import { z } from 'zod';

import { HumanJobSource, JobStatus } from '@prisma/client';
import { DataColumn, InferenceColumn } from '@scale/llm-shared/interfaces/data';
import { Variant } from '@scale/llm-shared/interfaces/variant';
import { IncompleteJobStatus } from '@scale/llm-shared/types';

export const EvaluationType = {
  ClassificationEvaluation: 'ClassificationEvaluation',
  MauveEvaluation: 'MauveEvaluation',
  HumanEvaluation: 'HumanEvaluation',
  AIFeedback: 'AIFeedback',
} as const;

export type EvaluationType = (typeof EvaluationType)[keyof typeof EvaluationType];

export const ClassificationEvaluationStats = z.object({
  f1ScoreByClass: z.record(z.string(), z.number().min(0).max(1)),
  accuracyByClass: z.record(z.string(), z.number().min(0).max(1)),
  precisionByClass: z.record(z.string(), z.number().min(0).max(1)),
  recallByClass: z.record(z.string(), z.number().min(0).max(1)),
  microF1Score: z.number(),
  microAccuracy: z.number(),
  microPrecision: z.number(),
  microRecall: z.number(),
});
export type ClassificationEvaluationStats = z.infer<typeof ClassificationEvaluationStats>;

export const MauveEvaluationStats = z.object({
  score: z.number(),
  tokensConsumed: z.number(),
});

export type MauveEvaluationStats = z.infer<typeof MauveEvaluationStats>;

export const AIFeedbackEvaluationStats = z.object({
  score: z.number(),
});

export type AIFeedbackEvaluationStats = z.infer<typeof AIFeedbackEvaluationStats>;

const HumanEvaluationStats = z.object({ hitRate: z.number() });

export const CompletedMetadataByEvaluationType = {
  [EvaluationType.ClassificationEvaluation]: z.object({
    stats: ClassificationEvaluationStats,
    actualOutputColumnId: z.string(),
  }),
  [EvaluationType.MauveEvaluation]: z.object({
    stats: MauveEvaluationStats,
    actualOutputColumnId: z.string(),
    scaleLaunchTaskId: z.string(),
  }),
  [EvaluationType.HumanEvaluation]: z.object({
    stats: HumanEvaluationStats,
    inferenceOutputColumnId: z.string(),
    humanOutputColumnId: z.string(),
  }),
  [EvaluationType.AIFeedback]: z.object({
    stats: AIFeedbackEvaluationStats,
    aiFeedbackInferenceId: z.string(),
  }),
};

export const CreateMetadataByEvaluationType = {
  [EvaluationType.ClassificationEvaluation]: z.object({}),
  [EvaluationType.MauveEvaluation]: z.object({}),
  [EvaluationType.HumanEvaluation]: z.object({
    skipDeduplication: z.boolean(),
  }),
  [EvaluationType.AIFeedback]: z.object({ criteria: z.string({}) }),
};

const ClassificationEvaluationSpecs = z.object({
  variantId: z.string(),
  inputDataId: z.string(),
  expectedDataColumnId: z.string(),
  trimWhitespace: z.boolean().optional(),
});

const MauveEvaluationSpecs = z.object({
  variantId: z.string(),
  inputDataId: z.string(),
  expectedDataColumnId: z.string(),
  scaleLaunchTaskId: z.optional(z.string()),
});

const AIFeedbackEvaluationSpecs = z.object({
  variantId: z.string(),
  inputDataId: z.string(),
  aiFeedbackInferenceId: z.optional(z.string()),
  trimWhitespace: z.boolean().optional(),
});

const HumanEvaluationSpecs = z.object({
  variantId: z.string(),
  inputDataId: z.string(),
  humanEvaluationInstruction: z.object({
    taskDescription: z.string(),
    goodInput: z.string(),
    badInput: z.string(),
  }),
  humanJobSource: z.nativeEnum(HumanJobSource),
  // This only exists on Running human evals
  scaleProjectAndBatch: z.optional(
    z.object({
      projectId: z.string(),
      batchId: z.string(),
    }),
  ),
});

export const SpecsByEvaluationType = {
  [EvaluationType.ClassificationEvaluation]: ClassificationEvaluationSpecs,
  [EvaluationType.MauveEvaluation]: MauveEvaluationSpecs,
  [EvaluationType.HumanEvaluation]: HumanEvaluationSpecs,
  [EvaluationType.AIFeedback]: AIFeedbackEvaluationSpecs,
};

function createGenericEvaluationBaseType<ET extends EvaluationType>(evaluationType: ET) {
  return z
    .object({
      id: z.string(),
      type: z.literal(evaluationType),
      createdAt: z.date(),
    })
    .merge(SpecsByEvaluationType[evaluationType]);
}

function createGenericCompletedEvaluationType<ET extends EvaluationType>(evaluationType: ET) {
  const baseType = createGenericEvaluationBaseType(evaluationType);
  return baseType
    .extend({
      status: z.union([z.literal(JobStatus.Completed), z.literal(JobStatus.Backfilled)]),
      errorRate: z.number().min(0).max(1),
    })
    .merge(CompletedMetadataByEvaluationType[evaluationType]);
}

function createGenericIncompleteEvaluationType<ET extends EvaluationType>(evaluationType: ET) {
  const baseType = createGenericEvaluationBaseType(evaluationType);
  return baseType.extend({
    status: z.nativeEnum(IncompleteJobStatus),
  });
}

function createGenericEvaluationCreateType<ET extends EvaluationType>(evaluationType: ET) {
  const incompleteEvalType = createGenericIncompleteEvaluationType(evaluationType);
  return incompleteEvalType
    .omit({
      id: true,
      status: true,
      createdAt: true,
    })
    .merge(CreateMetadataByEvaluationType[evaluationType]);
}

export const CompletedClassificationEvaluation = createGenericCompletedEvaluationType(
  EvaluationType.ClassificationEvaluation,
);

export type CompletedClassificationEvaluation = z.infer<typeof CompletedClassificationEvaluation>;

export const CompletedMauveEvaluation = createGenericCompletedEvaluationType(
  EvaluationType.MauveEvaluation,
);

export type CompletedMauveEvaluation = z.infer<typeof CompletedMauveEvaluation>;

export const CompletedHumanEvaluation = createGenericCompletedEvaluationType(
  EvaluationType.HumanEvaluation,
);

export const CompletedAIFeedbackEvaluation = createGenericCompletedEvaluationType(
  EvaluationType.AIFeedback,
);

export type CompletedAIFeedbackEvaluation = z.infer<typeof CompletedAIFeedbackEvaluation>;

export type CompletedHumanEvaluation = z.infer<typeof CompletedHumanEvaluation>;

export const CompletedEvaluation = z.union([
  CompletedClassificationEvaluation,
  CompletedMauveEvaluation,
  CompletedHumanEvaluation,
  CompletedAIFeedbackEvaluation,
]);

export type CompletedEvaluation = z.infer<typeof CompletedEvaluation>;

export const IncompleteClassificationEvaluation = createGenericIncompleteEvaluationType(
  EvaluationType.ClassificationEvaluation,
);

export type IncompleteClassificationEvaluation = z.infer<typeof IncompleteClassificationEvaluation>;

export const IncompleteMauveEvaluation = createGenericIncompleteEvaluationType(
  EvaluationType.MauveEvaluation,
);
export type IncompleteMauveEvaluation = z.infer<typeof IncompleteMauveEvaluation>;

export const IncompleteAIFeedbackEvaluation = createGenericIncompleteEvaluationType(
  EvaluationType.AIFeedback,
);
export type IncompleteAIFeedbackEvaluation = z.infer<typeof IncompleteAIFeedbackEvaluation>;

export const IncompleteHumanEvaluation = createGenericIncompleteEvaluationType(
  EvaluationType.HumanEvaluation,
);

export type IncompleteHumanEvaluation = z.infer<typeof IncompleteHumanEvaluation>;

const IncompleteEvaluationTypes = [
  IncompleteClassificationEvaluation,
  IncompleteMauveEvaluation,
  IncompleteHumanEvaluation,
  IncompleteAIFeedbackEvaluation,
] as const;

export const IncompleteEvaluation = z.union(IncompleteEvaluationTypes);

export type IncompleteEvaluation = z.infer<typeof IncompleteEvaluation>;

export const Evaluation = z.union([CompletedEvaluation, IncompleteEvaluation]);

export type Evaluation = z.infer<typeof Evaluation>;

export const EvaluationCreateOmitFields = {};

// Eric can't get the typing to work here properly. Ideally we can get this to
//const IncompleteEvaluationOmittedTypes = IncompleteEvaluationTypes.map((evalType, index) => {
//const omittedType = evalType.omit({
//id: true,
//status: true,
//createdAt: true,
//});
//return omittedType;
//});

export const EvaluationCreateClassificationEvaluation = createGenericEvaluationCreateType(
  EvaluationType.ClassificationEvaluation,
);
export type EvaluationCreateClassificationEvaluation = z.infer<
  typeof EvaluationCreateClassificationEvaluation
>;

export const EvaluationCreateMauveEvaluation = createGenericEvaluationCreateType(
  EvaluationType.MauveEvaluation,
);
export type EvaluationCreateMauveEvaluation = z.infer<typeof EvaluationCreateMauveEvaluation>;

export const EvaluationCreateHumanEvaluation = createGenericEvaluationCreateType(
  EvaluationType.HumanEvaluation,
);
export type EvaluationCreateHumanEvaluation = z.infer<typeof EvaluationCreateHumanEvaluation>;

export const EvaluationCreateAIFeedbackEvaluation = createGenericEvaluationCreateType(
  EvaluationType.AIFeedback,
);
export type EvaluationCreateAIFeedbackEvaluation = z.infer<
  typeof EvaluationCreateAIFeedbackEvaluation
>;

export const EvaluationCreate = z.union([
  EvaluationCreateClassificationEvaluation,
  EvaluationCreateMauveEvaluation,
  EvaluationCreateHumanEvaluation,
  EvaluationCreateAIFeedbackEvaluation,
]);

export type EvaluationCreate = z.infer<typeof EvaluationCreate>;

export const GetEvaluation = z.object({
  id: z.string(),
  type: z.nativeEnum(EvaluationType),
});

export type GetEvaluation = z.infer<typeof GetEvaluation>;

export const EvaluationInputOutputs = z.object({
  inputColumns: z.array(DataColumn),
  inferenceOutputColumns: z.array(InferenceColumn),
  expectedOutputColumn: z.optional(DataColumn),
  humanEvaluationOutputColumn: z.optional(DataColumn),
  aiFeedbackOutputColumn: z.optional(InferenceColumn),
  variant: Variant,
});

export type EvaluationInputOutputs = z.infer<typeof EvaluationInputOutputs>;

export const SuccessfulEvaluationCreateResult = z.object({
  success: z.literal(true),
  evaluation: IncompleteEvaluation,
});
export type SuccessfulEvaluationCreateResult = z.infer<typeof SuccessfulEvaluationCreateResult>;

export const UnsuccessfulEvaluationCreateResult = z.object({
  success: z.literal(false),
  message: z.string(),
});

export type UnsuccessfulEvaluationCreateResult = z.infer<typeof UnsuccessfulEvaluationCreateResult>;

export const EvaluationCreateResult = z.union([
  SuccessfulEvaluationCreateResult,
  UnsuccessfulEvaluationCreateResult,
]);

export type EvaluationCreateResult = z.infer<typeof EvaluationCreateResult>;
