from pydantic import BaseModel, Field from typing import Optional, List from enum import Enum class HFInferenceModel(str, Enum): """Generator models for content creation.""" QWEN_7B = "Qwen/Qwen2.5-7B-Instruct" QWEN_72B = "Qwen/Qwen2.5-72B-Instruct" GPT_OSS_120B = "openai/gpt-oss-120b" class HFJudgeModel(str, Enum): """Judge models for quality scoring.""" QWEN_7B = "Qwen/Qwen2.5-7B-Instruct" GLM_5 = "z-ai/glm-5" class HFInferenceProvider(str, Enum): CEREBRAS = "cerebras" TOGETHER = "together" class GenerateRequest(BaseModel): num_records: int = Field(default=10, ge=1, le=100, description="Number of records to generate") seed_dataset: str = Field(..., description="HF Hub dataset ID for seed data") model: HFInferenceModel = Field(default=HFInferenceModel.QWEN_7B, description="HF model for generation") provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER, description="HF Inference provider") judge_model: HFJudgeModel = Field(default=HFJudgeModel.QWEN_7B, description="Judge model for quality scoring") judge_provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER, description="Judge model provider") temperature: float = Field(default=0.7, ge=0.0, le=1.5, description="Sampling temperature") use_judge: bool = Field(default=True, description="Enable LLM Judge scoring") class LoadSeedsRequest(BaseModel): seed_dataset: str = Field(..., description="HF Hub dataset ID for seed data") class GeneratedRecord(BaseModel): instruction: str = Field(..., description="Generated question/instruction") output: str = Field(..., description="Generated answer/response") topic: str = Field(..., description="Topic of the Q&A") difficulty: str = Field(..., description="Difficulty level") quality_score: int = Field(default=7, description="Initial quality estimate") model: str = Field(..., description="Model used for generation") provider: str = Field(..., description="Provider used") class GenerateResponse(BaseModel): success: bool data: Optional[List[GeneratedRecord]] = None record_count: int = 0 seeds_used: int = 0 error: Optional[str] = None class LoadSeedsResponse(BaseModel): success: bool loaded_count: int = 0 dataset_id: Optional[str] = None error: Optional[str] = None class HealthResponse(BaseModel): status: str model: str provider: str judge_model: str judge_provider: str seeds_loaded: bool