argilla-curator / models.py
mindchain's picture
Upload models.py with huggingface_hub
3f16f79 verified
from pydantic import BaseModel, Field
from typing import Optional, List
from enum import Enum
class HFInferenceModel(str, Enum):
GLM_5 = "z-ai/glm-5"
GPT_4O_MINI = "openai/gpt-4o-mini"
QWEN_7B = "Qwen/Qwen2.5-7B-Instruct"
class HFInferenceProvider(str, Enum):
HF = "hf"
TOGETHER = "together"
OPENROUTER = "openrouter"
class CurateRequest(BaseModel):
raw_dataset: str = Field(..., description="Raw dataset ID from Phase 1 (e.g., mindchain/synthetic-distilabel-raw)")
model: HFInferenceModel = Field(default=HFInferenceModel.QWEN_7B, description="Judge model for scoring")
provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER, description="HF Inference provider")
min_score: int = Field(default=7, ge=1, le=10, description="Minimum quality score to include")
target_dataset: Optional[str] = Field(default=None, description="Target dataset for curated output")
class ScoreRequest(BaseModel):
instruction: str = Field(..., description="Question or instruction")
output: str = Field(..., description="Response or answer")
model: HFInferenceModel = Field(default=HFInferenceModel.QWEN_7B)
provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER)
class CurateResponse(BaseModel):
success: bool
data: Optional[List[dict]] = None
curated_count: int = 0
total_count: int = 0
filtered_count: int = 0
score_distribution: dict = {}
error: Optional[str] = None
class ScoreResponse(BaseModel):
success: bool
score: Optional[int] = None
explanation: Optional[str] = None
error: Optional[str] = None
class HealthResponse(BaseModel):
status: str
model: str
hf_token_configured: bool
version: str = "1.0.0"