Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel, Field | |
| from typing import Optional, List | |
| from enum import Enum | |
| class HFInferenceModel(str, Enum): | |
| GLM_5 = "z-ai/glm-5" | |
| GPT_4O_MINI = "openai/gpt-4o-mini" | |
| QWEN_7B = "Qwen/Qwen2.5-7B-Instruct" | |
| class HFInferenceProvider(str, Enum): | |
| HF = "hf" | |
| TOGETHER = "together" | |
| OPENROUTER = "openrouter" | |
| class CurateRequest(BaseModel): | |
| raw_dataset: str = Field(..., description="Raw dataset ID from Phase 1 (e.g., mindchain/synthetic-distilabel-raw)") | |
| model: HFInferenceModel = Field(default=HFInferenceModel.QWEN_7B, description="Judge model for scoring") | |
| provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER, description="HF Inference provider") | |
| min_score: int = Field(default=7, ge=1, le=10, description="Minimum quality score to include") | |
| target_dataset: Optional[str] = Field(default=None, description="Target dataset for curated output") | |
| class ScoreRequest(BaseModel): | |
| instruction: str = Field(..., description="Question or instruction") | |
| output: str = Field(..., description="Response or answer") | |
| model: HFInferenceModel = Field(default=HFInferenceModel.QWEN_7B) | |
| provider: HFInferenceProvider = Field(default=HFInferenceProvider.TOGETHER) | |
| class CurateResponse(BaseModel): | |
| success: bool | |
| data: Optional[List[dict]] = None | |
| curated_count: int = 0 | |
| total_count: int = 0 | |
| filtered_count: int = 0 | |
| score_distribution: dict = {} | |
| error: Optional[str] = None | |
| class ScoreResponse(BaseModel): | |
| success: bool | |
| score: Optional[int] = None | |
| explanation: Optional[str] = None | |
| error: Optional[str] = None | |
| class HealthResponse(BaseModel): | |
| status: str | |
| model: str | |
| hf_token_configured: bool | |
| version: str = "1.0.0" | |