Spaces:
Sleeping
Sleeping
| """ | |
| Pydantic models for API data validation. | |
| Defines request and response schemas with validation rules. | |
| """ | |
| from datetime import datetime | |
| from typing import Optional | |
| from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator | |
| class IssueInput(BaseModel): | |
| """Input model for GitHub issue or pull request classification.""" | |
| issue_text: str = Field( | |
| ..., | |
| min_length=1, | |
| description="Issue title text", | |
| examples=["Fix bug in authentication module"], | |
| ) | |
| issue_description: Optional[str] = Field( | |
| default=None, | |
| description="Issue body text", | |
| examples=["The authentication module fails when handling expired tokens"], | |
| ) | |
| repo_name: Optional[str] = Field( | |
| default=None, description="Repository name", examples=["user/repo-name"] | |
| ) | |
| pr_number: Optional[int] = Field( | |
| default=None, ge=1, description="Pull request number", examples=[123] | |
| ) | |
| created_at: Optional[datetime] = Field( | |
| default=None, description="Issue creation timestamp", examples=["2024-01-15T10:30:00Z"] | |
| ) | |
| author_name: Optional[str] = Field( | |
| default=None, description="Issue author username", examples=["johndoe"] | |
| ) | |
| def clean_text(cls, v: Optional[str]) -> Optional[str]: | |
| """Validate and clean text fields.""" | |
| if v is None: | |
| return v | |
| v = v.strip() | |
| if not v: | |
| raise ValueError("Text cannot be empty or whitespace only") | |
| return v | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "issue_text": "Add support for OAuth authentication", | |
| "issue_description": "Implement OAuth 2.0 flow for third-party providers", | |
| "repo_name": "myorg/myproject", | |
| "pr_number": 456, | |
| "author_name": "developer123", | |
| } | |
| } | |
| ) | |
| class SkillPrediction(BaseModel): | |
| """Single skill prediction with confidence score.""" | |
| skill_name: str = Field( | |
| ..., | |
| description="Name of the predicted skill (domain/subdomain)", | |
| examples=["Language/Java", "DevOps/CI-CD"], | |
| ) | |
| confidence: float = Field( | |
| ..., ge=0.0, le=1.0, description="Confidence score (0.0 to 1.0)", examples=[0.85] | |
| ) | |
| model_config = ConfigDict( | |
| json_schema_extra={"example": {"skill_name": "Language/Java", "confidence": 0.92}} | |
| ) | |
| class PredictionResponse(BaseModel): | |
| """Response model for skill classification predictions.""" | |
| predictions: list[SkillPrediction] = Field( | |
| default_factory=list, description="List of predicted skills with confidence scores" | |
| ) | |
| num_predictions: int = Field( | |
| ..., ge=0, description="Total number of predicted skills", examples=[5] | |
| ) | |
| model_version: str = Field(default="1.0.0", description="Model version", examples=["1.0.0"]) | |
| processing_time_ms: Optional[float] = Field( | |
| default=None, ge=0.0, description="Processing time in milliseconds", examples=[125.5] | |
| ) | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "predictions": [ | |
| {"skill_name": "Language/Java", "confidence": 0.92}, | |
| {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, | |
| ], | |
| "num_predictions": 2, | |
| "model_version": "1.0.0", | |
| "processing_time_ms": 125.5, | |
| } | |
| } | |
| ) | |
| class BatchIssueInput(BaseModel): | |
| """Input model for batch prediction.""" | |
| issues: list[IssueInput] = Field( | |
| ..., | |
| min_length=1, | |
| max_length=100, | |
| description="Issues to classify (max 100)", | |
| ) | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "issues": [ | |
| { | |
| "issue_text": "Fix authentication bug", | |
| "issue_description": "Users cannot login with OAuth", | |
| }, | |
| { | |
| "issue_text": "Add database migration", | |
| "issue_description": "Create migration for new user table", | |
| }, | |
| ] | |
| } | |
| } | |
| ) | |
| class BatchPredictionResponse(BaseModel): | |
| """Response model for batch predictions.""" | |
| results: list[PredictionResponse] = Field( | |
| default_factory=list, description="Prediction results, one per issue" | |
| ) | |
| total_issues: int = Field(..., ge=0, description="Number of issues processed", examples=[2]) | |
| total_processing_time_ms: Optional[float] = Field( | |
| default=None, ge=0.0, description="Processing time in milliseconds", examples=[250.0] | |
| ) | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "results": [ | |
| { | |
| "predictions": [{"skill_name": "Language/Java", "confidence": 0.92}], | |
| "num_predictions": 1, | |
| "model_version": "1.0.0", | |
| } | |
| ], | |
| "total_issues": 2, | |
| "total_processing_time_ms": 250.0, | |
| } | |
| } | |
| ) | |
| class ErrorResponse(BaseModel): | |
| """Error response model.""" | |
| error: str = Field(..., description="Error message", examples=["Invalid input"]) | |
| detail: Optional[str] = Field( | |
| default=None, description="Detailed error", examples=["Field 'issue_text' is required"] | |
| ) | |
| timestamp: datetime = Field(default_factory=datetime.now, description="Error timestamp") | |
| def serialize_timestamp(self, value: datetime) -> str: | |
| return value.isoformat() | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "error": "Validation Error", | |
| "detail": "issue_text: field required", | |
| "timestamp": "2024-01-15T10:30:00Z", | |
| } | |
| } | |
| ) | |
| class HealthCheckResponse(BaseModel): | |
| """Health check response model.""" | |
| status: str = Field(default="healthy", description="Service status", examples=["healthy"]) | |
| model_loaded: bool = Field(..., description="Model ready status", examples=[True]) | |
| version: str = Field(default="1.0.0", description="API version", examples=["1.0.0"]) | |
| timestamp: datetime = Field(default_factory=datetime.now, description="Timestamp") | |
| class PredictionRecord(PredictionResponse): | |
| """Extended prediction model with metadata from MLflow.""" | |
| run_id: str = Field(..., description="MLflow Run ID") | |
| timestamp: datetime = Field(..., description="Prediction timestamp") | |
| input_text: Optional[str] = Field(default="", description="Input text classified") | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| "example": { | |
| "predictions": [ | |
| {"skill_name": "Language/Java", "confidence": 0.92}, | |
| {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, | |
| ], | |
| "num_predictions": 2, | |
| "model_version": "1.0.0", | |
| "processing_time_ms": 125.5, | |
| "run_id": "a1b2c3d4e5f6", | |
| "timestamp": "2024-01-15T10:30:00Z", | |
| "input_text": "Fix bug in authentication module", | |
| } | |
| } | |
| ) | |