""" Pydantic models for API data validation. Defines request and response schemas with validation rules. """ from datetime import datetime from typing import Optional from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator class IssueInput(BaseModel): """Input model for GitHub issue or pull request classification.""" issue_text: str = Field( ..., min_length=1, description="Issue title text", examples=["Fix bug in authentication module"], ) issue_description: Optional[str] = Field( default=None, description="Issue body text", examples=["The authentication module fails when handling expired tokens"], ) repo_name: Optional[str] = Field( default=None, description="Repository name", examples=["user/repo-name"] ) pr_number: Optional[int] = Field( default=None, ge=1, description="Pull request number", examples=[123] ) created_at: Optional[datetime] = Field( default=None, description="Issue creation timestamp", examples=["2024-01-15T10:30:00Z"] ) author_name: Optional[str] = Field( default=None, description="Issue author username", examples=["johndoe"] ) @field_validator("issue_text", "issue_description") @classmethod def clean_text(cls, v: Optional[str]) -> Optional[str]: """Validate and clean text fields.""" if v is None: return v v = v.strip() if not v: raise ValueError("Text cannot be empty or whitespace only") return v model_config = ConfigDict( json_schema_extra={ "example": { "issue_text": "Add support for OAuth authentication", "issue_description": "Implement OAuth 2.0 flow for third-party providers", "repo_name": "myorg/myproject", "pr_number": 456, "author_name": "developer123", } } ) class SkillPrediction(BaseModel): """Single skill prediction with confidence score.""" skill_name: str = Field( ..., description="Name of the predicted skill (domain/subdomain)", examples=["Language/Java", "DevOps/CI-CD"], ) confidence: float = Field( ..., ge=0.0, le=1.0, description="Confidence score (0.0 to 1.0)", examples=[0.85] ) model_config = ConfigDict( json_schema_extra={"example": {"skill_name": "Language/Java", "confidence": 0.92}} ) class PredictionResponse(BaseModel): """Response model for skill classification predictions.""" predictions: list[SkillPrediction] = Field( default_factory=list, description="List of predicted skills with confidence scores" ) num_predictions: int = Field( ..., ge=0, description="Total number of predicted skills", examples=[5] ) model_version: str = Field(default="1.0.0", description="Model version", examples=["1.0.0"]) processing_time_ms: Optional[float] = Field( default=None, ge=0.0, description="Processing time in milliseconds", examples=[125.5] ) model_config = ConfigDict( json_schema_extra={ "example": { "predictions": [ {"skill_name": "Language/Java", "confidence": 0.92}, {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, ], "num_predictions": 2, "model_version": "1.0.0", "processing_time_ms": 125.5, } } ) class BatchIssueInput(BaseModel): """Input model for batch prediction.""" issues: list[IssueInput] = Field( ..., min_length=1, max_length=100, description="Issues to classify (max 100)", ) model_config = ConfigDict( json_schema_extra={ "example": { "issues": [ { "issue_text": "Fix authentication bug", "issue_description": "Users cannot login with OAuth", }, { "issue_text": "Add database migration", "issue_description": "Create migration for new user table", }, ] } } ) class BatchPredictionResponse(BaseModel): """Response model for batch predictions.""" results: list[PredictionResponse] = Field( default_factory=list, description="Prediction results, one per issue" ) total_issues: int = Field(..., ge=0, description="Number of issues processed", examples=[2]) total_processing_time_ms: Optional[float] = Field( default=None, ge=0.0, description="Processing time in milliseconds", examples=[250.0] ) model_config = ConfigDict( json_schema_extra={ "example": { "results": [ { "predictions": [{"skill_name": "Language/Java", "confidence": 0.92}], "num_predictions": 1, "model_version": "1.0.0", } ], "total_issues": 2, "total_processing_time_ms": 250.0, } } ) class ErrorResponse(BaseModel): """Error response model.""" error: str = Field(..., description="Error message", examples=["Invalid input"]) detail: Optional[str] = Field( default=None, description="Detailed error", examples=["Field 'issue_text' is required"] ) timestamp: datetime = Field(default_factory=datetime.now, description="Error timestamp") @field_serializer("timestamp") def serialize_timestamp(self, value: datetime) -> str: return value.isoformat() model_config = ConfigDict( json_schema_extra={ "example": { "error": "Validation Error", "detail": "issue_text: field required", "timestamp": "2024-01-15T10:30:00Z", } } ) class HealthCheckResponse(BaseModel): """Health check response model.""" status: str = Field(default="healthy", description="Service status", examples=["healthy"]) model_loaded: bool = Field(..., description="Model ready status", examples=[True]) version: str = Field(default="1.0.0", description="API version", examples=["1.0.0"]) timestamp: datetime = Field(default_factory=datetime.now, description="Timestamp") class PredictionRecord(PredictionResponse): """Extended prediction model with metadata from MLflow.""" run_id: str = Field(..., description="MLflow Run ID") timestamp: datetime = Field(..., description="Prediction timestamp") input_text: Optional[str] = Field(default="", description="Input text classified") model_config = ConfigDict( json_schema_extra={ "example": { "predictions": [ {"skill_name": "Language/Java", "confidence": 0.92}, {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, ], "num_predictions": 2, "model_version": "1.0.0", "processing_time_ms": 125.5, "run_id": "a1b2c3d4e5f6", "timestamp": "2024-01-15T10:30:00Z", "input_text": "Fix bug in authentication module", } } )