Spaces:
Build error
Build error
| """ | |
| Pydantic models for API request/response validation. | |
| """ | |
| from datetime import datetime | |
| from typing import Optional | |
| from pydantic import BaseModel, Field | |
| class UploadResponse(BaseModel): | |
| """Response model for file upload endpoint.""" | |
| success: bool | |
| session_id: str = Field(..., description="Unique session identifier for tracking files") | |
| source_filename: str | |
| target_filename: Optional[str] = None | |
| message: str | |
| class ExtractedData(BaseModel): | |
| """Model for extracted T1 tax data.""" | |
| line_values: dict[str, Optional[str]] = Field( | |
| default_factory=dict, | |
| description="Extracted line numbers and their values" | |
| ) | |
| extraction_method: str = Field( | |
| default="text", | |
| description="Method used: 'text' or 'ocr'" | |
| ) | |
| has_text: bool = Field( | |
| default=True, | |
| description="Whether PDF had extractable text" | |
| ) | |
| raw_text: Optional[str] = Field( | |
| default=None, | |
| description="Raw extracted text (optional)" | |
| ) | |
| class ProcessingRequest(BaseModel): | |
| """Request model for processing endpoint.""" | |
| session_id: str = Field(..., description="Session ID from upload") | |
| line_numbers: Optional[list[str]] = Field( | |
| default=None, | |
| description="Specific line numbers to extract. If None, extracts all." | |
| ) | |
| use_ocr: bool = Field( | |
| default=False, | |
| description="Force OCR processing" | |
| ) | |
| include_raw_text: bool = Field( | |
| default=False, | |
| description="Include raw extracted text in response" | |
| ) | |
| class ProcessingResponse(BaseModel): | |
| """Response model for processing endpoint.""" | |
| success: bool | |
| session_id: str | |
| extracted_data: Optional[ExtractedData] = None | |
| mapped_fields: Optional[dict[str, str]] = None | |
| output_filename: Optional[str] = None | |
| errors: list[str] = Field(default_factory=list) | |
| warnings: list[str] = Field(default_factory=list) | |
| class BatchUploadItem(BaseModel): | |
| """Model for a single item in batch upload.""" | |
| source_filename: str | |
| target_filename: Optional[str] = None | |
| class BatchProcessingRequest(BaseModel): | |
| """Request model for batch processing.""" | |
| session_ids: list[str] = Field(..., description="List of session IDs to process") | |
| line_numbers: Optional[list[str]] = None | |
| use_ocr: bool = False | |
| class BatchProcessingResponse(BaseModel): | |
| """Response model for batch processing.""" | |
| success: bool | |
| total: int | |
| processed: int | |
| failed: int | |
| results: list[ProcessingResponse] | |
| class FieldMapping(BaseModel): | |
| """Model for a field mapping entry.""" | |
| line_number: str | |
| field_name: str | |
| description: Optional[str] = None | |
| class MappingConfigRequest(BaseModel): | |
| """Request model for updating mapping configuration.""" | |
| mappings: list[FieldMapping] | |
| class MappingConfigResponse(BaseModel): | |
| """Response model for mapping configuration.""" | |
| success: bool | |
| mappings: dict[str, dict] | |
| message: str | |
| class HealthResponse(BaseModel): | |
| """Response model for health check endpoint.""" | |
| status: str | |
| version: str | |
| timestamp: datetime | |
| tesseract_available: bool | |
| openai_configured: bool | |
| class ErrorResponse(BaseModel): | |
| """Standard error response model.""" | |
| success: bool = False | |
| error: str | |
| details: Optional[dict] = None | |