Hamza4100's picture
Upload 23 files
aa8e38b verified
"""
Pydantic models for API request/response validation.
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, Field
class UploadResponse(BaseModel):
"""Response model for file upload endpoint."""
success: bool
session_id: str = Field(..., description="Unique session identifier for tracking files")
source_filename: str
target_filename: Optional[str] = None
message: str
class ExtractedData(BaseModel):
"""Model for extracted T1 tax data."""
line_values: dict[str, Optional[str]] = Field(
default_factory=dict,
description="Extracted line numbers and their values"
)
extraction_method: str = Field(
default="text",
description="Method used: 'text' or 'ocr'"
)
has_text: bool = Field(
default=True,
description="Whether PDF had extractable text"
)
raw_text: Optional[str] = Field(
default=None,
description="Raw extracted text (optional)"
)
class ProcessingRequest(BaseModel):
"""Request model for processing endpoint."""
session_id: str = Field(..., description="Session ID from upload")
line_numbers: Optional[list[str]] = Field(
default=None,
description="Specific line numbers to extract. If None, extracts all."
)
use_ocr: bool = Field(
default=False,
description="Force OCR processing"
)
include_raw_text: bool = Field(
default=False,
description="Include raw extracted text in response"
)
class ProcessingResponse(BaseModel):
"""Response model for processing endpoint."""
success: bool
session_id: str
extracted_data: Optional[ExtractedData] = None
mapped_fields: Optional[dict[str, str]] = None
output_filename: Optional[str] = None
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
class BatchUploadItem(BaseModel):
"""Model for a single item in batch upload."""
source_filename: str
target_filename: Optional[str] = None
class BatchProcessingRequest(BaseModel):
"""Request model for batch processing."""
session_ids: list[str] = Field(..., description="List of session IDs to process")
line_numbers: Optional[list[str]] = None
use_ocr: bool = False
class BatchProcessingResponse(BaseModel):
"""Response model for batch processing."""
success: bool
total: int
processed: int
failed: int
results: list[ProcessingResponse]
class FieldMapping(BaseModel):
"""Model for a field mapping entry."""
line_number: str
field_name: str
description: Optional[str] = None
class MappingConfigRequest(BaseModel):
"""Request model for updating mapping configuration."""
mappings: list[FieldMapping]
class MappingConfigResponse(BaseModel):
"""Response model for mapping configuration."""
success: bool
mappings: dict[str, dict]
message: str
class HealthResponse(BaseModel):
"""Response model for health check endpoint."""
status: str
version: str
timestamp: datetime
tesseract_available: bool
openai_configured: bool
class ErrorResponse(BaseModel):
"""Standard error response model."""
success: bool = False
error: str
details: Optional[dict] = None