File size: 3,321 Bytes
aa8e38b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Pydantic models for API request/response validation.
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, Field


class UploadResponse(BaseModel):
    """Response model for file upload endpoint."""
    success: bool
    session_id: str = Field(..., description="Unique session identifier for tracking files")
    source_filename: str
    target_filename: Optional[str] = None
    message: str


class ExtractedData(BaseModel):
    """Model for extracted T1 tax data."""
    line_values: dict[str, Optional[str]] = Field(
        default_factory=dict,
        description="Extracted line numbers and their values"
    )
    extraction_method: str = Field(
        default="text",
        description="Method used: 'text' or 'ocr'"
    )
    has_text: bool = Field(
        default=True,
        description="Whether PDF had extractable text"
    )
    raw_text: Optional[str] = Field(
        default=None,
        description="Raw extracted text (optional)"
    )


class ProcessingRequest(BaseModel):
    """Request model for processing endpoint."""
    session_id: str = Field(..., description="Session ID from upload")
    line_numbers: Optional[list[str]] = Field(
        default=None,
        description="Specific line numbers to extract. If None, extracts all."
    )
    use_ocr: bool = Field(
        default=False,
        description="Force OCR processing"
    )
    include_raw_text: bool = Field(
        default=False,
        description="Include raw extracted text in response"
    )


class ProcessingResponse(BaseModel):
    """Response model for processing endpoint."""
    success: bool
    session_id: str
    extracted_data: Optional[ExtractedData] = None
    mapped_fields: Optional[dict[str, str]] = None
    output_filename: Optional[str] = None
    errors: list[str] = Field(default_factory=list)
    warnings: list[str] = Field(default_factory=list)


class BatchUploadItem(BaseModel):
    """Model for a single item in batch upload."""
    source_filename: str
    target_filename: Optional[str] = None


class BatchProcessingRequest(BaseModel):
    """Request model for batch processing."""
    session_ids: list[str] = Field(..., description="List of session IDs to process")
    line_numbers: Optional[list[str]] = None
    use_ocr: bool = False


class BatchProcessingResponse(BaseModel):
    """Response model for batch processing."""
    success: bool
    total: int
    processed: int
    failed: int
    results: list[ProcessingResponse]


class FieldMapping(BaseModel):
    """Model for a field mapping entry."""
    line_number: str
    field_name: str
    description: Optional[str] = None


class MappingConfigRequest(BaseModel):
    """Request model for updating mapping configuration."""
    mappings: list[FieldMapping]


class MappingConfigResponse(BaseModel):
    """Response model for mapping configuration."""
    success: bool
    mappings: dict[str, dict]
    message: str


class HealthResponse(BaseModel):
    """Response model for health check endpoint."""
    status: str
    version: str
    timestamp: datetime
    tesseract_available: bool
    openai_configured: bool


class ErrorResponse(BaseModel):
    """Standard error response model."""
    success: bool = False
    error: str
    details: Optional[dict] = None