from typing import List, Dict, Any, Optional from pydantic import BaseModel, Field from datetime import date class AccountDetails(BaseModel): """Model for individual account details.""" account_name: str = Field(..., description="Name of the account") account_number: str = Field(..., description="Account number") starting_balance: float = Field(..., description="Starting balance of the account") ending_balance: float = Field(..., description="Ending balance of the account") statement_start_date: str = Field(..., description="Statement start date in YYYY-MM-DD format") statement_end_date: str = Field(..., description="Statement end date in YYYY-MM-DD format") class AccountSummary(BaseModel): """Model for bank account summary extracted from LLM.""" bank_name: str = Field(..., description="Name of the bank") account_holder: str = Field(..., description="Name of the account holder") accounts: List[AccountDetails] = Field(..., description="List of account details") class BankStatementData(BaseModel): """Model for processed bank statement data.""" account_summary: Dict[str, str] = Field(..., description="Account summary information") transaction_tables: Dict[str, Any] = Field(..., description="Extracted transaction tables") class WordData(BaseModel): """Model for word data with bounding box.""" word: str = Field(..., description="Extracted word text") bbox: List[float] = Field(..., description="Bounding box coordinates [x0, y0, x1, y1]") class LineData(BaseModel): """Model for line data with words.""" line: str = Field(..., description="Complete line text") bbox: List[float] = Field(..., description="Line bounding box [x, y]") words: List[WordData] = Field(..., description="List of words in the line") class ExtractedTextData(BaseModel): """Model for extracted text data from PDF.""" pages: List[List[LineData]] = Field(..., description="List of pages, each containing lines")