Spaces:
Running
Running
| """Pydantic schemas — API request/response DTOs. | |
| All responses use camelCase serialization to match the existing frontend contract | |
| (originally served by the Spring Boot backend). | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime | |
| from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator | |
| def _to_camel(name: str) -> str: | |
| parts = name.split("_") | |
| return parts[0] + "".join(w.capitalize() for w in parts[1:]) | |
| class _CamelModel(BaseModel): | |
| """Base model that serializes field names to camelCase.""" | |
| model_config = ConfigDict( | |
| alias_generator=_to_camel, | |
| populate_by_name=True, | |
| serialize_by_alias=True, | |
| ) | |
| class DocumentResponse(_CamelModel): | |
| id: str | |
| filename: str | |
| status: str = "uploaded" # Document status (always "uploaded" for now) | |
| content_type: str | None = None | |
| file_size: int | None = None | |
| page_count: int | None = None | |
| created_at: str | datetime | |
| class AnalysisResponse(_CamelModel): | |
| id: str | |
| document_id: str = "" | |
| document_filename: str | None = None | |
| status: str | |
| content_markdown: str | None = None | |
| content_html: str | None = None | |
| pages_json: str | None = None | |
| chunks_json: str | None = None | |
| has_document_json: bool = False | |
| error_message: str | None = None | |
| started_at: str | datetime | None = None | |
| completed_at: str | datetime | None = None | |
| created_at: str | datetime | |
| class PipelineOptionsRequest(BaseModel): | |
| """Docling pipeline configuration options.""" | |
| model_config = ConfigDict(populate_by_name=True) | |
| do_ocr: bool = Field(default=True, validation_alias=AliasChoices("do_ocr", "doOcr")) | |
| do_table_structure: bool = Field( | |
| default=True, validation_alias=AliasChoices("do_table_structure", "doTableStructure") | |
| ) | |
| table_mode: str = Field( | |
| default="accurate", validation_alias=AliasChoices("table_mode", "tableMode") | |
| ) | |
| do_code_enrichment: bool = Field( | |
| default=False, validation_alias=AliasChoices("do_code_enrichment", "doCodeEnrichment") | |
| ) | |
| do_formula_enrichment: bool = Field( | |
| default=False, validation_alias=AliasChoices("do_formula_enrichment", "doFormulaEnrichment") | |
| ) | |
| do_picture_classification: bool = Field( | |
| default=False, | |
| validation_alias=AliasChoices("do_picture_classification", "doPictureClassification"), | |
| ) | |
| do_picture_description: bool = Field( | |
| default=False, | |
| validation_alias=AliasChoices("do_picture_description", "doPictureDescription"), | |
| ) | |
| generate_picture_images: bool = Field( | |
| default=False, | |
| validation_alias=AliasChoices("generate_picture_images", "generatePictureImages"), | |
| ) | |
| generate_page_images: bool = Field( | |
| default=False, validation_alias=AliasChoices("generate_page_images", "generatePageImages") | |
| ) | |
| images_scale: float = Field( | |
| default=1.0, validation_alias=AliasChoices("images_scale", "imagesScale") | |
| ) | |
| def validate_table_mode(cls, v: str) -> str: | |
| if v not in ("accurate", "fast"): | |
| raise ValueError('table_mode must be "accurate" or "fast"') | |
| return v | |
| def validate_images_scale(cls, v: float) -> float: | |
| if v <= 0 or v > 10: | |
| raise ValueError("images_scale must be between 0 (exclusive) and 10") | |
| return v | |
| class ChunkingOptionsRequest(BaseModel): | |
| """Docling chunking configuration options.""" | |
| model_config = ConfigDict(populate_by_name=True) | |
| chunker_type: str = Field( | |
| default="hybrid", validation_alias=AliasChoices("chunker_type", "chunkerType") | |
| ) | |
| max_tokens: int = Field(default=512, validation_alias=AliasChoices("max_tokens", "maxTokens")) | |
| merge_peers: bool = Field( | |
| default=True, validation_alias=AliasChoices("merge_peers", "mergePeers") | |
| ) | |
| repeat_table_header: bool = Field( | |
| default=True, validation_alias=AliasChoices("repeat_table_header", "repeatTableHeader") | |
| ) | |
| def validate_chunker_type(cls, v: str) -> str: | |
| if v not in ("hybrid", "hierarchical"): | |
| raise ValueError('chunker_type must be "hybrid" or "hierarchical"') | |
| return v | |
| def validate_max_tokens(cls, v: int) -> int: | |
| if v < 64 or v > 8192: | |
| raise ValueError("max_tokens must be between 64 and 8192") | |
| return v | |
| class ChunkBboxResponse(_CamelModel): | |
| page: int | |
| bbox: list[float] | |
| class ChunkResponse(_CamelModel): | |
| text: str | |
| headings: list[str] = [] | |
| source_page: int | None = None | |
| token_count: int = 0 | |
| bboxes: list[ChunkBboxResponse] = [] | |
| class CreateAnalysisRequest(BaseModel): | |
| documentId: str = Field(validation_alias=AliasChoices("documentId", "document_id")) | |
| pipelineOptions: PipelineOptionsRequest | None = Field( | |
| default=None, validation_alias=AliasChoices("pipelineOptions", "pipeline_options") | |
| ) | |
| chunkingOptions: ChunkingOptionsRequest | None = Field( | |
| default=None, validation_alias=AliasChoices("chunkingOptions", "chunking_options") | |
| ) | |
| class RechunkRequest(BaseModel): | |
| chunkingOptions: ChunkingOptionsRequest = Field( | |
| validation_alias=AliasChoices("chunkingOptions", "chunking_options") | |
| ) | |