Spaces:
Sleeping
Sleeping
| from datetime import datetime | |
| from typing import Optional, Dict | |
| from .process_status import ProcessStatus | |
| from pydantic import BaseModel, Field | |
| class DocumentDTO(BaseModel): | |
| id: Optional[int] = None | |
| name: str = Field(default="") | |
| title: Optional[str] = None | |
| extract_status: ProcessStatus = Field(default=ProcessStatus.INITIALIZED) | |
| embedding_status: ProcessStatus = Field(default=ProcessStatus.INITIALIZED) | |
| analyze_status: ProcessStatus = Field(default=ProcessStatus.INITIALIZED) | |
| mime_type: Optional[str] = None | |
| raw_content: Optional[str] = None | |
| user_description: Optional[str] = None | |
| create_time: Optional[datetime] = None | |
| url: Optional[str] = None | |
| document_size: int = Field(default=0) | |
| insight: Optional[Dict] = None | |
| summary: Optional[Dict] = None | |
| class Config: | |
| json_encoders = { | |
| datetime: lambda v: v.isoformat() if v else None, | |
| ProcessStatus: lambda v: v.value if v else None, | |
| } | |
| def dict(self, *args, **kwargs): | |
| d = super().dict(*args, **kwargs) | |
| if d.get("extract_status"): | |
| d["extract_status"] = d["extract_status"].value | |
| if d.get("embedding_status"): | |
| d["embedding_status"] = d["embedding_status"].value | |
| if d.get("analyze_status"): | |
| d["analyze_status"] = d["analyze_status"].value | |
| return d | |
| def from_dict(cls, data: Dict): | |
| if not data: | |
| return None | |
| if "extract_status" in data: | |
| data["extract_status"] = ProcessStatus(data["extract_status"]) | |
| if "embedding_status" in data: | |
| data["embedding_status"] = ProcessStatus(data["embedding_status"]) | |
| if "analyze_status" in data: | |
| data["analyze_status"] = ProcessStatus(data["analyze_status"]) | |
| if "create_time" in data and isinstance(data["create_time"], str): | |
| data["create_time"] = datetime.fromisoformat(data["create_time"]) | |
| return cls(**data) | |
| class CreateDocumentRequest(BaseModel): | |
| name: str = Field( | |
| ..., description="Document name", max_length=255, example="example.pdf" | |
| ) | |
| title: Optional[str] = Field( | |
| None, description="Document title", max_length=255, example="Example Document" | |
| ) | |
| mime_type: Optional[str] = Field( | |
| None, description="MIME type", max_length=100, example="application/pdf" | |
| ) | |
| user_description: Optional[str] = Field( | |
| None, | |
| description="User provided description", | |
| example="This is an example document", | |
| ) | |
| url: Optional[str] = Field( | |
| None, | |
| description="Document URL or file path", | |
| example="https://example.com/doc.pdf or /path/to/file", | |
| ) | |
| document_size: int = Field( | |
| 0, description="Document size in bytes", ge=0, example=1024 | |
| ) | |
| raw_content: Optional[str] = Field( | |
| None, description="Extracted raw content from the document" | |
| ) | |
| extract_status: ProcessStatus = Field( | |
| ProcessStatus.INITIALIZED, description="Extraction status" | |
| ) | |
| embedding_status: ProcessStatus = Field( | |
| ProcessStatus.INITIALIZED, description="Embedding status" | |
| ) | |
| analyze_status: ProcessStatus = Field( | |
| ProcessStatus.INITIALIZED, description="Analysis status" | |
| ) | |
| def to_dto(self) -> "DocumentDTO": | |
| return DocumentDTO( | |
| name=self.name, | |
| title=self.title, | |
| mime_type=self.mime_type, | |
| user_description=self.user_description, | |
| url=self.url, | |
| document_size=self.document_size, | |
| raw_content=self.raw_content, | |
| extract_status=self.extract_status, | |
| embedding_status=self.embedding_status, | |
| analyze_status=self.analyze_status, | |
| ) | |