Spaces:
Running
Running
| """ | |
| Domain Layer - Document Entity | |
| Represents a document in the knowledge base. | |
| """ | |
| from dataclasses import dataclass, field | |
| from datetime import datetime | |
| from enum import Enum | |
| from typing import Optional | |
| from uuid import UUID, uuid4 | |
| class DocumentStatus(str, Enum): | |
| """Document processing status""" | |
| PENDING = "pending" | |
| PROCESSING = "processing" | |
| INDEXED = "indexed" | |
| FAILED = "failed" | |
| class DocumentType(str, Enum): | |
| """Supported document types""" | |
| PDF = "pdf" | |
| DOCX = "docx" | |
| TXT = "txt" | |
| MD = "md" | |
| HTML = "html" | |
| class Document: | |
| """Document entity - core business object""" | |
| title: str | |
| filename: str | |
| file_type: DocumentType | |
| file_size: int | |
| storage_path: str | |
| department: str | |
| id: UUID = field(default_factory=uuid4) | |
| status: DocumentStatus = DocumentStatus.PENDING | |
| upload_session_id: Optional[str] = None | |
| uploaded_at: datetime = field(default_factory=datetime.utcnow) | |
| indexed_at: Optional[datetime] = None | |
| metadata: dict = field(default_factory=dict) | |
| created_at: datetime = field(default_factory=datetime.utcnow) | |
| updated_at: datetime = field(default_factory=datetime.utcnow) | |
| def mark_as_processing(self) -> None: | |
| """Mark document as being processed""" | |
| self.status = DocumentStatus.PROCESSING | |
| self.updated_at = datetime.utcnow() | |
| def mark_as_indexed(self) -> None: | |
| """Mark document as successfully indexed""" | |
| self.status = DocumentStatus.INDEXED | |
| self.indexed_at = datetime.utcnow() | |
| self.updated_at = datetime.utcnow() | |
| def mark_as_failed(self) -> None: | |
| """Mark document processing as failed""" | |
| self.status = DocumentStatus.FAILED | |
| self.updated_at = datetime.utcnow() | |
| def is_indexed(self) -> bool: | |
| """Check if document is indexed""" | |
| return self.status == DocumentStatus.INDEXED | |
| class DocumentChunk: | |
| """Document chunk - piece of document for vector search""" | |
| document_id: UUID | |
| chunk_index: int | |
| content: str | |
| token_count: int | |
| id: UUID = field(default_factory=uuid4) | |
| vector_id: Optional[str] = None | |
| metadata: dict = field(default_factory=dict) | |
| created_at: datetime = field(default_factory=datetime.utcnow) | |
| def set_vector_id(self, vector_id: str) -> None: | |
| """Set Qdrant vector ID""" | |
| self.vector_id = vector_id | |