""" Domain Layer - Document Entity Represents a document in the knowledge base. """ from dataclasses import dataclass, field from datetime import datetime from enum import Enum from typing import Optional from uuid import UUID, uuid4 class DocumentStatus(str, Enum): """Document processing status""" PENDING = "pending" PROCESSING = "processing" INDEXED = "indexed" FAILED = "failed" class DocumentType(str, Enum): """Supported document types""" PDF = "pdf" DOCX = "docx" TXT = "txt" MD = "md" HTML = "html" @dataclass class Document: """Document entity - core business object""" title: str filename: str file_type: DocumentType file_size: int storage_path: str department: str id: UUID = field(default_factory=uuid4) status: DocumentStatus = DocumentStatus.PENDING upload_session_id: Optional[str] = None uploaded_at: datetime = field(default_factory=datetime.utcnow) indexed_at: Optional[datetime] = None metadata: dict = field(default_factory=dict) created_at: datetime = field(default_factory=datetime.utcnow) updated_at: datetime = field(default_factory=datetime.utcnow) def mark_as_processing(self) -> None: """Mark document as being processed""" self.status = DocumentStatus.PROCESSING self.updated_at = datetime.utcnow() def mark_as_indexed(self) -> None: """Mark document as successfully indexed""" self.status = DocumentStatus.INDEXED self.indexed_at = datetime.utcnow() self.updated_at = datetime.utcnow() def mark_as_failed(self) -> None: """Mark document processing as failed""" self.status = DocumentStatus.FAILED self.updated_at = datetime.utcnow() def is_indexed(self) -> bool: """Check if document is indexed""" return self.status == DocumentStatus.INDEXED @dataclass class DocumentChunk: """Document chunk - piece of document for vector search""" document_id: UUID chunk_index: int content: str token_count: int id: UUID = field(default_factory=uuid4) vector_id: Optional[str] = None metadata: dict = field(default_factory=dict) created_at: datetime = field(default_factory=datetime.utcnow) def set_vector_id(self, vector_id: str) -> None: """Set Qdrant vector ID""" self.vector_id = vector_id