from __future__ import annotations import enum from datetime import datetime, timezone from uuid import uuid4 from sqlalchemy import DateTime, ForeignKey, Integer, JSON, String, Text, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column, relationship from src.session import Base def utcnow() -> datetime: return datetime.now(timezone.utc) class SourceKind(str, enum.Enum): WEBSITE = "website" class SourceStatus(str, enum.Enum): COMPLETED = "completed" class Source(Base): __tablename__ = "sources" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) kind: Mapped[SourceKind] = mapped_column(String(32)) status: Mapped[SourceStatus] = mapped_column(String(32), index=True) title: Mapped[str] = mapped_column(String(500)) source_url: Mapped[str | None] = mapped_column(String(2048), nullable=True) extra_json: Mapped[dict] = mapped_column(JSON, default=dict) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) documents: Mapped[list["Document"]] = relationship(back_populates="source", cascade="all, delete-orphan") class Document(Base): __tablename__ = "documents" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) source_id: Mapped[str] = mapped_column(ForeignKey("sources.id", ondelete="CASCADE"), index=True) title: Mapped[str] = mapped_column(String(500)) content: Mapped[str] = mapped_column(Text) language: Mapped[str] = mapped_column(String(8), default="en") content_sha256: Mapped[str] = mapped_column(String(64), index=True) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) source: Mapped[Source] = relationship(back_populates="documents") chunks: Mapped[list["DocumentChunk"]] = relationship(back_populates="document", cascade="all, delete-orphan") class DocumentChunk(Base): __tablename__ = "document_chunks" __table_args__ = (UniqueConstraint("document_id", "chunk_index", name="uq_document_chunk_idx"),) id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) document_id: Mapped[str] = mapped_column(ForeignKey("documents.id", ondelete="CASCADE"), index=True) chunk_index: Mapped[int] = mapped_column(Integer) content: Mapped[str] = mapped_column(Text) token_estimate: Mapped[int] = mapped_column(Integer) embedding: Mapped[list[float]] = mapped_column(JSON) extra_json: Mapped[dict] = mapped_column(JSON, default=dict) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) document: Mapped[Document] = relationship(back_populates="chunks")