Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import enum | |
| from datetime import datetime, timezone | |
| from uuid import uuid4 | |
| from sqlalchemy import DateTime, ForeignKey, Integer, JSON, String, Text, UniqueConstraint | |
| from sqlalchemy.orm import Mapped, mapped_column, relationship | |
| from src.session import Base | |
| def utcnow() -> datetime: | |
| return datetime.now(timezone.utc) | |
| class SourceKind(str, enum.Enum): | |
| WEBSITE = "website" | |
| class SourceStatus(str, enum.Enum): | |
| COMPLETED = "completed" | |
| class Source(Base): | |
| __tablename__ = "sources" | |
| id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) | |
| kind: Mapped[SourceKind] = mapped_column(String(32)) | |
| status: Mapped[SourceStatus] = mapped_column(String(32), index=True) | |
| title: Mapped[str] = mapped_column(String(500)) | |
| source_url: Mapped[str | None] = mapped_column(String(2048), nullable=True) | |
| extra_json: Mapped[dict] = mapped_column(JSON, default=dict) | |
| created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) | |
| updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) | |
| documents: Mapped[list["Document"]] = relationship(back_populates="source", cascade="all, delete-orphan") | |
| class Document(Base): | |
| __tablename__ = "documents" | |
| id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) | |
| source_id: Mapped[str] = mapped_column(ForeignKey("sources.id", ondelete="CASCADE"), index=True) | |
| title: Mapped[str] = mapped_column(String(500)) | |
| content: Mapped[str] = mapped_column(Text) | |
| language: Mapped[str] = mapped_column(String(8), default="en") | |
| content_sha256: Mapped[str] = mapped_column(String(64), index=True) | |
| created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) | |
| source: Mapped[Source] = relationship(back_populates="documents") | |
| chunks: Mapped[list["DocumentChunk"]] = relationship(back_populates="document", cascade="all, delete-orphan") | |
| class DocumentChunk(Base): | |
| __tablename__ = "document_chunks" | |
| __table_args__ = (UniqueConstraint("document_id", "chunk_index", name="uq_document_chunk_idx"),) | |
| id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4())) | |
| document_id: Mapped[str] = mapped_column(ForeignKey("documents.id", ondelete="CASCADE"), index=True) | |
| chunk_index: Mapped[int] = mapped_column(Integer) | |
| content: Mapped[str] = mapped_column(Text) | |
| token_estimate: Mapped[int] = mapped_column(Integer) | |
| embedding: Mapped[list[float]] = mapped_column(JSON) | |
| extra_json: Mapped[dict] = mapped_column(JSON, default=dict) | |
| created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) | |
| document: Mapped[Document] = relationship(back_populates="chunks") | |