chatbot-rag-fi / src /models.py
ABAO77's picture
Upload 147 files
0df80b4 verified
from __future__ import annotations
import enum
from datetime import datetime, timezone
from uuid import uuid4
from sqlalchemy import DateTime, ForeignKey, Integer, JSON, String, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from src.session import Base
def utcnow() -> datetime:
return datetime.now(timezone.utc)
class SourceKind(str, enum.Enum):
WEBSITE = "website"
class SourceStatus(str, enum.Enum):
COMPLETED = "completed"
class Source(Base):
__tablename__ = "sources"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
kind: Mapped[SourceKind] = mapped_column(String(32))
status: Mapped[SourceStatus] = mapped_column(String(32), index=True)
title: Mapped[str] = mapped_column(String(500))
source_url: Mapped[str | None] = mapped_column(String(2048), nullable=True)
extra_json: Mapped[dict] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow)
documents: Mapped[list["Document"]] = relationship(back_populates="source", cascade="all, delete-orphan")
class Document(Base):
__tablename__ = "documents"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
source_id: Mapped[str] = mapped_column(ForeignKey("sources.id", ondelete="CASCADE"), index=True)
title: Mapped[str] = mapped_column(String(500))
content: Mapped[str] = mapped_column(Text)
language: Mapped[str] = mapped_column(String(8), default="en")
content_sha256: Mapped[str] = mapped_column(String(64), index=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
source: Mapped[Source] = relationship(back_populates="documents")
chunks: Mapped[list["DocumentChunk"]] = relationship(back_populates="document", cascade="all, delete-orphan")
class DocumentChunk(Base):
__tablename__ = "document_chunks"
__table_args__ = (UniqueConstraint("document_id", "chunk_index", name="uq_document_chunk_idx"),)
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
document_id: Mapped[str] = mapped_column(ForeignKey("documents.id", ondelete="CASCADE"), index=True)
chunk_index: Mapped[int] = mapped_column(Integer)
content: Mapped[str] = mapped_column(Text)
token_estimate: Mapped[int] = mapped_column(Integer)
embedding: Mapped[list[float]] = mapped_column(JSON)
extra_json: Mapped[dict] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
document: Mapped[Document] = relationship(back_populates="chunks")