from pgvector.sqlalchemy import Vector from sqlalchemy import JSON, Column, DateTime, ForeignKey, Integer, String, Text from sqlalchemy.orm import relationship from sqlalchemy.sql import func from backend.config import settings from backend.database import Base class Datasource(Base): __tablename__ = "datasources" id = Column(Integer, primary_key=True, index=True) type = Column(String(50), nullable=False) # pdf, url, text source_uri = Column(Text) created_at = Column(DateTime(timezone=True), server_default=func.now()) # Relationships documents = relationship("Document", back_populates="datasource", cascade="all, delete-orphan") class Document(Base): __tablename__ = "documents" id = Column(Integer, primary_key=True, index=True) datasource_id = Column( Integer, ForeignKey("datasources.id", ondelete="CASCADE"), nullable=False, index=True, ) title = Column(String(500), nullable=False) bytes = Column(Integer) pages = Column(Integer) meta_json = Column(JSON, default={}) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) # Relationships datasource = relationship("Datasource", back_populates="documents") chunks = relationship("Chunk", back_populates="document", cascade="all, delete-orphan") class Chunk(Base): __tablename__ = "chunks" id = Column(Integer, primary_key=True, index=True) doc_id = Column( Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True, ) ordinal = Column(Integer, nullable=False) text = Column(Text, nullable=False) meta_json = Column(JSON, default={}) created_at = Column(DateTime(timezone=True), server_default=func.now()) # Relationships document = relationship("Document", back_populates="chunks") embedding = relationship( "Embedding", back_populates="chunk", uselist=False, cascade="all, delete-orphan" ) class Embedding(Base): __tablename__ = "embeddings" id = Column(Integer, primary_key=True, index=True) chunk_id = Column( Integer, ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False, unique=True, index=True, ) embedding = Column(Vector(settings.VECTOR_DIMENSIONS)) created_at = Column(DateTime(timezone=True), server_default=func.now()) # Relationships chunk = relationship("Chunk", back_populates="embedding")