career_app / backend /models /document.py
Youngger9765
feat: implement core RAG services with TDD and enhanced pre-commit hooks
19a176c
from pgvector.sqlalchemy import Vector
from sqlalchemy import JSON, Column, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from backend.config import settings
from backend.database import Base
class Datasource(Base):
__tablename__ = "datasources"
id = Column(Integer, primary_key=True, index=True)
type = Column(String(50), nullable=False) # pdf, url, text
source_uri = Column(Text)
created_at = Column(DateTime(timezone=True), server_default=func.now())
# Relationships
documents = relationship("Document", back_populates="datasource", cascade="all, delete-orphan")
class Document(Base):
__tablename__ = "documents"
id = Column(Integer, primary_key=True, index=True)
datasource_id = Column(
Integer,
ForeignKey("datasources.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
title = Column(String(500), nullable=False)
bytes = Column(Integer)
pages = Column(Integer)
meta_json = Column(JSON, default={})
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
# Relationships
datasource = relationship("Datasource", back_populates="documents")
chunks = relationship("Chunk", back_populates="document", cascade="all, delete-orphan")
class Chunk(Base):
__tablename__ = "chunks"
id = Column(Integer, primary_key=True, index=True)
doc_id = Column(
Integer,
ForeignKey("documents.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
ordinal = Column(Integer, nullable=False)
text = Column(Text, nullable=False)
meta_json = Column(JSON, default={})
created_at = Column(DateTime(timezone=True), server_default=func.now())
# Relationships
document = relationship("Document", back_populates="chunks")
embedding = relationship(
"Embedding", back_populates="chunk", uselist=False, cascade="all, delete-orphan"
)
class Embedding(Base):
__tablename__ = "embeddings"
id = Column(Integer, primary_key=True, index=True)
chunk_id = Column(
Integer,
ForeignKey("chunks.id", ondelete="CASCADE"),
nullable=False,
unique=True,
index=True,
)
embedding = Column(Vector(settings.VECTOR_DIMENSIONS))
created_at = Column(DateTime(timezone=True), server_default=func.now())
# Relationships
chunk = relationship("Chunk", back_populates="embedding")