natural-hygiene-rag / src /db /models.py
LS8's picture
Upload folder using huggingface_hub
847db01 verified
import uuid
from datetime import datetime
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text, func
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
class Base(DeclarativeBase):
pass
def generate_uuid() -> str:
return str(uuid.uuid4())
class Author(Base):
__tablename__ = "authors"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
name: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
birth_year: Mapped[int | None] = mapped_column(Integer)
death_year: Mapped[int | None] = mapped_column(Integer)
bio: Mapped[str | None] = mapped_column(Text)
era: Mapped[str | None] = mapped_column(String(50))
books: Mapped[list["Book"]] = relationship(back_populates="author_rel")
class Book(Base):
__tablename__ = "books"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
title: Mapped[str] = mapped_column(String(500), nullable=False)
author: Mapped[str] = mapped_column(String(255), nullable=False)
author_id: Mapped[str | None] = mapped_column(ForeignKey("authors.id"))
publication_year: Mapped[int | None] = mapped_column(Integer)
edition: Mapped[str | None] = mapped_column(String(100))
source_file: Mapped[str | None] = mapped_column(String(500))
file_hash: Mapped[str | None] = mapped_column(String(64), unique=True)
ingestion_status: Mapped[str] = mapped_column(String(20), default="pending")
ingestion_date: Mapped[datetime | None] = mapped_column(DateTime)
total_chunks: Mapped[int] = mapped_column(Integer, default=0)
notes: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
author_rel: Mapped[Author | None] = relationship(back_populates="books")
chunks: Mapped[list["Chunk"]] = relationship(back_populates="book", cascade="all, delete-orphan")
class Topic(Base):
__tablename__ = "topics"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
name: Mapped[str] = mapped_column(String(100), nullable=False, unique=True)
description: Mapped[str | None] = mapped_column(Text)
class ChunkTopic(Base):
__tablename__ = "chunk_topics"
chunk_id: Mapped[str] = mapped_column(ForeignKey("chunks.id"), primary_key=True)
topic_id: Mapped[str] = mapped_column(ForeignKey("topics.id"), primary_key=True)
class Chunk(Base):
__tablename__ = "chunks"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
book_id: Mapped[str] = mapped_column(ForeignKey("books.id"), nullable=False)
content: Mapped[str] = mapped_column(Text, nullable=False)
chapter_number: Mapped[int | None] = mapped_column(Integer)
chapter_title: Mapped[str | None] = mapped_column(String(500))
page_start: Mapped[int | None] = mapped_column(Integer)
page_end: Mapped[int | None] = mapped_column(Integer)
chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
content_type: Mapped[str] = mapped_column(String(50), default="text")
ocr_confidence: Mapped[float | None] = mapped_column(Float)
extraction_method: Mapped[str] = mapped_column(String(20), default="digital")
embedding_id: Mapped[str | None] = mapped_column(String(36), unique=True)
parent_chunk_id: Mapped[str | None] = mapped_column(ForeignKey("chunks.id"))
book: Mapped[Book] = relationship(back_populates="chunks")
topics: Mapped[list[Topic]] = relationship(secondary="chunk_topics")
class QueryLog(Base):
__tablename__ = "query_log"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
query_text: Mapped[str] = mapped_column(Text, nullable=False)
model_used: Mapped[str] = mapped_column(String(100))
response_text: Mapped[str | None] = mapped_column(Text)
source_chunk_ids: Mapped[str | None] = mapped_column(Text) # JSON array
timestamp: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
feedback: Mapped[int | None] = mapped_column(Integer)