Rifqi Hafizuddin
[KM-652] refactor(db): align analysis-family models to the dedorch schema
4f0ff12
Raw
History Blame
9.34 kB
"""SQLAlchemy database models."""
from uuid import uuid4
from sqlalchemy import (
Boolean,
Column,
DateTime,
ForeignKey,
Integer,
String,
Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from src.db.postgres.connection import Base
class User(Base):
"""User model."""
__tablename__ = "users"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
fullname = Column(String, nullable=False)
email = Column(String, nullable=False, unique=True, index=True)
password = Column(String, nullable=False) # bcrypt-hashed
company = Column(String)
company_size = Column(String)
function = Column(String)
site = Column(String)
role = Column(String)
status = Column(String, nullable=False, default="active") # active | inactive
created_at = Column(DateTime(timezone=True), server_default=func.now())
class Document(Base):
"""Document model."""
__tablename__ = "documents"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
filename = Column(String, nullable=False)
blob_name = Column(String, nullable=False, unique=True)
file_size = Column(Integer)
file_type = Column(String) # pdf, docx, txt, etc.
status = Column(String, default="uploaded") # uploaded, processing, completed, failed
processed_at = Column(DateTime(timezone=True))
error_message = Column(Text)
created_at = Column(DateTime(timezone=True), server_default=func.now())
class Room(Base):
"""Room model for chat sessions."""
__tablename__ = "rooms"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
title = Column(String, default="New Chat")
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
status = Column(String, nullable=False, default="active") # active | inactive
messages = relationship("ChatMessage", back_populates="room", cascade="all, delete-orphan")
class ChatMessage(Base):
"""Chat message model."""
__tablename__ = "chat_messages"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
room_id = Column(String, ForeignKey("rooms.id"), nullable=False, index=True)
role = Column(String, nullable=False) # user, assistant
content = Column(Text, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
room = relationship("Room", back_populates="messages")
sources = relationship("MessageSource", back_populates="message", cascade="all, delete-orphan")
class MessageSource(Base):
"""Sources (RAG references) attached to an assistant message."""
__tablename__ = "message_sources"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
message_id = Column(String, ForeignKey("chat_messages.id", ondelete="CASCADE"), nullable=False, index=True)
document_id = Column(String)
filename = Column(Text)
page_label = Column(Text)
created_at = Column(DateTime(timezone=True), server_default=func.now())
message = relationship("ChatMessage", back_populates="sources")
class DatabaseClient(Base):
"""User-registered external database connections."""
__tablename__ = "databases"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
name = Column(String, nullable=False) # display name, e.g. "Prod DB"
db_type = Column(String, nullable=False) # postgres|mysql|sqlserver|supabase|bigquery|snowflake
credentials = Column(JSONB, nullable=False) # per-type JSON; sensitive fields Fernet-encrypted
status = Column(String, nullable=False, default="active") # active | inactive
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
class Catalog(Base):
"""Per-user data catalog stored as a single jsonb row.
`data` holds the full Pydantic Catalog (src/catalog/models.py:Catalog)
serialized via `model_dump(mode="json")`. Read path uses
`Catalog.model_validate(...)` to rehydrate.
Dedicated table — kept separate from `langchain_pg_embedding` so unstructured
embeddings and structured-catalog metadata never share storage.
"""
__tablename__ = "data_catalog"
user_id = Column(String, primary_key=True)
data = Column(JSONB, nullable=False)
schema_version = Column(String, nullable=False, default="1.0")
generated_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
class AnalysisRecordRow(Base):
"""One row per completed slow-path analysis (the report's source of truth).
`data` holds the full Pydantic AnalysisRecord
(src/agents/slow_path/schemas.py:AnalysisRecord) serialized via
`model_dump(mode="json")`; the read path rehydrates with
`AnalysisRecord.model_validate(...)`. Many records accumulate per analysis
session — `generate_report` reads them by `analysis_id`, oldest-first.
`analysis_id` is nullable until the Analysis State (owned upstream) is wired
into the slow path; records still persist (and carry `user_id`) before then.
"""
__tablename__ = "analysis_records"
id = Column(String, primary_key=True) # AnalysisRecord.record_id
analysis_id = Column(String, index=True) # FK to the analysis session (nullable for now)
user_id = Column(String, nullable=False, index=True)
plan_id = Column(String, nullable=False)
data = Column(JSONB, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
class AnalysisReportRow(Base):
"""One immutable row per generated report version — dedorch `reports` (Go-owned).
dedorch stores the rendered markdown `content` + `title` + `version` (no jsonb
snapshot — markdown-only per the 2026-06-23 checkpoint). The read path rebuilds a
minimal `AnalysisReport` (structured fields empty; `rendered_markdown` = content).
Versions accumulate per analysis; versioning is serialized by a per-analysis
advisory lock in `ReportStore`. Class name kept; table + shape changed for dedorch.
"""
__tablename__ = "reports"
id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisReport.report_id (uuid)
analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True)
title = Column(String, nullable=False)
content = Column(Text, nullable=False) # rendered markdown
generated_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
version = Column(Integer, nullable=False)
class AnalysisStateRow(Base):
"""Per-analysis session state — the dedorch `analysis` table (Go-owned migration).
One session = one analysis = one conversation; `id` is the shared session id
(canonical UUID). The orchestrator gate + Help skill read this every turn;
`problem_validated` gates structured analysis; the Problem Statement skill flips
it; `report_id` is null until a report exists. `id`/`report_id` are Postgres
`uuid` in dedorch, so they bind as UUID (canonical-string in/out). Class name
kept as `AnalysisStateRow`; only the table + id types changed for dedorch.
"""
__tablename__ = "analysis"
id = Column(UUID(as_uuid=False), primary_key=True) # shared session id (uuid)
analysis_title = Column(String, nullable=False, default="New analysis")
problem_statement = Column(Text, nullable=False, default="")
problem_validated = Column(Boolean, nullable=False, default=False)
owner_id = Column(String, nullable=False, index=True)
report_id = Column(UUID(as_uuid=False), nullable=True)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
class AnalysisDataSourceRow(Base):
"""Per-analysis data-source binding (#10) — dedorch `data_sources` (Go-owned).
Which catalog sources an analysis is scoped to. `reference_id` is the catalog
`Source.source_id`; `type`/`name` snapshot the source kind + label. Written at
`/analysis/create`; read by `structured_flow` scoping + the report appendix.
`source_metadata` maps to the `metadata` column (`metadata` is reserved by the
declarative API). Class name kept; table + shape changed for dedorch.
"""
__tablename__ = "data_sources"
id = Column(UUID(as_uuid=False), primary_key=True)
analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True)
type = Column(String, nullable=False)
name = Column(String, nullable=False)
reference_id = Column(String, nullable=False) # == catalog Source.source_id
bound_by = Column(String, nullable=False)
bound_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
source_metadata = Column("metadata", JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())