"""SQLAlchemy database models.""" from uuid import uuid4 from sqlalchemy import ( Column, DateTime, ForeignKey, Integer, String, Text, ) from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.orm import relationship from sqlalchemy.sql import func from src.db.postgres.connection import Base class User(Base): """User model.""" __tablename__ = "users" id = Column(String, primary_key=True, default=lambda: str(uuid4())) fullname = Column(String, nullable=False) email = Column(String, nullable=False, unique=True, index=True) password = Column(String, nullable=False) # bcrypt-hashed company = Column(String) company_size = Column(String) function = Column(String) site = Column(String) role = Column(String) status = Column(String, nullable=False, default="active") # active | inactive created_at = Column(DateTime(timezone=True), server_default=func.now()) class Document(Base): """Document model.""" __tablename__ = "documents" id = Column(String, primary_key=True, default=lambda: str(uuid4())) user_id = Column(String, nullable=False, index=True) filename = Column(String, nullable=False) blob_name = Column(String, nullable=False, unique=True) file_size = Column(Integer) file_type = Column(String) # pdf, docx, txt, etc. status = Column(String, default="uploaded") # uploaded, processing, completed, failed processed_at = Column(DateTime(timezone=True)) error_message = Column(Text) created_at = Column(DateTime(timezone=True), server_default=func.now()) class Room(Base): """Room model for chat sessions.""" __tablename__ = "rooms" id = Column(String, primary_key=True, default=lambda: str(uuid4())) user_id = Column(String, nullable=False, index=True) title = Column(String, default="New Chat") created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) status = Column(String, nullable=False, default="active") # active | inactive messages = relationship("ChatMessage", back_populates="room", cascade="all, delete-orphan") class ChatMessage(Base): """Chat message model.""" __tablename__ = "chat_messages" id = Column(String, primary_key=True, default=lambda: str(uuid4())) room_id = Column(String, ForeignKey("rooms.id"), nullable=False, index=True) role = Column(String, nullable=False) # user, assistant content = Column(Text, nullable=False) created_at = Column(DateTime(timezone=True), server_default=func.now()) room = relationship("Room", back_populates="messages") sources = relationship("MessageSource", back_populates="message", cascade="all, delete-orphan") class MessageSource(Base): """Sources (RAG references) attached to an assistant message.""" __tablename__ = "message_sources" id = Column(String, primary_key=True, default=lambda: str(uuid4())) message_id = Column(String, ForeignKey("chat_messages.id", ondelete="CASCADE"), nullable=False, index=True) document_id = Column(String) filename = Column(Text) page_label = Column(Text) created_at = Column(DateTime(timezone=True), server_default=func.now()) message = relationship("ChatMessage", back_populates="sources") class DatabaseClient(Base): """User-registered external database connections.""" __tablename__ = "databases" id = Column(String, primary_key=True, default=lambda: str(uuid4())) user_id = Column(String, nullable=False, index=True) name = Column(String, nullable=False) # display name, e.g. "Prod DB" db_type = Column(String, nullable=False) # postgres|mysql|sqlserver|supabase|bigquery|snowflake credentials = Column(JSONB, nullable=False) # per-type JSON; sensitive fields Fernet-encrypted status = Column(String, nullable=False, default="active") # active | inactive created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) class Catalog(Base): """Per-user data catalog stored as a single jsonb row. `data` holds the full Pydantic Catalog (src/catalog/models.py:Catalog) serialized via `model_dump(mode="json")`. Read path uses `Catalog.model_validate(...)` to rehydrate. Dedicated table — kept separate from `langchain_pg_embedding` so unstructured embeddings and structured-catalog metadata never share storage. """ __tablename__ = "data_catalog" user_id = Column(String, primary_key=True) data = Column(JSONB, nullable=False) schema_version = Column(String, nullable=False, default="1.0") generated_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) class ReportInputRow(Base): """One row per completed slow-path analysis (the report's source of truth). `data` holds the full Pydantic AnalysisRecord (src/agents/slow_path/schemas.py:AnalysisRecord) serialized via `model_dump(mode="json")`; the read path rehydrates with `AnalysisRecord.model_validate(...)`. Many records accumulate per analysis session — `generate_report` reads them by `analysis_id`, oldest-first. `analysis_id` is nullable until the Analysis State (owned upstream) is wired into the slow path; records still persist (and carry `user_id`) before then. OWNERSHIP / HANDOFF (#21/#22, 2026-06-25 checkpoint): table **renamed `analysis_records` → `report_inputs`** — it holds the inputs report generation reads (the slow-path run records). "report_inputs" avoids clashing with Go's `analyses_messages` and with Langfuse observability. **Python-owned for now** (Python still creates it locally); the finalized schema goes to Harry so the dedorch migration creates it post-cutover (#22), where `id`/`analysis_id` will be `uuid` (+ FK to `analyses(id)`). The Pydantic `AnalysisRecord` (the in-memory run object) is intentionally kept. Slated to migrate to Go ownership later — keep this + DEV_PLAN #21/#22 as the handoff record. NOTE: dedorch currently still has the OLD `analysis_records` table (empty) until Harry's rename migration lands. """ __tablename__ = "report_inputs" # id/analysis_id are `uuid` to match dedorch's `report_inputs` + the analysis-family # (analyses/reports/data_sources). No FK declared in Python (dedorch's migration owns it, #22). id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisRecord.record_id (uuid hex ok) analysis_id = Column(UUID(as_uuid=False), index=True) # the analysis session id (nullable for now) user_id = Column(String, nullable=False, index=True) plan_id = Column(String, nullable=False) data = Column(JSONB, nullable=False) created_at = Column(DateTime(timezone=True), server_default=func.now()) class AnalysisReportRow(Base): """One immutable row per generated report version — dedorch `reports` (Go-owned). dedorch stores the rendered markdown `content` + `title` + `version` (no jsonb snapshot — markdown-only per the 2026-06-23 checkpoint). The read path rebuilds a minimal `AnalysisReport` (structured fields empty; `rendered_markdown` = content). Versions accumulate per analysis; versioning is serialized by a per-analysis advisory lock in `ReportStore`. Class name kept; table + shape changed for dedorch. """ __tablename__ = "reports" id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisReport.report_id (uuid) analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) title = Column(String, nullable=False) content = Column(Text, nullable=False) # rendered markdown generated_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) version = Column(Integer, nullable=False) class AnalysisStateRow(Base): """Per-analysis session state — the dedorch **`analyses`** table (plural; Go-owned). One session = one analysis = one conversation; `id` is the shared session id (canonical UUID). Verified against the dedorch DB 2026-06-25. dedorch `analyses` columns (reconciled 2026-07-01 — Harry's #3 landed): `id` (uuid), `analysis_title`, `objective` (text), `business_questions` (jsonb), `user_id` (text), `report_id` (uuid), `status` (text 'active'|'inactive' — soft-delete), `data_bind` (jsonb), `data_bind_version` (int), `report_collection` (jsonb), `created_at`, `updated_at`. `problem_statement`/`problem_validated` were DROPPED in dedorch (#3) and removed here; `objective` + `business_questions` (the user-entered goal, set at onboarding by Go) replace them. The FE/Go columns (`status`/`data_bind*`/`report_collection`) are carried to match dedorch but are NOT surfaced in the `AnalysisState` pydantic contract. `analysis` (singular) is the deprecated DUPLICATE table Harry will drop — never use it. Class name kept. """ __tablename__ = "analyses" id = Column(UUID(as_uuid=False), primary_key=True) # shared session id (uuid) analysis_title = Column(String, nullable=False, default="New analysis") objective = Column(Text, nullable=False, default="") business_questions = Column(JSONB, nullable=False, default=list) user_id = Column(String, nullable=False, index=True) # was owner_id (dedorch uses user_id) report_id = Column(UUID(as_uuid=False), nullable=True) # dedorch `analyses` columns (FE/Go concerns; carried so create_all matches dedorch). status = Column(String, nullable=False, default="active") # active | inactive (soft-delete) data_bind = Column(JSONB, nullable=False, default=list) data_bind_version = Column(Integer, nullable=False, default=1) report_collection = Column(JSONB, nullable=False, default=list) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() ) class AnalysisDataSourceRow(Base): """Per-analysis data-source binding (#10) — dedorch `data_sources` (Go-owned). Which catalog sources an analysis is scoped to. `reference_id` is the catalog `Source.source_id`; `type`/`name` snapshot the source kind + label. Written at `/analysis/create`; read by `structured_flow` scoping + the report appendix. `source_metadata` maps to the `metadata` column (`metadata` is reserved by the declarative API). Class name kept; table + shape changed for dedorch. """ __tablename__ = "data_sources" id = Column(UUID(as_uuid=False), primary_key=True) analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) type = Column(String, nullable=False) name = Column(String, nullable=False) reference_id = Column(String, nullable=False) # == catalog Source.source_id bound_by = Column(String, nullable=False) bound_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) source_metadata = Column("metadata", JSONB, nullable=True) created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) class AnalysesMessageRow(Base): """One conversation message — dedorch `analyses_messages` (Go-owned table). The analysis chat room (user question + AI answer), replacing the deprecated `rooms`/`chat_messages`. Python is a **consumer/writer** here: it INSERTs and reads rows but does NOT own the table (Go's migration creates it). Shape mirrors the Go contract (`API_CONTRACT_BE_GOLANG.md` §Analysis Messages): `role ∈ user|ai`. RAG source citations are NOT persisted here — the old `message_sources` table is deprecated along with `chat_messages`. """ __tablename__ = "analyses_messages" id = Column(UUID(as_uuid=False), primary_key=True) analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) user_id = Column(String, nullable=False, index=True) role = Column(String, nullable=False) # user | ai content = Column(Text, nullable=False) created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())