Rifqi Hafizuddin
[NOTICKET] fix(db): reconcile analyses schema + migrate chat to analyses_messages
283eb0e | """SQLAlchemy database models.""" | |
| from uuid import uuid4 | |
| from sqlalchemy import ( | |
| Column, | |
| DateTime, | |
| ForeignKey, | |
| Integer, | |
| String, | |
| Text, | |
| ) | |
| from sqlalchemy.dialects.postgresql import JSONB, UUID | |
| from sqlalchemy.orm import relationship | |
| from sqlalchemy.sql import func | |
| from src.db.postgres.connection import Base | |
| class User(Base): | |
| """User model.""" | |
| __tablename__ = "users" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| fullname = Column(String, nullable=False) | |
| email = Column(String, nullable=False, unique=True, index=True) | |
| password = Column(String, nullable=False) # bcrypt-hashed | |
| company = Column(String) | |
| company_size = Column(String) | |
| function = Column(String) | |
| site = Column(String) | |
| role = Column(String) | |
| status = Column(String, nullable=False, default="active") # active | inactive | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| class Document(Base): | |
| """Document model.""" | |
| __tablename__ = "documents" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| user_id = Column(String, nullable=False, index=True) | |
| filename = Column(String, nullable=False) | |
| blob_name = Column(String, nullable=False, unique=True) | |
| file_size = Column(Integer) | |
| file_type = Column(String) # pdf, docx, txt, etc. | |
| status = Column(String, default="uploaded") # uploaded, processing, completed, failed | |
| processed_at = Column(DateTime(timezone=True)) | |
| error_message = Column(Text) | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| class Room(Base): | |
| """Room model for chat sessions.""" | |
| __tablename__ = "rooms" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| user_id = Column(String, nullable=False, index=True) | |
| title = Column(String, default="New Chat") | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| updated_at = Column(DateTime(timezone=True), onupdate=func.now()) | |
| status = Column(String, nullable=False, default="active") # active | inactive | |
| messages = relationship("ChatMessage", back_populates="room", cascade="all, delete-orphan") | |
| class ChatMessage(Base): | |
| """Chat message model.""" | |
| __tablename__ = "chat_messages" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| room_id = Column(String, ForeignKey("rooms.id"), nullable=False, index=True) | |
| role = Column(String, nullable=False) # user, assistant | |
| content = Column(Text, nullable=False) | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| room = relationship("Room", back_populates="messages") | |
| sources = relationship("MessageSource", back_populates="message", cascade="all, delete-orphan") | |
| class MessageSource(Base): | |
| """Sources (RAG references) attached to an assistant message.""" | |
| __tablename__ = "message_sources" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| message_id = Column(String, ForeignKey("chat_messages.id", ondelete="CASCADE"), nullable=False, index=True) | |
| document_id = Column(String) | |
| filename = Column(Text) | |
| page_label = Column(Text) | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| message = relationship("ChatMessage", back_populates="sources") | |
| class DatabaseClient(Base): | |
| """User-registered external database connections.""" | |
| __tablename__ = "databases" | |
| id = Column(String, primary_key=True, default=lambda: str(uuid4())) | |
| user_id = Column(String, nullable=False, index=True) | |
| name = Column(String, nullable=False) # display name, e.g. "Prod DB" | |
| db_type = Column(String, nullable=False) # postgres|mysql|sqlserver|supabase|bigquery|snowflake | |
| credentials = Column(JSONB, nullable=False) # per-type JSON; sensitive fields Fernet-encrypted | |
| status = Column(String, nullable=False, default="active") # active | inactive | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| updated_at = Column(DateTime(timezone=True), onupdate=func.now()) | |
| class Catalog(Base): | |
| """Per-user data catalog stored as a single jsonb row. | |
| `data` holds the full Pydantic Catalog (src/catalog/models.py:Catalog) | |
| serialized via `model_dump(mode="json")`. Read path uses | |
| `Catalog.model_validate(...)` to rehydrate. | |
| Dedicated table — kept separate from `langchain_pg_embedding` so unstructured | |
| embeddings and structured-catalog metadata never share storage. | |
| """ | |
| __tablename__ = "data_catalog" | |
| user_id = Column(String, primary_key=True) | |
| data = Column(JSONB, nullable=False) | |
| schema_version = Column(String, nullable=False, default="1.0") | |
| generated_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| updated_at = Column(DateTime(timezone=True), onupdate=func.now()) | |
| class ReportInputRow(Base): | |
| """One row per completed slow-path analysis (the report's source of truth). | |
| `data` holds the full Pydantic AnalysisRecord | |
| (src/agents/slow_path/schemas.py:AnalysisRecord) serialized via | |
| `model_dump(mode="json")`; the read path rehydrates with | |
| `AnalysisRecord.model_validate(...)`. Many records accumulate per analysis | |
| session — `generate_report` reads them by `analysis_id`, oldest-first. | |
| `analysis_id` is nullable until the Analysis State (owned upstream) is wired | |
| into the slow path; records still persist (and carry `user_id`) before then. | |
| OWNERSHIP / HANDOFF (#21/#22, 2026-06-25 checkpoint): table **renamed `analysis_records` | |
| → `report_inputs`** — it holds the inputs report generation reads (the slow-path run | |
| records). "report_inputs" avoids clashing with Go's `analyses_messages` and with Langfuse | |
| observability. **Python-owned for now** (Python still creates it locally); the finalized | |
| schema goes to Harry so the dedorch migration creates it post-cutover (#22), where | |
| `id`/`analysis_id` will be `uuid` (+ FK to `analyses(id)`). The Pydantic `AnalysisRecord` | |
| (the in-memory run object) is intentionally kept. Slated to migrate to Go ownership later — | |
| keep this + DEV_PLAN #21/#22 as the handoff record. NOTE: dedorch currently still has the | |
| OLD `analysis_records` table (empty) until Harry's rename migration lands. | |
| """ | |
| __tablename__ = "report_inputs" | |
| # id/analysis_id are `uuid` to match dedorch's `report_inputs` + the analysis-family | |
| # (analyses/reports/data_sources). No FK declared in Python (dedorch's migration owns it, #22). | |
| id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisRecord.record_id (uuid hex ok) | |
| analysis_id = Column(UUID(as_uuid=False), index=True) # the analysis session id (nullable for now) | |
| user_id = Column(String, nullable=False, index=True) | |
| plan_id = Column(String, nullable=False) | |
| data = Column(JSONB, nullable=False) | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| class AnalysisReportRow(Base): | |
| """One immutable row per generated report version — dedorch `reports` (Go-owned). | |
| dedorch stores the rendered markdown `content` + `title` + `version` (no jsonb | |
| snapshot — markdown-only per the 2026-06-23 checkpoint). The read path rebuilds a | |
| minimal `AnalysisReport` (structured fields empty; `rendered_markdown` = content). | |
| Versions accumulate per analysis; versioning is serialized by a per-analysis | |
| advisory lock in `ReportStore`. Class name kept; table + shape changed for dedorch. | |
| """ | |
| __tablename__ = "reports" | |
| id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisReport.report_id (uuid) | |
| analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) | |
| title = Column(String, nullable=False) | |
| content = Column(Text, nullable=False) # rendered markdown | |
| generated_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) | |
| version = Column(Integer, nullable=False) | |
| class AnalysisStateRow(Base): | |
| """Per-analysis session state — the dedorch **`analyses`** table (plural; Go-owned). | |
| One session = one analysis = one conversation; `id` is the shared session id | |
| (canonical UUID). Verified against the dedorch DB 2026-06-25. | |
| dedorch `analyses` columns (reconciled 2026-07-01 — Harry's #3 landed): `id` (uuid), | |
| `analysis_title`, `objective` (text), `business_questions` (jsonb), `user_id` (text), | |
| `report_id` (uuid), `status` (text 'active'|'inactive' — soft-delete), `data_bind` (jsonb), | |
| `data_bind_version` (int), `report_collection` (jsonb), `created_at`, `updated_at`. | |
| `problem_statement`/`problem_validated` were DROPPED in dedorch (#3) and removed here; | |
| `objective` + `business_questions` (the user-entered goal, set at onboarding by Go) replace | |
| them. The FE/Go columns (`status`/`data_bind*`/`report_collection`) are carried to match | |
| dedorch but are NOT surfaced in the `AnalysisState` pydantic contract. `analysis` (singular) | |
| is the deprecated DUPLICATE table Harry will drop — never use it. Class name kept. | |
| """ | |
| __tablename__ = "analyses" | |
| id = Column(UUID(as_uuid=False), primary_key=True) # shared session id (uuid) | |
| analysis_title = Column(String, nullable=False, default="New analysis") | |
| objective = Column(Text, nullable=False, default="") | |
| business_questions = Column(JSONB, nullable=False, default=list) | |
| user_id = Column(String, nullable=False, index=True) # was owner_id (dedorch uses user_id) | |
| report_id = Column(UUID(as_uuid=False), nullable=True) | |
| # dedorch `analyses` columns (FE/Go concerns; carried so create_all matches dedorch). | |
| status = Column(String, nullable=False, default="active") # active | inactive (soft-delete) | |
| data_bind = Column(JSONB, nullable=False, default=list) | |
| data_bind_version = Column(Integer, nullable=False, default=1) | |
| report_collection = Column(JSONB, nullable=False, default=list) | |
| created_at = Column(DateTime(timezone=True), server_default=func.now()) | |
| updated_at = Column( | |
| DateTime(timezone=True), server_default=func.now(), onupdate=func.now() | |
| ) | |
| class AnalysisDataSourceRow(Base): | |
| """Per-analysis data-source binding (#10) — dedorch `data_sources` (Go-owned). | |
| Which catalog sources an analysis is scoped to. `reference_id` is the catalog | |
| `Source.source_id`; `type`/`name` snapshot the source kind + label. Written at | |
| `/analysis/create`; read by `structured_flow` scoping + the report appendix. | |
| `source_metadata` maps to the `metadata` column (`metadata` is reserved by the | |
| declarative API). Class name kept; table + shape changed for dedorch. | |
| """ | |
| __tablename__ = "data_sources" | |
| id = Column(UUID(as_uuid=False), primary_key=True) | |
| analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) | |
| type = Column(String, nullable=False) | |
| name = Column(String, nullable=False) | |
| reference_id = Column(String, nullable=False) # == catalog Source.source_id | |
| bound_by = Column(String, nullable=False) | |
| bound_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) | |
| source_metadata = Column("metadata", JSONB, nullable=True) | |
| created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) | |
| class AnalysesMessageRow(Base): | |
| """One conversation message — dedorch `analyses_messages` (Go-owned table). | |
| The analysis chat room (user question + AI answer), replacing the deprecated | |
| `rooms`/`chat_messages`. Python is a **consumer/writer** here: it INSERTs and reads | |
| rows but does NOT own the table (Go's migration creates it). Shape mirrors the Go | |
| contract (`API_CONTRACT_BE_GOLANG.md` §Analysis Messages): `role ∈ user|ai`. RAG source | |
| citations are NOT persisted here — the old `message_sources` table is deprecated along | |
| with `chat_messages`. | |
| """ | |
| __tablename__ = "analyses_messages" | |
| id = Column(UUID(as_uuid=False), primary_key=True) | |
| analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True) | |
| user_id = Column(String, nullable=False, index=True) | |
| role = Column(String, nullable=False) # user | ai | |
| content = Column(Text, nullable=False) | |
| created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) | |