ishaq101's picture
feat/Analysis State & Report Rework (#4)
0e02a0f
Raw
History Blame
11.5 kB
"""SQLAlchemy database models."""
from uuid import uuid4
from sqlalchemy import (
Boolean,
Column,
DateTime,
ForeignKey,
Integer,
String,
Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from src.db.postgres.connection import Base
class User(Base):
"""User model."""
__tablename__ = "users"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
fullname = Column(String, nullable=False)
email = Column(String, nullable=False, unique=True, index=True)
password = Column(String, nullable=False) # bcrypt-hashed
company = Column(String)
company_size = Column(String)
function = Column(String)
site = Column(String)
role = Column(String)
status = Column(String, nullable=False, default="active") # active | inactive
created_at = Column(DateTime(timezone=True), server_default=func.now())
class Document(Base):
"""Document model."""
__tablename__ = "documents"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
filename = Column(String, nullable=False)
blob_name = Column(String, nullable=False, unique=True)
file_size = Column(Integer)
file_type = Column(String) # pdf, docx, txt, etc.
status = Column(String, default="uploaded") # uploaded, processing, completed, failed
processed_at = Column(DateTime(timezone=True))
error_message = Column(Text)
created_at = Column(DateTime(timezone=True), server_default=func.now())
class Room(Base):
"""Room model for chat sessions."""
__tablename__ = "rooms"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
title = Column(String, default="New Chat")
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
status = Column(String, nullable=False, default="active") # active | inactive
messages = relationship("ChatMessage", back_populates="room", cascade="all, delete-orphan")
class ChatMessage(Base):
"""Chat message model."""
__tablename__ = "chat_messages"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
room_id = Column(String, ForeignKey("rooms.id"), nullable=False, index=True)
role = Column(String, nullable=False) # user, assistant
content = Column(Text, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
room = relationship("Room", back_populates="messages")
sources = relationship("MessageSource", back_populates="message", cascade="all, delete-orphan")
class MessageSource(Base):
"""Sources (RAG references) attached to an assistant message."""
__tablename__ = "message_sources"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
message_id = Column(String, ForeignKey("chat_messages.id", ondelete="CASCADE"), nullable=False, index=True)
document_id = Column(String)
filename = Column(Text)
page_label = Column(Text)
created_at = Column(DateTime(timezone=True), server_default=func.now())
message = relationship("ChatMessage", back_populates="sources")
class DatabaseClient(Base):
"""User-registered external database connections."""
__tablename__ = "databases"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
user_id = Column(String, nullable=False, index=True)
name = Column(String, nullable=False) # display name, e.g. "Prod DB"
db_type = Column(String, nullable=False) # postgres|mysql|sqlserver|supabase|bigquery|snowflake
credentials = Column(JSONB, nullable=False) # per-type JSON; sensitive fields Fernet-encrypted
status = Column(String, nullable=False, default="active") # active | inactive
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
class Catalog(Base):
"""Per-user data catalog stored as a single jsonb row.
`data` holds the full Pydantic Catalog (src/catalog/models.py:Catalog)
serialized via `model_dump(mode="json")`. Read path uses
`Catalog.model_validate(...)` to rehydrate.
Dedicated table β€” kept separate from `langchain_pg_embedding` so unstructured
embeddings and structured-catalog metadata never share storage.
"""
__tablename__ = "data_catalog"
user_id = Column(String, primary_key=True)
data = Column(JSONB, nullable=False)
schema_version = Column(String, nullable=False, default="1.0")
generated_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
class ReportInputRow(Base):
"""One row per completed slow-path analysis (the report's source of truth).
`data` holds the full Pydantic AnalysisRecord
(src/agents/slow_path/schemas.py:AnalysisRecord) serialized via
`model_dump(mode="json")`; the read path rehydrates with
`AnalysisRecord.model_validate(...)`. Many records accumulate per analysis
session β€” `generate_report` reads them by `analysis_id`, oldest-first.
`analysis_id` is nullable until the Analysis State (owned upstream) is wired
into the slow path; records still persist (and carry `user_id`) before then.
OWNERSHIP / HANDOFF (#21/#22, 2026-06-25 checkpoint): table **renamed `analysis_records`
β†’ `report_inputs`** β€” it holds the inputs report generation reads (the slow-path run
records). "report_inputs" avoids clashing with Go's `analyses_messages` and with Langfuse
observability. **Python-owned for now** (Python still creates it locally); the finalized
schema goes to Harry so the dedorch migration creates it post-cutover (#22), where
`id`/`analysis_id` will be `uuid` (+ FK to `analyses(id)`). The Pydantic `AnalysisRecord`
(the in-memory run object) is intentionally kept. Slated to migrate to Go ownership later β€”
keep this + DEV_PLAN #21/#22 as the handoff record. NOTE: dedorch currently still has the
OLD `analysis_records` table (empty) until Harry's rename migration lands.
"""
__tablename__ = "report_inputs"
# id/analysis_id are `uuid` to match dedorch's `report_inputs` + the analysis-family
# (analyses/reports/data_sources). No FK declared in Python (dedorch's migration owns it, #22).
id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisRecord.record_id (uuid hex ok)
analysis_id = Column(UUID(as_uuid=False), index=True) # the analysis session id (nullable for now)
user_id = Column(String, nullable=False, index=True)
plan_id = Column(String, nullable=False)
data = Column(JSONB, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
class AnalysisReportRow(Base):
"""One immutable row per generated report version β€” dedorch `reports` (Go-owned).
dedorch stores the rendered markdown `content` + `title` + `version` (no jsonb
snapshot β€” markdown-only per the 2026-06-23 checkpoint). The read path rebuilds a
minimal `AnalysisReport` (structured fields empty; `rendered_markdown` = content).
Versions accumulate per analysis; versioning is serialized by a per-analysis
advisory lock in `ReportStore`. Class name kept; table + shape changed for dedorch.
"""
__tablename__ = "reports"
id = Column(UUID(as_uuid=False), primary_key=True) # AnalysisReport.report_id (uuid)
analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True)
title = Column(String, nullable=False)
content = Column(Text, nullable=False) # rendered markdown
generated_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
version = Column(Integer, nullable=False)
class AnalysisStateRow(Base):
"""Per-analysis session state β€” the dedorch **`analyses`** table (plural; Go-owned).
One session = one analysis = one conversation; `id` is the shared session id
(canonical UUID). Verified against the dedorch DB 2026-06-25.
dedorch `analyses` ACTUAL columns: `id` (uuid), `analysis_title`, `user_id` (text),
`report_id` (uuid), `created_at`, `updated_at`, `problem_statement`,
`problem_validated`, `status` (text 'active'|'inactive' β€” soft-delete),
`data_bind` (jsonb), `data_bind_version` (int), `report_collection` (jsonb).
Reconciled to that shape (#4, 2026-06-26): `user_id` (was `owner_id`) + `status`/`data_bind`/
`data_bind_version`/`report_collection` added. dedorch still carries `problem_statement`/
`problem_validated` and does NOT yet have `objective`/`business_questions` β€” Harry's #3 drops
the former + adds the latter; the report layer reads the goal getattr-tolerantly so that swap
stays non-breaking. The new FE/Go columns are stored to match dedorch but NOT surfaced in the
`AnalysisState` pydantic contract (no Python reader needs them yet).
`analysis` (singular) is the deprecated DUPLICATE table Harry will drop β€” never use it.
Class name kept as `AnalysisStateRow`.
"""
__tablename__ = "analyses"
id = Column(UUID(as_uuid=False), primary_key=True) # shared session id (uuid)
analysis_title = Column(String, nullable=False, default="New analysis")
problem_statement = Column(Text, nullable=False, default="")
problem_validated = Column(Boolean, nullable=False, default=False)
user_id = Column(String, nullable=False, index=True) # was owner_id (dedorch uses user_id)
report_id = Column(UUID(as_uuid=False), nullable=True)
# dedorch `analyses` columns (FE/Go concerns; carried so create_all matches dedorch).
status = Column(String, nullable=False, default="active") # active | inactive (soft-delete)
data_bind = Column(JSONB, nullable=False, default=list)
data_bind_version = Column(Integer, nullable=False, default=1)
report_collection = Column(JSONB, nullable=False, default=list)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
class AnalysisDataSourceRow(Base):
"""Per-analysis data-source binding (#10) β€” dedorch `data_sources` (Go-owned).
Which catalog sources an analysis is scoped to. `reference_id` is the catalog
`Source.source_id`; `type`/`name` snapshot the source kind + label. Written at
`/analysis/create`; read by `structured_flow` scoping + the report appendix.
`source_metadata` maps to the `metadata` column (`metadata` is reserved by the
declarative API). Class name kept; table + shape changed for dedorch.
"""
__tablename__ = "data_sources"
id = Column(UUID(as_uuid=False), primary_key=True)
analysis_id = Column(UUID(as_uuid=False), nullable=False, index=True)
type = Column(String, nullable=False)
name = Column(String, nullable=False)
reference_id = Column(String, nullable=False) # == catalog Source.source_id
bound_by = Column(String, nullable=False)
bound_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
source_metadata = Column("metadata", JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())