"""Pydantic models for the per-user data catalog (Cs + Ct). See ARCHITECTURE.md §6 for the full schema definition. Source.location_ref URI scheme ------------------------------ A `Source` is uniquely addressable by `location_ref`; introspectors and executors parse it to find the underlying data: schema sources → "dbclient://{database_client_id}" Resolves via `database_client_service.get(...)` which returns a `DatabaseClient` row whose Fernet-encrypted credentials are decrypted at runtime. tabular sources → "az_blob://{user_id}/{document_id}" The Source aggregates one or more sheets as Tables; each per-sheet Parquet blob is named via `parquet_service.parquet_blob_name(user_id, document_id, sheet_name)`, so executors derive the per-Table blob path from `Source.location_ref` plus `Table.name`. unstructured → reserved (deferred — see ARCHITECTURE.md §10 q2). """ from datetime import datetime from typing import Any, Literal from pydantic import BaseModel, Field SourceType = Literal["schema", "tabular", "unstructured"] DataType = Literal["int", "decimal", "string", "datetime", "date", "bool", "json"] class ColumnStats(BaseModel): min: Any | None = None max: Any | None = None mean: float | None = None median: float | None = None distinct_count: int | None = None top_values: list[Any] | None = None class Column(BaseModel): column_id: str name: str data_type: DataType nullable: bool pii_flag: bool = False sample_values: list[Any] | None = None stats: ColumnStats | None = None class ForeignKey(BaseModel): """A FK edge from one column in this table to a column in another table. All references use stable IDs derived from source/table/column names so edges survive renames at the `name` level. The target table must belong to the SAME `Source` — cross-source FKs are not modeled in v1. """ column_id: str # the column in this table that holds the FK target_table_id: str # referenced table_id, within the same Source target_column_id: str # referenced column_id class Table(BaseModel): table_id: str name: str row_count: int | None = None columns: list[Column] foreign_keys: list[ForeignKey] = Field(default_factory=list) class Source(BaseModel): source_id: str source_type: SourceType name: str location_ref: str updated_at: datetime tables: list[Table] = Field(default_factory=list) class Catalog(BaseModel): user_id: str schema_version: str = "1.0" generated_at: datetime sources: list[Source] = Field(default_factory=list)