import operator from typing import Annotated, Optional, TypedDict from app.models.chat import SourceRef class ChunkMetadata(TypedDict, total=False): """ Per-chunk payload stored in Qdrant. All fields have total=False so optional fields can be added without migrating existing points. Callers must .get() with a default for safety. chunk_type is the canonical discriminator for how a point is used: "leaf" — real portfolio content; the ONLY type shown to the user. "raptor_summary" — synthetic cluster summary; retrieval aid only. "question_proxy" — hypothetical question embedding; retrieval aid only. """ doc_id: str source_title: str source_url: str # Absolute URL or "" for resume. Never a relative path. section: str source_type: str # "blog" | "project" | "resume" | "readme" date: str tags: list[str] # ── Canonical chunk_type discriminator ──────────────────────────────────── chunk_type: str # "leaf" | "raptor_summary" | "question_proxy" # ── Leaf-only fields ────────────────────────────────────────────────────── # Proper nouns + domain terms extracted by capitalisation heuristic at # ingestion time. Used for Qdrant keyword payload filter at query time so # canonical name variants ("XSilica", "XSILICA") all match. keywords: list[str] # ── Positional ordering field ───────────────────────────────────────────── # 0-based position of this chunk within its source document, set at ingestion # time by heading_chunker. Used for ordered sibling expansion at query time # so retrieve.py can prefer adjacent chunks over arbitrary doc members. chunk_index: int # ── raptor_summary-only fields ──────────────────────────────────────────── # Qdrant point UUIDs of the leaf chunks that were summarised to produce # this cluster node. Used at query time to expand relevant cluster hits # to their actual evidence passages. child_leaf_ids: list[str] # ── question_proxy-only fields ───────────────────────────────────────────── # Qdrant point UUID of the parent leaf chunk. Used at query time to resolve # a question_proxy hit to the underlying evidence passage. parent_leaf_id: str # ── Legacy fields (kept for old points that pre-date the new schema) ────── raptor_level: int # deprecated — use chunk_type instead linked_chunks: list[str] # deprecated — replaced by child_leaf_ids class Chunk(TypedDict): text: str metadata: ChunkMetadata class PipelineState(TypedDict): query: str query_complexity: str session_id: str query_embedding: Optional[list[float]] # set by cache node, reused by retrieve expanded_queries: Annotated[list[str], operator.add] retrieved_chunks: Annotated[list[Chunk], operator.add] reranked_chunks: Annotated[list[Chunk], operator.add] answer: str sources: Annotated[list[SourceRef], operator.add] cached: bool cache_key: Optional[str] guard_passed: bool thinking: bool # True while Gemini has signalled RAG is needed # Last N Q/A pairs for this session — injected into prompts for follow-up context. conversation_history: list # Stage 2: rolling conversation summary (single paragraph, ≤150 tokens). # Injected into generate/gemini_fast instead of raw turn list when present. conversation_summary: Optional[str] # Stage 2: self-contained query rewritten before retrieval when the original # contains unresolved pronouns/references. Used for embedding; original query # is used for display and system prompt. decontextualized_query: Optional[str] # True when the current query explicitly criticises the previous answer. is_criticism: bool # Stage 1: True when submitted via a follow-up pill button. # Bypasses Gemini fast-path so pill follow-ups always produce cited RAG answers. is_followup: bool latency_ms: int error: Optional[str] interaction_id: Optional[int] # CRAG: counts retrieve node invocations; 2 = one retry was attempted. retrieval_attempts: int rewritten_query: Optional[str] # Top cross-encoder score from the last retrieve call. Used by CRAG routing. top_rerank_score: Optional[float] follow_ups: list[str] # Which pipeline branch produced the final answer. path: Optional[str] # 1–3 word topic extracted from the query by the guard node. query_topic: Optional[str] # Stage 3: SELF-RAG critic scores (1–3 each). Logged to SQLite for training. critic_groundedness: Optional[int] # all claims supported by a specific chunk critic_completeness: Optional[int] # answer uses all relevant available chunks critic_specificity: Optional[int] # answer contains specific names/numbers critic_quality: Optional[str] # "high" | "medium" | "low" # Fix 1: Enumeration query classifier. # True when the query has enumeration intent ("list all projects", "how many blogs"). # The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter # scroll instead, returning a complete deduplicated title list. # Logged to SQLite so enumeration turns can be monitored separately from RAG turns. is_enumeration_query: bool # Bug 4: canonical name forms produced by expand_query (all casing variants of # proper nouns in the query). Fed into the BM25 query as a union so the sparse # component scores positively across "XSilica", "XSILICA", "xsilica", etc. query_canonical_forms: list[str] # RC-13: retrieval diagnostics logged per turn. sibling_expansion_count: Optional[int] # chunks added via sibling expansion focused_source_type: Optional[str] # e.g. "cv", "project", "blog", None