Spaces:
Running
Running
| import operator | |
| from typing import Annotated, Optional, TypedDict | |
| from app.models.chat import SourceRef | |
| class ChunkMetadata(TypedDict, total=False): | |
| """ | |
| Per-chunk payload stored in Qdrant. | |
| All fields have total=False so optional fields can be added without | |
| migrating existing points. Callers must .get() with a default for safety. | |
| chunk_type is the canonical discriminator for how a point is used: | |
| "leaf" β real portfolio content; the ONLY type shown to the user. | |
| "raptor_summary" β synthetic cluster summary; retrieval aid only. | |
| "question_proxy" β hypothetical question embedding; retrieval aid only. | |
| """ | |
| doc_id: str | |
| source_title: str | |
| source_url: str # Absolute URL or "" for resume. Never a relative path. | |
| section: str | |
| source_type: str # "blog" | "project" | "resume" | "readme" | |
| date: str | |
| tags: list[str] | |
| # ββ Canonical chunk_type discriminator ββββββββββββββββββββββββββββββββββββ | |
| chunk_type: str # "leaf" | "raptor_summary" | "question_proxy" | |
| # ββ Leaf-only fields ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Proper nouns + domain terms extracted by capitalisation heuristic at | |
| # ingestion time. Used for Qdrant keyword payload filter at query time so | |
| # canonical name variants ("XSilica", "XSILICA") all match. | |
| keywords: list[str] | |
| # ββ Positional ordering field βββββββββββββββββββββββββββββββββββββββββββββ | |
| # 0-based position of this chunk within its source document, set at ingestion | |
| # time by heading_chunker. Used for ordered sibling expansion at query time | |
| # so retrieve.py can prefer adjacent chunks over arbitrary doc members. | |
| chunk_index: int | |
| # ββ raptor_summary-only fields ββββββββββββββββββββββββββββββββββββββββββββ | |
| # Qdrant point UUIDs of the leaf chunks that were summarised to produce | |
| # this cluster node. Used at query time to expand relevant cluster hits | |
| # to their actual evidence passages. | |
| child_leaf_ids: list[str] | |
| # ββ question_proxy-only fields βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Qdrant point UUID of the parent leaf chunk. Used at query time to resolve | |
| # a question_proxy hit to the underlying evidence passage. | |
| parent_leaf_id: str | |
| # ββ Legacy fields (kept for old points that pre-date the new schema) ββββββ | |
| raptor_level: int # deprecated β use chunk_type instead | |
| linked_chunks: list[str] # deprecated β replaced by child_leaf_ids | |
| class Chunk(TypedDict): | |
| text: str | |
| metadata: ChunkMetadata | |
| class PipelineState(TypedDict): | |
| query: str | |
| query_complexity: str | |
| session_id: str | |
| query_embedding: Optional[list[float]] # set by cache node, reused by retrieve | |
| expanded_queries: Annotated[list[str], operator.add] | |
| retrieved_chunks: Annotated[list[Chunk], operator.add] | |
| reranked_chunks: Annotated[list[Chunk], operator.add] | |
| answer: str | |
| sources: Annotated[list[SourceRef], operator.add] | |
| cached: bool | |
| cache_key: Optional[str] | |
| guard_passed: bool | |
| thinking: bool # True while Gemini has signalled RAG is needed | |
| # Last N Q/A pairs for this session β injected into prompts for follow-up context. | |
| conversation_history: list | |
| # Stage 2: rolling conversation summary (single paragraph, β€150 tokens). | |
| # Injected into generate/gemini_fast instead of raw turn list when present. | |
| conversation_summary: Optional[str] | |
| # Stage 2: self-contained query rewritten before retrieval when the original | |
| # contains unresolved pronouns/references. Used for embedding; original query | |
| # is used for display and system prompt. | |
| decontextualized_query: Optional[str] | |
| # True when the current query explicitly criticises the previous answer. | |
| is_criticism: bool | |
| # Stage 1: True when submitted via a follow-up pill button. | |
| # Bypasses Gemini fast-path so pill follow-ups always produce cited RAG answers. | |
| is_followup: bool | |
| latency_ms: int | |
| error: Optional[str] | |
| interaction_id: Optional[int] | |
| # CRAG: counts retrieve node invocations; 2 = one retry was attempted. | |
| retrieval_attempts: int | |
| rewritten_query: Optional[str] | |
| # Top cross-encoder score from the last retrieve call. Used by CRAG routing. | |
| top_rerank_score: Optional[float] | |
| follow_ups: list[str] | |
| # Which pipeline branch produced the final answer. | |
| path: Optional[str] | |
| # 1β3 word topic extracted from the query by the guard node. | |
| query_topic: Optional[str] | |
| # Stage 3: SELF-RAG critic scores (1β3 each). Logged to SQLite for training. | |
| critic_groundedness: Optional[int] # all claims supported by a specific chunk | |
| critic_completeness: Optional[int] # answer uses all relevant available chunks | |
| critic_specificity: Optional[int] # answer contains specific names/numbers | |
| critic_quality: Optional[str] # "high" | "medium" | "low" | |
| # Fix 1: Enumeration query classifier. | |
| # True when the query has enumeration intent ("list all projects", "how many blogs"). | |
| # The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter | |
| # scroll instead, returning a complete deduplicated title list. | |
| # Logged to SQLite so enumeration turns can be monitored separately from RAG turns. | |
| is_enumeration_query: bool | |
| # Bug 4: canonical name forms produced by expand_query (all casing variants of | |
| # proper nouns in the query). Fed into the BM25 query as a union so the sparse | |
| # component scores positively across "XSilica", "XSILICA", "xsilica", etc. | |
| query_canonical_forms: list[str] | |
| # RC-13: retrieval diagnostics logged per turn. | |
| sibling_expansion_count: Optional[int] # chunks added via sibling expansion | |
| focused_source_type: Optional[str] # e.g. "cv", "project", "blog", None | |