personabot-api / app /models /pipeline.py
GitHub Actions
Deploy 5383798
8fdc5ad
import operator
from typing import Annotated, Optional, TypedDict
from app.models.chat import SourceRef
class ChunkMetadata(TypedDict, total=False):
"""
Per-chunk payload stored in Qdrant.
All fields have total=False so optional fields can be added without
migrating existing points. Callers must .get() with a default for safety.
chunk_type is the canonical discriminator for how a point is used:
"leaf" β€” real portfolio content; the ONLY type shown to the user.
"raptor_summary" β€” synthetic cluster summary; retrieval aid only.
"question_proxy" β€” hypothetical question embedding; retrieval aid only.
"""
doc_id: str
source_title: str
source_url: str # Absolute URL or "" for resume. Never a relative path.
section: str
source_type: str # "blog" | "project" | "resume" | "readme"
date: str
tags: list[str]
# ── Canonical chunk_type discriminator ────────────────────────────────────
chunk_type: str # "leaf" | "raptor_summary" | "question_proxy"
# ── Leaf-only fields ──────────────────────────────────────────────────────
# Proper nouns + domain terms extracted by capitalisation heuristic at
# ingestion time. Used for Qdrant keyword payload filter at query time so
# canonical name variants ("XSilica", "XSILICA") all match.
keywords: list[str]
# ── Positional ordering field ─────────────────────────────────────────────
# 0-based position of this chunk within its source document, set at ingestion
# time by heading_chunker. Used for ordered sibling expansion at query time
# so retrieve.py can prefer adjacent chunks over arbitrary doc members.
chunk_index: int
# ── raptor_summary-only fields ────────────────────────────────────────────
# Qdrant point UUIDs of the leaf chunks that were summarised to produce
# this cluster node. Used at query time to expand relevant cluster hits
# to their actual evidence passages.
child_leaf_ids: list[str]
# ── question_proxy-only fields ─────────────────────────────────────────────
# Qdrant point UUID of the parent leaf chunk. Used at query time to resolve
# a question_proxy hit to the underlying evidence passage.
parent_leaf_id: str
# ── Legacy fields (kept for old points that pre-date the new schema) ──────
raptor_level: int # deprecated β€” use chunk_type instead
linked_chunks: list[str] # deprecated β€” replaced by child_leaf_ids
class Chunk(TypedDict):
text: str
metadata: ChunkMetadata
class PipelineState(TypedDict):
query: str
query_complexity: str
session_id: str
query_embedding: Optional[list[float]] # set by cache node, reused by retrieve
expanded_queries: Annotated[list[str], operator.add]
retrieved_chunks: Annotated[list[Chunk], operator.add]
reranked_chunks: Annotated[list[Chunk], operator.add]
answer: str
sources: Annotated[list[SourceRef], operator.add]
cached: bool
cache_key: Optional[str]
guard_passed: bool
thinking: bool # True while Gemini has signalled RAG is needed
# Last N Q/A pairs for this session β€” injected into prompts for follow-up context.
conversation_history: list
# Stage 2: rolling conversation summary (single paragraph, ≀150 tokens).
# Injected into generate/gemini_fast instead of raw turn list when present.
conversation_summary: Optional[str]
# Stage 2: self-contained query rewritten before retrieval when the original
# contains unresolved pronouns/references. Used for embedding; original query
# is used for display and system prompt.
decontextualized_query: Optional[str]
# True when the current query explicitly criticises the previous answer.
is_criticism: bool
# Stage 1: True when submitted via a follow-up pill button.
# Bypasses Gemini fast-path so pill follow-ups always produce cited RAG answers.
is_followup: bool
latency_ms: int
error: Optional[str]
interaction_id: Optional[int]
# CRAG: counts retrieve node invocations; 2 = one retry was attempted.
retrieval_attempts: int
rewritten_query: Optional[str]
# Top cross-encoder score from the last retrieve call. Used by CRAG routing.
top_rerank_score: Optional[float]
follow_ups: list[str]
# Which pipeline branch produced the final answer.
path: Optional[str]
# 1–3 word topic extracted from the query by the guard node.
query_topic: Optional[str]
# Stage 3: SELF-RAG critic scores (1–3 each). Logged to SQLite for training.
critic_groundedness: Optional[int] # all claims supported by a specific chunk
critic_completeness: Optional[int] # answer uses all relevant available chunks
critic_specificity: Optional[int] # answer contains specific names/numbers
critic_quality: Optional[str] # "high" | "medium" | "low"
# Fix 1: Enumeration query classifier.
# True when the query has enumeration intent ("list all projects", "how many blogs").
# The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter
# scroll instead, returning a complete deduplicated title list.
# Logged to SQLite so enumeration turns can be monitored separately from RAG turns.
is_enumeration_query: bool
# Bug 4: canonical name forms produced by expand_query (all casing variants of
# proper nouns in the query). Fed into the BM25 query as a union so the sparse
# component scores positively across "XSilica", "XSILICA", "xsilica", etc.
query_canonical_forms: list[str]
# RC-13: retrieval diagnostics logged per turn.
sibling_expansion_count: Optional[int] # chunks added via sibling expansion
focused_source_type: Optional[str] # e.g. "cv", "project", "blog", None