Spaces:

1337XCode
/

personabot-api

Running

personabot-api / app /models /pipeline.py

GitHub Actions

Deploy 5383798

8fdc5ad 28 days ago

6.33 kB

	import operator
	from typing import Annotated, Optional, TypedDict

	from app.models.chat import SourceRef


	class ChunkMetadata(TypedDict, total=False):
	"""
	Per-chunk payload stored in Qdrant.

	All fields have total=False so optional fields can be added without
	migrating existing points. Callers must .get() with a default for safety.

	chunk_type is the canonical discriminator for how a point is used:
	"leaf" — real portfolio content; the ONLY type shown to the user.
	"raptor_summary" — synthetic cluster summary; retrieval aid only.
	"question_proxy" — hypothetical question embedding; retrieval aid only.
	"""
	doc_id: str
	source_title: str
	source_url: str # Absolute URL or "" for resume. Never a relative path.
	section: str
	source_type: str # "blog" \| "project" \| "resume" \| "readme"
	date: str
	tags: list[str]
	# ── Canonical chunk_type discriminator ────────────────────────────────────
	chunk_type: str # "leaf" \| "raptor_summary" \| "question_proxy"
	# ── Leaf-only fields ──────────────────────────────────────────────────────
	# Proper nouns + domain terms extracted by capitalisation heuristic at
	# ingestion time. Used for Qdrant keyword payload filter at query time so
	# canonical name variants ("XSilica", "XSILICA") all match.
	keywords: list[str]
	# ── Positional ordering field ─────────────────────────────────────────────
	# 0-based position of this chunk within its source document, set at ingestion
	# time by heading_chunker. Used for ordered sibling expansion at query time
	# so retrieve.py can prefer adjacent chunks over arbitrary doc members.
	chunk_index: int
	# ── raptor_summary-only fields ────────────────────────────────────────────
	# Qdrant point UUIDs of the leaf chunks that were summarised to produce
	# this cluster node. Used at query time to expand relevant cluster hits
	# to their actual evidence passages.
	child_leaf_ids: list[str]
	# ── question_proxy-only fields ─────────────────────────────────────────────
	# Qdrant point UUID of the parent leaf chunk. Used at query time to resolve
	# a question_proxy hit to the underlying evidence passage.
	parent_leaf_id: str
	# ── Legacy fields (kept for old points that pre-date the new schema) ──────
	raptor_level: int # deprecated — use chunk_type instead
	linked_chunks: list[str] # deprecated — replaced by child_leaf_ids


	class Chunk(TypedDict):
	text: str
	metadata: ChunkMetadata


	class PipelineState(TypedDict):
	query: str
	query_complexity: str
	session_id: str
	query_embedding: Optional[list[float]] # set by cache node, reused by retrieve
	expanded_queries: Annotated[list[str], operator.add]
	retrieved_chunks: Annotated[list[Chunk], operator.add]
	reranked_chunks: Annotated[list[Chunk], operator.add]
	answer: str
	sources: Annotated[list[SourceRef], operator.add]
	cached: bool
	cache_key: Optional[str]
	guard_passed: bool
	thinking: bool # True while Gemini has signalled RAG is needed
	# Last N Q/A pairs for this session — injected into prompts for follow-up context.
	conversation_history: list
	# Stage 2: rolling conversation summary (single paragraph, ≤150 tokens).
	# Injected into generate/gemini_fast instead of raw turn list when present.
	conversation_summary: Optional[str]
	# Stage 2: self-contained query rewritten before retrieval when the original
	# contains unresolved pronouns/references. Used for embedding; original query
	# is used for display and system prompt.
	decontextualized_query: Optional[str]
	# True when the current query explicitly criticises the previous answer.
	is_criticism: bool
	# Stage 1: True when submitted via a follow-up pill button.
	# Bypasses Gemini fast-path so pill follow-ups always produce cited RAG answers.
	is_followup: bool
	latency_ms: int
	error: Optional[str]
	interaction_id: Optional[int]
	# CRAG: counts retrieve node invocations; 2 = one retry was attempted.
	retrieval_attempts: int
	rewritten_query: Optional[str]
	# Top cross-encoder score from the last retrieve call. Used by CRAG routing.
	top_rerank_score: Optional[float]
	follow_ups: list[str]
	# Which pipeline branch produced the final answer.
	path: Optional[str]
	# 1–3 word topic extracted from the query by the guard node.
	query_topic: Optional[str]
	# Stage 3: SELF-RAG critic scores (1–3 each). Logged to SQLite for training.
	critic_groundedness: Optional[int] # all claims supported by a specific chunk
	critic_completeness: Optional[int] # answer uses all relevant available chunks
	critic_specificity: Optional[int] # answer contains specific names/numbers
	critic_quality: Optional[str] # "high" \| "medium" \| "low"
	# Fix 1: Enumeration query classifier.
	# True when the query has enumeration intent ("list all projects", "how many blogs").
	# The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter
	# scroll instead, returning a complete deduplicated title list.
	# Logged to SQLite so enumeration turns can be monitored separately from RAG turns.
	is_enumeration_query: bool
	# Bug 4: canonical name forms produced by expand_query (all casing variants of
	# proper nouns in the query). Fed into the BM25 query as a union so the sparse
	# component scores positively across "XSilica", "XSILICA", "xsilica", etc.
	query_canonical_forms: list[str]
	# RC-13: retrieval diagnostics logged per turn.
	sibling_expansion_count: Optional[int] # chunks added via sibling expansion
	focused_source_type: Optional[str] # e.g. "cv", "project", "blog", None