Spaces:
Running
Running
GitHub Actions commited on
Commit ·
8fdc5ad
1
Parent(s): d1766f7
Deploy 5383798
Browse files- app/core/config.py +3 -5
- app/models/pipeline.py +8 -0
- app/pipeline/nodes/gemini_fast.py +9 -15
- app/pipeline/nodes/generate.py +37 -20
- app/pipeline/nodes/log_eval.py +7 -2
- app/pipeline/nodes/retrieve.py +17 -2
- app/services/gemini_context.toon +23 -17
- app/services/reranker.py +4 -1
app/core/config.py
CHANGED
|
@@ -44,12 +44,10 @@ class Settings(BaseSettings):
|
|
| 44 |
# HF Spaces persistent volume mounts at /data. Local dev uses a relative path.
|
| 45 |
DB_PATH: str = "sqlite.db"
|
| 46 |
|
| 47 |
-
# Gemini fast-path —
|
| 48 |
-
#
|
| 49 |
-
#
|
| 50 |
-
# script (refresh_gemini_context.py) and MUST NOT appear in any chat logs.
|
| 51 |
GEMINI_API_KEY: Optional[str] = None
|
| 52 |
-
GEMINI_PROCESSING_API_KEY: Optional[str] = None
|
| 53 |
GEMINI_MODEL: str = "gemini-2.5-flash-lite"
|
| 54 |
GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"
|
| 55 |
|
|
|
|
| 44 |
# HF Spaces persistent volume mounts at /data. Local dev uses a relative path.
|
| 45 |
DB_PATH: str = "sqlite.db"
|
| 46 |
|
| 47 |
+
# Gemini fast-path — live query traffic only.
|
| 48 |
+
# GEMINI_CONTEXT_PATH points to the manually maintained context file.
|
| 49 |
+
# Edit backend/app/services/gemini_context.toon to update fast-path context.
|
|
|
|
| 50 |
GEMINI_API_KEY: Optional[str] = None
|
|
|
|
| 51 |
GEMINI_MODEL: str = "gemini-2.5-flash-lite"
|
| 52 |
GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"
|
| 53 |
|
app/models/pipeline.py
CHANGED
|
@@ -30,6 +30,11 @@ class ChunkMetadata(TypedDict, total=False):
|
|
| 30 |
# ingestion time. Used for Qdrant keyword payload filter at query time so
|
| 31 |
# canonical name variants ("XSilica", "XSILICA") all match.
|
| 32 |
keywords: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# ── raptor_summary-only fields ────────────────────────────────────────────
|
| 34 |
# Qdrant point UUIDs of the leaf chunks that were summarised to produce
|
| 35 |
# this cluster node. Used at query time to expand relevant cluster hits
|
|
@@ -105,3 +110,6 @@ class PipelineState(TypedDict):
|
|
| 105 |
# proper nouns in the query). Fed into the BM25 query as a union so the sparse
|
| 106 |
# component scores positively across "XSilica", "XSILICA", "xsilica", etc.
|
| 107 |
query_canonical_forms: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# ingestion time. Used for Qdrant keyword payload filter at query time so
|
| 31 |
# canonical name variants ("XSilica", "XSILICA") all match.
|
| 32 |
keywords: list[str]
|
| 33 |
+
# ── Positional ordering field ─────────────────────────────────────────────
|
| 34 |
+
# 0-based position of this chunk within its source document, set at ingestion
|
| 35 |
+
# time by heading_chunker. Used for ordered sibling expansion at query time
|
| 36 |
+
# so retrieve.py can prefer adjacent chunks over arbitrary doc members.
|
| 37 |
+
chunk_index: int
|
| 38 |
# ── raptor_summary-only fields ────────────────────────────────────────────
|
| 39 |
# Qdrant point UUIDs of the leaf chunks that were summarised to produce
|
| 40 |
# this cluster node. Used at query time to expand relevant cluster hits
|
|
|
|
| 110 |
# proper nouns in the query). Fed into the BM25 query as a union so the sparse
|
| 111 |
# component scores positively across "XSilica", "XSILICA", "xsilica", etc.
|
| 112 |
query_canonical_forms: list[str]
|
| 113 |
+
# RC-13: retrieval diagnostics logged per turn.
|
| 114 |
+
sibling_expansion_count: Optional[int] # chunks added via sibling expansion
|
| 115 |
+
focused_source_type: Optional[str] # e.g. "cv", "project", "blog", None
|
app/pipeline/nodes/gemini_fast.py
CHANGED
|
@@ -98,23 +98,17 @@ def _is_trivial(query: str) -> bool:
|
|
| 98 |
"""
|
| 99 |
True when the query is pure navigation — safe for Gemini fast-path.
|
| 100 |
|
| 101 |
-
A query is trivial when
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
RAG by default. RAG calls Qdrant. Qdrant has the data.
|
| 110 |
"""
|
| 111 |
stripped = query.strip().rstrip("?!.")
|
| 112 |
-
|
| 113 |
-
return True
|
| 114 |
-
words = query.split()
|
| 115 |
-
if len(words) < 4 and not _NE_RE.search(query):
|
| 116 |
-
return True
|
| 117 |
-
return False
|
| 118 |
|
| 119 |
|
| 120 |
def _is_complex(query: str) -> bool:
|
|
|
|
| 98 |
"""
|
| 99 |
True when the query is pure navigation — safe for Gemini fast-path.
|
| 100 |
|
| 101 |
+
A query is trivial ONLY when its stripped form exactly matches a known
|
| 102 |
+
navigation phrase from _TRIVIAL_PHRASES. All other queries — including
|
| 103 |
+
short career/skills/internship questions — route to full RAG so they
|
| 104 |
+
receive citations backed by Qdrant evidence, not the stale TOON summary.
|
| 105 |
+
|
| 106 |
+
Removing the <4-word bypass (RC-10): queries like "his skills?" or
|
| 107 |
+
"any internships?" previously hit the TOON fast-path and could return
|
| 108 |
+
un-cited, outdated answers. They now always go through retrieval.
|
|
|
|
| 109 |
"""
|
| 110 |
stripped = query.strip().rstrip("?!.")
|
| 111 |
+
return stripped.lower() in _TRIVIAL_PHRASES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
def _is_complex(query: str) -> bool:
|
app/pipeline/nodes/generate.py
CHANGED
|
@@ -162,31 +162,48 @@ def _format_history(state: "PipelineState") -> str:
|
|
| 162 |
|
| 163 |
def _merge_by_source(chunks: list) -> list[dict]:
|
| 164 |
"""
|
| 165 |
-
Collapse chunks that share the same source_url (or source_title
|
| 166 |
-
absent) into a single merged chunk.
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
| 173 |
"""
|
| 174 |
-
|
|
|
|
| 175 |
order: list[str] = []
|
| 176 |
for chunk in chunks:
|
| 177 |
meta = chunk["metadata"]
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
| 185 |
else:
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
|
| 192 |
def _dedup_sources(source_refs: list[SourceRef], limit: int | None = None) -> list[SourceRef]:
|
|
|
|
| 162 |
|
| 163 |
def _merge_by_source(chunks: list) -> list[dict]:
|
| 164 |
"""
|
| 165 |
+
Collapse chunks that share the same source_url + section (or source_title
|
| 166 |
+
when both URL and section are absent) into a single merged chunk.
|
| 167 |
+
|
| 168 |
+
RC-3 fix: keying by URL alone collapsed ALL resume chunks into one [1] blob
|
| 169 |
+
because every resume chunk has the same PDF URL. Keying by URL::section
|
| 170 |
+
gives each section its own [N] number, so Work Experience, Education, and
|
| 171 |
+
Skills are separately citable.
|
| 172 |
+
|
| 173 |
+
Chunks within each group are sorted by chunk_index (document order) before
|
| 174 |
+
concatenation so the LLM reads sections top-to-bottom, not in RRF score order.
|
| 175 |
"""
|
| 176 |
+
# Collect all chunks per group key, preserving insertion order of groups.
|
| 177 |
+
groups: dict[str, list] = {}
|
| 178 |
order: list[str] = []
|
| 179 |
for chunk in chunks:
|
| 180 |
meta = chunk["metadata"]
|
| 181 |
+
url = (meta.get("source_url") or "").strip()
|
| 182 |
+
section = (meta.get("section") or "").strip()
|
| 183 |
+
# Use url::section when both are available, url alone when section is
|
| 184 |
+
# empty, title alone when URL is also empty.
|
| 185 |
+
if url and section:
|
| 186 |
+
key = f"{url}::{section}"
|
| 187 |
+
elif url:
|
| 188 |
+
key = url
|
| 189 |
else:
|
| 190 |
+
key = meta.get("source_title", "")
|
| 191 |
+
if key not in groups:
|
| 192 |
+
groups[key] = []
|
| 193 |
+
order.append(key)
|
| 194 |
+
groups[key].append(chunk)
|
| 195 |
+
|
| 196 |
+
merged: list[dict] = []
|
| 197 |
+
for key in order:
|
| 198 |
+
group = groups[key]
|
| 199 |
+
# Sort by chunk_index so we read the document top-to-bottom.
|
| 200 |
+
group.sort(key=lambda c: c["metadata"].get("chunk_index", 0))
|
| 201 |
+
# Use first chunk's metadata as canonical; deep-copy so we don't mutate state.
|
| 202 |
+
canonical_meta = dict(group[0]["metadata"])
|
| 203 |
+
text_parts = [c["text"] for c in group]
|
| 204 |
+
merged_text = "\n\n[...continued from same source...]\n\n".join(text_parts)
|
| 205 |
+
merged.append({"text": merged_text, "metadata": canonical_meta})
|
| 206 |
+
return merged
|
| 207 |
|
| 208 |
|
| 209 |
def _dedup_sources(source_refs: list[SourceRef], limit: int | None = None) -> list[SourceRef]:
|
app/pipeline/nodes/log_eval.py
CHANGED
|
@@ -84,6 +84,9 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
|
|
| 84 |
("critic_quality", "TEXT"),
|
| 85 |
# Fix 1: enumeration classifier flag
|
| 86 |
("is_enumeration_query", "BOOLEAN DEFAULT 0"),
|
|
|
|
|
|
|
|
|
|
| 87 |
]:
|
| 88 |
try:
|
| 89 |
conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
|
|
@@ -96,8 +99,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
|
|
| 96 |
(timestamp, session_id, query, answer, chunks_used, rerank_scores,
|
| 97 |
reranked_chunks_json, latency_ms, cached, path,
|
| 98 |
critic_groundedness, critic_completeness, critic_specificity, critic_quality,
|
| 99 |
-
is_enumeration_query)
|
| 100 |
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 101 |
""",
|
| 102 |
(
|
| 103 |
datetime.now(tz=timezone.utc).isoformat(),
|
|
@@ -115,6 +118,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
|
|
| 115 |
state.get("critic_specificity"),
|
| 116 |
state.get("critic_quality"),
|
| 117 |
state.get("is_enumeration_query", False),
|
|
|
|
|
|
|
| 118 |
),
|
| 119 |
)
|
| 120 |
return cursor.lastrowid # type: ignore[return-value]
|
|
|
|
| 84 |
("critic_quality", "TEXT"),
|
| 85 |
# Fix 1: enumeration classifier flag
|
| 86 |
("is_enumeration_query", "BOOLEAN DEFAULT 0"),
|
| 87 |
+
# RC-13: retrieval diagnostics
|
| 88 |
+
("sibling_expansion_count", "INTEGER"),
|
| 89 |
+
("focused_source_type", "TEXT"),
|
| 90 |
]:
|
| 91 |
try:
|
| 92 |
conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
|
|
|
|
| 99 |
(timestamp, session_id, query, answer, chunks_used, rerank_scores,
|
| 100 |
reranked_chunks_json, latency_ms, cached, path,
|
| 101 |
critic_groundedness, critic_completeness, critic_specificity, critic_quality,
|
| 102 |
+
is_enumeration_query, sibling_expansion_count, focused_source_type)
|
| 103 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 104 |
""",
|
| 105 |
(
|
| 106 |
datetime.now(tz=timezone.utc).isoformat(),
|
|
|
|
| 118 |
state.get("critic_specificity"),
|
| 119 |
state.get("critic_quality"),
|
| 120 |
state.get("is_enumeration_query", False),
|
| 121 |
+
state.get("sibling_expansion_count"),
|
| 122 |
+
state.get("focused_source_type"),
|
| 123 |
),
|
| 124 |
)
|
| 125 |
return cursor.lastrowid # type: ignore[return-value]
|
app/pipeline/nodes/retrieve.py
CHANGED
|
@@ -272,8 +272,21 @@ def make_retrieve_node(
|
|
| 272 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 273 |
break
|
| 274 |
doc_id = seed["metadata"]["doc_id"]
|
|
|
|
| 275 |
siblings = vector_store.fetch_by_doc_id(doc_id, limit=_SIBLING_FETCH_LIMIT)
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
fp = f"{sib['metadata']['doc_id']}::{sib['metadata']['section']}"
|
| 278 |
if fp not in sibling_fps:
|
| 279 |
sibling_fps.add(fp)
|
|
@@ -282,7 +295,7 @@ def make_retrieve_node(
|
|
| 282 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 283 |
break
|
| 284 |
|
| 285 |
-
reranked = await reranker.rerank(retrieval_query, unique_chunks, top_k=
|
| 286 |
|
| 287 |
# Guard: assert all reranker inputs were leaf chunks.
|
| 288 |
# Non-leaf nodes (raptor_summary / question_proxy) reaching the reranker
|
|
@@ -368,6 +381,8 @@ def make_retrieve_node(
|
|
| 368 |
"reranked_chunks": diverse_chunks,
|
| 369 |
"retrieval_attempts": attempts + 1,
|
| 370 |
"top_rerank_score": top_score,
|
|
|
|
|
|
|
| 371 |
}
|
| 372 |
|
| 373 |
return retrieve_node
|
|
|
|
| 272 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 273 |
break
|
| 274 |
doc_id = seed["metadata"]["doc_id"]
|
| 275 |
+
seed_idx = seed["metadata"].get("chunk_index", -1)
|
| 276 |
siblings = vector_store.fetch_by_doc_id(doc_id, limit=_SIBLING_FETCH_LIMIT)
|
| 277 |
+
# RC-2 fix: sort by chunk_index so we can prefer adjacent chunks.
|
| 278 |
+
siblings.sort(key=lambda c: c["metadata"].get("chunk_index", 0))
|
| 279 |
+
# If seed position is known, prefer adjacent indices (±2) first,
|
| 280 |
+
# then fall through to remaining siblings in document order.
|
| 281 |
+
if seed_idx >= 0:
|
| 282 |
+
adjacent = [s for s in siblings
|
| 283 |
+
if abs(s["metadata"].get("chunk_index", -999) - seed_idx) <= 2]
|
| 284 |
+
rest = [s for s in siblings
|
| 285 |
+
if abs(s["metadata"].get("chunk_index", -999) - seed_idx) > 2]
|
| 286 |
+
ordered_siblings = adjacent + rest
|
| 287 |
+
else:
|
| 288 |
+
ordered_siblings = siblings
|
| 289 |
+
for sib in ordered_siblings:
|
| 290 |
fp = f"{sib['metadata']['doc_id']}::{sib['metadata']['section']}"
|
| 291 |
if fp not in sibling_fps:
|
| 292 |
sibling_fps.add(fp)
|
|
|
|
| 295 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 296 |
break
|
| 297 |
|
| 298 |
+
reranked = await reranker.rerank(retrieval_query, unique_chunks, top_k=10) # RC-5: raised from 7
|
| 299 |
|
| 300 |
# Guard: assert all reranker inputs were leaf chunks.
|
| 301 |
# Non-leaf nodes (raptor_summary / question_proxy) reaching the reranker
|
|
|
|
| 381 |
"reranked_chunks": diverse_chunks,
|
| 382 |
"retrieval_attempts": attempts + 1,
|
| 383 |
"top_rerank_score": top_score,
|
| 384 |
+
"sibling_expansion_count": sibling_count if unique_chunks else 0, # RC-13
|
| 385 |
+
"focused_source_type": focused_type, # RC-13
|
| 386 |
}
|
| 387 |
|
| 388 |
return retrieve_node
|
app/services/gemini_context.toon
CHANGED
|
@@ -1,18 +1,24 @@
|
|
| 1 |
-
#
|
| 2 |
-
#
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
textops,Engineered polyglot microservices text editor with custom Go API gateway achieving >5ms latency overhead. Migrated to serverless architecture reducing operational costs by 94% while maintaining 99.9% uptime SLOs.,"['Kubernetes', 'Docker', 'AWS', 'GCP', 'Prometheus', 'GitLab CI/CD']",/projects/textops,""
|
| 16 |
-
blogs[2]{title,summary,url,tags}:
|
| 17 |
-
60 FPS Object Detection on Android using YOLOv8,"How I built a realtime Android vision loop with YOLO + NCNN, IOU tracking, and distance-adaptive PID control all running at 60 FPS.",/blog/assistive-vision,"['Computer Vision', 'Real-Time Systems', 'Android']"
|
| 18 |
-
Mongo Tom is back with GPT-5,"How I used JSON-structured prompts with fictional character framing to bypass safety guardrails in GPT-5, Claude, Gemini, and Grok.",/blog/prompt-engineering-jailbreak,"['Prompt Engineering', 'LLMs', 'AI Safety']"
|
|
|
|
| 1 |
+
# PersonaBot — Gemini fast-path context
|
| 2 |
+
# Manually maintained. Edit this file whenever your portfolio changes.
|
| 3 |
+
# This file is loaded once at startup and passed to Gemini as primary context
|
| 4 |
+
# for conversational/trivial questions. Evidence-based questions always go
|
| 5 |
+
# through full RAG (Qdrant retrieval) regardless of what is written here.
|
| 6 |
+
#
|
| 7 |
+
# Format: free text. Write naturally — Gemini reads this as a context block.
|
| 8 |
+
# Keep it concise; ~500-800 words is ideal. Longer = more tokens per request.
|
| 9 |
+
#
|
| 10 |
+
# HINT: Only include things that are genuinely fast-path-safe:
|
| 11 |
+
# - Who Darshan is (1-2 sentences)
|
| 12 |
+
# - Overview of the portfolio site
|
| 13 |
+
# - What topics the bot can answer questions about
|
| 14 |
+
# Do NOT put internship details, tech stacks, or project specifics here —
|
| 15 |
+
# those answers must come from Qdrant, fully cited.
|
| 16 |
|
| 17 |
+
Darshan Chheda is a software engineer and CS student whose portfolio covers
|
| 18 |
+
projects in systems programming, distributed systems, RAG/LLM systems,
|
| 19 |
+
computer vision, and full-stack web development.
|
| 20 |
+
|
| 21 |
+
This portfolio chatbot can answer questions about his projects, blog posts,
|
| 22 |
+
technical skills, work experience, education, hackathons, and general background.
|
| 23 |
+
For any specific factual question, it searches his actual portfolio content
|
| 24 |
+
and provides cited answers.
|
|
|
|
|
|
|
|
|
|
|
|
app/services/reranker.py
CHANGED
|
@@ -37,7 +37,10 @@ class Reranker:
|
|
| 37 |
self._min_score = 0.0
|
| 38 |
return []
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
if self._remote:
|
| 43 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
|
|
| 37 |
self._min_score = 0.0
|
| 38 |
return []
|
| 39 |
|
| 40 |
+
# RC-12: prefer contextualised_text (doc title + section prefix) so the
|
| 41 |
+
# cross-encoder sees the same enriched text as the dense retriever.
|
| 42 |
+
# Falls back to raw chunk text for old points that pre-date contextualisation.
|
| 43 |
+
texts = [chunk.get("contextualised_text") or chunk["text"] for chunk in chunks]
|
| 44 |
|
| 45 |
if self._remote:
|
| 46 |
async with httpx.AsyncClient(timeout=30.0) as client:
|