Spaces:
Running
Running
GitHub Actions commited on
Commit ·
8da917e
1
Parent(s): c44df3b
Deploy 8e14626
Browse files- app/api/chat.py +1 -1
- app/pipeline/nodes/gemini_fast.py +24 -4
- app/pipeline/nodes/retrieve.py +17 -1
- app/services/transcriber.py +22 -2
- tests/test_enumerate_query.py +6 -0
- tests/test_generate_not_found_fallback.py +20 -0
- tests/test_retrieve_query_normalization.py +9 -0
- tests/test_transcriber_normalization.py +15 -0
app/api/chat.py
CHANGED
|
@@ -18,7 +18,7 @@ _SSE_HEARTBEAT_SECONDS: float = 10.0
|
|
| 18 |
|
| 19 |
# Query pre-processing budgets must stay low to avoid delaying first byte.
|
| 20 |
_DECONTEXT_TIMEOUT_SECONDS: float = 0.35
|
| 21 |
-
_EXPANSION_TIMEOUT_SECONDS: float = 0.
|
| 22 |
|
| 23 |
# Phrases a visitor uses when telling the bot it gave a wrong answer.
|
| 24 |
# Matched on the lowercased raw message before any LLM call — O(1), zero cost.
|
|
|
|
| 18 |
|
| 19 |
# Query pre-processing budgets must stay low to avoid delaying first byte.
|
| 20 |
_DECONTEXT_TIMEOUT_SECONDS: float = 0.35
|
| 21 |
+
_EXPANSION_TIMEOUT_SECONDS: float = 0.60
|
| 22 |
|
| 23 |
# Phrases a visitor uses when telling the bot it gave a wrong answer.
|
| 24 |
# Matched on the lowercased raw message before any LLM call — O(1), zero cost.
|
app/pipeline/nodes/gemini_fast.py
CHANGED
|
@@ -27,6 +27,7 @@ from typing import Any
|
|
| 27 |
from langgraph.config import get_stream_writer
|
| 28 |
|
| 29 |
from app.core.portfolio_context import is_portfolio_relevant
|
|
|
|
| 30 |
from app.models.pipeline import PipelineState
|
| 31 |
from app.services.gemini_client import GeminiClient
|
| 32 |
from app.core.quality import is_low_trust
|
|
@@ -94,6 +95,8 @@ _TRIVIAL_PHRASES: frozenset[str] = frozenset({
|
|
| 94 |
"what do you do",
|
| 95 |
})
|
| 96 |
|
|
|
|
|
|
|
| 97 |
|
| 98 |
def _is_trivial(query: str) -> bool:
|
| 99 |
"""
|
|
@@ -131,6 +134,19 @@ def _is_complex(query: str) -> bool:
|
|
| 131 |
return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
|
| 132 |
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
|
| 135 |
"""
|
| 136 |
Returns a LangGraph-compatible async node function.
|
|
@@ -173,10 +189,14 @@ def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
|
|
| 173 |
|
| 174 |
complexity = "complex" if _is_complex(query) else "simple"
|
| 175 |
|
| 176 |
-
# Force RAG for
|
| 177 |
-
#
|
| 178 |
-
#
|
| 179 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
|
| 181 |
writer({"type": "status", "label": "Searching portfolio..."})
|
| 182 |
return {
|
|
|
|
| 27 |
from langgraph.config import get_stream_writer
|
| 28 |
|
| 29 |
from app.core.portfolio_context import is_portfolio_relevant
|
| 30 |
+
from app.core.portfolio_context import KNOWN_ORGS, KNOWN_PROJECTS
|
| 31 |
from app.models.pipeline import PipelineState
|
| 32 |
from app.services.gemini_client import GeminiClient
|
| 33 |
from app.core.quality import is_low_trust
|
|
|
|
| 95 |
"what do you do",
|
| 96 |
})
|
| 97 |
|
| 98 |
+
_ENTITY_SPECIFIC_NOUNS: frozenset[str] = KNOWN_PROJECTS | KNOWN_ORGS
|
| 99 |
+
|
| 100 |
|
| 101 |
def _is_trivial(query: str) -> bool:
|
| 102 |
"""
|
|
|
|
| 134 |
return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
|
| 135 |
|
| 136 |
|
| 137 |
+
def _is_entity_specific_portfolio_query(query: str) -> bool:
|
| 138 |
+
tokens = re.findall(r"[a-z0-9]+", query.lower())
|
| 139 |
+
if not tokens:
|
| 140 |
+
return False
|
| 141 |
+
for token in tokens:
|
| 142 |
+
if token in _ENTITY_SPECIFIC_NOUNS:
|
| 143 |
+
return True
|
| 144 |
+
for a, b in zip(tokens, tokens[1:]):
|
| 145 |
+
if f"{a} {b}" in _ENTITY_SPECIFIC_NOUNS:
|
| 146 |
+
return True
|
| 147 |
+
return False
|
| 148 |
+
|
| 149 |
+
|
| 150 |
def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
|
| 151 |
"""
|
| 152 |
Returns a LangGraph-compatible async node function.
|
|
|
|
| 189 |
|
| 190 |
complexity = "complex" if _is_complex(query) else "simple"
|
| 191 |
|
| 192 |
+
# Force RAG for entity-specific portfolio queries (project/org names).
|
| 193 |
+
# Broad intent-only phrasing (e.g., "what tech stack does he use") first
|
| 194 |
+
# goes through Gemini fast-path and falls back to RAG if low-trust.
|
| 195 |
+
if (
|
| 196 |
+
not _is_trivial(query)
|
| 197 |
+
and is_portfolio_relevant(query)
|
| 198 |
+
and _is_entity_specific_portfolio_query(query)
|
| 199 |
+
):
|
| 200 |
logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
|
| 201 |
writer({"type": "status", "label": "Searching portfolio..."})
|
| 202 |
return {
|
app/pipeline/nodes/retrieve.py
CHANGED
|
@@ -483,9 +483,25 @@ def make_retrieve_node(
|
|
| 483 |
and low_confidence
|
| 484 |
and top_score is not None
|
| 485 |
and top_score >= _MIN_RESCUE_SCORE
|
| 486 |
-
and (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
)
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
if not reranked or (low_confidence and not rescue_low_confidence):
|
| 490 |
return {
|
| 491 |
"answer": "",
|
|
|
|
| 483 |
and low_confidence
|
| 484 |
and top_score is not None
|
| 485 |
and top_score >= _MIN_RESCUE_SCORE
|
| 486 |
+
and (
|
| 487 |
+
capability_query
|
| 488 |
+
or _focused_source_type(retrieval_query) is not None
|
| 489 |
+
or attempts >= 1
|
| 490 |
+
or len(unique_chunks) >= 6
|
| 491 |
+
)
|
| 492 |
)
|
| 493 |
|
| 494 |
+
if low_confidence and not rescue_low_confidence and attempts >= 1 and unique_chunks:
|
| 495 |
+
writer(
|
| 496 |
+
{
|
| 497 |
+
"type": "status",
|
| 498 |
+
"label": "Using broader retrieval fallback after low-confidence rerank...",
|
| 499 |
+
}
|
| 500 |
+
)
|
| 501 |
+
reranked = unique_chunks[:10]
|
| 502 |
+
top_score = reranked[0]["metadata"].get("rerank_score", top_score)
|
| 503 |
+
low_confidence = False
|
| 504 |
+
|
| 505 |
if not reranked or (low_confidence and not rescue_low_confidence):
|
| 506 |
return {
|
| 507 |
"answer": "",
|
app/services/transcriber.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
|
| 3 |
import httpx
|
| 4 |
from groq import AsyncGroq
|
|
@@ -6,6 +7,25 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fi
|
|
| 6 |
|
| 7 |
from app.core.exceptions import GenerationError
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
class GroqTranscriber:
|
| 11 |
def __init__(
|
|
@@ -46,11 +66,11 @@ class GroqTranscriber:
|
|
| 46 |
)
|
| 47 |
text = getattr(response, "text", None)
|
| 48 |
if isinstance(text, str) and text.strip():
|
| 49 |
-
return
|
| 50 |
if isinstance(response, dict):
|
| 51 |
value = response.get("text")
|
| 52 |
if isinstance(value, str) and value.strip():
|
| 53 |
-
return
|
| 54 |
raise GenerationError("Transcription response did not contain text")
|
| 55 |
|
| 56 |
try:
|
|
|
|
| 1 |
import asyncio
|
| 2 |
+
import re
|
| 3 |
|
| 4 |
import httpx
|
| 5 |
from groq import AsyncGroq
|
|
|
|
| 7 |
|
| 8 |
from app.core.exceptions import GenerationError
|
| 9 |
|
| 10 |
+
_FILLER_PREFIX_RE = re.compile(r"^\s*(uh+|um+|erm+|like|you know|please|hey)\s+", re.IGNORECASE)
|
| 11 |
+
_MULTISPACE_RE = re.compile(r"\s+")
|
| 12 |
+
_TRANSCRIPT_REPLACEMENTS: tuple[tuple[re.Pattern[str], str], ...] = (
|
| 13 |
+
(re.compile(r"\bwalk experience\b", re.IGNORECASE), "work experience"),
|
| 14 |
+
(re.compile(r"\btext stack\b", re.IGNORECASE), "tech stack"),
|
| 15 |
+
(re.compile(r"\bprofessional sitting\b", re.IGNORECASE), "professional setting"),
|
| 16 |
+
(re.compile(r"\btech stocks\b", re.IGNORECASE), "tech stack"),
|
| 17 |
+
(re.compile(r"\bwhat tech stack does he\s+used\b", re.IGNORECASE), "what tech stack does he use"),
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _normalise_transcript_text(text: str) -> str:
|
| 22 |
+
cleaned = text.strip()
|
| 23 |
+
cleaned = _FILLER_PREFIX_RE.sub("", cleaned)
|
| 24 |
+
for pattern, replacement in _TRANSCRIPT_REPLACEMENTS:
|
| 25 |
+
cleaned = pattern.sub(replacement, cleaned)
|
| 26 |
+
cleaned = _MULTISPACE_RE.sub(" ", cleaned)
|
| 27 |
+
return cleaned.strip()
|
| 28 |
+
|
| 29 |
|
| 30 |
class GroqTranscriber:
|
| 31 |
def __init__(
|
|
|
|
| 66 |
)
|
| 67 |
text = getattr(response, "text", None)
|
| 68 |
if isinstance(text, str) and text.strip():
|
| 69 |
+
return _normalise_transcript_text(text)
|
| 70 |
if isinstance(response, dict):
|
| 71 |
value = response.get("text")
|
| 72 |
if isinstance(value, str) and value.strip():
|
| 73 |
+
return _normalise_transcript_text(value)
|
| 74 |
raise GenerationError("Transcription response did not contain text")
|
| 75 |
|
| 76 |
try:
|
tests/test_enumerate_query.py
CHANGED
|
@@ -220,3 +220,9 @@ class TestIsPortfolioRelevant:
|
|
| 220 |
|
| 221 |
def test_tech_stack_intent_is_relevant(self):
|
| 222 |
assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
def test_tech_stack_intent_is_relevant(self):
|
| 222 |
assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
|
| 223 |
+
|
| 224 |
+
def test_professional_setting_work_experience_is_relevant(self):
|
| 225 |
+
assert is_portfolio_relevant("What work experience do you have in a professional setting") is True
|
| 226 |
+
|
| 227 |
+
def test_tech_stack_use_phrase_is_relevant(self):
|
| 228 |
+
assert is_portfolio_relevant("What tech stack does he use") is True
|
tests/test_generate_not_found_fallback.py
CHANGED
|
@@ -47,3 +47,23 @@ async def test_portfolio_specific_query_forces_rag() -> None:
|
|
| 47 |
assert "answer" not in result
|
| 48 |
assert result["expanded_queries"] == ["How does TextOps work?"]
|
| 49 |
gemini.fast_answer.assert_not_awaited()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
assert "answer" not in result
|
| 48 |
assert result["expanded_queries"] == ["How does TextOps work?"]
|
| 49 |
gemini.fast_answer.assert_not_awaited()
|
| 50 |
+
|
| 51 |
+
@pytest.mark.asyncio
|
| 52 |
+
async def test_broad_portfolio_intent_can_use_gemini_fast_path() -> None:
|
| 53 |
+
gemini = MagicMock()
|
| 54 |
+
gemini.is_configured = True
|
| 55 |
+
gemini.fast_answer = AsyncMock(return_value=("He uses a broad stack.", None))
|
| 56 |
+
|
| 57 |
+
node = make_gemini_fast_node(gemini)
|
| 58 |
+
state = {
|
| 59 |
+
"query": "What tech stack does he use?",
|
| 60 |
+
"is_followup": False,
|
| 61 |
+
"conversation_history": [],
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
with patch(_WRITER_PATCH, return_value=MagicMock()):
|
| 65 |
+
result = await node(state)
|
| 66 |
+
|
| 67 |
+
assert result["answer"] == "He uses a broad stack."
|
| 68 |
+
assert result["path"] == "gemini_fast"
|
| 69 |
+
gemini.fast_answer.assert_awaited_once()
|
tests/test_retrieve_query_normalization.py
CHANGED
|
@@ -18,3 +18,12 @@ def test_capability_query_detection_handles_punctuation() -> None:
|
|
| 18 |
|
| 19 |
def test_focus_source_type_for_tech_stack_query() -> None:
|
| 20 |
assert _focused_source_type("What technologies and skills does he work with?") == "cv"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def test_focus_source_type_for_tech_stack_query() -> None:
|
| 20 |
assert _focused_source_type("What technologies and skills does he work with?") == "cv"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_focus_source_type_for_professional_work_experience_query() -> None:
|
| 24 |
+
query = "What work experience do you have in a professional setting"
|
| 25 |
+
assert _focused_source_type(query) == "cv"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_focus_source_type_for_tech_stack_use_query() -> None:
|
| 29 |
+
assert _focused_source_type("What tech stack does he use") == "cv"
|
tests/test_transcriber_normalization.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.services.transcriber import _normalise_transcript_text
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_normalise_walk_experience_to_work_experience() -> None:
|
| 5 |
+
query = "uh what is his walk experience in a professional setting"
|
| 6 |
+
assert _normalise_transcript_text(query) == "what is his work experience in a professional setting"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_normalise_text_stack_to_tech_stack() -> None:
|
| 10 |
+
assert _normalise_transcript_text("what text stack does he use") == "what tech stack does he use"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_keeps_clean_transcript_unchanged() -> None:
|
| 14 |
+
original = "What technologies and skills does he work with?"
|
| 15 |
+
assert _normalise_transcript_text(original) == original
|