GitHub Actions commited on
Commit
8da917e
·
1 Parent(s): c44df3b

Deploy 8e14626

Browse files
app/api/chat.py CHANGED
@@ -18,7 +18,7 @@ _SSE_HEARTBEAT_SECONDS: float = 10.0
18
 
19
  # Query pre-processing budgets must stay low to avoid delaying first byte.
20
  _DECONTEXT_TIMEOUT_SECONDS: float = 0.35
21
- _EXPANSION_TIMEOUT_SECONDS: float = 0.25
22
 
23
  # Phrases a visitor uses when telling the bot it gave a wrong answer.
24
  # Matched on the lowercased raw message before any LLM call — O(1), zero cost.
 
18
 
19
  # Query pre-processing budgets must stay low to avoid delaying first byte.
20
  _DECONTEXT_TIMEOUT_SECONDS: float = 0.35
21
+ _EXPANSION_TIMEOUT_SECONDS: float = 0.60
22
 
23
  # Phrases a visitor uses when telling the bot it gave a wrong answer.
24
  # Matched on the lowercased raw message before any LLM call — O(1), zero cost.
app/pipeline/nodes/gemini_fast.py CHANGED
@@ -27,6 +27,7 @@ from typing import Any
27
  from langgraph.config import get_stream_writer
28
 
29
  from app.core.portfolio_context import is_portfolio_relevant
 
30
  from app.models.pipeline import PipelineState
31
  from app.services.gemini_client import GeminiClient
32
  from app.core.quality import is_low_trust
@@ -94,6 +95,8 @@ _TRIVIAL_PHRASES: frozenset[str] = frozenset({
94
  "what do you do",
95
  })
96
 
 
 
97
 
98
  def _is_trivial(query: str) -> bool:
99
  """
@@ -131,6 +134,19 @@ def _is_complex(query: str) -> bool:
131
  return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
135
  """
136
  Returns a LangGraph-compatible async node function.
@@ -173,10 +189,14 @@ def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
173
 
174
  complexity = "complex" if _is_complex(query) else "simple"
175
 
176
- # Force RAG for portfolio-relevant factual queries.
177
- # Conversational / non-portfolio messages are allowed to use Gemini
178
- # so they do not fall through to a generic not-found RAG response.
179
- if not _is_trivial(query) and is_portfolio_relevant(query):
 
 
 
 
180
  logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
181
  writer({"type": "status", "label": "Searching portfolio..."})
182
  return {
 
27
  from langgraph.config import get_stream_writer
28
 
29
  from app.core.portfolio_context import is_portfolio_relevant
30
+ from app.core.portfolio_context import KNOWN_ORGS, KNOWN_PROJECTS
31
  from app.models.pipeline import PipelineState
32
  from app.services.gemini_client import GeminiClient
33
  from app.core.quality import is_low_trust
 
95
  "what do you do",
96
  })
97
 
98
+ _ENTITY_SPECIFIC_NOUNS: frozenset[str] = KNOWN_PROJECTS | KNOWN_ORGS
99
+
100
 
101
  def _is_trivial(query: str) -> bool:
102
  """
 
134
  return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
135
 
136
 
137
+ def _is_entity_specific_portfolio_query(query: str) -> bool:
138
+ tokens = re.findall(r"[a-z0-9]+", query.lower())
139
+ if not tokens:
140
+ return False
141
+ for token in tokens:
142
+ if token in _ENTITY_SPECIFIC_NOUNS:
143
+ return True
144
+ for a, b in zip(tokens, tokens[1:]):
145
+ if f"{a} {b}" in _ENTITY_SPECIFIC_NOUNS:
146
+ return True
147
+ return False
148
+
149
+
150
  def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
151
  """
152
  Returns a LangGraph-compatible async node function.
 
189
 
190
  complexity = "complex" if _is_complex(query) else "simple"
191
 
192
+ # Force RAG for entity-specific portfolio queries (project/org names).
193
+ # Broad intent-only phrasing (e.g., "what tech stack does he use") first
194
+ # goes through Gemini fast-path and falls back to RAG if low-trust.
195
+ if (
196
+ not _is_trivial(query)
197
+ and is_portfolio_relevant(query)
198
+ and _is_entity_specific_portfolio_query(query)
199
+ ):
200
  logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
201
  writer({"type": "status", "label": "Searching portfolio..."})
202
  return {
app/pipeline/nodes/retrieve.py CHANGED
@@ -483,9 +483,25 @@ def make_retrieve_node(
483
  and low_confidence
484
  and top_score is not None
485
  and top_score >= _MIN_RESCUE_SCORE
486
- and (capability_query or _focused_source_type(retrieval_query) is not None)
 
 
 
 
 
487
  )
488
 
 
 
 
 
 
 
 
 
 
 
 
489
  if not reranked or (low_confidence and not rescue_low_confidence):
490
  return {
491
  "answer": "",
 
483
  and low_confidence
484
  and top_score is not None
485
  and top_score >= _MIN_RESCUE_SCORE
486
+ and (
487
+ capability_query
488
+ or _focused_source_type(retrieval_query) is not None
489
+ or attempts >= 1
490
+ or len(unique_chunks) >= 6
491
+ )
492
  )
493
 
494
+ if low_confidence and not rescue_low_confidence and attempts >= 1 and unique_chunks:
495
+ writer(
496
+ {
497
+ "type": "status",
498
+ "label": "Using broader retrieval fallback after low-confidence rerank...",
499
+ }
500
+ )
501
+ reranked = unique_chunks[:10]
502
+ top_score = reranked[0]["metadata"].get("rerank_score", top_score)
503
+ low_confidence = False
504
+
505
  if not reranked or (low_confidence and not rescue_low_confidence):
506
  return {
507
  "answer": "",
app/services/transcriber.py CHANGED
@@ -1,4 +1,5 @@
1
  import asyncio
 
2
 
3
  import httpx
4
  from groq import AsyncGroq
@@ -6,6 +7,25 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fi
6
 
7
  from app.core.exceptions import GenerationError
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class GroqTranscriber:
11
  def __init__(
@@ -46,11 +66,11 @@ class GroqTranscriber:
46
  )
47
  text = getattr(response, "text", None)
48
  if isinstance(text, str) and text.strip():
49
- return text.strip()
50
  if isinstance(response, dict):
51
  value = response.get("text")
52
  if isinstance(value, str) and value.strip():
53
- return value.strip()
54
  raise GenerationError("Transcription response did not contain text")
55
 
56
  try:
 
1
  import asyncio
2
+ import re
3
 
4
  import httpx
5
  from groq import AsyncGroq
 
7
 
8
  from app.core.exceptions import GenerationError
9
 
10
+ _FILLER_PREFIX_RE = re.compile(r"^\s*(uh+|um+|erm+|like|you know|please|hey)\s+", re.IGNORECASE)
11
+ _MULTISPACE_RE = re.compile(r"\s+")
12
+ _TRANSCRIPT_REPLACEMENTS: tuple[tuple[re.Pattern[str], str], ...] = (
13
+ (re.compile(r"\bwalk experience\b", re.IGNORECASE), "work experience"),
14
+ (re.compile(r"\btext stack\b", re.IGNORECASE), "tech stack"),
15
+ (re.compile(r"\bprofessional sitting\b", re.IGNORECASE), "professional setting"),
16
+ (re.compile(r"\btech stocks\b", re.IGNORECASE), "tech stack"),
17
+ (re.compile(r"\bwhat tech stack does he\s+used\b", re.IGNORECASE), "what tech stack does he use"),
18
+ )
19
+
20
+
21
+ def _normalise_transcript_text(text: str) -> str:
22
+ cleaned = text.strip()
23
+ cleaned = _FILLER_PREFIX_RE.sub("", cleaned)
24
+ for pattern, replacement in _TRANSCRIPT_REPLACEMENTS:
25
+ cleaned = pattern.sub(replacement, cleaned)
26
+ cleaned = _MULTISPACE_RE.sub(" ", cleaned)
27
+ return cleaned.strip()
28
+
29
 
30
  class GroqTranscriber:
31
  def __init__(
 
66
  )
67
  text = getattr(response, "text", None)
68
  if isinstance(text, str) and text.strip():
69
+ return _normalise_transcript_text(text)
70
  if isinstance(response, dict):
71
  value = response.get("text")
72
  if isinstance(value, str) and value.strip():
73
+ return _normalise_transcript_text(value)
74
  raise GenerationError("Transcription response did not contain text")
75
 
76
  try:
tests/test_enumerate_query.py CHANGED
@@ -220,3 +220,9 @@ class TestIsPortfolioRelevant:
220
 
221
  def test_tech_stack_intent_is_relevant(self):
222
  assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
 
 
 
 
 
 
 
220
 
221
  def test_tech_stack_intent_is_relevant(self):
222
  assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
223
+
224
+ def test_professional_setting_work_experience_is_relevant(self):
225
+ assert is_portfolio_relevant("What work experience do you have in a professional setting") is True
226
+
227
+ def test_tech_stack_use_phrase_is_relevant(self):
228
+ assert is_portfolio_relevant("What tech stack does he use") is True
tests/test_generate_not_found_fallback.py CHANGED
@@ -47,3 +47,23 @@ async def test_portfolio_specific_query_forces_rag() -> None:
47
  assert "answer" not in result
48
  assert result["expanded_queries"] == ["How does TextOps work?"]
49
  gemini.fast_answer.assert_not_awaited()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  assert "answer" not in result
48
  assert result["expanded_queries"] == ["How does TextOps work?"]
49
  gemini.fast_answer.assert_not_awaited()
50
+
51
+ @pytest.mark.asyncio
52
+ async def test_broad_portfolio_intent_can_use_gemini_fast_path() -> None:
53
+ gemini = MagicMock()
54
+ gemini.is_configured = True
55
+ gemini.fast_answer = AsyncMock(return_value=("He uses a broad stack.", None))
56
+
57
+ node = make_gemini_fast_node(gemini)
58
+ state = {
59
+ "query": "What tech stack does he use?",
60
+ "is_followup": False,
61
+ "conversation_history": [],
62
+ }
63
+
64
+ with patch(_WRITER_PATCH, return_value=MagicMock()):
65
+ result = await node(state)
66
+
67
+ assert result["answer"] == "He uses a broad stack."
68
+ assert result["path"] == "gemini_fast"
69
+ gemini.fast_answer.assert_awaited_once()
tests/test_retrieve_query_normalization.py CHANGED
@@ -18,3 +18,12 @@ def test_capability_query_detection_handles_punctuation() -> None:
18
 
19
  def test_focus_source_type_for_tech_stack_query() -> None:
20
  assert _focused_source_type("What technologies and skills does he work with?") == "cv"
 
 
 
 
 
 
 
 
 
 
18
 
19
  def test_focus_source_type_for_tech_stack_query() -> None:
20
  assert _focused_source_type("What technologies and skills does he work with?") == "cv"
21
+
22
+
23
+ def test_focus_source_type_for_professional_work_experience_query() -> None:
24
+ query = "What work experience do you have in a professional setting"
25
+ assert _focused_source_type(query) == "cv"
26
+
27
+
28
+ def test_focus_source_type_for_tech_stack_use_query() -> None:
29
+ assert _focused_source_type("What tech stack does he use") == "cv"
tests/test_transcriber_normalization.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.services.transcriber import _normalise_transcript_text
2
+
3
+
4
+ def test_normalise_walk_experience_to_work_experience() -> None:
5
+ query = "uh what is his walk experience in a professional setting"
6
+ assert _normalise_transcript_text(query) == "what is his work experience in a professional setting"
7
+
8
+
9
+ def test_normalise_text_stack_to_tech_stack() -> None:
10
+ assert _normalise_transcript_text("what text stack does he use") == "what tech stack does he use"
11
+
12
+
13
+ def test_keeps_clean_transcript_unchanged() -> None:
14
+ original = "What technologies and skills does he work with?"
15
+ assert _normalise_transcript_text(original) == original