Spaces:

1337XCode
/

personabot-api

Running

App Files Files Community

GitHub Actions commited on 23 days ago

Commit

8da917e

1 Parent(s): c44df3b

Deploy 8e14626

Browse files

Files changed (8) hide show

app/api/chat.py +1 -1
app/pipeline/nodes/gemini_fast.py +24 -4
app/pipeline/nodes/retrieve.py +17 -1
app/services/transcriber.py +22 -2
tests/test_enumerate_query.py +6 -0
tests/test_generate_not_found_fallback.py +20 -0
tests/test_retrieve_query_normalization.py +9 -0
tests/test_transcriber_normalization.py +15 -0

app/api/chat.py CHANGED Viewed

@@ -18,7 +18,7 @@ _SSE_HEARTBEAT_SECONDS: float = 10.0
 # Query pre-processing budgets must stay low to avoid delaying first byte.
 _DECONTEXT_TIMEOUT_SECONDS: float = 0.35
-_EXPANSION_TIMEOUT_SECONDS: float = 0.25
 # Phrases a visitor uses when telling the bot it gave a wrong answer.
 # Matched on the lowercased raw message before any LLM call — O(1), zero cost.

 # Query pre-processing budgets must stay low to avoid delaying first byte.
 _DECONTEXT_TIMEOUT_SECONDS: float = 0.35
+_EXPANSION_TIMEOUT_SECONDS: float = 0.60
 # Phrases a visitor uses when telling the bot it gave a wrong answer.
 # Matched on the lowercased raw message before any LLM call — O(1), zero cost.

app/pipeline/nodes/gemini_fast.py CHANGED Viewed

@@ -27,6 +27,7 @@ from typing import Any
 from langgraph.config import get_stream_writer
 from app.core.portfolio_context import is_portfolio_relevant
 from app.models.pipeline import PipelineState
 from app.services.gemini_client import GeminiClient
 from app.core.quality import is_low_trust
@@ -94,6 +95,8 @@ _TRIVIAL_PHRASES: frozenset[str] = frozenset({
     "what do you do",
 })
 def _is_trivial(query: str) -> bool:
     """
@@ -131,6 +134,19 @@ def _is_complex(query: str) -> bool:
     return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
 def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
     """
     Returns a LangGraph-compatible async node function.
@@ -173,10 +189,14 @@ def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
         complexity = "complex" if _is_complex(query) else "simple"
-        # Force RAG for portfolio-relevant factual queries.
-        # Conversational / non-portfolio messages are allowed to use Gemini
-        # so they do not fall through to a generic not-found RAG response.
-        if not _is_trivial(query) and is_portfolio_relevant(query):
             logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
             writer({"type": "status", "label": "Searching portfolio..."})
             return {

 from langgraph.config import get_stream_writer
 from app.core.portfolio_context import is_portfolio_relevant
+from app.core.portfolio_context import KNOWN_ORGS, KNOWN_PROJECTS
 from app.models.pipeline import PipelineState
 from app.services.gemini_client import GeminiClient
 from app.core.quality import is_low_trust
     "what do you do",
 })
+_ENTITY_SPECIFIC_NOUNS: frozenset[str] = KNOWN_PROJECTS | KNOWN_ORGS
 def _is_trivial(query: str) -> bool:
     """
     return bool(tokens & _COMPLEX_SIGNALS) and token_count > _COMPLEX_MIN_WORDS
+def _is_entity_specific_portfolio_query(query: str) -> bool:
+    tokens = re.findall(r"[a-z0-9]+", query.lower())
+    if not tokens:
+        return False
+    for token in tokens:
+        if token in _ENTITY_SPECIFIC_NOUNS:
+            return True
+    for a, b in zip(tokens, tokens[1:]):
+        if f"{a} {b}" in _ENTITY_SPECIFIC_NOUNS:
+            return True
+    return False
 def make_gemini_fast_node(gemini_client: GeminiClient) -> Any:
     """
     Returns a LangGraph-compatible async node function.
         complexity = "complex" if _is_complex(query) else "simple"
+        # Force RAG for entity-specific portfolio queries (project/org names).
+        # Broad intent-only phrasing (e.g., "what tech stack does he use") first
+        # goes through Gemini fast-path and falls back to RAG if low-trust.
+        if (
+            not _is_trivial(query)
+            and is_portfolio_relevant(query)
+            and _is_entity_specific_portfolio_query(query)
+        ):
             logger.debug("Non-trivial query — routing directly to RAG: %r", query[:60])
             writer({"type": "status", "label": "Searching portfolio..."})
             return {

app/pipeline/nodes/retrieve.py CHANGED Viewed

@@ -483,9 +483,25 @@ def make_retrieve_node(
             and low_confidence
             and top_score is not None
             and top_score >= _MIN_RESCUE_SCORE
-            and (capability_query or _focused_source_type(retrieval_query) is not None)
         )
         if not reranked or (low_confidence and not rescue_low_confidence):
             return {
                 "answer": "",

             and low_confidence
             and top_score is not None
             and top_score >= _MIN_RESCUE_SCORE
+            and (
+                capability_query
+                or _focused_source_type(retrieval_query) is not None
+                or attempts >= 1
+                or len(unique_chunks) >= 6
+            )
         )
+        if low_confidence and not rescue_low_confidence and attempts >= 1 and unique_chunks:
+            writer(
+                {
+                    "type": "status",
+                    "label": "Using broader retrieval fallback after low-confidence rerank...",
+                }
+            )
+            reranked = unique_chunks[:10]
+            top_score = reranked[0]["metadata"].get("rerank_score", top_score)
+            low_confidence = False
         if not reranked or (low_confidence and not rescue_low_confidence):
             return {
                 "answer": "",

app/services/transcriber.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
 import httpx
 from groq import AsyncGroq
@@ -6,6 +7,25 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fi
 from app.core.exceptions import GenerationError
 class GroqTranscriber:
     def __init__(
@@ -46,11 +66,11 @@ class GroqTranscriber:
             )
             text = getattr(response, "text", None)
             if isinstance(text, str) and text.strip():
-                return text.strip()
             if isinstance(response, dict):
                 value = response.get("text")
                 if isinstance(value, str) and value.strip():
-                    return value.strip()
             raise GenerationError("Transcription response did not contain text")
         try:

 import asyncio
+import re
 import httpx
 from groq import AsyncGroq
 from app.core.exceptions import GenerationError
+_FILLER_PREFIX_RE = re.compile(r"^\s*(uh+|um+|erm+|like|you know|please|hey)\s+", re.IGNORECASE)
+_MULTISPACE_RE = re.compile(r"\s+")
+_TRANSCRIPT_REPLACEMENTS: tuple[tuple[re.Pattern[str], str], ...] = (
+    (re.compile(r"\bwalk experience\b", re.IGNORECASE), "work experience"),
+    (re.compile(r"\btext stack\b", re.IGNORECASE), "tech stack"),
+    (re.compile(r"\bprofessional sitting\b", re.IGNORECASE), "professional setting"),
+    (re.compile(r"\btech stocks\b", re.IGNORECASE), "tech stack"),
+    (re.compile(r"\bwhat tech stack does he\s+used\b", re.IGNORECASE), "what tech stack does he use"),
+)
+def _normalise_transcript_text(text: str) -> str:
+    cleaned = text.strip()
+    cleaned = _FILLER_PREFIX_RE.sub("", cleaned)
+    for pattern, replacement in _TRANSCRIPT_REPLACEMENTS:
+        cleaned = pattern.sub(replacement, cleaned)
+    cleaned = _MULTISPACE_RE.sub(" ", cleaned)
+    return cleaned.strip()
 class GroqTranscriber:
     def __init__(
             )
             text = getattr(response, "text", None)
             if isinstance(text, str) and text.strip():
+                return _normalise_transcript_text(text)
             if isinstance(response, dict):
                 value = response.get("text")
                 if isinstance(value, str) and value.strip():
+                    return _normalise_transcript_text(value)
             raise GenerationError("Transcription response did not contain text")
         try:

tests/test_enumerate_query.py CHANGED Viewed

@@ -220,3 +220,9 @@ class TestIsPortfolioRelevant:
     def test_tech_stack_intent_is_relevant(self):
         assert is_portfolio_relevant("Could you tell me about his tech stack?") is True

     def test_tech_stack_intent_is_relevant(self):
         assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
+    def test_professional_setting_work_experience_is_relevant(self):
+        assert is_portfolio_relevant("What work experience do you have in a professional setting") is True
+    def test_tech_stack_use_phrase_is_relevant(self):
+        assert is_portfolio_relevant("What tech stack does he use") is True

tests/test_generate_not_found_fallback.py CHANGED Viewed

@@ -47,3 +47,23 @@ async def test_portfolio_specific_query_forces_rag() -> None:
     assert "answer" not in result
     assert result["expanded_queries"] == ["How does TextOps work?"]
     gemini.fast_answer.assert_not_awaited()

     assert "answer" not in result
     assert result["expanded_queries"] == ["How does TextOps work?"]
     gemini.fast_answer.assert_not_awaited()
+    @pytest.mark.asyncio
+    async def test_broad_portfolio_intent_can_use_gemini_fast_path() -> None:
+        gemini = MagicMock()
+        gemini.is_configured = True
+        gemini.fast_answer = AsyncMock(return_value=("He uses a broad stack.", None))
+        node = make_gemini_fast_node(gemini)
+        state = {
+            "query": "What tech stack does he use?",
+            "is_followup": False,
+            "conversation_history": [],
+        }
+        with patch(_WRITER_PATCH, return_value=MagicMock()):
+            result = await node(state)
+        assert result["answer"] == "He uses a broad stack."
+        assert result["path"] == "gemini_fast"
+        gemini.fast_answer.assert_awaited_once()

tests/test_retrieve_query_normalization.py CHANGED Viewed

@@ -18,3 +18,12 @@ def test_capability_query_detection_handles_punctuation() -> None:
 def test_focus_source_type_for_tech_stack_query() -> None:
     assert _focused_source_type("What technologies and skills does he work with?") == "cv"

 def test_focus_source_type_for_tech_stack_query() -> None:
     assert _focused_source_type("What technologies and skills does he work with?") == "cv"
+def test_focus_source_type_for_professional_work_experience_query() -> None:
+    query = "What work experience do you have in a professional setting"
+    assert _focused_source_type(query) == "cv"
+def test_focus_source_type_for_tech_stack_use_query() -> None:
+    assert _focused_source_type("What tech stack does he use") == "cv"

tests/test_transcriber_normalization.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from app.services.transcriber import _normalise_transcript_text
+def test_normalise_walk_experience_to_work_experience() -> None:
+    query = "uh what is his walk experience in a professional setting"
+    assert _normalise_transcript_text(query) == "what is his work experience in a professional setting"
+def test_normalise_text_stack_to_tech_stack() -> None:
+    assert _normalise_transcript_text("what text stack does he use") == "what tech stack does he use"
+def test_keeps_clean_transcript_unchanged() -> None:
+    original = "What technologies and skills does he work with?"
+    assert _normalise_transcript_text(original) == original