Spaces:

1337XCode
/

personabot-api

Running

App Files Files Community

GitHub Actions commited on Mar 24

Commit

c44df3b

1 Parent(s): b616cc1

Deploy 2e8cff3

Browse files

Files changed (12) hide show

app/api/tts.py +4 -1
app/core/portfolio_context.py +3 -0
app/models/speech.py +1 -0
app/pipeline/graph.py +6 -2
app/pipeline/nodes/retrieve.py +70 -3
app/services/reranker.py +30 -23
app/services/tts_client.py +2 -2
tests/test_enumerate_query.py +3 -0
tests/test_graph_routing.py +30 -0
tests/test_models.py +11 -0
tests/test_retrieve_query_normalization.py +9 -1
tests/test_speech_endpoints.py +27 -1

app/api/tts.py CHANGED Viewed

@@ -22,5 +22,8 @@ async def synthesize_endpoint(
             detail="TTS service is not configured.",
         )
-    audio_bytes = await tts_client.synthesize(payload.text.strip())
     return Response(content=audio_bytes, media_type="audio/wav")

             detail="TTS service is not configured.",
         )
+    audio_bytes = await tts_client.synthesize(
+        payload.text.strip(),
+        voice=payload.voice.strip().lower(),
+    )
     return Response(content=audio_bytes, media_type="audio/wav")

app/core/portfolio_context.py CHANGED Viewed

@@ -87,6 +87,9 @@ KNOWN_INTENTS: frozenset[str] = frozenset({
     "work", "experience", "work experience", "career", "employment", "job", "role",
     "internship", "internships", "skills", "skill", "education", "degree", "university",
     "resume", "cv", "background", "certification", "certifications",
 })
 # ---------------------------------------------------------------------------

     "work", "experience", "work experience", "career", "employment", "job", "role",
     "internship", "internships", "skills", "skill", "education", "degree", "university",
     "resume", "cv", "background", "certification", "certifications",
+    "tech", "stack", "tech stack", "technology", "technologies",
+    "framework", "frameworks", "tool", "tools", "tooling",
+    "language", "languages",
 })
 # ---------------------------------------------------------------------------

app/models/speech.py CHANGED Viewed

@@ -7,3 +7,4 @@ class TranscribeResponse(BaseModel):
 class SynthesizeRequest(BaseModel):
     text: str = Field(..., min_length=1, max_length=300)

 class SynthesizeRequest(BaseModel):
     text: str = Field(..., min_length=1, max_length=300)
+    voice: str = Field(default="am_adam", min_length=2, max_length=32)

app/pipeline/graph.py CHANGED Viewed

@@ -97,8 +97,12 @@ def route_retrieve_result(state: PipelineState) -> str:
     # also failed (still empty after the first CRAG rewrite).  When the query
     # mentions a known portfolio entity, attempt one more vocabulary-shifted rewrite
     # before admitting the not-found path.
-    if attempts == 3 and not reranked and is_portfolio_relevant(query):
-        return "rewrite"
     return "generate"

     # also failed (still empty after the first CRAG rewrite).  When the query
     # mentions a known portfolio entity, attempt one more vocabulary-shifted rewrite
     # before admitting the not-found path.
+    if attempts == 3 and is_portfolio_relevant(query):
+        if not reranked:
+            return "rewrite"
+        top_score = state.get("top_rerank_score")
+        if top_score is not None and top_score < _CRAG_LOW_CONFIDENCE_SCORE:
+            return "rewrite"
     return "generate"

app/pipeline/nodes/retrieve.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import logging
 from typing import Callable
 from langgraph.config import get_stream_writer
@@ -21,6 +22,7 @@ from app.services.sparse_encoder import SparseEncoder
 # unrelated (noise), while –3.5 to –1.0 still captures valid skill/project
 # passages that answer tech-stack or experience questions.
 _MIN_TOP_SCORE: float = -3.5
 # Default cap: max chunks per source document for BROAD queries.
 # Without this, a verbose doc can crowd out all 5 context slots, hiding other
@@ -45,7 +47,9 @@ _FOCUS_KEYWORDS: dict[frozenset[str], str] = {
     frozenset({"experience", "work", "job", "role", "career", "internship",
                "skills", "skill", "education", "degree", "university",
                "certification", "certifications", "qualified", "resume", "cv",
-               "employment", "professional", "placement", "history"}): "cv",
     frozenset({"project", "built", "build", "developed", "architecture",
                "system", "platform", "app", "application"}): "project",
     frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",
@@ -59,6 +63,43 @@ _RRF_K: int = 60
 # Module-level singleton — BM25 model downloads once (~5 MB), cached in memory.
 _sparse_encoder = SparseEncoder()
 def _focused_source_type(query: str) -> str | None:
     """
@@ -69,7 +110,7 @@ def _focused_source_type(query: str) -> str | None:
     that don't match any category retain the 2-per-doc default cap so no single
     source dominates the 5 context slots.
     """
-    tokens = frozenset(query.lower().split())
     for keyword_set, source_type in _FOCUS_KEYWORDS.items():
         if tokens & keyword_set:
             return source_type
@@ -166,6 +207,9 @@ def _normalise_focus_typos(query: str) -> str:
         if len(stripped) < 4 or stripped in _FOCUS_VOCAB:
             corrected.append(token)
             continue
         replacement = _best_focus_replacement(stripped)
@@ -177,6 +221,11 @@ def _normalise_focus_typos(query: str) -> str:
     return " ".join(corrected)
 def make_retrieve_node(
     vector_store: VectorStore, embedder: Embedder, reranker: Reranker
 ) -> Callable[[PipelineState], dict]:
@@ -427,13 +476,31 @@ def make_retrieve_node(
         # ── Relevance gate ─────────────────────────────────────────────────────
         top_score = reranked[0]["metadata"].get("rerank_score", 0.0) if reranked else None
-        if not reranked or (top_score is not None and top_score < _MIN_TOP_SCORE):
             return {
                 "answer": "",
                 "retrieved_chunks": [],
                 "reranked_chunks": [],
                 "retrieval_attempts": attempts + 1,                "top_rerank_score": top_score,            }
         # ── Source diversity cap (query-aware) ─────────────────────────────────
         focused_type = _focused_source_type(retrieval_query)
         doc_counts: dict[str, int] = {}

 import asyncio
 import logging
+import re
 from typing import Callable
 from langgraph.config import get_stream_writer
 # unrelated (noise), while –3.5 to –1.0 still captures valid skill/project
 # passages that answer tech-stack or experience questions.
 _MIN_TOP_SCORE: float = -3.5
+_MIN_RESCUE_SCORE: float = -6.0
 # Default cap: max chunks per source document for BROAD queries.
 # Without this, a verbose doc can crowd out all 5 context slots, hiding other
     frozenset({"experience", "work", "job", "role", "career", "internship",
                "skills", "skill", "education", "degree", "university",
                "certification", "certifications", "qualified", "resume", "cv",
+               "employment", "professional", "placement", "history",
+               "tech", "stack", "technology", "technologies", "framework",
+               "frameworks", "tool", "tools", "tooling", "language", "languages"}): "cv",
     frozenset({"project", "built", "build", "developed", "architecture",
                "system", "platform", "app", "application"}): "project",
     frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",
 # Module-level singleton — BM25 model downloads once (~5 MB), cached in memory.
 _sparse_encoder = SparseEncoder()
+_CAPABILITY_QUERY_HINTS: frozenset[str] = frozenset(
+    {
+        "tech",
+        "stack",
+        "technology",
+        "technologies",
+        "framework",
+        "frameworks",
+        "tool",
+        "tools",
+        "tooling",
+        "language",
+        "languages",
+        "skills",
+        "skill",
+    }
+)
+_NORMALISATION_STOPWORDS: frozenset[str] = frozenset(
+    {
+        "tell",
+        "about",
+        "what",
+        "which",
+        "where",
+        "when",
+        "could",
+        "would",
+        "should",
+        "your",
+        "with",
+        "from",
+        "that",
+        "this",
+    }
+)
 def _focused_source_type(query: str) -> str | None:
     """
     that don't match any category retain the 2-per-doc default cap so no single
     source dominates the 5 context slots.
     """
+    tokens = frozenset(re.findall(r"[a-z0-9]+", query.lower()))
     for keyword_set, source_type in _FOCUS_KEYWORDS.items():
         if tokens & keyword_set:
             return source_type
         if len(stripped) < 4 or stripped in _FOCUS_VOCAB:
             corrected.append(token)
             continue
+        if stripped in _NORMALISATION_STOPWORDS:
+            corrected.append(token)
+            continue
         replacement = _best_focus_replacement(stripped)
     return " ".join(corrected)
+def _is_capability_query(query: str) -> bool:
+    tokens = frozenset(re.findall(r"[a-z0-9]+", query.lower()))
+    return bool(tokens & _CAPABILITY_QUERY_HINTS)
 def make_retrieve_node(
     vector_store: VectorStore, embedder: Embedder, reranker: Reranker
 ) -> Callable[[PipelineState], dict]:
         # ── Relevance gate ─────────────────────────────────────────────────────
         top_score = reranked[0]["metadata"].get("rerank_score", 0.0) if reranked else None
+        low_confidence = top_score is not None and top_score < _MIN_TOP_SCORE
+        capability_query = _is_capability_query(retrieval_query)
+        rescue_low_confidence = bool(
+            reranked
+            and low_confidence
+            and top_score is not None
+            and top_score >= _MIN_RESCUE_SCORE
+            and (capability_query or _focused_source_type(retrieval_query) is not None)
+        )
+        if not reranked or (low_confidence and not rescue_low_confidence):
             return {
                 "answer": "",
                 "retrieved_chunks": [],
                 "reranked_chunks": [],
                 "retrieval_attempts": attempts + 1,                "top_rerank_score": top_score,            }
+        if rescue_low_confidence:
+            writer(
+                {
+                    "type": "status",
+                    "label": "Applying retrieval rescue for portfolio capability query...",
+                }
+            )
         # ── Source diversity cap (query-aware) ─────────────────────────────────
         focused_type = _focused_source_type(retrieval_query)
         doc_counts: dict[str, int] = {}

app/services/reranker.py CHANGED Viewed

@@ -6,6 +6,7 @@
 from typing import Any, Optional
 import httpx
 from app.models.pipeline import Chunk
@@ -43,29 +44,35 @@ class Reranker:
         texts = [chunk.get("contextualised_text") or chunk["text"] for chunk in chunks]
         if self._remote:
-            # Reverted to 60.0s to allow HF Space cold starts. Timeout handling
-            # will be managed on the frontend.
-            async with httpx.AsyncClient(timeout=60.0) as client:
-                # Truncate individual texts to 1500 chars — cross-encoders truncate
-                # anyway at their token limit (512 tokens ≈ ~1800 chars), so we lose
-                # nothing while staying safely within the remote Space schema constraint.
-                truncated = [t[:1500] for t in texts]
-                resp = await client.post(
-                    f"{self._url}/rerank",
-                    json={"query": query[:512], "texts": truncated, "top_k": top_k},
-                )
-                resp.raise_for_status()
-                data = resp.json()
-                # HF Space returns {indices: [...], scores: [...]} already sorted
-                indices: list[int] = data["indices"]
-                scores: list[float] = data["scores"]
-                result = []
-                for idx, score in zip(indices, scores):
-                    chunk_copy = dict(chunks[idx])
-                    chunk_copy["metadata"]["rerank_score"] = score
-                    result.append(chunk_copy)
-                self._min_score = scores[-1] if scores else 0.0
-                return result  # type: ignore[return-value]
         model = _get_local_model()
         pairs = [(query, text) for text in texts]

 from typing import Any, Optional
 import httpx
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
 from app.models.pipeline import Chunk
         texts = [chunk.get("contextualised_text") or chunk["text"] for chunk in chunks]
         if self._remote:
+            @retry(
+                stop=stop_after_attempt(2),
+                wait=wait_exponential(multiplier=0.4, min=0.4, max=1.2),
+                retry=retry_if_exception_type((httpx.TimeoutException, httpx.HTTPError)),
+                reraise=True,
+            )
+            async def _remote_call() -> tuple[list[int], list[float]]:
+                async with httpx.AsyncClient(timeout=60.0) as client:
+                    truncated = [t[:1500] for t in texts]
+                    resp = await client.post(
+                        f"{self._url}/rerank",
+                        json={"query": query[:512], "texts": truncated, "top_k": top_k},
+                    )
+                    resp.raise_for_status()
+                    data = resp.json()
+                    indices = data.get("indices")
+                    scores = data.get("scores")
+                    if not isinstance(indices, list) or not isinstance(scores, list):
+                        raise httpx.HTTPError("Invalid reranker response schema")
+                    return [int(i) for i in indices], [float(s) for s in scores]
+            indices, scores = await _remote_call()
+            result = []
+            for idx, score in zip(indices, scores):
+                chunk_copy = dict(chunks[idx])
+                chunk_copy["metadata"]["rerank_score"] = score
+                result.append(chunk_copy)
+            self._min_score = scores[-1] if scores else 0.0
+            return result  # type: ignore[return-value]
         model = _get_local_model()
         pairs = [(query, text) for text in texts]

app/services/tts_client.py CHANGED Viewed

@@ -14,7 +14,7 @@ class TTSClient:
     def is_configured(self) -> bool:
         return bool(self._tts_space_url)
-    async def synthesize(self, text: str) -> bytes:
         if not self.is_configured:
             raise GenerationError("TTS client is not configured")
@@ -22,7 +22,7 @@ class TTSClient:
             async with httpx.AsyncClient(timeout=self._timeout_seconds) as client:
                 response = await client.post(
                     f"{self._tts_space_url}/synthesize",
-                    json={"text": text},
                     headers={"Content-Type": "application/json"},
                 )
                 response.raise_for_status()

     def is_configured(self) -> bool:
         return bool(self._tts_space_url)
+    async def synthesize(self, text: str, voice: str = "am_adam") -> bytes:
         if not self.is_configured:
             raise GenerationError("TTS client is not configured")
             async with httpx.AsyncClient(timeout=self._timeout_seconds) as client:
                 response = await client.post(
                     f"{self._tts_space_url}/synthesize",
+                    json={"text": text, "voice": voice},
                     headers={"Content-Type": "application/json"},
                 )
                 response.raise_for_status()

tests/test_enumerate_query.py CHANGED Viewed

@@ -217,3 +217,6 @@ class TestIsPortfolioRelevant:
     def test_stt_typo_work_experience_is_still_relevant(self):
         assert is_portfolio_relevant("tell me about his walk experience") is True

     def test_stt_typo_work_experience_is_still_relevant(self):
         assert is_portfolio_relevant("tell me about his walk experience") is True
+    def test_tech_stack_intent_is_relevant(self):
+        assert is_portfolio_relevant("Could you tell me about his tech stack?") is True

tests/test_graph_routing.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from app.pipeline.graph import route_retrieve_result
+def test_attempt_three_portfolio_empty_rewrites() -> None:
+    state = {
+        "retrieval_attempts": 3,
+        "reranked_chunks": [],
+        "query": "Could you tell me about his tech stack?",
+    }
+    assert route_retrieve_result(state) == "rewrite"
+def test_attempt_three_portfolio_low_confidence_rewrites() -> None:
+    state = {
+        "retrieval_attempts": 3,
+        "reranked_chunks": [{"text": "x", "metadata": {}}],
+        "top_rerank_score": -2.0,
+        "query": "Could you tell me about his tech stack?",
+    }
+    assert route_retrieve_result(state) == "rewrite"
+def test_attempt_three_unrelated_low_confidence_generates() -> None:
+    state = {
+        "retrieval_attempts": 3,
+        "reranked_chunks": [{"text": "x", "metadata": {}}],
+        "top_rerank_score": -2.0,
+        "query": "what is the weather in london",
+    }
+    assert route_retrieve_result(state) == "generate"

tests/test_models.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import pytest
 from pydantic import ValidationError
 from app.models.chat import ChatRequest, SourceRef, ChatResponse
 VALID_UUID = "a1b2c3d4-e5f6-4789-8abc-def012345678"
@@ -85,3 +86,13 @@ class TestChatResponse:
         assert resp.cached is False
         assert resp.latency_ms == 312
         assert len(resp.sources) == 1

 import pytest
 from pydantic import ValidationError
 from app.models.chat import ChatRequest, SourceRef, ChatResponse
+from app.models.speech import SynthesizeRequest
 VALID_UUID = "a1b2c3d4-e5f6-4789-8abc-def012345678"
         assert resp.cached is False
         assert resp.latency_ms == 312
         assert len(resp.sources) == 1
+class TestSynthesizeRequest:
+    def test_default_voice_is_male(self):
+        req = SynthesizeRequest(text="hello")
+        assert req.voice == "am_adam"
+    def test_voice_too_long_rejected(self):
+        with pytest.raises(ValidationError):
+            SynthesizeRequest(text="hello", voice="x" * 33)

tests/test_retrieve_query_normalization.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from app.pipeline.nodes.retrieve import _normalise_focus_typos
 def test_walk_experience_normalises_to_work_experience() -> None:
@@ -10,3 +10,11 @@ def test_walk_experience_normalises_to_work_experience() -> None:
 def test_non_focus_text_is_not_overwritten() -> None:
     original = "Tell me about widget orchestration internals"
     assert _normalise_focus_typos(original) == original.lower()

+from app.pipeline.nodes.retrieve import _focused_source_type, _is_capability_query, _normalise_focus_typos
 def test_walk_experience_normalises_to_work_experience() -> None:
 def test_non_focus_text_is_not_overwritten() -> None:
     original = "Tell me about widget orchestration internals"
     assert _normalise_focus_typos(original) == original.lower()
+def test_capability_query_detection_handles_punctuation() -> None:
+    assert _is_capability_query("What tech stack does he use?") is True
+def test_focus_source_type_for_tech_stack_query() -> None:
+    assert _focused_source_type("What technologies and skills does he work with?") == "cv"

tests/test_speech_endpoints.py CHANGED Viewed

@@ -44,8 +44,12 @@ def test_tts_requires_auth(app_client):
 def test_tts_success(app_client, valid_token):
-    async def fake_synthesize(text):
         await asyncio.sleep(0)
         return b"RIFF....fake"
     app_client.app.state.tts_client.synthesize = fake_synthesize
@@ -59,3 +63,25 @@ def test_tts_success(app_client, valid_token):
     assert response.status_code == 200
     assert response.headers.get("content-type", "").startswith("audio/wav")
     assert response.content == b"RIFF....fake"

 def test_tts_success(app_client, valid_token):
+    captured: dict[str, str] = {}
+    async def fake_synthesize(text, voice="am_adam"):
         await asyncio.sleep(0)
+        captured["text"] = text
+        captured["voice"] = voice
         return b"RIFF....fake"
     app_client.app.state.tts_client.synthesize = fake_synthesize
     assert response.status_code == 200
     assert response.headers.get("content-type", "").startswith("audio/wav")
     assert response.content == b"RIFF....fake"
+    assert captured["text"] == "Hello world"
+    assert captured["voice"] == "am_adam"
+def test_tts_uses_provided_voice(app_client, valid_token):
+    captured: dict[str, str] = {}
+    async def fake_synthesize(text, voice="am_adam"):
+        await asyncio.sleep(0)
+        captured["voice"] = voice
+        return b"RIFF....fake"
+    app_client.app.state.tts_client.synthesize = fake_synthesize
+    response = app_client.post(
+        "/tts",
+        json={"text": "Hello world", "voice": "af_heart"},
+        headers={"Authorization": f"Bearer {valid_token}"},
+    )
+    assert response.status_code == 200
+    assert captured["voice"] == "af_heart"