Spaces:

1337XCode
/

personabot-api

Running

App Files Files Community

GitHub Actions commited on Apr 25

Commit

7ff4fe5

1 Parent(s): 4a9ec15

Deploy 8e59863

Browse files

Files changed (7) hide show

app/main.py +48 -0
app/models/speech.py +1 -1
app/services/transcriber.py +9 -2
app/services/tts_client.py +55 -51
tests/test_models.py +2 -2
tests/test_retrieve_query_normalization.py +1 -1
tests/test_speech_endpoints.py +2 -2

app/main.py CHANGED Viewed

@@ -113,6 +113,34 @@ async def _qdrant_keepalive_loop(
             logger.warning("Qdrant keepalive ping failed: %s", exc)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     settings = get_settings()
@@ -219,6 +247,16 @@ async def lifespan(app: FastAPI):
     app.state.qdrant_keepalive_stop = keepalive_stop
     app.state.qdrant_keepalive_task = keepalive_task
     logger.info("Startup complete")
     yield
@@ -230,8 +268,18 @@ async def lifespan(app: FastAPI):
         app.state.qdrant_keepalive_task.cancel()
     except Exception:
         pass
     app.state.semantic_cache = None
     app.state.qdrant.close()
     # Only attempt to end an MLflow run when DagsHub tracking was enabled at startup.
     if settings.DAGSHUB_TOKEN:
         import mlflow

             logger.warning("Qdrant keepalive ping failed: %s", exc)
+async def _tts_keepalive_loop(
+    tts_client: TTSClient,
+    stop_event: asyncio.Event,
+) -> None:
+    """
+    Periodically ping the external TTS service to keep the HuggingFace Space awake.
+    """
+    if not tts_client.is_configured:
+        return
+    # Ping every 4 minutes (240 seconds) to prevent HF Space from spinning down
+    interval_seconds = 240
+    while not stop_event.is_set():
+        try:
+            await asyncio.wait_for(stop_event.wait(), timeout=interval_seconds)
+            break
+        except TimeoutError:
+            pass
+        try:
+            await tts_client.ping()
+            logger.debug("TTS keepalive ping succeeded")
+        except Exception as exc:
+            logger.warning("TTS keepalive ping failed: %s", exc)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     settings = get_settings()
     app.state.qdrant_keepalive_stop = keepalive_stop
     app.state.qdrant_keepalive_task = keepalive_task
+    tts_keepalive_stop = asyncio.Event()
+    tts_keepalive_task = asyncio.create_task(
+        _tts_keepalive_loop(
+            tts_client=app.state.tts_client,
+            stop_event=tts_keepalive_stop,
+        )
+    )
+    app.state.tts_keepalive_stop = tts_keepalive_stop
+    app.state.tts_keepalive_task = tts_keepalive_task
     logger.info("Startup complete")
     yield
         app.state.qdrant_keepalive_task.cancel()
     except Exception:
         pass
+    app.state.tts_keepalive_stop.set()
+    try:
+        await asyncio.wait_for(app.state.tts_keepalive_task, timeout=2)
+    except TimeoutError:
+        app.state.tts_keepalive_task.cancel()
+    except Exception:
+        pass
     app.state.semantic_cache = None
     app.state.qdrant.close()
+    if app.state.tts_client:
+        await app.state.tts_client.close()
     # Only attempt to end an MLflow run when DagsHub tracking was enabled at startup.
     if settings.DAGSHUB_TOKEN:
         import mlflow

app/models/speech.py CHANGED Viewed

@@ -7,4 +7,4 @@ class TranscribeResponse(BaseModel):
 class SynthesizeRequest(BaseModel):
     text: str = Field(..., min_length=1, max_length=300)
-    voice: str = Field(default="am_adam", min_length=2, max_length=32)

 class SynthesizeRequest(BaseModel):
     text: str = Field(..., min_length=1, max_length=300)
+    voice: str = Field(default="bf_emma", min_length=2, max_length=32)

app/services/transcriber.py CHANGED Viewed

@@ -65,14 +65,21 @@ class GroqTranscriber:
                 model=self._model,
                 temperature=0,
                 language=target_language,
             )
             text = getattr(response, "text", None)
             if isinstance(text, str) and text.strip():
-                return _normalise_transcript_text(text, self._replacements)
             if isinstance(response, dict):
                 value = response.get("text")
                 if isinstance(value, str) and value.strip():
-                    return _normalise_transcript_text(value, self._replacements)
             raise GenerationError("Transcription response did not contain text")
         try:

                 model=self._model,
                 temperature=0,
                 language=target_language,
+                prompt="PersonaBot, Darshan, RAG, portfolio, software engineering",
             )
             text = getattr(response, "text", None)
             if isinstance(text, str) and text.strip():
+                cleaned = _normalise_transcript_text(text, self._replacements)
+                if len(cleaned) < 3:
+                    raise GenerationError("Transcription too short to be valid")
+                return cleaned
             if isinstance(response, dict):
                 value = response.get("text")
                 if isinstance(value, str) and value.strip():
+                    cleaned = _normalise_transcript_text(value, self._replacements)
+                    if len(cleaned) < 3:
+                        raise GenerationError("Transcription too short to be valid")
+                    return cleaned
             raise GenerationError("Transcription response did not contain text")
         try:

app/services/tts_client.py CHANGED Viewed

@@ -7,31 +7,48 @@ class TTSClient:
     def __init__(self, tts_space_url: str, timeout_seconds: float) -> None:
         self._tts_space_url = tts_space_url.rstrip("/")
         self._timeout_seconds = timeout_seconds
     @property
     def is_configured(self) -> bool:
         return bool(self._tts_space_url)
-    async def synthesize(self, text: str, voice: str = "am_adam") -> bytes:
         if not self.is_configured:
-            raise GenerationError("TTS client is not configured")
-        async def _call() -> bytes:
-            async with httpx.AsyncClient(timeout=self._timeout_seconds) as client:
-                response = await client.post(
-                    f"{self._tts_space_url}/synthesize",
-                    json={"text": text, "voice": voice},
-                    headers={"Content-Type": "application/json"},
-                )
-                response.raise_for_status()
-                audio_bytes = response.content
-                if not audio_bytes:
-                    raise GenerationError("TTS response was empty")
-                return audio_bytes
         try:
-            return await asyncio.wait_for(_call(), timeout=self._timeout_seconds)
-        except TimeoutError as exc:
             raise GenerationError("TTS request timed out") from exc
         except httpx.HTTPStatusError as exc:
             raise GenerationError(
@@ -43,43 +60,30 @@ class TTSClient:
         except Exception as exc:
             raise GenerationError("TTS synthesis failed", context={"error": str(exc)}) from exc
-    async def synthesize_stream(self, text: str, voice: str = "am_adam"):
         text = text.strip()
         if not text:
             raise GenerationError("TTS request text is empty")
-        loop = asyncio.get_running_loop()
-        queue = asyncio.Queue()
-        def _worker():
-            try:
-                generator = self._pipeline(text, voice=voice, speed=1, split_pattern=r'\n+')
-                for gs, ps, audio in generator:
-                    if audio is not None:
-                        import numpy as np
-                        pcm_audio = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16).tobytes()
-                        loop.call_soon_threadsafe(queue.put_nowait, pcm_audio)
-            except Exception as e:
-                loop.call_soon_threadsafe(queue.put_nowait, e)
-            finally:
-                loop.call_soon_threadsafe(queue.put_nowait, None)
-        import threading
-        thread = threading.Thread(target=_worker)
-        thread.start()
-        import struct
-        # 44-byte WAV header with 0xFFFFFFFF for sizes (streaming)
-        yield struct.pack('<4sI4s4sIHHIIHH4sI',
-            b'RIFF', 0xFFFFFFFF, b'WAVE',
-            b'fmt ', 16, 1, 1, 24000, 48000, 2, 16,
-            b'data', 0xFFFFFFFF
-        )
-        while True:
-            chunk = await queue.get()
-            if chunk is None:
-                break
-            if isinstance(chunk, Exception):
-                raise GenerationError("TTS synthesis stream failed", context={"error": str(chunk)}) from chunk
-            yield chunk

     def __init__(self, tts_space_url: str, timeout_seconds: float) -> None:
         self._tts_space_url = tts_space_url.rstrip("/")
         self._timeout_seconds = timeout_seconds
+        # Persistent client — reuses connections
+        self._http = httpx.AsyncClient(
+            timeout=timeout_seconds,
+            limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
+        )
+    async def close(self):
+        await self._http.aclose()
     @property
     def is_configured(self) -> bool:
         return bool(self._tts_space_url)
+    async def ping(self) -> None:
         if not self.is_configured:
+            return
+        try:
+            await self._http.post(
+                f"{self._tts_space_url}/synthesize",
+                json={"text": "hi", "voice": "bf_emma"},
+                headers={"Content-Type": "application/json"},
+                timeout=5.0,
+            )
+        except Exception:
+            pass
+    async def synthesize(self, text: str, voice: str = "bf_emma") -> bytes:
+        if not self.is_configured:
+            raise GenerationError("TTS client is not configured")
         try:
+            response = await self._http.post(
+                f"{self._tts_space_url}/synthesize",
+                json={"text": text, "voice": voice},
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            audio_bytes = response.content
+            if not audio_bytes:
+                raise GenerationError("TTS response was empty")
+            return audio_bytes
+        except httpx.TimeoutException as exc:
             raise GenerationError("TTS request timed out") from exc
         except httpx.HTTPStatusError as exc:
             raise GenerationError(
         except Exception as exc:
             raise GenerationError("TTS synthesis failed", context={"error": str(exc)}) from exc
+    async def synthesize_stream(self, text: str, voice: str = "bf_emma"):
         text = text.strip()
         if not text:
             raise GenerationError("TTS request text is empty")
+        if not self.is_configured:
+            raise GenerationError("TTS client is not configured")
+        try:
+            async with self._http.stream(
+                "POST",
+                f"{self._tts_space_url}/synthesize",
+                json={"text": text, "voice": voice},
+                headers={"Content-Type": "application/json"},
+            ) as response:
+                response.raise_for_status()
+                async for chunk in response.aiter_bytes():
+                    yield chunk
+        except httpx.TimeoutException as exc:
+            raise GenerationError("TTS request timed out") from exc
+        except httpx.HTTPStatusError as exc:
+            raise GenerationError(
+                "TTS upstream returned an error",
+                context={"status_code": exc.response.status_code},
+            ) from exc
+        except Exception as exc:
+            raise GenerationError("TTS synthesis stream failed", context={"error": str(exc)}) from exc

tests/test_models.py CHANGED Viewed

@@ -89,9 +89,9 @@ class TestChatResponse:
 class TestSynthesizeRequest:
-    def test_default_voice_is_male(self):
         req = SynthesizeRequest(text="hello")
-        assert req.voice == "am_adam"
     def test_voice_too_long_rejected(self):
         with pytest.raises(ValidationError):

 class TestSynthesizeRequest:
+    def test_default_voice_is_emma(self):
         req = SynthesizeRequest(text="hello")
+        assert req.voice == "bf_emma"
     def test_voice_too_long_rejected(self):
         with pytest.raises(ValidationError):

tests/test_retrieve_query_normalization.py CHANGED Viewed

@@ -26,4 +26,4 @@ def test_focus_source_type_for_professional_work_experience_query() -> None:
 def test_focus_source_type_for_tech_stack_use_query() -> None:
-    assert _focused_source_type("What tech stack does he use") == "cv"


26
27
28	def test_focus_source_type_for_tech_stack_use_query() -> None:
29	+ assert _focused_source_type("What tech stack does he use") is None

tests/test_speech_endpoints.py CHANGED Viewed

@@ -46,7 +46,7 @@ def test_tts_requires_auth(app_client):
 def test_tts_success(app_client, valid_token):
     captured: dict[str, str] = {}
-    async def fake_synthesize_stream(text, voice="am_adam"):
         await asyncio.sleep(0)
         captured["text"] = text
         captured["voice"] = voice
@@ -65,7 +65,7 @@ def test_tts_success(app_client, valid_token):
     # StreamingResponse returns chunks, so response.content concatenates them
     assert response.content == b"RIFF....fake"
     assert captured["text"] == "Hello world"
-    assert captured["voice"] == "am_adam"
 def test_tts_uses_provided_voice(app_client, valid_token):

 def test_tts_success(app_client, valid_token):
     captured: dict[str, str] = {}
+    async def fake_synthesize_stream(text, voice="bf_emma"):
         await asyncio.sleep(0)
         captured["text"] = text
         captured["voice"] = voice
     # StreamingResponse returns chunks, so response.content concatenates them
     assert response.content == b"RIFF....fake"
     assert captured["text"] == "Hello world"
+    assert captured["voice"] == "bf_emma"
 def test_tts_uses_provided_voice(app_client, valid_token):