GitHub Actions commited on
Commit
7ff4fe5
·
1 Parent(s): 4a9ec15

Deploy 8e59863

Browse files
app/main.py CHANGED
@@ -113,6 +113,34 @@ async def _qdrant_keepalive_loop(
113
  logger.warning("Qdrant keepalive ping failed: %s", exc)
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  @asynccontextmanager
117
  async def lifespan(app: FastAPI):
118
  settings = get_settings()
@@ -219,6 +247,16 @@ async def lifespan(app: FastAPI):
219
  app.state.qdrant_keepalive_stop = keepalive_stop
220
  app.state.qdrant_keepalive_task = keepalive_task
221
 
 
 
 
 
 
 
 
 
 
 
222
  logger.info("Startup complete")
223
  yield
224
 
@@ -230,8 +268,18 @@ async def lifespan(app: FastAPI):
230
  app.state.qdrant_keepalive_task.cancel()
231
  except Exception:
232
  pass
 
 
 
 
 
 
 
 
233
  app.state.semantic_cache = None
234
  app.state.qdrant.close()
 
 
235
  # Only attempt to end an MLflow run when DagsHub tracking was enabled at startup.
236
  if settings.DAGSHUB_TOKEN:
237
  import mlflow
 
113
  logger.warning("Qdrant keepalive ping failed: %s", exc)
114
 
115
 
116
+ async def _tts_keepalive_loop(
117
+ tts_client: TTSClient,
118
+ stop_event: asyncio.Event,
119
+ ) -> None:
120
+ """
121
+ Periodically ping the external TTS service to keep the HuggingFace Space awake.
122
+ """
123
+ if not tts_client.is_configured:
124
+ return
125
+
126
+ # Ping every 4 minutes (240 seconds) to prevent HF Space from spinning down
127
+ interval_seconds = 240
128
+
129
+ while not stop_event.is_set():
130
+ try:
131
+ await asyncio.wait_for(stop_event.wait(), timeout=interval_seconds)
132
+ break
133
+ except TimeoutError:
134
+ pass
135
+
136
+ try:
137
+ await tts_client.ping()
138
+ logger.debug("TTS keepalive ping succeeded")
139
+ except Exception as exc:
140
+ logger.warning("TTS keepalive ping failed: %s", exc)
141
+
142
+
143
+
144
  @asynccontextmanager
145
  async def lifespan(app: FastAPI):
146
  settings = get_settings()
 
247
  app.state.qdrant_keepalive_stop = keepalive_stop
248
  app.state.qdrant_keepalive_task = keepalive_task
249
 
250
+ tts_keepalive_stop = asyncio.Event()
251
+ tts_keepalive_task = asyncio.create_task(
252
+ _tts_keepalive_loop(
253
+ tts_client=app.state.tts_client,
254
+ stop_event=tts_keepalive_stop,
255
+ )
256
+ )
257
+ app.state.tts_keepalive_stop = tts_keepalive_stop
258
+ app.state.tts_keepalive_task = tts_keepalive_task
259
+
260
  logger.info("Startup complete")
261
  yield
262
 
 
268
  app.state.qdrant_keepalive_task.cancel()
269
  except Exception:
270
  pass
271
+
272
+ app.state.tts_keepalive_stop.set()
273
+ try:
274
+ await asyncio.wait_for(app.state.tts_keepalive_task, timeout=2)
275
+ except TimeoutError:
276
+ app.state.tts_keepalive_task.cancel()
277
+ except Exception:
278
+ pass
279
  app.state.semantic_cache = None
280
  app.state.qdrant.close()
281
+ if app.state.tts_client:
282
+ await app.state.tts_client.close()
283
  # Only attempt to end an MLflow run when DagsHub tracking was enabled at startup.
284
  if settings.DAGSHUB_TOKEN:
285
  import mlflow
app/models/speech.py CHANGED
@@ -7,4 +7,4 @@ class TranscribeResponse(BaseModel):
7
 
8
  class SynthesizeRequest(BaseModel):
9
  text: str = Field(..., min_length=1, max_length=300)
10
- voice: str = Field(default="am_adam", min_length=2, max_length=32)
 
7
 
8
  class SynthesizeRequest(BaseModel):
9
  text: str = Field(..., min_length=1, max_length=300)
10
+ voice: str = Field(default="bf_emma", min_length=2, max_length=32)
app/services/transcriber.py CHANGED
@@ -65,14 +65,21 @@ class GroqTranscriber:
65
  model=self._model,
66
  temperature=0,
67
  language=target_language,
 
68
  )
69
  text = getattr(response, "text", None)
70
  if isinstance(text, str) and text.strip():
71
- return _normalise_transcript_text(text, self._replacements)
 
 
 
72
  if isinstance(response, dict):
73
  value = response.get("text")
74
  if isinstance(value, str) and value.strip():
75
- return _normalise_transcript_text(value, self._replacements)
 
 
 
76
  raise GenerationError("Transcription response did not contain text")
77
 
78
  try:
 
65
  model=self._model,
66
  temperature=0,
67
  language=target_language,
68
+ prompt="PersonaBot, Darshan, RAG, portfolio, software engineering",
69
  )
70
  text = getattr(response, "text", None)
71
  if isinstance(text, str) and text.strip():
72
+ cleaned = _normalise_transcript_text(text, self._replacements)
73
+ if len(cleaned) < 3:
74
+ raise GenerationError("Transcription too short to be valid")
75
+ return cleaned
76
  if isinstance(response, dict):
77
  value = response.get("text")
78
  if isinstance(value, str) and value.strip():
79
+ cleaned = _normalise_transcript_text(value, self._replacements)
80
+ if len(cleaned) < 3:
81
+ raise GenerationError("Transcription too short to be valid")
82
+ return cleaned
83
  raise GenerationError("Transcription response did not contain text")
84
 
85
  try:
app/services/tts_client.py CHANGED
@@ -7,31 +7,48 @@ class TTSClient:
7
  def __init__(self, tts_space_url: str, timeout_seconds: float) -> None:
8
  self._tts_space_url = tts_space_url.rstrip("/")
9
  self._timeout_seconds = timeout_seconds
 
 
 
 
 
 
 
 
10
 
11
  @property
12
  def is_configured(self) -> bool:
13
  return bool(self._tts_space_url)
14
 
15
- async def synthesize(self, text: str, voice: str = "am_adam") -> bytes:
16
  if not self.is_configured:
17
- raise GenerationError("TTS client is not configured")
 
 
 
 
 
 
 
 
 
18
 
19
- async def _call() -> bytes:
20
- async with httpx.AsyncClient(timeout=self._timeout_seconds) as client:
21
- response = await client.post(
22
- f"{self._tts_space_url}/synthesize",
23
- json={"text": text, "voice": voice},
24
- headers={"Content-Type": "application/json"},
25
- )
26
- response.raise_for_status()
27
- audio_bytes = response.content
28
- if not audio_bytes:
29
- raise GenerationError("TTS response was empty")
30
- return audio_bytes
31
 
32
  try:
33
- return await asyncio.wait_for(_call(), timeout=self._timeout_seconds)
34
- except TimeoutError as exc:
 
 
 
 
 
 
 
 
 
35
  raise GenerationError("TTS request timed out") from exc
36
  except httpx.HTTPStatusError as exc:
37
  raise GenerationError(
@@ -43,43 +60,30 @@ class TTSClient:
43
  except Exception as exc:
44
  raise GenerationError("TTS synthesis failed", context={"error": str(exc)}) from exc
45
 
46
- async def synthesize_stream(self, text: str, voice: str = "am_adam"):
47
  text = text.strip()
48
  if not text:
49
  raise GenerationError("TTS request text is empty")
50
 
51
- loop = asyncio.get_running_loop()
52
- queue = asyncio.Queue()
53
-
54
- def _worker():
55
- try:
56
- generator = self._pipeline(text, voice=voice, speed=1, split_pattern=r'\n+')
57
- for gs, ps, audio in generator:
58
- if audio is not None:
59
- import numpy as np
60
- pcm_audio = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16).tobytes()
61
- loop.call_soon_threadsafe(queue.put_nowait, pcm_audio)
62
- except Exception as e:
63
- loop.call_soon_threadsafe(queue.put_nowait, e)
64
- finally:
65
- loop.call_soon_threadsafe(queue.put_nowait, None)
66
-
67
- import threading
68
- thread = threading.Thread(target=_worker)
69
- thread.start()
70
-
71
- import struct
72
- # 44-byte WAV header with 0xFFFFFFFF for sizes (streaming)
73
- yield struct.pack('<4sI4s4sIHHIIHH4sI',
74
- b'RIFF', 0xFFFFFFFF, b'WAVE',
75
- b'fmt ', 16, 1, 1, 24000, 48000, 2, 16,
76
- b'data', 0xFFFFFFFF
77
- )
78
 
79
- while True:
80
- chunk = await queue.get()
81
- if chunk is None:
82
- break
83
- if isinstance(chunk, Exception):
84
- raise GenerationError("TTS synthesis stream failed", context={"error": str(chunk)}) from chunk
85
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def __init__(self, tts_space_url: str, timeout_seconds: float) -> None:
8
  self._tts_space_url = tts_space_url.rstrip("/")
9
  self._timeout_seconds = timeout_seconds
10
+ # Persistent client — reuses connections
11
+ self._http = httpx.AsyncClient(
12
+ timeout=timeout_seconds,
13
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
14
+ )
15
+
16
+ async def close(self):
17
+ await self._http.aclose()
18
 
19
  @property
20
  def is_configured(self) -> bool:
21
  return bool(self._tts_space_url)
22
 
23
+ async def ping(self) -> None:
24
  if not self.is_configured:
25
+ return
26
+ try:
27
+ await self._http.post(
28
+ f"{self._tts_space_url}/synthesize",
29
+ json={"text": "hi", "voice": "bf_emma"},
30
+ headers={"Content-Type": "application/json"},
31
+ timeout=5.0,
32
+ )
33
+ except Exception:
34
+ pass
35
 
36
+ async def synthesize(self, text: str, voice: str = "bf_emma") -> bytes:
37
+ if not self.is_configured:
38
+ raise GenerationError("TTS client is not configured")
 
 
 
 
 
 
 
 
 
39
 
40
  try:
41
+ response = await self._http.post(
42
+ f"{self._tts_space_url}/synthesize",
43
+ json={"text": text, "voice": voice},
44
+ headers={"Content-Type": "application/json"},
45
+ )
46
+ response.raise_for_status()
47
+ audio_bytes = response.content
48
+ if not audio_bytes:
49
+ raise GenerationError("TTS response was empty")
50
+ return audio_bytes
51
+ except httpx.TimeoutException as exc:
52
  raise GenerationError("TTS request timed out") from exc
53
  except httpx.HTTPStatusError as exc:
54
  raise GenerationError(
 
60
  except Exception as exc:
61
  raise GenerationError("TTS synthesis failed", context={"error": str(exc)}) from exc
62
 
63
+ async def synthesize_stream(self, text: str, voice: str = "bf_emma"):
64
  text = text.strip()
65
  if not text:
66
  raise GenerationError("TTS request text is empty")
67
 
68
+ if not self.is_configured:
69
+ raise GenerationError("TTS client is not configured")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ try:
72
+ async with self._http.stream(
73
+ "POST",
74
+ f"{self._tts_space_url}/synthesize",
75
+ json={"text": text, "voice": voice},
76
+ headers={"Content-Type": "application/json"},
77
+ ) as response:
78
+ response.raise_for_status()
79
+ async for chunk in response.aiter_bytes():
80
+ yield chunk
81
+ except httpx.TimeoutException as exc:
82
+ raise GenerationError("TTS request timed out") from exc
83
+ except httpx.HTTPStatusError as exc:
84
+ raise GenerationError(
85
+ "TTS upstream returned an error",
86
+ context={"status_code": exc.response.status_code},
87
+ ) from exc
88
+ except Exception as exc:
89
+ raise GenerationError("TTS synthesis stream failed", context={"error": str(exc)}) from exc
tests/test_models.py CHANGED
@@ -89,9 +89,9 @@ class TestChatResponse:
89
 
90
 
91
  class TestSynthesizeRequest:
92
- def test_default_voice_is_male(self):
93
  req = SynthesizeRequest(text="hello")
94
- assert req.voice == "am_adam"
95
 
96
  def test_voice_too_long_rejected(self):
97
  with pytest.raises(ValidationError):
 
89
 
90
 
91
  class TestSynthesizeRequest:
92
+ def test_default_voice_is_emma(self):
93
  req = SynthesizeRequest(text="hello")
94
+ assert req.voice == "bf_emma"
95
 
96
  def test_voice_too_long_rejected(self):
97
  with pytest.raises(ValidationError):
tests/test_retrieve_query_normalization.py CHANGED
@@ -26,4 +26,4 @@ def test_focus_source_type_for_professional_work_experience_query() -> None:
26
 
27
 
28
  def test_focus_source_type_for_tech_stack_use_query() -> None:
29
- assert _focused_source_type("What tech stack does he use") == "cv"
 
26
 
27
 
28
  def test_focus_source_type_for_tech_stack_use_query() -> None:
29
+ assert _focused_source_type("What tech stack does he use") is None
tests/test_speech_endpoints.py CHANGED
@@ -46,7 +46,7 @@ def test_tts_requires_auth(app_client):
46
  def test_tts_success(app_client, valid_token):
47
  captured: dict[str, str] = {}
48
 
49
- async def fake_synthesize_stream(text, voice="am_adam"):
50
  await asyncio.sleep(0)
51
  captured["text"] = text
52
  captured["voice"] = voice
@@ -65,7 +65,7 @@ def test_tts_success(app_client, valid_token):
65
  # StreamingResponse returns chunks, so response.content concatenates them
66
  assert response.content == b"RIFF....fake"
67
  assert captured["text"] == "Hello world"
68
- assert captured["voice"] == "am_adam"
69
 
70
 
71
  def test_tts_uses_provided_voice(app_client, valid_token):
 
46
  def test_tts_success(app_client, valid_token):
47
  captured: dict[str, str] = {}
48
 
49
+ async def fake_synthesize_stream(text, voice="bf_emma"):
50
  await asyncio.sleep(0)
51
  captured["text"] = text
52
  captured["voice"] = voice
 
65
  # StreamingResponse returns chunks, so response.content concatenates them
66
  assert response.content == b"RIFF....fake"
67
  assert captured["text"] == "Hello world"
68
+ assert captured["voice"] == "bf_emma"
69
 
70
 
71
  def test_tts_uses_provided_voice(app_client, valid_token):