updating app
Browse files
app.py
CHANGED
|
@@ -340,10 +340,9 @@ def streaming_process(audio_input, state):
|
|
| 340 |
state.transcript_yo.append(yoruba)
|
| 341 |
|
| 342 |
if audio_out is not None and len(audio_out) > 0:
|
| 343 |
-
#
|
| 344 |
-
audio_out = np.clip(audio_out, -1.0, 1.0)
|
| 345 |
-
|
| 346 |
-
return (sr_out, audio_int16), format_live_log(state), state
|
| 347 |
else:
|
| 348 |
return None, format_live_log(state), state
|
| 349 |
|
|
@@ -382,6 +381,8 @@ DESCRIPTION = """
|
|
| 382 |
# Live Football Commentary \u2014 English \u2192 Yoruba
|
| 383 |
|
| 384 |
Translate English football commentary into Yoruba speech in real-time.
|
|
|
|
|
|
|
| 385 |
"""
|
| 386 |
|
| 387 |
STREAMING_INSTRUCTIONS = """
|
|
@@ -392,6 +393,7 @@ STREAMING_INSTRUCTIONS = """
|
|
| 392 |
4. The transcript updates live below
|
| 393 |
5. Click **Clear** to reset
|
| 394 |
|
|
|
|
| 395 |
""".format(chunk_dur=CHUNK_DURATION_S)
|
| 396 |
|
| 397 |
EXAMPLES_TEXT = [
|
|
@@ -431,7 +433,6 @@ with gr.Blocks(
|
|
| 431 |
label="Yoruba Output",
|
| 432 |
type="numpy",
|
| 433 |
autoplay=True,
|
| 434 |
-
streaming=True,
|
| 435 |
)
|
| 436 |
stream_log = gr.Markdown(
|
| 437 |
label="Live Transcript",
|
|
@@ -442,6 +443,8 @@ with gr.Blocks(
|
|
| 442 |
fn=streaming_process,
|
| 443 |
inputs=[stream_input, stream_state],
|
| 444 |
outputs=[stream_output, stream_log, stream_state],
|
|
|
|
|
|
|
| 445 |
)
|
| 446 |
|
| 447 |
clear_btn.click(
|
|
|
|
| 340 |
state.transcript_yo.append(yoruba)
|
| 341 |
|
| 342 |
if audio_out is not None and len(audio_out) > 0:
|
| 343 |
+
# Ensure float32 in [-1, 1] range for autoplay Audio component
|
| 344 |
+
audio_out = np.clip(audio_out, -1.0, 1.0).astype(np.float32)
|
| 345 |
+
return (sr_out, audio_out), format_live_log(state), state
|
|
|
|
| 346 |
else:
|
| 347 |
return None, format_live_log(state), state
|
| 348 |
|
|
|
|
| 381 |
# Live Football Commentary \u2014 English \u2192 Yoruba
|
| 382 |
|
| 383 |
Translate English football commentary into Yoruba speech in real-time.
|
| 384 |
+
|
| 385 |
+
**Pipeline:** ASR (Whisper) \u2192 MT (NLLB-200) \u2192 TTS (MMS-TTS Yoruba)
|
| 386 |
"""
|
| 387 |
|
| 388 |
STREAMING_INSTRUCTIONS = """
|
|
|
|
| 393 |
4. The transcript updates live below
|
| 394 |
5. Click **Clear** to reset
|
| 395 |
|
| 396 |
+
**Expected latency:** ~3\u20135 seconds behind your speech.
|
| 397 |
""".format(chunk_dur=CHUNK_DURATION_S)
|
| 398 |
|
| 399 |
EXAMPLES_TEXT = [
|
|
|
|
| 433 |
label="Yoruba Output",
|
| 434 |
type="numpy",
|
| 435 |
autoplay=True,
|
|
|
|
| 436 |
)
|
| 437 |
stream_log = gr.Markdown(
|
| 438 |
label="Live Transcript",
|
|
|
|
| 443 |
fn=streaming_process,
|
| 444 |
inputs=[stream_input, stream_state],
|
| 445 |
outputs=[stream_output, stream_log, stream_state],
|
| 446 |
+
time_limit=600,
|
| 447 |
+
stream_every=1.0,
|
| 448 |
)
|
| 449 |
|
| 450 |
clear_btn.click(
|