PlotweaverModel commited on
Commit
aa5b642
·
verified ·
1 Parent(s): 7f24b54

updating app

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -340,10 +340,9 @@ def streaming_process(audio_input, state):
340
  state.transcript_yo.append(yoruba)
341
 
342
  if audio_out is not None and len(audio_out) > 0:
343
- # Convert to int16 PCM format for streaming Audio output
344
- audio_out = np.clip(audio_out, -1.0, 1.0)
345
- audio_int16 = (audio_out * 32767).astype(np.int16)
346
- return (sr_out, audio_int16), format_live_log(state), state
347
  else:
348
  return None, format_live_log(state), state
349
 
@@ -382,6 +381,8 @@ DESCRIPTION = """
382
  # Live Football Commentary \u2014 English \u2192 Yoruba
383
 
384
  Translate English football commentary into Yoruba speech in real-time.
 
 
385
  """
386
 
387
  STREAMING_INSTRUCTIONS = """
@@ -392,6 +393,7 @@ STREAMING_INSTRUCTIONS = """
392
  4. The transcript updates live below
393
  5. Click **Clear** to reset
394
 
 
395
  """.format(chunk_dur=CHUNK_DURATION_S)
396
 
397
  EXAMPLES_TEXT = [
@@ -431,7 +433,6 @@ with gr.Blocks(
431
  label="Yoruba Output",
432
  type="numpy",
433
  autoplay=True,
434
- streaming=True,
435
  )
436
  stream_log = gr.Markdown(
437
  label="Live Transcript",
@@ -442,6 +443,8 @@ with gr.Blocks(
442
  fn=streaming_process,
443
  inputs=[stream_input, stream_state],
444
  outputs=[stream_output, stream_log, stream_state],
 
 
445
  )
446
 
447
  clear_btn.click(
 
340
  state.transcript_yo.append(yoruba)
341
 
342
  if audio_out is not None and len(audio_out) > 0:
343
+ # Ensure float32 in [-1, 1] range for autoplay Audio component
344
+ audio_out = np.clip(audio_out, -1.0, 1.0).astype(np.float32)
345
+ return (sr_out, audio_out), format_live_log(state), state
 
346
  else:
347
  return None, format_live_log(state), state
348
 
 
381
  # Live Football Commentary \u2014 English \u2192 Yoruba
382
 
383
  Translate English football commentary into Yoruba speech in real-time.
384
+
385
+ **Pipeline:** ASR (Whisper) \u2192 MT (NLLB-200) \u2192 TTS (MMS-TTS Yoruba)
386
  """
387
 
388
  STREAMING_INSTRUCTIONS = """
 
393
  4. The transcript updates live below
394
  5. Click **Clear** to reset
395
 
396
+ **Expected latency:** ~3\u20135 seconds behind your speech.
397
  """.format(chunk_dur=CHUNK_DURATION_S)
398
 
399
  EXAMPLES_TEXT = [
 
433
  label="Yoruba Output",
434
  type="numpy",
435
  autoplay=True,
 
436
  )
437
  stream_log = gr.Markdown(
438
  label="Live Transcript",
 
443
  fn=streaming_process,
444
  inputs=[stream_input, stream_state],
445
  outputs=[stream_output, stream_log, stream_state],
446
+ time_limit=600,
447
+ stream_every=1.0,
448
  )
449
 
450
  clear_btn.click(