Spaces:
Sleeping
Sleeping
Olaolu Olugbenle
commited on
Commit
·
909d8d8
1
Parent(s):
6640d8a
removed tts elems in gradio
Browse files
app.py
CHANGED
|
@@ -193,7 +193,7 @@ def transcribe_and_translate(file_path, mode: str):
|
|
| 193 |
# sr = audio.frame_rate
|
| 194 |
# samples = np.array(audio.get_array_of_samples())
|
| 195 |
|
| 196 |
-
# #TODO
|
| 197 |
# if audio.channels > 1:
|
| 198 |
# samples = samples.reshape((-1, audio.channels))
|
| 199 |
# # normalize integer samples -> float32 in [-1, 1]
|
|
@@ -310,62 +310,59 @@ with gr.Blocks(title="Olùkọ́ | Learn Yoruba") as app:
|
|
| 310 |
gr.Markdown("# 🇳🇬 Olùkọ́")
|
| 311 |
gr.Markdown("Comprehensive Yoruba learning tool!")
|
| 312 |
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
)
|
| 320 |
|
| 321 |
-
#
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
placeholder="Type here or press the mic to speak..."
|
| 328 |
-
)
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
)
|
| 337 |
|
| 338 |
#Store translation textbox + TTS model in same row
|
| 339 |
with gr.Row():
|
| 340 |
#Translation textbox
|
| 341 |
output_translation = gr.Textbox(label="💬 Translation")
|
| 342 |
|
| 343 |
-
#Button for TTS
|
| 344 |
-
tts_button = gr.Button("Play TTS")
|
| 345 |
|
| 346 |
-
#Audio for TTS playback
|
| 347 |
-
tts_audio = gr.Audio(label="TTS Playback", type="numpy", interactive=False)
|
| 348 |
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
|
|
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
|
| 370 |
# when the mic finishes: transcribe + translate and populate both boxes
|
| 371 |
mic_recorder.change(
|
|
@@ -381,13 +378,13 @@ with gr.Blocks(title="Olùkọ́ | Learn Yoruba") as app:
|
|
| 381 |
outputs=output_translation,
|
| 382 |
)
|
| 383 |
|
| 384 |
-
#If the TTS_Button is pushed, call the _on_tts_click function
|
| 385 |
#Send the output audi0 (sr, numpy_array) to the tts_audio block
|
| 386 |
-
tts_button.click(
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
)
|
| 391 |
|
| 392 |
# ===========================
|
| 393 |
# APP LAUNCH
|
|
@@ -396,3 +393,6 @@ with gr.Blocks(title="Olùkọ́ | Learn Yoruba") as app:
|
|
| 396 |
|
| 397 |
if __name__ == "__main__":
|
| 398 |
app.launch() #server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
# sr = audio.frame_rate
|
| 194 |
# samples = np.array(audio.get_array_of_samples())
|
| 195 |
|
| 196 |
+
# #TODO
|
| 197 |
# if audio.channels > 1:
|
| 198 |
# samples = samples.reshape((-1, audio.channels))
|
| 199 |
# # normalize integer samples -> float32 in [-1, 1]
|
|
|
|
| 310 |
gr.Markdown("# 🇳🇬 Olùkọ́")
|
| 311 |
gr.Markdown("Comprehensive Yoruba learning tool!")
|
| 312 |
|
| 313 |
+
# direction selector
|
| 314 |
+
mode = gr.Radio(
|
| 315 |
+
choices=["Yoruba → English", "English → Yoruba"],
|
| 316 |
+
value="Yoruba → English",
|
| 317 |
+
label="Direction"
|
| 318 |
+
)
|
|
|
|
| 319 |
|
| 320 |
+
# User input microphone
|
| 321 |
+
mic_recorder = gr.Audio(
|
| 322 |
+
type="filepath",
|
| 323 |
+
label="🎙️",
|
| 324 |
+
show_label=True
|
| 325 |
+
)
|
|
|
|
|
|
|
| 326 |
|
| 327 |
+
# single editable textbox + microphone next to it
|
| 328 |
+
#User input textbox
|
| 329 |
+
output_transcription = gr.Textbox(
|
| 330 |
+
label="✍️ Speak/Type...",
|
| 331 |
+
interactive=True
|
| 332 |
+
)
|
|
|
|
| 333 |
|
| 334 |
#Store translation textbox + TTS model in same row
|
| 335 |
with gr.Row():
|
| 336 |
#Translation textbox
|
| 337 |
output_translation = gr.Textbox(label="💬 Translation")
|
| 338 |
|
| 339 |
+
#Button for TTS. TODO
|
| 340 |
+
# tts_button = gr.Button("Play TTS")
|
| 341 |
|
| 342 |
+
#Audio for TTS playback. TODO
|
| 343 |
+
# tts_audio = gr.Audio(label="TTS Playback", type="numpy", interactive=False)
|
| 344 |
|
| 345 |
+
#TODO
|
| 346 |
+
# def _on_tts_click(text, direction):
|
| 347 |
+
# """Generate TTS from the translation textbox (no disk write)
|
| 348 |
+
# and return (sr, samples)."""
|
| 349 |
+
# if not text:
|
| 350 |
+
# return None
|
| 351 |
|
| 352 |
+
# # select language/voice mapping as needed
|
| 353 |
+
# if direction == "English → Yoruba":
|
| 354 |
+
# lang = "yo"
|
| 355 |
+
# voice = "Femi"
|
| 356 |
+
# else:
|
| 357 |
+
# lang = "en"
|
| 358 |
+
# voice = "Mary"
|
| 359 |
+
|
| 360 |
+
# try:
|
| 361 |
+
# result = synthesize_tts_to_array(text, language=lang, voice=voice)
|
| 362 |
+
# return result # (sr, numpy_array) or None
|
| 363 |
+
# except Exception as e:
|
| 364 |
+
# print("TTS generation failed:", e)
|
| 365 |
+
# return None
|
| 366 |
|
| 367 |
# when the mic finishes: transcribe + translate and populate both boxes
|
| 368 |
mic_recorder.change(
|
|
|
|
| 378 |
outputs=output_translation,
|
| 379 |
)
|
| 380 |
|
| 381 |
+
#TODO If the TTS_Button is pushed, call the _on_tts_click function
|
| 382 |
#Send the output audi0 (sr, numpy_array) to the tts_audio block
|
| 383 |
+
# tts_button.click(
|
| 384 |
+
# _on_tts_click,
|
| 385 |
+
# inputs=[output_translation, mode],
|
| 386 |
+
# outputs=tts_audio
|
| 387 |
+
# )
|
| 388 |
|
| 389 |
# ===========================
|
| 390 |
# APP LAUNCH
|
|
|
|
| 393 |
|
| 394 |
if __name__ == "__main__":
|
| 395 |
app.launch() #server_name="0.0.0.0", server_port=7860)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
|