Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,47 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from
|
| 3 |
from tts_engine import TTSEngine
|
| 4 |
-
from translator import TranslatorEngine
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
tts_engine = TTSEngine(use_coqui=True)
|
| 9 |
-
translator = TranslatorEngine()
|
| 10 |
|
| 11 |
LANGUAGES = [
|
| 12 |
"english", "yoruba", "igbo", "hausa", "pidgin",
|
| 13 |
"esan", "tiv", "calabar", "benin"
|
| 14 |
]
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
|
| 17 |
"""One side speaks -> STT -> Translate -> TTS"""
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# Speech to text
|
| 22 |
-
text = stt_engine.transcribe(audio, language=src_lang)
|
| 23 |
|
| 24 |
-
# Translate
|
| 25 |
-
translated =
|
| 26 |
|
| 27 |
-
#
|
| 28 |
audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
|
| 29 |
|
| 30 |
return translated, audio_path
|
|
|
|
| 1 |
+
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
from transformers import pipeline
|
| 4 |
from tts_engine import TTSEngine
|
|
|
|
| 5 |
|
| 6 |
+
# STT: Whisper
|
| 7 |
+
stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=-1)
|
| 8 |
+
|
| 9 |
+
# Translation: MarianMT (generic English <-> multilingual)
|
| 10 |
+
translator_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
| 11 |
+
|
| 12 |
tts_engine = TTSEngine(use_coqui=True)
|
|
|
|
| 13 |
|
| 14 |
LANGUAGES = [
|
| 15 |
"english", "yoruba", "igbo", "hausa", "pidgin",
|
| 16 |
"esan", "tiv", "calabar", "benin"
|
| 17 |
]
|
| 18 |
|
| 19 |
+
def transcribe(audio, language="en"):
|
| 20 |
+
"""Speech to text using Whisper."""
|
| 21 |
+
if audio is None:
|
| 22 |
+
return ""
|
| 23 |
+
result = stt_pipeline(audio, generate_kwargs={"language": language})
|
| 24 |
+
return result["text"]
|
| 25 |
+
|
| 26 |
+
def translate(text, src_lang, tgt_lang):
|
| 27 |
+
"""Dummy translation with Marian (you can extend with Nigerian mappings)."""
|
| 28 |
+
if not text:
|
| 29 |
+
return ""
|
| 30 |
+
# For Nigerian languages not covered by Marian, just return the same text
|
| 31 |
+
if src_lang not in ["english", "en"] or tgt_lang not in ["english", "en"]:
|
| 32 |
+
return f"[{src_lang}->{tgt_lang}] {text}"
|
| 33 |
+
translated = translator_pipeline(text)[0]["translation_text"]
|
| 34 |
+
return translated
|
| 35 |
+
|
| 36 |
def handle_conversation(audio, src_lang, tgt_lang, clone_voice):
|
| 37 |
"""One side speaks -> STT -> Translate -> TTS"""
|
| 38 |
+
# Step 1: STT
|
| 39 |
+
text = transcribe(audio, language="en" if src_lang == "english" else None)
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# Step 2: Translate
|
| 42 |
+
translated = translate(text, src_lang, tgt_lang)
|
| 43 |
|
| 44 |
+
# Step 3: TTS
|
| 45 |
audio_path = tts_engine.speak(translated, lang=tgt_lang, voice_clone=clone_voice)
|
| 46 |
|
| 47 |
return translated, audio_path
|