Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,81 +2,54 @@ import gradio as gr
|
|
| 2 |
from transformers import pipeline
|
| 3 |
from gtts import gTTS
|
| 4 |
import io
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
|
| 8 |
-
# Load
|
| 9 |
am_en_translator = pipeline("text2text-generation", model="Atnafu/Amharic-English-MT")
|
| 10 |
-
|
| 11 |
-
# Load English to Amharic model
|
| 12 |
en_am_translator = pipeline("text2text-generation", model="Atnafu/English-Amharic-MT")
|
| 13 |
-
|
| 14 |
-
# Load Speech-to-Text (STT) models
|
| 15 |
-
# For Amharic STT
|
| 16 |
-
am_stt_pipeline = pipeline("automatic-speech-recognition", model="speechbrain/asr-wav2vec2-commonvoice-amharic")
|
| 17 |
-
# For English STT
|
| 18 |
en_stt_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 19 |
|
| 20 |
-
def
|
| 21 |
-
if not text:
|
| 22 |
-
return None
|
| 23 |
-
tts = gTTS(text=text, lang='en')
|
| 24 |
-
audio_fp = io.BytesIO()
|
| 25 |
-
tts.write_to_fp(audio_fp)
|
| 26 |
-
audio_fp.seek(0)
|
| 27 |
-
# Gradio expects (sample_rate, audio_data) for audio output
|
| 28 |
-
# We'll save to a temp file and load it to get sample rate and data
|
| 29 |
-
# Or, more directly, use soundfile to read the BytesIO into the correct format if possible
|
| 30 |
-
# For simplicity, let's return the BytesIO directly and let Gradio handle it.
|
| 31 |
-
# In some Gradio versions, returning BytesIO of mp3 works directly.
|
| 32 |
-
# If not, a temp .wav file conversion might be needed.
|
| 33 |
-
|
| 34 |
-
# Let's try to convert to wav using pydub for better Gradio compatibility if needed,
|
| 35 |
-
# but for now, return BytesIO (which Gradio usually handles)
|
| 36 |
-
return audio_fp.getvalue() # Returning bytes directly
|
| 37 |
-
|
| 38 |
-
def text_to_audio_am(text):
|
| 39 |
if not text:
|
| 40 |
return None
|
| 41 |
-
tts = gTTS(text=text, lang=
|
| 42 |
-
|
| 43 |
-
tts.write_to_fp(
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def am_to_en_full(amharic_text_input, amharic_audio_input):
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
amharic_text = am_stt_pipeline(amharic_audio_input)['text']
|
| 51 |
-
elif amharic_text_input:
|
| 52 |
amharic_text = amharic_text_input
|
|
|
|
|
|
|
| 53 |
else:
|
| 54 |
-
return "", None
|
| 55 |
-
|
| 56 |
-
# Translate Amharic to English
|
| 57 |
translated_en_text = am_en_translator(amharic_text)[0]['generated_text']
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
translated_en_audio = text_to_audio_en(translated_en_text)
|
| 61 |
-
|
| 62 |
-
return translated_en_text, (44100, translated_en_audio) # Assuming 44100 Hz sample rate for gTTS, adjust if actual is different
|
| 63 |
|
| 64 |
def en_to_am_full(english_text_input, english_audio_input):
|
| 65 |
if english_audio_input is not None:
|
| 66 |
-
# Transcribe audio to text
|
| 67 |
english_text = en_stt_pipeline(english_audio_input)['text']
|
| 68 |
elif english_text_input:
|
| 69 |
english_text = english_text_input
|
| 70 |
else:
|
| 71 |
-
return "", None
|
| 72 |
-
|
| 73 |
-
# Translate English to Amharic
|
| 74 |
translated_am_text = en_am_translator(english_text)[0]['generated_text']
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
translated_am_audio = text_to_audio_am(translated_am_text)
|
| 78 |
-
|
| 79 |
-
return translated_am_text, (44100, translated_am_audio) # Assuming 44100 Hz sample rate for gTTS, adjust if actual is different
|
| 80 |
|
| 81 |
with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
|
| 82 |
gr.Markdown("# Amharic-English Two-Way Translator with Voice")
|
|
@@ -98,7 +71,6 @@ with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
|
|
| 98 |
am_text_out = gr.Textbox(lines=3, label="Amharic Text Output", interactive=False)
|
| 99 |
am_audio_out = gr.Audio(label="Amharic Audio Output")
|
| 100 |
|
| 101 |
-
# Event listeners
|
| 102 |
am_en_btn.click(
|
| 103 |
am_to_en_full,
|
| 104 |
inputs=[am_text_in, am_audio_in],
|
|
@@ -110,4 +82,4 @@ with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
|
|
| 110 |
outputs=[am_text_out, am_audio_out]
|
| 111 |
)
|
| 112 |
|
| 113 |
-
demo.launch(
|
|
|
|
| 2 |
from transformers import pipeline
|
| 3 |
from gtts import gTTS
|
| 4 |
import io
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
import numpy as np
|
| 7 |
|
| 8 |
+
# Load translation models
|
| 9 |
am_en_translator = pipeline("text2text-generation", model="Atnafu/Amharic-English-MT")
|
|
|
|
|
|
|
| 10 |
en_am_translator = pipeline("text2text-generation", model="Atnafu/English-Amharic-MT")
|
| 11 |
+
# English STT model
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
en_stt_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 13 |
|
| 14 |
+
def text_to_audio(text, lang):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
if not text:
|
| 16 |
return None
|
| 17 |
+
tts = gTTS(text=text, lang=lang)
|
| 18 |
+
mp3_bytes = io.BytesIO()
|
| 19 |
+
tts.write_to_fp(mp3_bytes)
|
| 20 |
+
mp3_bytes.seek(0)
|
| 21 |
+
# Convert to WAV for Gradio compatibility
|
| 22 |
+
audio = AudioSegment.from_file(mp3_bytes, format="mp3")
|
| 23 |
+
wav_bytes = io.BytesIO()
|
| 24 |
+
audio.export(wav_bytes, format="wav")
|
| 25 |
+
wav_bytes.seek(0)
|
| 26 |
+
# Gradio expects sample_rate, np.array
|
| 27 |
+
sample_rate = audio.frame_rate # e.g., 22050
|
| 28 |
+
audio_np = np.frombuffer(wav_bytes.read(), np.int16)
|
| 29 |
+
return (sample_rate, audio_np)
|
| 30 |
|
| 31 |
def am_to_en_full(amharic_text_input, amharic_audio_input):
|
| 32 |
+
# Only support text input for Amharic
|
| 33 |
+
if amharic_text_input:
|
|
|
|
|
|
|
| 34 |
amharic_text = amharic_text_input
|
| 35 |
+
elif amharic_audio_input:
|
| 36 |
+
return "Sorry, Amharic voice feature not supported yet.", None
|
| 37 |
else:
|
| 38 |
+
return "", None
|
|
|
|
|
|
|
| 39 |
translated_en_text = am_en_translator(amharic_text)[0]['generated_text']
|
| 40 |
+
translated_en_audio = text_to_audio(translated_en_text, 'en')
|
| 41 |
+
return translated_en_text, translated_en_audio
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def en_to_am_full(english_text_input, english_audio_input):
|
| 44 |
if english_audio_input is not None:
|
|
|
|
| 45 |
english_text = en_stt_pipeline(english_audio_input)['text']
|
| 46 |
elif english_text_input:
|
| 47 |
english_text = english_text_input
|
| 48 |
else:
|
| 49 |
+
return "", None
|
|
|
|
|
|
|
| 50 |
translated_am_text = en_am_translator(english_text)[0]['generated_text']
|
| 51 |
+
translated_am_audio = text_to_audio(translated_am_text, 'am')
|
| 52 |
+
return translated_am_text, translated_am_audio
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
|
| 55 |
gr.Markdown("# Amharic-English Two-Way Translator with Voice")
|
|
|
|
| 71 |
am_text_out = gr.Textbox(lines=3, label="Amharic Text Output", interactive=False)
|
| 72 |
am_audio_out = gr.Audio(label="Amharic Audio Output")
|
| 73 |
|
|
|
|
| 74 |
am_en_btn.click(
|
| 75 |
am_to_en_full,
|
| 76 |
inputs=[am_text_in, am_audio_in],
|
|
|
|
| 82 |
outputs=[am_text_out, am_audio_out]
|
| 83 |
)
|
| 84 |
|
| 85 |
+
demo.launch()
|