biruktafese commited on
Commit
3e0e167
·
verified ·
1 Parent(s): 62dc52c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -57
app.py CHANGED
@@ -2,81 +2,54 @@ import gradio as gr
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import io
5
- import os
6
- import soundfile as sf
7
 
8
- # Load Amharic to English model
9
  am_en_translator = pipeline("text2text-generation", model="Atnafu/Amharic-English-MT")
10
-
11
- # Load English to Amharic model
12
  en_am_translator = pipeline("text2text-generation", model="Atnafu/English-Amharic-MT")
13
-
14
- # Load Speech-to-Text (STT) models
15
- # For Amharic STT
16
- am_stt_pipeline = pipeline("automatic-speech-recognition", model="speechbrain/asr-wav2vec2-commonvoice-amharic")
17
- # For English STT
18
  en_stt_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
19
 
20
- def text_to_audio_en(text):
21
- if not text:
22
- return None
23
- tts = gTTS(text=text, lang='en')
24
- audio_fp = io.BytesIO()
25
- tts.write_to_fp(audio_fp)
26
- audio_fp.seek(0)
27
- # Gradio expects (sample_rate, audio_data) for audio output
28
- # We'll save to a temp file and load it to get sample rate and data
29
- # Or, more directly, use soundfile to read the BytesIO into the correct format if possible
30
- # For simplicity, let's return the BytesIO directly and let Gradio handle it.
31
- # In some Gradio versions, returning BytesIO of mp3 works directly.
32
- # If not, a temp .wav file conversion might be needed.
33
-
34
- # Let's try to convert to wav using pydub for better Gradio compatibility if needed,
35
- # but for now, return BytesIO (which Gradio usually handles)
36
- return audio_fp.getvalue() # Returning bytes directly
37
-
38
- def text_to_audio_am(text):
39
  if not text:
40
  return None
41
- tts = gTTS(text=text, lang='am')
42
- audio_fp = io.BytesIO()
43
- tts.write_to_fp(audio_fp)
44
- audio_fp.seek(0)
45
- return audio_fp.getvalue() # Returning bytes directly
 
 
 
 
 
 
 
 
46
 
47
  def am_to_en_full(amharic_text_input, amharic_audio_input):
48
- if amharic_audio_input is not None:
49
- # Transcribe audio to text
50
- amharic_text = am_stt_pipeline(amharic_audio_input)['text']
51
- elif amharic_text_input:
52
  amharic_text = amharic_text_input
 
 
53
  else:
54
- return "", None # No input, return empty text and no audio
55
-
56
- # Translate Amharic to English
57
  translated_en_text = am_en_translator(amharic_text)[0]['generated_text']
58
-
59
- # Convert translated English text to audio
60
- translated_en_audio = text_to_audio_en(translated_en_text)
61
-
62
- return translated_en_text, (44100, translated_en_audio) # Assuming 44100 Hz sample rate for gTTS, adjust if actual is different
63
 
64
  def en_to_am_full(english_text_input, english_audio_input):
65
  if english_audio_input is not None:
66
- # Transcribe audio to text
67
  english_text = en_stt_pipeline(english_audio_input)['text']
68
  elif english_text_input:
69
  english_text = english_text_input
70
  else:
71
- return "", None # No input, return empty text and no audio
72
-
73
- # Translate English to Amharic
74
  translated_am_text = en_am_translator(english_text)[0]['generated_text']
75
-
76
- # Convert translated Amharic text to audio
77
- translated_am_audio = text_to_audio_am(translated_am_text)
78
-
79
- return translated_am_text, (44100, translated_am_audio) # Assuming 44100 Hz sample rate for gTTS, adjust if actual is different
80
 
81
  with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
82
  gr.Markdown("# Amharic-English Two-Way Translator with Voice")
@@ -98,7 +71,6 @@ with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
98
  am_text_out = gr.Textbox(lines=3, label="Amharic Text Output", interactive=False)
99
  am_audio_out = gr.Audio(label="Amharic Audio Output")
100
 
101
- # Event listeners
102
  am_en_btn.click(
103
  am_to_en_full,
104
  inputs=[am_text_in, am_audio_in],
@@ -110,4 +82,4 @@ with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
110
  outputs=[am_text_out, am_audio_out]
111
  )
112
 
113
- demo.launch(share=True)
 
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import io
5
+ from pydub import AudioSegment
6
+ import numpy as np
7
 
8
+ # Load translation models
9
  am_en_translator = pipeline("text2text-generation", model="Atnafu/Amharic-English-MT")
 
 
10
  en_am_translator = pipeline("text2text-generation", model="Atnafu/English-Amharic-MT")
11
+ # English STT model
 
 
 
 
12
  en_stt_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
13
 
14
+ def text_to_audio(text, lang):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  if not text:
16
  return None
17
+ tts = gTTS(text=text, lang=lang)
18
+ mp3_bytes = io.BytesIO()
19
+ tts.write_to_fp(mp3_bytes)
20
+ mp3_bytes.seek(0)
21
+ # Convert to WAV for Gradio compatibility
22
+ audio = AudioSegment.from_file(mp3_bytes, format="mp3")
23
+ wav_bytes = io.BytesIO()
24
+ audio.export(wav_bytes, format="wav")
25
+ wav_bytes.seek(0)
26
+ # Gradio expects sample_rate, np.array
27
+ sample_rate = audio.frame_rate # e.g., 22050
28
+ audio_np = np.frombuffer(wav_bytes.read(), np.int16)
29
+ return (sample_rate, audio_np)
30
 
31
  def am_to_en_full(amharic_text_input, amharic_audio_input):
32
+ # Only support text input for Amharic
33
+ if amharic_text_input:
 
 
34
  amharic_text = amharic_text_input
35
+ elif amharic_audio_input:
36
+ return "Sorry, Amharic voice feature not supported yet.", None
37
  else:
38
+ return "", None
 
 
39
  translated_en_text = am_en_translator(amharic_text)[0]['generated_text']
40
+ translated_en_audio = text_to_audio(translated_en_text, 'en')
41
+ return translated_en_text, translated_en_audio
 
 
 
42
 
43
  def en_to_am_full(english_text_input, english_audio_input):
44
  if english_audio_input is not None:
 
45
  english_text = en_stt_pipeline(english_audio_input)['text']
46
  elif english_text_input:
47
  english_text = english_text_input
48
  else:
49
+ return "", None
 
 
50
  translated_am_text = en_am_translator(english_text)[0]['generated_text']
51
+ translated_am_audio = text_to_audio(translated_am_text, 'am')
52
+ return translated_am_text, translated_am_audio
 
 
 
53
 
54
  with gr.Blocks(title="Amharic-English Two-Way Translator with Voice") as demo:
55
  gr.Markdown("# Amharic-English Two-Way Translator with Voice")
 
71
  am_text_out = gr.Textbox(lines=3, label="Amharic Text Output", interactive=False)
72
  am_audio_out = gr.Audio(label="Amharic Audio Output")
73
 
 
74
  am_en_btn.click(
75
  am_to_en_full,
76
  inputs=[am_text_in, am_audio_in],
 
82
  outputs=[am_text_out, am_audio_out]
83
  )
84
 
85
+ demo.launch()