innocentpeter commited on
Commit
500fc2e
·
verified ·
1 Parent(s): 82e3c78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -24
app.py CHANGED
@@ -3,22 +3,29 @@ import gradio as gr
3
  from pathlib import Path
4
  import torch
5
  from transformers import pipeline
 
6
 
7
- # --- Import your OOP modules ---
8
  from translation import Translator, CONFIG
9
  from tts_engine import TTSEngine
10
 
11
- # --- Init core objects ---
12
  translator = Translator(
13
  n2n_enabled=CONFIG["features"]["nigerian_to_nigerian_enabled"]
14
  )
15
- tts_engine = TTSEngine(use_coqui=False) # default to pyttsx3
16
 
17
- # --- Whisper STT pipeline (CPU safe) ---
 
 
 
 
18
  device = 0 if torch.cuda.is_available() else -1
19
- stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=device)
 
 
 
 
20
 
21
- # --- Core translate + TTS function ---
22
  def translate_and_speak(input_text, input_lang, output_lang, use_coqui, clone_voice):
23
  if not input_text:
24
  return "⚠️ No input detected", None
@@ -36,18 +43,50 @@ def translate_and_speak(input_text, input_lang, output_lang, use_coqui, clone_vo
36
 
37
  return translated, audio_path
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # --- Gradio App ---
40
  with gr.Blocks() as demo:
41
  gr.Markdown("## 🌍 Nigerian Voice Translator Assistant")
42
 
43
  with gr.Row():
44
  input_lang = gr.Dropdown(
45
- choices=["yoruba", "hausa", "igbo", "pidgin", "esan", "tiv", "calabar", "benin"],
46
- value="yoruba",
47
- label="Input Language"
48
  )
49
  output_lang = gr.Dropdown(
50
- choices=["english", "yoruba", "hausa", "igbo", "pidgin", "esan", "tiv", "calabar", "benin"],
51
  value="english",
52
  label="Output Language"
53
  )
@@ -58,7 +97,7 @@ with gr.Blocks() as demo:
58
  with gr.Tab("⌨️ Text Input"):
59
  text_input = gr.Textbox(label="Enter text")
60
 
61
- use_coqui = gr.Checkbox(label="Use Coqui TTS (natural accents)", value=False)
62
  clone_voice = gr.Checkbox(label="Clone my voice if available", value=False)
63
 
64
  translate_btn = gr.Button("Translate & Speak")
@@ -66,24 +105,11 @@ with gr.Blocks() as demo:
66
  output_text = gr.Textbox(label="Translation")
67
  output_audio = gr.Audio(label="Spoken Output", type="filepath")
68
 
69
- # --- Events ---
70
- def handle_input(mic_input, text_input, input_lang, output_lang, use_coqui, clone_voice):
71
- # If mic audio provided → transcribe with Whisper
72
- if mic_input:
73
- print(f"🎙️ Running Whisper STT on {mic_input}")
74
- result = stt_pipeline(mic_input)
75
- input_text = result["text"].strip()
76
- else:
77
- input_text = text_input.strip() if text_input else ""
78
-
79
- return translate_and_speak(input_text, input_lang, output_lang, use_coqui, clone_voice)
80
-
81
  translate_btn.click(
82
  fn=handle_input,
83
  inputs=[mic_input, text_input, input_lang, output_lang, use_coqui, clone_voice],
84
  outputs=[output_text, output_audio]
85
  )
86
 
87
- # --- Run ---
88
  if __name__ == "__main__":
89
  demo.launch()
 
3
  from pathlib import Path
4
  import torch
5
  from transformers import pipeline
6
+ from langdetect import detect
7
 
8
+ # --- Import OOP modules ---
9
  from translation import Translator, CONFIG
10
  from tts_engine import TTSEngine
11
 
12
+ # --- Init core ---
13
  translator = Translator(
14
  n2n_enabled=CONFIG["features"]["nigerian_to_nigerian_enabled"]
15
  )
 
16
 
17
+ # On Hugging Face: disable pyttsx3 (no espeak). Default = Coqui
18
+ USE_PYTTSX3 = os.environ.get("USE_PYTTSX3", "false").lower() == "true"
19
+ tts_engine = TTSEngine(use_coqui=not USE_PYTTSX3)
20
+
21
+ # --- Whisper STT ---
22
  device = 0 if torch.cuda.is_available() else -1
23
+ stt_pipeline = pipeline(
24
+ "automatic-speech-recognition",
25
+ model="openai/whisper-small",
26
+ device=device
27
+ )
28
 
 
29
  def translate_and_speak(input_text, input_lang, output_lang, use_coqui, clone_voice):
30
  if not input_text:
31
  return "⚠️ No input detected", None
 
43
 
44
  return translated, audio_path
45
 
46
+
47
+ def handle_input(mic_input, text_input, input_lang, output_lang, use_coqui, clone_voice):
48
+ input_text = ""
49
+
50
+ # Mic input → Whisper
51
+ if mic_input:
52
+ result = stt_pipeline(mic_input)
53
+ input_text = result["text"].strip()
54
+
55
+ # Auto language detect
56
+ try:
57
+ detected = detect(input_text)
58
+ print(f"🌍 Auto-detected: {detected}")
59
+ # Map detection to supported langs
60
+ if detected.startswith("yo"):
61
+ input_lang = "yoruba"
62
+ elif detected.startswith("ha"):
63
+ input_lang = "hausa"
64
+ elif detected.startswith("ig"):
65
+ input_lang = "igbo"
66
+ elif detected.startswith("en"):
67
+ input_lang = "english"
68
+ # else leave user selection
69
+ except Exception as e:
70
+ print("⚠️ Language detection failed:", e)
71
+
72
+ elif text_input:
73
+ input_text = text_input.strip()
74
+
75
+ return translate_and_speak(input_text, input_lang, output_lang, use_coqui, clone_voice)
76
+
77
+
78
  # --- Gradio App ---
79
  with gr.Blocks() as demo:
80
  gr.Markdown("## 🌍 Nigerian Voice Translator Assistant")
81
 
82
  with gr.Row():
83
  input_lang = gr.Dropdown(
84
+ choices=["auto", "yoruba", "hausa", "igbo", "pidgin", "esan", "tiv", "calabar", "benin", "english"],
85
+ value="auto",
86
+ label="Input Language (auto-detect by default)"
87
  )
88
  output_lang = gr.Dropdown(
89
+ choices=["english","yoruba","hausa","igbo","pidgin","esan","tiv","calabar","benin"],
90
  value="english",
91
  label="Output Language"
92
  )
 
97
  with gr.Tab("⌨️ Text Input"):
98
  text_input = gr.Textbox(label="Enter text")
99
 
100
+ use_coqui = gr.Checkbox(label="Use Coqui TTS (natural accents)", value=True)
101
  clone_voice = gr.Checkbox(label="Clone my voice if available", value=False)
102
 
103
  translate_btn = gr.Button("Translate & Speak")
 
105
  output_text = gr.Textbox(label="Translation")
106
  output_audio = gr.Audio(label="Spoken Output", type="filepath")
107
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  translate_btn.click(
109
  fn=handle_input,
110
  inputs=[mic_input, text_input, input_lang, output_lang, use_coqui, clone_voice],
111
  outputs=[output_text, output_audio]
112
  )
113
 
 
114
  if __name__ == "__main__":
115
  demo.launch()