Spaces:

minte-atnafu
/

GihonTech

Sleeping

App Files Files Community

Minte commited on Oct 6, 2025

Commit

0717063

1 Parent(s): e04060f

refactor: replace subprocess with uroman for romanization and update TTS generation

Browse files

Files changed (2) hide show

app.py +13 -19
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from transformers import (
 import gradio as gr
 import resampy
 import tempfile
-import subprocess
 # --- Load ASR model ---
 try:
@@ -62,11 +62,11 @@ except Exception as e:
 # --- Romanization helper ---
 def romanize(text):
     try:
-        result = subprocess.run(["uroman"], input=text.encode("utf-8"), stdout=subprocess.PIPE)
-        return result.stdout.decode("utf-8").strip()
     except Exception as e:
         print("[ERROR] Romanization failed:", e)
-        return text  # fallback
 # --- ASR ---
 def transcribe_amharic(audio_file):
@@ -117,18 +117,8 @@ def generate_chat_response(text):
     if chat_model is None:
         return "Chat model not loaded"
     try:
-        inputs = chat_model.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = chat_model.model.generate(
-                inputs.input_ids,
-                max_length=128,
-                num_beams=4,
-                no_repeat_ngram_size=2,
-                early_stopping=True,
-                repetition_penalty=1.3,
-                do_sample=True
-            )
-        response = chat_model.tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response.strip()
     except Exception as e:
         print("[ERROR] Chat generation failed:", e)
@@ -145,7 +135,11 @@ def generate_tts(text):
         romanized_text = romanize(text)
         inputs = tts_processor(text=romanized_text, return_tensors="pt")
         with torch.no_grad():
-            speech = tts_model.generate_speech(inputs["input_ids"], tts_vocoder)
         audio_data = speech.numpy()
         max_val = np.max(np.abs(audio_data))
         if max_val > 0:
@@ -226,10 +220,10 @@ def assistant_pipeline(audio):
 # --- Gradio UI ---
 with gr.Blocks(title="🌍 Local Language AI Assistant") as demo:
     gr.Markdown("# 🌍 Local Language AI Assistant")
-    gr.Markdown("🎙️ Speak **or upload** Amharic audio and get AI responses with synthesized Amharic speech!")
     with gr.Row():
-        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="🎤 Record or Upload your voice")
     submit_btn = gr.Button("Process", variant="primary")

 import gradio as gr
 import resampy
 import tempfile
+from uroman import uroman  # ✅ Use Python version instead of subprocess
 # --- Load ASR model ---
 try:
 # --- Romanization helper ---
 def romanize(text):
     try:
+        romanized = uroman.romanize_string(text)
+        return romanized.strip()
     except Exception as e:
         print("[ERROR] Romanization failed:", e)
+        return text
 # --- ASR ---
 def transcribe_amharic(audio_file):
     if chat_model is None:
         return "Chat model not loaded"
     try:
+        result = chat_model(text, max_length=128, num_beams=4, do_sample=True)
+        response = result[0]["generated_text"]
         return response.strip()
     except Exception as e:
         print("[ERROR] Chat generation failed:", e)
         romanized_text = romanize(text)
         inputs = tts_processor(text=romanized_text, return_tensors="pt")
         with torch.no_grad():
+            speech = tts_model.generate_speech(
+                inputs["input_ids"],
+                speaker_embeddings=torch.zeros((1, 512)),  # ✅ fixed
+                vocoder=tts_vocoder
+            )
         audio_data = speech.numpy()
         max_val = np.max(np.abs(audio_data))
         if max_val > 0:
 # --- Gradio UI ---
 with gr.Blocks(title="🌍 Local Language AI Assistant") as demo:
     gr.Markdown("# 🌍 Local Language AI Assistant")
+    gr.Markdown("Speak or upload Amharic audio and get AI responses with voice output!")
     with gr.Row():
+        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="🎤 Record or Upload Audio")
     submit_btn = gr.Button("Process", variant="primary")

requirements.txt CHANGED Viewed

@@ -8,4 +8,6 @@ accelerate
 sentencepiece
 scipy
 numpy
-sacremoses

 sentencepiece
 scipy
 numpy
+sacremoses
+sacremoses
+git+https://github.com/isi-nlp/uroman.git