Spaces:

Gamortsey
/

Translator

Runtime error

App Files Files Community

Gamortsey commited on Aug 19, 2025

Commit

ddb6bb8

verified ·

1 Parent(s): e6584a7

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -105

app.py CHANGED Viewed

@@ -16,12 +16,9 @@ from transformers import (
 )
 # ---------- Configuration ----------
-# Use smaller models suitable for CPU-only Hugging Face Spaces (free tier)
 WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "openai/whisper-small")
 NLLB_MODEL = os.environ.get("NLLB_MODEL", "facebook/nllb-200-distilled-600M")
-# Map frontend language names -> (whisper_lang_arg, nllb_src_lang_tag)
-# Adjust tags if you have different NLLB language tags for specific dialects
 LANG_MAP = {
     "akan": (None, "aka_Latn"),
     "hausa": ("ha", "hau_Latn"),
@@ -31,13 +28,12 @@ LANG_MAP = {
     "english": ("en", None),
 }
-# Force CPU for free Spaces
-DEVICE = torch.device("cpu")
 app = Flask(__name__)
 CORS(app)
-# ---------- Model manager (lazy load) ----------
 class ModelManager:
     def __init__(self):
         self.whisper_processor = None
@@ -70,7 +66,6 @@ class ModelManager:
         if self.whisper_processor is None or self.whisper_model is None:
             raise RuntimeError("Whisper model not loaded")
-        # Load audio and resample if needed
         waveform, sr = torchaudio.load(audio_path)
         if sr != 16000:
             waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform)
@@ -97,13 +92,11 @@ class ModelManager:
         if not src_text:
             return ""
         if not nllb_src_lang_tag:
-            # Already English or no NLLB mapping — return source
             return src_text
         if self.nllb_tokenizer is None or self.nllb_model is None:
             raise RuntimeError("NLLB model not loaded")
-        # Set tokenizer source lang if supported
         try:
             self.nllb_tokenizer.src_lang = nllb_src_lang_tag
         except Exception:
@@ -111,7 +104,6 @@ class ModelManager:
         inputs = self.nllb_tokenizer(src_text, return_tensors="pt").to(DEVICE)
-        # Attempt to get forced BOS token id for English; fallback to no forced token
         forced_bos = None
         try:
             forced_bos = self.nllb_tokenizer.convert_tokens_to_ids("eng_Latn")
@@ -129,7 +121,6 @@ class ModelManager:
         with torch.no_grad():
             translated_tokens = self.nllb_model.generate(**inputs, **gen_kwargs)
         translated = self.nllb_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
         return translated.strip()
@@ -138,13 +129,6 @@ model_manager = ModelManager()
 # ---------- REST endpoint ----------
 @app.route("/transcribe", methods=["POST"])
 def transcribe_endpoint():
-    """
-    POST multipart/form-data:
-      - field 'audio': file (wav/mp3/ogg etc.)
-      - field 'language': string key (akan, hausa, swahili, french, arabic, english)
-    Response:
-      - Plain text body with the translated text (Content-Type: text/plain)
-    """
     if "audio" not in request.files:
         return Response("No audio file provided", status=400, mimetype="text/plain")
@@ -156,13 +140,11 @@ def transcribe_endpoint():
     whisper_lang_arg, nllb_src_tag = LANG_MAP[language]
-    # Load models (lazy)
     try:
         model_manager.load()
     except Exception as e:
         return Response(f"Model loading failed: {e}", status=500, mimetype="text/plain")
-    # Save audio to a temp file
     tmp_fd, tmp_path = tempfile.mkstemp(suffix=Path(audio_file.filename).suffix or ".wav")
     os.close(tmp_fd)
     audio_file.save(tmp_path)
@@ -170,7 +152,6 @@ def transcribe_endpoint():
     try:
         transcription = model_manager.transcribe(tmp_path, whisper_language_arg=whisper_lang_arg)
         if not transcription:
-            # nothing transcribed -> return empty body (204)
             return Response("", status=204, mimetype="text/plain")
         translation = model_manager.translate_to_english(transcription, nllb_src_tag)
@@ -183,102 +164,131 @@ def transcribe_endpoint():
         except Exception:
             pass
-# ---------- Robust Gradio UI mount (optional) ----------
 gradio_mounted = False
-try:
-    import gradio as gr
-    import soundfile as sf
-    import numpy as np
-    def _ui_transcribe(audio, language):
-        """
-        Accept many audio input shapes from different gradio versions:
-          - filepath (str)
-          - tuple (sr, ndarray)
-          - ndarray (numpy)
-        We normalize to a temporary wav file.
-        """
-        if audio is None:
-            return "No audio", ""
-        audio_path = None
-        if isinstance(audio, str) and Path(audio).exists():
-            audio_path = audio
-        elif isinstance(audio, (tuple, list)) and len(audio) >= 2:
-            sr, data = audio[0], audio[1]
-            tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-            sf.write(tmp.name, data, sr)
-            audio_path = tmp.name
-        elif isinstance(audio, (np.ndarray,)) or hasattr(audio, "shape"):
-            sr = 16000
-            tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-            sf.write(tmp.name, audio, sr)
-            audio_path = tmp.name
-        else:
             try:
-                audio_path = getattr(audio, "name", None)
-            except Exception:
-                audio_path = None
-        if not audio_path:
-            return "Unsupported audio format from Gradio", ""
         try:
-            model_manager.load()
-            whisper_lang, nllb_tag = LANG_MAP.get(language.lower(), (None, None))
-            transcription = model_manager.transcribe(audio_path, whisper_language_arg=whisper_lang)
-            translation = model_manager.translate_to_english(transcription, nllb_tag)
-            return transcription, translation
-        finally:
-            # try cleanup
-            try:
-                if audio_path and Path(audio_path).exists() and "/tmp" in str(audio_path):
-                    os.remove(audio_path)
-            except Exception:
-                pass
-    demo = None
-    try:
-        # modern API
-        audio_component = gr.Audio(source="microphone", type="filepath")
-        dropdown = gr.Dropdown(choices=list(LANG_MAP.keys()), value="english", label="Language")
-        demo = gr.Interface(
-            fn=_ui_transcribe,
-            inputs=[audio_component, dropdown],
-            outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Translation (English)")],
-            title="Multilingual Transcriber (server)"
-        )
-    except TypeError:
-        # fallback for older gradio versions
         try:
-            audio_component = gr.inputs.Audio(source="microphone", type="filepath")
-            dropdown = gr.inputs.Dropdown(choices=list(LANG_MAP.keys()), default="english")
-            outputs = [gr.outputs.Textbox(), gr.outputs.Textbox()]
-            demo = gr.Interface(fn=_ui_transcribe, inputs=[audio_component, dropdown], outputs=outputs,
-                                title="Multilingual Transcriber (server)")
-        except Exception as e:
-            print("Gradio fallback constructor failed:", e)
-            demo = None
-    except Exception as e:
-        print("Gradio constructor failed:", e)
-        demo = None
-    if demo is not None:
         try:
-            app = gr.mount_gradio_app(app, demo, path="/ui")
-            gradio_mounted = True
-            print("Gradio mounted at /ui")
-        except Exception as e:
-            print("Failed to mount Gradio app:", e)
-            gradio_mounted = False
-    else:
-        print("Gradio demo not created; continuing without mounted UI.")
-except Exception as e:
-    print("Gradio UI unavailable or failed to mount:", e)
     gradio_mounted = False
-# Root endpoint: redirect to /ui if mounted, otherwise status text
 @app.route("/")
 def index():
     if gradio_mounted:

 )
 # ---------- Configuration ----------
 WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "openai/whisper-small")
 NLLB_MODEL = os.environ.get("NLLB_MODEL", "facebook/nllb-200-distilled-600M")
 LANG_MAP = {
     "akan": (None, "aka_Latn"),
     "hausa": ("ha", "hau_Latn"),
     "english": ("en", None),
 }
+DEVICE = torch.device("cpu")  # Free HF Spaces = CPU
 app = Flask(__name__)
 CORS(app)
+# ---------- Model manager ----------
 class ModelManager:
     def __init__(self):
         self.whisper_processor = None
         if self.whisper_processor is None or self.whisper_model is None:
             raise RuntimeError("Whisper model not loaded")
         waveform, sr = torchaudio.load(audio_path)
         if sr != 16000:
             waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform)
         if not src_text:
             return ""
         if not nllb_src_lang_tag:
             return src_text
         if self.nllb_tokenizer is None or self.nllb_model is None:
             raise RuntimeError("NLLB model not loaded")
         try:
             self.nllb_tokenizer.src_lang = nllb_src_lang_tag
         except Exception:
         inputs = self.nllb_tokenizer(src_text, return_tensors="pt").to(DEVICE)
         forced_bos = None
         try:
             forced_bos = self.nllb_tokenizer.convert_tokens_to_ids("eng_Latn")
         with torch.no_grad():
             translated_tokens = self.nllb_model.generate(**inputs, **gen_kwargs)
         translated = self.nllb_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
         return translated.strip()
 # ---------- REST endpoint ----------
 @app.route("/transcribe", methods=["POST"])
 def transcribe_endpoint():
     if "audio" not in request.files:
         return Response("No audio file provided", status=400, mimetype="text/plain")
     whisper_lang_arg, nllb_src_tag = LANG_MAP[language]
     try:
         model_manager.load()
     except Exception as e:
         return Response(f"Model loading failed: {e}", status=500, mimetype="text/plain")
     tmp_fd, tmp_path = tempfile.mkstemp(suffix=Path(audio_file.filename).suffix or ".wav")
     os.close(tmp_fd)
     audio_file.save(tmp_path)
     try:
         transcription = model_manager.transcribe(tmp_path, whisper_language_arg=whisper_lang_arg)
         if not transcription:
             return Response("", status=204, mimetype="text/plain")
         translation = model_manager.translate_to_english(transcription, nllb_src_tag)
         except Exception:
             pass
+# ---------- Robust Gradio UI mount ----------
 gradio_mounted = False
+if os.environ.get("DISABLE_GRADIO", "0") != "1":
+    try:
+        import gradio as gr
+        import soundfile as sf
+        import numpy as np
+        def _ui_transcribe(audio, language):
+            if audio is None:
+                return "No audio", ""
+            audio_path = None
+            if isinstance(audio, str) and Path(audio).exists():
+                audio_path = audio
+            elif isinstance(audio, (tuple, list)) and len(audio) >= 2:
+                sr, data = audio[0], audio[1]
+                tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+                sf.write(tmp.name, data, sr)
+                audio_path = tmp.name
+            elif isinstance(audio, (np.ndarray,)) or hasattr(audio, "shape"):
+                sr = 16000
+                tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+                sf.write(tmp.name, audio, sr)
+                audio_path = tmp.name
+            else:
+                try:
+                    audio_path = getattr(audio, "name", None)
+                except Exception:
+                    audio_path = None
+            if not audio_path:
+                return "Unsupported audio format from Gradio", ""
             try:
+                model_manager.load()
+                whisper_lang, nllb_tag = LANG_MAP.get(language.lower(), (None, None))
+                transcription = model_manager.transcribe(audio_path, whisper_language_arg=whisper_lang)
+                translation = model_manager.translate_to_english(transcription, nllb_tag)
+                return transcription, translation
+            finally:
+                try:
+                    if audio_path and Path(audio_path).exists() and "/tmp" in str(audio_path):
+                        os.remove(audio_path)
+                except Exception:
+                    pass
+        demo = None
+        # Create components robustly across gradio versions
+        audio_component = None
+        dropdown_component = None
+        textbox_out1 = None
+        textbox_out2 = None
+        # Option A: modern simple API (gr.Audio)
         try:
+            if hasattr(gr, "Audio"):
+                audio_component = gr.Audio(source="microphone", type="filepath")
+            elif hasattr(gr, "components") and hasattr(gr.components, "Audio"):
+                audio_component = gr.components.Audio(source="microphone", type="filepath")
+        except Exception:
+            audio_component = None
+        # Dropdown
         try:
+            if hasattr(gr, "Dropdown"):
+                dropdown_component = gr.Dropdown(choices=list(LANG_MAP.keys()), value="english", label="Language")
+            elif hasattr(gr, "components") and hasattr(gr.components, "Dropdown"):
+                dropdown_component = gr.components.Dropdown(choices=list(LANG_MAP.keys()), value="english", label="Language")
+        except Exception:
+            dropdown_component = None
+        # Output textboxes
         try:
+            if hasattr(gr, "Textbox"):
+                textbox_out1 = gr.Textbox(label="Transcription")
+                textbox_out2 = gr.Textbox(label="Translation (English)")
+            elif hasattr(gr, "components") and hasattr(gr.components, "Textbox"):
+                textbox_out1 = gr.components.Textbox(label="Transcription")
+                textbox_out2 = gr.components.Textbox(label="Translation (English)")
+        except Exception:
+            textbox_out1 = textbox_out2 = None
+        # If any component missing, try old 'inputs/outputs' API as final fallback
+        if audio_component is None or dropdown_component is None or textbox_out1 is None:
+            try:
+                if hasattr(gr, "inputs") and hasattr(gr, "inputs",):
+                    audio_component = getattr(gr.inputs, "Audio")(source="microphone", type="filepath")
+                    dropdown_component = getattr(gr.inputs, "Dropdown")(choices=list(LANG_MAP.keys()), default="english")
+                    textbox_out1 = getattr(gr.outputs, "Textbox")()
+                    textbox_out2 = getattr(gr.outputs, "Textbox")()
+            except Exception:
+                pass
+        # If we have required components, create the Interface
+        if audio_component is not None and dropdown_component is not None and textbox_out1 is not None:
+            try:
+                demo = gr.Interface(
+                    fn=_ui_transcribe,
+                    inputs=[audio_component, dropdown_component],
+                    outputs=[textbox_out1, textbox_out2],
+                    title="Multilingual Transcriber (server)"
+                )
+            except Exception as e:
+                print("Failed to create gr.Interface:", e)
+                demo = None
+        if demo is not None:
+            try:
+                app = gr.mount_gradio_app(app, demo, path="/ui")
+                gradio_mounted = True
+                print("Gradio mounted at /ui")
+            except Exception as e:
+                print("Failed to mount Gradio app:", e)
+                gradio_mounted = False
+        else:
+            print("Gradio demo not created; continuing without mounted UI.")
+    except Exception as e:
+        print("Gradio UI unavailable or failed to mount:", e)
+        gradio_mounted = False
+else:
+    print("Gradio mounting disabled via DISABLE_GRADIO=1")
     gradio_mounted = False
+# Root endpoint
 @app.route("/")
 def index():
     if gradio_mounted: