Spaces:

nexusbert
/

milestone3

Sleeping

App Files Files Community

nexusbert commited on Oct 10, 2025

Commit

12f5131

1 Parent(s): 4e0cc0a

final fix

Browse files

Files changed (1) hide show

app.py +23 -66

app.py CHANGED Viewed

@@ -192,85 +192,42 @@ def preprocess_audio_ffmpeg(audio_data: bytes, target_sr: int = 16000) -> np.nda
         logger.error(f"FFmpeg preprocessing failed: {e}")
         raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
-def _score_transcription_quality(text: str) -> float:
-    if not text or not text.strip():
-        return 0.0
-    text_lower = text.lower()
-    score = 0.0
-    if len(text.strip()) > 3:
-        score += 0.3
-    if any(char.isalpha() for char in text):
-        score += 0.2
-    if len(text.split()) > 1:
-        score += 0.2
-    if not any(char in text for char in "[]{}()"):
-        score += 0.1
-    if not text.endswith("..."):
-        score += 0.1
-    if len(text.strip()) > 10:
-        score += 0.1
-    return min(score, 1.0)
 def speech_to_text(audio_data: bytes) -> str:
     audio_array = preprocess_audio_ffmpeg(audio_data)
-    mms_text = ""
-    igbo_text = ""
     mms_result = _get_mms()
     if mms_result and mms_result[0] is not None and mms_result[1] is not None:
         mms_model, mms_proc = mms_result
         mms_text = _run_mms(mms_model, mms_proc, audio_array)
-        logger.info(f"MMS result: '{mms_text}'")
     igbo_result = _get_igbo_asr()
     if igbo_result[0] is not None and igbo_result[1] is not None:
         igbo_model, igbo_proc = igbo_result
         igbo_text = _run_whisper(igbo_model, igbo_proc, audio_array, language="igbo")
-        logger.info(f"Igbo ASR result: '{igbo_text}'")
-    if not mms_text and not igbo_text:
-        return ""
-    if not mms_text:
-        logger.info("Using Igbo ASR result (MMS failed)")
-        return igbo_text
-    if not igbo_text:
-        logger.info("Using MMS ASR result (Igbo ASR failed)")
-        return mms_text
-    mms_score = _score_transcription_quality(mms_text)
-    igbo_score = _score_transcription_quality(igbo_text)
-    mms_lang = detect_language(mms_text)
-    igbo_lang = detect_language(igbo_text)
-    logger.info(f"MMS: '{mms_text}' (score: {mms_score:.2f}, lang: {mms_lang})")
-    logger.info(f"Igbo: '{igbo_text}' (score: {igbo_score:.2f}, lang: {igbo_lang})")
-    if igbo_lang == "ig" and mms_lang != "ig":
-        logger.info("Using Igbo ASR result (detected Igbo language)")
-        return igbo_text
-    if mms_lang == "ig" and igbo_lang != "ig":
-        logger.info("Using MMS ASR result (Igbo ASR didn't detect Igbo)")
-        return mms_text
-    if igbo_score > mms_score + 0.1:
-        logger.info("Using Igbo ASR result (higher quality score)")
-        return igbo_text
-    else:
-        logger.info("Using MMS ASR result (higher quality score)")
-        return mms_text
 def get_ai_response(text: str, response_language: str = None) -> str:

         logger.error(f"FFmpeg preprocessing failed: {e}")
         raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
 def speech_to_text(audio_data: bytes) -> str:
     audio_array = preprocess_audio_ffmpeg(audio_data)
+    candidates = []
     mms_result = _get_mms()
     if mms_result and mms_result[0] is not None and mms_result[1] is not None:
         mms_model, mms_proc = mms_result
         mms_text = _run_mms(mms_model, mms_proc, audio_array)
+        if mms_text:
+            candidates.append(("mms", mms_text))
+            logger.info(f"MMS result: '{mms_text}'")
     igbo_result = _get_igbo_asr()
     if igbo_result[0] is not None and igbo_result[1] is not None:
         igbo_model, igbo_proc = igbo_result
         igbo_text = _run_whisper(igbo_model, igbo_proc, audio_array, language="igbo")
+        if igbo_text:
+            candidates.append(("igbo", igbo_text))
+            logger.info(f"Igbo ASR result: '{igbo_text}'")
+    for model_name, text in candidates:
+        detected_lang = detect_language(text)
+        if detected_lang == "ig" and model_name == "igbo":
+            logger.info(f"Using {model_name} ASR result (detected {detected_lang} language)")
+            return text
+        elif detected_lang in ["ha", "yo", "en"] and model_name == "mms":
+            logger.info(f"Using {model_name} ASR result (detected {detected_lang} language)")
+            return text
+    if candidates:
+        best_text = max((t for _, t in candidates), key=lambda s: len(s or ""))
+        logger.info(f"Using best result by length: '{best_text}'")
+        return best_text
+    return ""
 def get_ai_response(text: str, response_language: str = None) -> str: