Spaces:

RobotsMali
/

RobotsMali_ASR_DEMO

Runtime error

App Files Files Community

binaryMao commited on Mar 7

Commit

5c26bdc

verified ·

1 Parent(s): 3fc0845

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -42

app.py CHANGED Viewed

@@ -1,16 +1,13 @@
-# -*- coding: utf-8 -*-
 import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
 import torch
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
-# Imports spécifiques pour éviter l'erreur "Abstract Class"
 from nemo.collections.asr.models import EncDecCTCModel, EncDecRNNTModel
 import gradio as gr
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-SEGMENT_DURATION = 10.0
-# Dictionnaire complet (Nom: (Repo, Type))
 MODELS = {
     "Soloba V3 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v3", "ctc"),
     "Soloba V2 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v2", "ctc"),
@@ -26,36 +23,30 @@ MODELS = {
 _cache = {}
 def get_model(name):
-    """Charge le modèle en forçant la classe concrète (CTC ou RNNT)."""
     if name in _cache:
         return _cache[name]
-    # Libération agressive de la RAM avant chargement
     if len(_cache) >= 1:
         _cache.clear()
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, arch_type = MODELS[name]
-    print(f"⏳ Préparation du modèle {name} ({arch_type})...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
-    # Utilisation de la classe spécifique pour contourner l'erreur ASRModel
     try:
         if arch_type == "ctc":
             model = EncDecCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
         else:
             model = EncDecRNNTModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
-    except Exception as e:
-        print(f"⚠️ Erreur de chargement spécifique, tentative générique : {e}")
         model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
     model.eval()
     if DEVICE == "cuda":
         model = model.half()
     _cache[name] = model
     return model
@@ -67,59 +58,34 @@ def pipeline(audio_in, model_name):
     tmp_dir = tempfile.mkdtemp()
     try:
         yield "⏳ Traitement de l'audio...", ""
-        # Normalisation FFmpeg
         wav_path = os.path.join(tmp_dir, "input.wav")
         subprocess.run(f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {wav_path}", shell=True, check=True)
-        # Segmentation
         subprocess.run(f"ffmpeg -i {wav_path} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
-        valid_segments = [f for f in valid_segments if os.path.getsize(f) > 1000]
         if not valid_segments:
             yield "❌ Erreur", "Fichier audio vide ou incompatible."
             return
-        yield f"🎙️ Transcription ({len(valid_segments)} segments)...", ""
         model = get_model(model_name)
         with torch.inference_mode():
-            # Mode stable sans Lhotse
-            batch_hyp = model.transcribe(
-                valid_segments,
-                batch_size=4,
-                return_hypotheses=True,
-                num_workers=0
-            )
-        results = []
-        for hyp in batch_hyp:
-            # Gère les formats de sortie CTC et RNNT
-            text = hyp.text if hasattr(hyp, 'text') else str(hyp)
-            if text: results.append(text)
         yield "✅ Succès", " ".join(results)
     except Exception as e:
-        print(traceback.format_exc())
         yield "❌ Erreur", str(e)
     finally:
-        if os.path.exists(tmp_dir):
-            shutil.rmtree(tmp_dir)
-# --- UI GRADIO ---
-with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# 🤖 RobotsMali - Reconnaissance Vocale")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(label="Audio", type="filepath", sources=["upload", "microphone"])
             model_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
             run_btn = gr.Button("🚀 DÉMARRER", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : En attente")
             text_output = gr.Textbox(label="Transcription", lines=12)
@@ -127,4 +93,5 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     run_btn.click(fn=pipeline, inputs=[audio_input, model_input], outputs=[status, text_output])
 if __name__ == "__main__":
-    demo.launch()

 import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
 import torch
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 from nemo.collections.asr.models import EncDecCTCModel, EncDecRNNTModel
 import gradio as gr
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloba V3 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v3", "ctc"),
     "Soloba V2 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v2", "ctc"),
 _cache = {}
 def get_model(name):
     if name in _cache:
         return _cache[name]
     if len(_cache) >= 1:
         _cache.clear()
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, arch_type = MODELS[name]
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     try:
         if arch_type == "ctc":
             model = EncDecCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
         else:
             model = EncDecRNNTModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
+    except Exception:
         model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
     model.eval()
     if DEVICE == "cuda":
         model = model.half()
     _cache[name] = model
     return model
     tmp_dir = tempfile.mkdtemp()
     try:
         yield "⏳ Traitement de l'audio...", ""
         wav_path = os.path.join(tmp_dir, "input.wav")
         subprocess.run(f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {wav_path}", shell=True, check=True)
         subprocess.run(f"ffmpeg -i {wav_path} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         if not valid_segments:
             yield "❌ Erreur", "Fichier audio vide ou incompatible."
             return
         model = get_model(model_name)
         with torch.inference_mode():
+            batch_hyp = model.transcribe(valid_segments, batch_size=4, return_hypotheses=True)
+        results = [hyp.text if hasattr(hyp, 'text') else str(hyp) for hyp in batch_hyp]
         yield "✅ Succès", " ".join(results)
     except Exception as e:
         yield "❌ Erreur", str(e)
     finally:
+        if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
+with gr.Blocks(title="RobotsMali ASR") as demo:
     gr.Markdown("# 🤖 RobotsMali - Reconnaissance Vocale")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(label="Audio", type="filepath", sources=["upload", "microphone"])
             model_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
             run_btn = gr.Button("🚀 DÉMARRER", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : En attente")
             text_output = gr.Textbox(label="Transcription", lines=12)
     run_btn.click(fn=pipeline, inputs=[audio_input, model_input], outputs=[status, text_output])
 if __name__ == "__main__":
+    # Paramètres CRITIQUES pour Docker sur Hugging Face
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)