Spaces:

RobotsMali
/

RobotsMali_ASR_DEMO

Runtime error

App Files Files Community

binaryMao commited on Mar 16

Commit

55e23fc

verified ·

1 Parent(s): 78be54b

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -37

app.py CHANGED Viewed

@@ -1,18 +1,14 @@
-import os, shlex, subprocess, tempfile, traceback, time, glob, gc, shutil
 import torch
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 from nemo.collections.asr.models import EncDecCTCModel, EncDecRNNTModel
 import gradio as gr
-# Mise à jour de Gradio (optionnel, peut être commenté après la première exécution)
-try:
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio", "gradio-client"])
-except:
-    pass
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SEGMENT_DURATION = 10.0
 # Dictionnaire des modèles RobotsMali
 MODELS = {
@@ -33,30 +29,46 @@ def get_model(name):
     if name in _cache:
         return _cache[name]
     # Gestion agressive de la mémoire
     if len(_cache) >= 1:
         _cache.clear()
         gc.collect()
-        if torch.cuda.is_available(): torch.cuda.empty_cache()
-    repo, arch_type = MODELS[name]
-    folder = snapshot_download(repo, local_dir_use_symlinks=False)
-    nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     try:
         if arch_type == "ctc":
             model = EncDecCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
         else:
             model = EncDecRNNTModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
-    except Exception:
-        model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
-    model.eval()
-    if DEVICE == "cuda":
-        model = model.half()
-    _cache[name] = model
-    return model
 def pipeline(audio_in, model_name):
     if not audio_in:
@@ -66,17 +78,40 @@ def pipeline(audio_in, model_name):
     tmp_dir = tempfile.mkdtemp()
     try:
         yield "⏳ Traitement de l'audio...", ""
         wav_path = os.path.join(tmp_dir, "input.wav")
-        # Normalisation audio via FFmpeg
-        subprocess.run(f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {wav_path}", shell=True, check=True)
-        subprocess.run(f"ffmpeg -i {wav_path} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         if not valid_segments:
             yield "❌ Erreur", "Fichier audio vide ou incompatible."
             return
         model = get_model(model_name)
         with torch.inference_mode():
             batch_hyp = model.transcribe(valid_segments, batch_size=4, return_hypotheses=True)
@@ -87,28 +122,42 @@ def pipeline(audio_in, model_name):
         print(traceback.format_exc())
         yield "❌ Erreur", str(e)
     finally:
-        if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
-# Interface Gradio optimisée
-with gr.Blocks(title="RobotsMali ASR") as demo:
     gr.Markdown("# 🤖 RobotsMali - Reconnaissance Vocale")
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="Audio", type="filepath", sources=["upload", "microphone"])
-            model_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
             run_btn = gr.Button("🚀 DÉMARRER", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : En attente")
             text_output = gr.Textbox(label="Transcription", lines=12)
-    run_btn.click(fn=pipeline, inputs=[audio_input, model_input], outputs=[status, text_output])
 if __name__ == "__main__":
-    # Configuration pour les environnements cloud
     demo.queue().launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_api=False,
-        share=True,  # Important : crée un lien public
-        debug=True   # Ajoutez ceci pour voir plus de détails en cas d'erreur
     )

+import os, shlex, subprocess, tempfile, traceback, glob, gc, shutil
 import torch
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 from nemo.collections.asr.models import EncDecCTCModel, EncDecRNNTModel
 import gradio as gr
+# Configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SEGMENT_DURATION = 10.0
+print(f"✅ Démarrage sur device: {DEVICE}")
 # Dictionnaire des modèles RobotsMali
 MODELS = {
     if name in _cache:
         return _cache[name]
+    print(f"📥 Chargement du modèle: {name}")
     # Gestion agressive de la mémoire
     if len(_cache) >= 1:
         _cache.clear()
         gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
     try:
+        repo, arch_type = MODELS[name]
+        print(f"📦 Téléchargement depuis {repo}...")
+        folder = snapshot_download(repo, local_dir_use_symlinks=False)
+        print(f"📁 Dossier: {folder}")
+        nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
+        if nemo_file is None:
+            raise FileNotFoundError(f"Aucun fichier .nemo trouvé dans {folder}")
+        print(f"🔧 Restauration du modèle depuis {nemo_file}")
         if arch_type == "ctc":
             model = EncDecCTCModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
         else:
             model = EncDecRNNTModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
+        model.eval()
+        if DEVICE == "cuda":
+            model = model.half()
+        print(f"✅ Modèle {name} chargé avec succès")
+        _cache[name] = model
+        return model
+    except Exception as e:
+        print(f"❌ Erreur lors du chargement du modèle {name}:")
+        print(traceback.format_exc())
+        raise e
 def pipeline(audio_in, model_name):
     if not audio_in:
     tmp_dir = tempfile.mkdtemp()
     try:
         yield "⏳ Traitement de l'audio...", ""
+        # Vérification que le fichier audio existe
+        if not os.path.exists(audio_in):
+            yield "❌ Erreur", f"Fichier audio introuvable: {audio_in}"
+            return
         wav_path = os.path.join(tmp_dir, "input.wav")
+        # Conversion audio
+        cmd = f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {shlex.quote(wav_path)}"
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+        if result.returncode != 0:
+            yield "❌ Erreur", f"Erreur FFmpeg: {result.stderr}"
+            return
+        if not os.path.exists(wav_path) or os.path.getsize(wav_path) == 0:
+            yield "❌ Erreur", "Fichier audio converti vide"
+            return
+        # Segmentation
+        seg_pattern = os.path.join(tmp_dir, 'seg_%03d.wav')
+        cmd = f"ffmpeg -i {shlex.quote(wav_path)} -f segment -segment_time {SEGMENT_DURATION} -c copy {shlex.quote(seg_pattern)}"
+        subprocess.run(cmd, shell=True, capture_output=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         if not valid_segments:
             yield "❌ Erreur", "Fichier audio vide ou incompatible."
             return
+        print(f"🔊 {len(valid_segments)} segments à transcrire")
         model = get_model(model_name)
         with torch.inference_mode():
             batch_hyp = model.transcribe(valid_segments, batch_size=4, return_hypotheses=True)
         print(traceback.format_exc())
         yield "❌ Erreur", str(e)
     finally:
+        if os.path.exists(tmp_dir):
+            shutil.rmtree(tmp_dir)
+# Interface Gradio
+with gr.Blocks(title="RobotsMali ASR", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 RobotsMali - Reconnaissance Vocale")
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(
+                label="Audio",
+                type="filepath",
+                sources=["upload", "microphone"]
+            )
+            model_input = gr.Dropdown(
+                choices=list(MODELS.keys()),
+                value="Soloni V3 (TDT-CTC)",
+                label="Modèle"
+            )
             run_btn = gr.Button("🚀 DÉMARRER", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : En attente")
             text_output = gr.Textbox(label="Transcription", lines=12)
+    run_btn.click(
+        fn=pipeline,
+        inputs=[audio_input, model_input],
+        outputs=[status, text_output]
+    )
+# Point d'entrée - CORRECTION ICI
 if __name__ == "__main__":
+    print("🚀 Lancement de l'application RobotsMali ASR...")
+    # Désactiver l'API pour éviter le bug
     demo.queue().launch(
+        server_name="0.0.0.0",
+        show_api=False  # ← Ceci corrige l'erreur
     )