Spaces:

RobotsMali
/

RobotsMali_ASR_DEMO

Runtime error

App Files Files Community

binaryMao commited on Mar 2

Commit

7b18ccd

verified ·

1 Parent(s): c446e17

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -54

app.py CHANGED Viewed

@@ -8,16 +8,15 @@ import gradio as gr
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
-# Configuration du matériel
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SEGMENT_DURATION = 10.0
-# Dictionnaire des modèles
 MODELS = {
     "Soloni V3 (Recommandé CPU)": "RobotsMali/soloni-114m-tdt-ctc-v3"
 }
-# Cache pour éviter de recharger le modèle à chaque clic
 _cache = {}
 # ==========================
@@ -28,12 +27,12 @@ def get_model(model_name):
         return _cache[model_name]
     repo_id = MODELS[model_name]
-    print(f"⏳ Téléchargement du modèle depuis {repo_id}...")
-    # Téléchargement depuis Hugging Face
     folder = snapshot_download(repo_id, local_dir_use_symlinks=False)
-    # Recherche automatique du fichier .nemo dans le dossier téléchargé
     try:
         nemo_file = next(
             os.path.join(folder, f)
@@ -41,11 +40,9 @@ def get_model(model_name):
             if f.endswith(".nemo")
         )
     except StopIteration:
-        raise FileNotFoundError("Aucun fichier .nemo trouvé dans le dépôt.")
-    print(f"📦 Restauration du modèle NeMo sur {DEVICE}...")
-    # Utilisation de ASRModel pour l'auto-détection (CTC/RNNT)
     model = nemo_asr.models.ASRModel.restore_from(
         nemo_file,
         map_location=torch.device(DEVICE)
@@ -56,100 +53,82 @@ def get_model(model_name):
     return model
 # ==========================
-# PIPELINE DE TRANSCRIPTION
 # ==========================
 def pipeline(audio_path, model_name):
     if not audio_path:
-        yield "❌ Aucun fichier audio fourni", ""
         return
-    # Création d'un dossier temporaire unique pour les segments
     tmp_dir = tempfile.mkdtemp()
     try:
-        yield "⏳ Préparation de l'audio (FFmpeg)...", ""
-        # Normalisation et segmentation de l'audio
-        # -ac 1 (mono), -ar 16000 (16kHz requis par NeMo)
-        command = (
             f"ffmpeg -y -i '{audio_path}' -f segment -segment_time {SEGMENT_DURATION} "
             f"-ac 1 -ar 16000 {tmp_dir}/seg_%03d.wav > /dev/null 2>&1"
         )
-        os.system(command)
-        # Liste et tri des segments générés
         segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
-        # Filtrer les fichiers trop petits (silences ou erreurs FFmpeg)
         segments = [s for s in segments if os.path.getsize(s) > 1000]
         if not segments:
-            yield "❌ Erreur lors de la segmentation audio", ""
             return
-        yield f"🎙️ Transcription de {len(segments)} segments en cours...", ""
-        # Récupération du modèle
         model = get_model(model_name)
-        # Inférence
         with torch.inference_mode():
-            results = model.transcribe(
-                segments,
-                batch_size=4,
-                num_workers=0 # Important sur CPU pour éviter les fuites de mémoire
-            )
-        # Reconstruction du texte
-        # Note: selon le modèle, results peut être une liste de chaînes ou un objet complexe
         if isinstance(results, tuple):
-            text_results = results[0]
         else:
-            text_results = results
-        final_text = " ".join(text_results)
-        yield "✅ Transcription terminée", final_text
     except Exception as e:
-        yield f"❌ Erreur système : {str(e)}", ""
     finally:
-        # Nettoyage automatique du dossier temporaire
         if os.path.exists(tmp_dir):
             shutil.rmtree(tmp_dir)
 # ==========================
-# INTERFACE GRADIO (UI)
 # ==========================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🤖 RobotsMali ASR – Edition CPU
-    Déposez un fichier audio pour obtenir une transcription automatique.
-    """)
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(type="filepath", label="Fichier Audio (WAV, MP3, etc.)")
             model_input = gr.Dropdown(
                 choices=list(MODELS.keys()),
                 value="Soloni V3 (Recommandé CPU)",
-                label="Sélection du Modèle"
             )
-            btn = gr.Button("🚀 Lancer la Transcription", variant="primary")
         with gr.Column():
-            status = gr.Markdown("### Statut : Prêt")
-            output = gr.Textbox(label="Résultat de la transcription", lines=12, show_copy_button=True)
-    # Interaction
     btn.click(
         fn=pipeline,
         inputs=[audio_input, model_input],
-        outputs=[status, output]
     )
-    gr.Markdown("--- \n *Optimisé pour les environnements à ressources limitées.*")
 if __name__ == "__main__":
     demo.launch()

 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
+# Configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SEGMENT_DURATION = 10.0
+# Modèle unique pour CPU (CTC est plus léger)
 MODELS = {
     "Soloni V3 (Recommandé CPU)": "RobotsMali/soloni-114m-tdt-ctc-v3"
 }
 _cache = {}
 # ==========================
         return _cache[model_name]
     repo_id = MODELS[model_name]
+    print(f"⏳ Chargement du modèle : {repo_id}")
+    # Téléchargement local
     folder = snapshot_download(repo_id, local_dir_use_symlinks=False)
+    # Trouve le fichier .nemo
     try:
         nemo_file = next(
             os.path.join(folder, f)
             if f.endswith(".nemo")
         )
     except StopIteration:
+        raise FileNotFoundError("Fichier .nemo introuvable.")
+    # Restauration générique (Auto-détecte CTC/RNNT)
     model = nemo_asr.models.ASRModel.restore_from(
         nemo_file,
         map_location=torch.device(DEVICE)
     return model
 # ==========================
+# LOGIQUE DE TRANSCRIPTION
 # ==========================
 def pipeline(audio_path, model_name):
     if not audio_path:
+        yield "❌ Erreur", "Veuillez fournir un fichier audio."
         return
     tmp_dir = tempfile.mkdtemp()
     try:
+        yield "⏳ Traitement audio...", ""
+        # Segmentation FFmpeg (16kHz Mono requis)
+        # On utilise une syntaxe simple pour éviter les erreurs de shell
+        os.system(
             f"ffmpeg -y -i '{audio_path}' -f segment -segment_time {SEGMENT_DURATION} "
             f"-ac 1 -ar 16000 {tmp_dir}/seg_%03d.wav > /dev/null 2>&1"
         )
         segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         segments = [s for s in segments if os.path.getsize(s) > 1000]
         if not segments:
+            yield "❌ Erreur", "Impossible de segmenter l'audio."
             return
+        yield f"🎙️ Transcription ({len(segments)} segments)...", ""
         model = get_model(model_name)
         with torch.inference_mode():
+            # Transcription par batch pour le CPU
+            results = model.transcribe(segments, batch_size=2, num_workers=0)
+        # Gestion des différents formats de sortie de NeMo
         if isinstance(results, tuple):
+            text_list = results[0]
         else:
+            text_list = results
+        final_text = " ".join(text_list)
+        yield "✅ Terminé", final_text
     except Exception as e:
+        yield "❌ Erreur Système", str(e)
     finally:
         if os.path.exists(tmp_dir):
             shutil.rmtree(tmp_dir)
 # ==========================
+# INTERFACE UTILISATEUR
 # ==========================
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 RobotsMali ASR")
+    gr.Markdown("Outil de transcription automatique optimisé pour le CPU.")
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(type="filepath", label="Audio")
             model_input = gr.Dropdown(
                 choices=list(MODELS.keys()),
                 value="Soloni V3 (Recommandé CPU)",
+                label="Modèle"
             )
+            btn = gr.Button("🚀 Transcrire", variant="primary")
         with gr.Column():
+            status_output = gr.Textbox(label="Statut", interactive=False)
+            text_output = gr.Textbox(label="Texte Transcrit", lines=10)
     btn.click(
         fn=pipeline,
         inputs=[audio_input, model_input],
+        outputs=[status_output, text_output]
     )
 if __name__ == "__main__":
     demo.launch()