Spaces:

RobotsMali
/

RobotsMali_ASR_DEMO

Runtime error

App Files Files Community

binaryMao commited on Mar 2

Commit

0c8d6fd

verified ·

1 Parent(s): 599bc18

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -43

app.py CHANGED Viewed

@@ -6,9 +6,8 @@ from nemo.collections import asr as nemo_asr
 import gradio as gr
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-SEGMENT_DURATION = 10.0  # Augmenté à 10s pour l'audio pur (plus efficace)
-# --- CONFIGURATION DES MODÈLES (Identique au script vidéo) ---
 MODELS = {
     "Soloba V3 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v3", "ctc"),
     "Soloba V2 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v2", "ctc"),
@@ -24,23 +23,21 @@ MODELS = {
 _cache = {}
 def get_model(name):
-    """Charge le modèle avec gestion de la mémoire."""
     if name in _cache:
         return _cache[name]
-    # Libérer la mémoire des anciens modèles si nécessaire
-    if len(_cache) > 0:
         _cache.clear()
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, _ = MODELS[name]
-    print(f"⏳ Chargement de {name} depuis {repo}...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
-    # Chargement via ASRModel (Factory)
     model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
     model.eval()
@@ -50,36 +47,34 @@ def get_model(name):
     _cache[name] = model
     return model
-# --- PIPELINE ASR ---
 def pipeline(audio_in, model_name):
     tmp_dir = tempfile.mkdtemp()
     try:
-        if not audio_in:
-            yield "❌ Fichier audio manquant", ""
-            return
-        yield "⏳ Traitement audio & Segmentation...", ""
-        # Normalisation : Mono, 16kHz
         wav_path = os.path.join(tmp_dir, "clean.wav")
         subprocess.run(f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {wav_path}", shell=True, check=True)
-        # Découpage en segments
         subprocess.run(f"ffmpeg -i {wav_path} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         valid_segments = [f for f in valid_segments if os.path.getsize(f) > 1000]
         if not valid_segments:
-            yield "❌ Erreur de segmentation", ""
             return
-        yield f"🎙️ Transcription de {len(valid_segments)} segments...", ""
         model = get_model(model_name)
         with torch.inference_mode():
-            # Utilisation de la méthode stable sans Lhotse
             batch_hyp = model.transcribe(
                 valid_segments,
                 batch_size=4,
@@ -87,50 +82,35 @@ def pipeline(audio_in, model_name):
                 num_workers=0
             )
-        # Extraction du texte
         results = []
         for hyp in batch_hyp:
             text = hyp.text if hasattr(hyp, 'text') else str(hyp)
-            if text:
-                results.append(text)
-        final_text = " ".join(results)
-        yield "✅ Transcription terminée", final_text
     except Exception as e:
-        print(traceback.format_exc())
-        yield f"❌ Erreur : {str(e)}", ""
     finally:
         if os.path.exists(tmp_dir):
             shutil.rmtree(tmp_dir)
-# --- INTERFACE GRADIO ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 RobotsMali Speech-to-Text")
-    gr.Markdown("Transcription multi-modèles pour les langues du Mali.")
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="Audio (Fichier ou Micro)", type="filepath")
-            model_input = gr.Dropdown(
-                choices=list(MODELS.keys()),
-                value="Soloni V3 (TDT-CTC)",
-                label="Choisir un modèle"
-            )
             run_btn = gr.Button("🚀 TRANSCRIRE", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : Prêt")
-            text_output = gr.Textbox(label="Résultat", lines=15, show_copy_button=True)
-    run_btn.click(
-        fn=pipeline,
-        inputs=[audio_input, model_input],
-        outputs=[status, text_output]
-    )
-    gr.HTML("<center><p style='color: gray;'>Optimisé pour CPU & GPU Hugging Face</p></center>")
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloba V3 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v3", "ctc"),
     "Soloba V2 (CTC)":           ("RobotsMali/soloba-ctc-0.6b-v2", "ctc"),
 _cache = {}
 def get_model(name):
     if name in _cache:
         return _cache[name]
+    # NETTOYAGE MÉMOIRE AGRESSIF (Crucial pour les modèles 0.6b sur CPU Free)
+    if len(_cache) >= 1:
         _cache.clear()
         gc.collect()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, _ = MODELS[name]
+    print(f"⏳ Chargement de {name}...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     model = nemo_asr.models.ASRModel.restore_from(nemo_file, map_location=torch.device(DEVICE))
     model.eval()
     _cache[name] = model
     return model
 def pipeline(audio_in, model_name):
+    if not audio_in:
+        yield "❌ Erreur", "Veuillez fournir un fichier audio."
+        return
     tmp_dir = tempfile.mkdtemp()
     try:
+        yield "⏳ Préparation de l'audio...", ""
         wav_path = os.path.join(tmp_dir, "clean.wav")
+        # Conversion simple et robuste
         subprocess.run(f"ffmpeg -y -i {shlex.quote(audio_in)} -ac 1 -ar 16000 {wav_path}", shell=True, check=True)
+        # Segmentation
         subprocess.run(f"ffmpeg -i {wav_path} -f segment -segment_time {SEGMENT_DURATION} -c copy {os.path.join(tmp_dir, 'seg_%03d.wav')}", shell=True)
         valid_segments = sorted(glob.glob(os.path.join(tmp_dir, "seg_*.wav")))
         valid_segments = [f for f in valid_segments if os.path.getsize(f) > 1000]
         if not valid_segments:
+            yield "❌ Erreur", "Audio trop court ou illisible."
             return
+        yield f"🎙️ Transcription ({len(valid_segments)} segments)...", ""
         model = get_model(model_name)
         with torch.inference_mode():
             batch_hyp = model.transcribe(
                 valid_segments,
                 batch_size=4,
                 num_workers=0
             )
         results = []
         for hyp in batch_hyp:
             text = hyp.text if hasattr(hyp, 'text') else str(hyp)
+            if text: results.append(text)
+        yield "✅ Terminé", " ".join(results)
     except Exception as e:
+        yield "❌ Erreur", str(e)
     finally:
         if os.path.exists(tmp_dir):
             shutil.rmtree(tmp_dir)
+# --- INTERFACE (Argument show_copy_button supprimé) ---
+with gr.Blocks() as demo:
     gr.Markdown("# 🤖 RobotsMali Speech-to-Text")
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(label="Audio", type="filepath")
+            model_input = gr.Dropdown(choices=list(MODELS.keys()), value="Soloni V3 (TDT-CTC)", label="Modèle")
             run_btn = gr.Button("🚀 TRANSCRIRE", variant="primary")
         with gr.Column():
             status = gr.Markdown("### État : Prêt")
+            # Correction ici : show_copy_button est retiré pour compatibilité
+            text_output = gr.Textbox(label="Résultat", lines=15)
+    run_btn.click(fn=pipeline, inputs=[audio_input, model_input], outputs=[status, text_output])
 if __name__ == "__main__":
     demo.launch()