Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Runtime error

App Files Files Community

binaryMao commited on Dec 16, 2025

Commit

bd6b31b

verified ·

1 Parent(s): 0e0456f

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -66

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-ROBOTSMALI — Sous-titrage Bambara (VERSION INTÉGRALE V6.0)
-Incrustation de sous-titres avec tous les modèles RobotsMali.
 """
 import os
 import shlex
@@ -11,6 +10,7 @@ import traceback
 import random
 import textwrap
 import time
 from pathlib import Path
 import numpy as np
@@ -27,7 +27,6 @@ random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
-# TOUS VOS MODÈLES SONT ICI
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
@@ -37,7 +36,6 @@ MODELS = {
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
-# EXEMPLE CONFIGURÉ
 VIDEO_EXAMPLES = [
     ["examples/MARALINKE.mp4", "Soloba V1 (CTC)"]
 ]
@@ -47,27 +45,17 @@ _cache = {}
 # ---------------------------- # FONCTIONS TECHNIQUES # ----------------------------
 def run_cmd(cmd):
-    """Exécute une commande système."""
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
         raise RuntimeError(f"Erreur FFmpeg: {res.stdout}")
     return res.stdout
-def ffprobe_duration(path):
-    cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
-    out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    try: return float(out.stdout.strip())
-    except: return None
 def load_model(name):
-    """Charge le modèle sélectionné et nettoie le cache si nécessaire."""
     if name in _cache: return _cache[name]
-    # Nettoyage pour économiser la RAM
     if len(_cache) > 0:
         _cache.clear()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, mode = MODELS[name]
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
@@ -79,59 +67,55 @@ def load_model(name):
     else:
         try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
         except: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
     return model
-def extract_audio(video_path, out_wav):
-    """Stabilisation du codec (pour la webcam) et extraction audio."""
-    tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
-    os.close(tmp_fd)
-    # On force le H.264 pour éviter les erreurs de lecture
-    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
-    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
-    if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
 def burn_subtitles(video_path, words, duration):
-    """Génère le fichier SRT et l'incruste dans la vidéo finale."""
-    out_path = f"output_{int(time.time())}.mp4"
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
             chunk = words[idx : idx + chunk_size]
             start = (idx / len(words)) * duration
             end = (min(idx + chunk_size, len(words)) / len(words)) * duration
             def t_srt(sec):
                 h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
                 return f"{h:02}:{m:02}:{s:02},{ms:03}"
             txt = "\n".join(textwrap.wrap(" ".join(chunk), 40))
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
-    # Encodage ultra-rapide pour éviter le timeout
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
-    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset ultrafast -crf 28 -c:a copy {shlex.quote(out_path)}')
     os.remove(srt_name)
     return out_path
-# ---------------------------- # PIPELINE # ----------------------------
 def pipeline(video_input, model_name):
     try:
         if not video_input: return "❌ Veuillez charger une vidéo", None
-        video_path = video_input
-        yield "⏳ Phase 1/3 : Stabilisation & Audio...", None
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
             wav_path = tf.name
-        extract_audio(video_path, wav_path)
-        duration = ffprobe_duration(video_path) or 10.0 # fallback
-        yield f"⏳ Phase 2/3 : Analyse IA ({model_name})...", None
         model = load_model(model_name)
         res = model.transcribe([wav_path])[0]
         text = res.text if hasattr(res, 'text') else str(res)
@@ -139,17 +123,19 @@ def pipeline(video_input, model_name):
         if not words: return "⚠️ Pas de parole détectée.", None
-        yield "⏳ Phase 3/3 : Génération des sous-titres...", None
-        final_v = burn_subtitles(video_path, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
-        yield "✅ Sous-titrage terminé !", final_v
     except Exception as e:
         traceback.print_exc()
-        yield f"❌ Erreur critique : {str(e)}", None
-# ---------------------------- # INTERFACE ARTISTIQUE # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
@@ -160,34 +146,18 @@ body { background-color: #0b0e14; }
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="title-header"):
-        gr.HTML("""
-            <h1 style='color:#facc15; font-size: 2.5rem; margin:0;'>🤖 ROBOTSMALI</h1>
-            <p style='color:#94a3b8; font-style:italic;'>Intelligence Artificielle pour le Bambara</p>
-            <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
-        """)
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 📥 Source Vidéo")
-            v_in = gr.Video(label=None, mirror_webcam=False)
-            m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
-            btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
         with gr.Column():
-            gr.Markdown("### 📤 Résultat")
-            status = gr.Markdown("*Prêt pour le traitement...*")
-            v_out = gr.Video(label=None)
-    # EXEMPLES : cache_examples=False est crucial pour que le clic fonctionne
-    gr.Examples(
-        examples=VIDEO_EXAMPLES,
-        inputs=[v_in, m_sel],
-        label="📺 Vidéo d'exemple",
-        cache_examples=False
-    )
-    gr.HTML("<div style='text-align: center; color: #475569; padding-top: 20px;'>© 2025 RobotsMali - Bamako</div>")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":

 # -*- coding: utf-8 -*-
 """
+ROBOTSMALI — Sous-titrage Bambara (VERSION INTÉGRALE V6.1 - FIX FINAL OUTPUT)
 """
 import os
 import shlex
 import random
 import textwrap
 import time
+import shutil
 from pathlib import Path
 import numpy as np
 np.random.seed(1234)
 torch.manual_seed(1234)
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
 VIDEO_EXAMPLES = [
     ["examples/MARALINKE.mp4", "Soloba V1 (CTC)"]
 ]
 # ---------------------------- # FONCTIONS TECHNIQUES # ----------------------------
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
         raise RuntimeError(f"Erreur FFmpeg: {res.stdout}")
     return res.stdout
 def load_model(name):
     if name in _cache: return _cache[name]
     if len(_cache) > 0:
         _cache.clear()
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     repo, mode = MODELS[name]
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     else:
         try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
         except: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
     return model
 def burn_subtitles(video_path, words, duration):
+    # Création d'un fichier de sortie dans un dossier temporaire Gradio
+    out_path = os.path.join(tempfile.gettempdir(), f"final_output_{int(time.time())}.mp4")
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
             chunk = words[idx : idx + chunk_size]
             start = (idx / len(words)) * duration
             end = (min(idx + chunk_size, len(words)) / len(words)) * duration
             def t_srt(sec):
                 h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
                 return f"{h:02}:{m:02}:{s:02},{ms:03}"
             txt = "\n".join(textwrap.wrap(" ".join(chunk), 40))
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
+    # Commande d'encodage optimisée pour le Web (H.264 Baseline + Faststart)
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
+    cmd = (
+        f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} '
+        f'-vf {shlex.quote(vf)} -c:v libx264 -pix_fmt yuv420p -preset ultrafast -crf 28 '
+        f'-movflags +faststart -c:a copy {shlex.quote(out_path)}'
+    )
+    run_cmd(cmd)
     os.remove(srt_name)
     return out_path
 def pipeline(video_input, model_name):
     try:
         if not video_input: return "❌ Veuillez charger une vidéo", None
+        yield "⏳ Phase 1/3 : Analyse Audio...", None
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
             wav_path = tf.name
+        # Extraction stable
+        run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_input)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(wav_path)}')
+        # Récupération durée
+        dur_out = subprocess.run(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(video_input)}',
+                                 shell=True, stdout=subprocess.PIPE, text=True).stdout
+        duration = float(dur_out.strip()) if dur_out.strip() else 10.0
+        yield f"⏳ Phase 2/3 : Transcription IA ({model_name})...", None
         model = load_model(model_name)
         res = model.transcribe([wav_path])[0]
         text = res.text if hasattr(res, 'text') else str(res)
         if not words: return "⚠️ Pas de parole détectée.", None
+        yield "⏳ Phase 3/3 : Encodage vidéo final...", None
+        final_v = burn_subtitles(video_input, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
+        # On force Gradio à renvoyer le chemin absolu
+        yield "✅ Succès !", gr.update(value=final_v)
     except Exception as e:
         traceback.print_exc()
+        yield f"❌ Erreur: {str(e)}", None
+# ---------------------------- # INTERFACE # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="title-header"):
+        gr.HTML("<h1 style='color:#facc15;'>🤖 ROBOTSMALI</h1><p style='color:#94a3b8;'>Sous-titrage Bambara Professionnel</p>")
     with gr.Row():
         with gr.Column():
+            v_in = gr.Video(label="Entrée", mirror_webcam=False)
+            m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle")
+            btn = gr.Button("🚀 GÉNÉRER", variant="primary")
         with gr.Column():
+            status = gr.Markdown("*Prêt*")
+            v_out = gr.Video(label="Résultat Final")
+    gr.Examples(examples=VIDEO_EXAMPLES, inputs=[v_in, m_sel], cache_examples=False)
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":