Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Running

App Files Files Community

binaryMao commited on Dec 19, 2025

Commit

4fb6b44

verified ·

1 Parent(s): 555494f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -25

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 """
-ROBOTSMALI — Sous-titrage Bambara (VERSION 7.2 - FIX DURATION & STREAMING)
-- Correction Moov Atom (+faststart) pour affichage instantané
-- Case de résultat unique (Lecture + Téléchargement)
-- Suivi des phases de traitement (Audio, IA, Rendu)
 """
 import os
 import shlex
@@ -17,8 +17,6 @@ from pathlib import Path
 import numpy as np
 import torch
-import soundfile as sf
-import librosa
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
@@ -35,7 +33,7 @@ MODELS = {
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
-# Détection automatique de la vidéo d'exemple
 def get_example():
     paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4"]
     for p in paths:
@@ -48,12 +46,14 @@ _cache = {}
 # ---------------------------- # MOTEUR DE TRAITEMENT # ----------------------------
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
-        raise RuntimeError(f"FFmpeg Error: {res.stdout}")
     return res.stdout
 def load_model(name):
     if name in _cache: return _cache[name]
     _cache.clear()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
@@ -74,12 +74,12 @@ def load_model(name):
     _cache[name] = model
     return model
-def burn_subtitles(video_path, words, duration):
-    # Création du nom de fichier unique
     out_name = f"robotsmali_final_{int(time.time())}.mp4"
     out_path = os.path.abspath(out_name)
-    # Création du fichier de sous-titres (SRT)
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
@@ -93,8 +93,7 @@ def burn_subtitles(video_path, words, duration):
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
-    # FFmpeg avec correction du Moov Atom (+faststart) et format Web standard
-    # Cela permet au navigateur de connaître la durée dès le début du fichier.
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = (
         f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} '
@@ -110,14 +109,13 @@ def burn_subtitles(video_path, words, duration):
 def pipeline(video_input, model_name):
     try:
         if not video_input:
-            yield "### ❌ État\n*Aucune vidéo chargée.*", None
             return
-        yield "### ⏳ État\n*Phase 1/3 : Analyse audio et extraction...*", None
         wav_path = os.path.abspath("temp_audio.wav")
         run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_input)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(wav_path)}')
-        # Récupération de la durée exacte pour synchroniser les sous-titres
         dur_out = subprocess.run(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(video_input)}',
                                  shell=True, stdout=subprocess.PIPE, text=True).stdout
         duration = float(dur_out.strip()) if dur_out.strip() else 10.0
@@ -132,17 +130,17 @@ def pipeline(video_input, model_name):
             yield "### ⚠️ État\n*Aucune parole détectée.*", None
             return
-        yield "### ⏳ État\n*Phase 3/3 : Encodage vidéo et optimisation streaming...*", None
-        final_v = burn_subtitles(video_input, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
         yield "### ✅ État\n*Traitement terminé avec succès !*", final_v
     except Exception as e:
         traceback.print_exc()
-        yield f"### ❌ État\n*Erreur : {str(e)}*", None
-# ---------------------------- # INTERFACE # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
@@ -153,7 +151,7 @@ body { background-color: #0b0e14; }
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="header"):
-        gr.HTML("<h1 style='color:#facc15; margin:0;'>🤖 ROBOTSMALI</h1><p style='color:#94a3b8;'>Sous-titrage Automatique Bambara</p>")
     with gr.Row():
         with gr.Column():
@@ -164,13 +162,16 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
         with gr.Column():
             gr.Markdown("### 📤 2. RÉSULTAT")
-            status = gr.Markdown("### État\n*En attente...*")
-            v_out = gr.Video(label="Vidéo finale (Synchronisée)")
     if EXAMPLE_PATH:
-        gr.Examples(examples=[[EXAMPLE_PATH, "Soloba V1 (CTC)"]], inputs=[v_in, m_sel], label="📺 Exemple")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":
-    demo.launch(debug=True, share=True)

 # -*- coding: utf-8 -*-
 """
+ROBOTSMALI — Sous-titrage Bambara (VERSION 7.4 - DEBUG & SHARE)
+- Correction Moov Atom pour le streaming (FastStart)
+- Interface stable avec partage public activé
+- Suivi détaillé des phases de transcription
 """
 import os
 import shlex
 import numpy as np
 import torch
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
+# Détection automatique du fichier d'exemple
 def get_example():
     paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4"]
     for p in paths:
 # ---------------------------- # MOTEUR DE TRAITEMENT # ----------------------------
 def run_cmd(cmd):
+    """Exécute une commande système et capture les erreurs."""
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
+        raise RuntimeError(f"Erreur système : {res.stdout}")
     return res.stdout
 def load_model(name):
+    """Charge le modèle IA en mémoire avec mise en cache."""
     if name in _cache: return _cache[name]
     _cache.clear()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
     _cache[name] = model
     return model
+def process_video(video_path, words, duration):
+    """Génère la vidéo finale avec sous-titres et optimisation streaming."""
     out_name = f"robotsmali_final_{int(time.time())}.mp4"
     out_path = os.path.abspath(out_name)
+    # Génération du fichier SRT temporaire
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
+    # Encodage FFmpeg avec le flag crucial +faststart
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = (
         f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} '
 def pipeline(video_input, model_name):
     try:
         if not video_input:
+            yield "### ❌ État\n*Aucune vidéo détectée.*", None
             return
+        yield "### ⏳ État\n*Phase 1/3 : Analyse audio en cours...*", None
         wav_path = os.path.abspath("temp_audio.wav")
         run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_input)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(wav_path)}')
         dur_out = subprocess.run(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(video_input)}',
                                  shell=True, stdout=subprocess.PIPE, text=True).stdout
         duration = float(dur_out.strip()) if dur_out.strip() else 10.0
             yield "### ⚠️ État\n*Aucune parole détectée.*", None
             return
+        yield "### ⏳ État\n*Phase 3/3 : Finalisation de la vidéo...*", None
+        final_v = process_video(video_input, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
         yield "### ✅ État\n*Traitement terminé avec succès !*", final_v
     except Exception as e:
         traceback.print_exc()
+        yield f"### ❌ État\n*Erreur critique : {str(e)}*", None
+# ---------------------------- # INTERFACE GRADIO # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="header"):
+        gr.HTML("<h1 style='color:#facc15; margin:0;'>🤖 ROBOTSMALI</h1><p style='color:#94a3b8;'>Intelligence Artificielle pour le Bambara</p>")
     with gr.Row():
         with gr.Column():
         with gr.Column():
             gr.Markdown("### 📤 2. RÉSULTAT")
+            status = gr.Markdown("### État\n*Prêt*")
+            v_out = gr.Video(label="Vidéo finale")
+    # Désactivation du cache pour éviter le gel de l'interface sur Hugging Face
     if EXAMPLE_PATH:
+        gr.Examples(examples=[[EXAMPLE_PATH, "Soloba V1 (CTC)"]], inputs=[v_in, m_sel], cache_examples=False)
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":
+    # share=True : Crée un lien public .gradio.live
+    # debug=True : Affiche les erreurs détaillées dans la console
+    demo.launch(share=True, debug=True)