Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Sleeping

App Files Files Community

binaryMao commited on Dec 19, 2025

Commit

900ddc5

verified ·

1 Parent(s): 8a33271

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -36

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 """
-ROBOTSMALI — Sous-titrage Bambara (VERSION 7.0 - STABLE & ÉPURÉE)
 - Case de résultat unique (Lecture + Téléchargement)
-- Statut de traitement détaillé (Audio -> IA -> Vidéo)
-- Correction automatique des chemins d'exemples
 """
 import os
 import shlex
@@ -23,7 +23,7 @@ from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
-# ---------------------------- # CONFIGURATION TECHNIQUE # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MODELS = {
@@ -35,15 +35,14 @@ MODELS = {
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
-# Recherche intelligente du fichier exemple
-def find_example():
-    paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4", "examples/maralinke.mp4"]
     for p in paths:
-        if os.path.exists(p):
-            return p
     return None
-EXAMPLE_PATH = find_example()
 _cache = {}
 # ---------------------------- # MOTEUR DE TRAITEMENT # ----------------------------
@@ -51,7 +50,7 @@ _cache = {}
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
-        raise RuntimeError(f"Erreur système: {res.stdout}")
     return res.stdout
 def load_model(name):
@@ -76,11 +75,11 @@ def load_model(name):
     return model
 def burn_subtitles(video_path, words, duration):
-    # Sortie dans le dossier courant pour éviter les pertes de fichiers temporaires
     out_name = f"robotsmali_final_{int(time.time())}.mp4"
     out_path = os.path.abspath(out_name)
-    # Génération du fichier SRT
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
@@ -94,7 +93,8 @@ def burn_subtitles(video_path, words, duration):
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
-    # Encodage H.264 ultra-compatible (MP4 Progressif)
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = (
         f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} '
@@ -105,74 +105,72 @@ def burn_subtitles(video_path, words, duration):
     if os.path.exists(srt_name): os.remove(srt_name)
     return out_path
-# ---------------------------- # PIPELINE PRINCIPALE # ----------------------------
 def pipeline(video_input, model_name):
     try:
         if not video_input:
-            yield "### ❌ État\n*Veuillez charger une vidéo.*", None
             return
-        yield "### ⏳ État\n*Phase 1 : Extraction du signal audio...*", None
         wav_path = os.path.abspath("temp_audio.wav")
         run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_input)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(wav_path)}')
         dur_out = subprocess.run(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(video_input)}',
                                  shell=True, stdout=subprocess.PIPE, text=True).stdout
         duration = float(dur_out.strip()) if dur_out.strip() else 10.0
-        yield f"### ⏳ État\n*Phase 2 : Transcription Bambara ({model_name})...*", None
         model = load_model(model_name)
         res = model.transcribe([wav_path])[0]
         text = res.text if hasattr(res, 'text') else str(res)
         words = [w for w in text.split() if len(w) > 1]
         if not words:
-            yield "### ⚠️ État\n*Aucune parole détectée dans la vidéo.*", None
             return
-        yield "### ⏳ État\n*Phase 3 : Incrustation et rendu final...*", None
-        final_video = burn_subtitles(video_input, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
-        yield "### ✅ État\n*Traitement terminé !*", final_video
     except Exception as e:
         traceback.print_exc()
-        yield f"### ❌ État\n*Erreur technique : {str(e)}*", None
-# ---------------------------- # INTERFACE UTILISATEUR # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
-.gradio-container { background: rgba(17, 25, 40, 0.9) !important; border-radius: 15px; border: 1px solid rgba(255, 255, 255, 0.1); }
-#header { text-align: center; padding: 15px; }
 .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
 """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="header"):
-        gr.HTML("<h1 style='color:#facc15; margin:0;'>🤖 ROBOTSMALI</h1><p style='color:#94a3b8;'>Intelligence Artificielle de Sous-titrage Bambara</p>")
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 📥 1. ENTRÉE")
-            v_in = gr.Video(label="Vidéo à traiter")
             m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
             btn = gr.Button("🚀 GÉNÉRER", variant="primary")
         with gr.Column():
-            gr.Markdown("### 📤 2. SORTIE")
-            status = gr.Markdown("### État\n*Prêt*")
-            v_out = gr.Video(label="Résultat final")
-    # Gestion des exemples
     if EXAMPLE_PATH:
         gr.Examples(examples=[[EXAMPLE_PATH, "Soloba V1 (CTC)"]], inputs=[v_in, m_sel], label="📺 Exemple")
-    else:
-        gr.Markdown("⚠️ *Note : Aucun fichier exemple détecté sur le serveur.*")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":
-    demo.launch(debug=True,share=True)

 # -*- coding: utf-8 -*-
 """
+ROBOTSMALI — Sous-titrage Bambara (VERSION 7.2 - FIX DURATION & STREAMING)
+- Correction Moov Atom (+faststart) pour affichage instantané
 - Case de résultat unique (Lecture + Téléchargement)
+- Suivi des phases de traitement (Audio, IA, Rendu)
 """
 import os
 import shlex
 from nemo.collections import asr as nemo_asr
 import gradio as gr
+# ---------------------------- # CONFIGURATION # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MODELS = {
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
+# Détection automatique de la vidéo d'exemple
+def get_example():
+    paths = ["examples/MARALINKE.mp4", "MARALINKE.mp4"]
     for p in paths:
+        if os.path.exists(p): return p
     return None
+EXAMPLE_PATH = get_example()
 _cache = {}
 # ---------------------------- # MOTEUR DE TRAITEMENT # ----------------------------
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
+        raise RuntimeError(f"FFmpeg Error: {res.stdout}")
     return res.stdout
 def load_model(name):
     return model
 def burn_subtitles(video_path, words, duration):
+    # Création du nom de fichier unique
     out_name = f"robotsmali_final_{int(time.time())}.mp4"
     out_path = os.path.abspath(out_name)
+    # Création du fichier de sous-titres (SRT)
     chunk_size = 7
     with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
         for i, idx in enumerate(range(0, len(words), chunk_size)):
             tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
         srt_name = tf.name
+    # FFmpeg avec correction du Moov Atom (+faststart) et format Web standard
+    # Cela permet au navigateur de connaître la durée dès le début du fichier.
     vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = (
         f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} '
     if os.path.exists(srt_name): os.remove(srt_name)
     return out_path
+# ---------------------------- # PIPELINE # ----------------------------
 def pipeline(video_input, model_name):
     try:
         if not video_input:
+            yield "### ❌ État\n*Aucune vidéo chargée.*", None
             return
+        yield "### ⏳ État\n*Phase 1/3 : Analyse audio et extraction...*", None
         wav_path = os.path.abspath("temp_audio.wav")
         run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_input)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(wav_path)}')
+        # Récupération de la durée exacte pour synchroniser les sous-titres
         dur_out = subprocess.run(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(video_input)}',
                                  shell=True, stdout=subprocess.PIPE, text=True).stdout
         duration = float(dur_out.strip()) if dur_out.strip() else 10.0
+        yield f"### ⏳ État\n*Phase 2/3 : Transcription IA ({model_name})...*", None
         model = load_model(model_name)
         res = model.transcribe([wav_path])[0]
         text = res.text if hasattr(res, 'text') else str(res)
         words = [w for w in text.split() if len(w) > 1]
         if not words:
+            yield "### ⚠️ État\n*Aucune parole détectée.*", None
             return
+        yield "### ⏳ État\n*Phase 3/3 : Encodage vidéo et optimisation streaming...*", None
+        final_v = burn_subtitles(video_input, words, duration)
         if os.path.exists(wav_path): os.remove(wav_path)
+        yield "### ✅ État\n*Traitement terminé avec succès !*", final_v
     except Exception as e:
         traceback.print_exc()
+        yield f"### ❌ État\n*Erreur : {str(e)}*", None
+# ---------------------------- # INTERFACE # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
+.gradio-container { background: rgba(17, 25, 40, 0.9) !important; border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); }
+#header { text-align: center; padding: 20px; }
 .gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
 """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="header"):
+        gr.HTML("<h1 style='color:#facc15; margin:0;'>🤖 ROBOTSMALI</h1><p style='color:#94a3b8;'>Sous-titrage Automatique Bambara</p>")
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 📥 1. CHARGEMENT")
+            v_in = gr.Video(label="Vidéo source", mirror_webcam=False)
             m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
             btn = gr.Button("🚀 GÉNÉRER", variant="primary")
         with gr.Column():
+            gr.Markdown("### 📤 2. RÉSULTAT")
+            status = gr.Markdown("### État\n*En attente...*")
+            v_out = gr.Video(label="Vidéo finale (Synchronisée)")
     if EXAMPLE_PATH:
         gr.Examples(examples=[[EXAMPLE_PATH, "Soloba V1 (CTC)"]], inputs=[v_in, m_sel], label="📺 Exemple")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])
 if __name__ == "__main__":
+    demo.launch(debug=True, share=True)