Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Running

App Files Files Community

binaryMao commited on Dec 16, 2025

Commit

e7976e4

verified ·

1 Parent(s): 95a2204

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -72

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 # -*- coding: utf-8 -*-
 """
-ROBOTSMALI — Sous-titrage Bambara (V5.5 - Production Ready)
-- Vidéo d'exemple : examples/MARALINKE.mp4
-- Correction AttributeError: Gradio Div -> Column/HTML
-- Correction Codec Webcam : VP8 -> H.264 (Stabilisation forcée)
 """
 import os
 import shlex
@@ -22,27 +20,12 @@ from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
-# ---------------------------- # VÉRIFICATION DIAGNOSTIC # ----------------------------
-print("--- DIAGNOSTIC DES FICHIERS ---")
-example_path = "examples/MARALINKE.mp4"
-if os.path.exists(example_path):
-    print(f"✅ SUCCÈS : {example_path} est bien présent.")
-else:
-    print(f"❌ ERREUR : {example_path} est introuvable !")
-    if os.path.exists("examples"):
-        print(f"Contenu réel du dossier examples/ : {os.listdir('examples')}")
-    else:
-        print("Le dossier 'examples' n'existe pas à la racine du projet.")
-print("-------------------------------")
-# ---------------------------- # CONFIGURATION # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
-SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
@@ -52,7 +35,10 @@ MODELS = {
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
-VIDEO_EXAMPLES = [[example_path, "Soloba V1 (CTC)"]]
 _cache = {}
@@ -61,7 +47,7 @@ _cache = {}
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
-        raise RuntimeError(f"Erreur FFmpeg: {res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
@@ -89,10 +75,10 @@ def load_model(name):
     return model
 def extract_audio(video_path, out_wav):
-    """Stabilisation pour flux webcam et extraction audio."""
     tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
     os.close(tmp_fd)
-    # Correction WebM/Webcam : réencodage libx264 forcé
     run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
     run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
     if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
@@ -100,21 +86,45 @@ def extract_audio(video_path, out_wav):
 def clean_audio(wav_path):
     audio, sr = sf.read(wav_path)
     if audio.ndim == 2: audio = audio.mean(axis=1)
-    if sr != 16000: audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=16000)
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
     if max_val > 1e-6: audio = audio / max_val * 0.9
     clean_path = wav_path.replace(".wav", "_clean.wav")
     sf.write(clean_path, audio, 16000)
     return clean_path, audio, 16000
-# ---------------------------- # PIPELINE PRINCIPAL # ----------------------------
 def pipeline(video_input, model_name):
     try:
-        if not video_input: return "❌ Vidéo introuvable. Veuillez réessayer.", None
         video_path = video_input
-        yield "⏳ Phase 1/3 : Analyse du fichier et extraction audio...", None
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
             wav_path = tf.name
@@ -122,66 +132,42 @@ def pipeline(video_input, model_name):
         clean_wav, audio, sr = clean_audio(wav_path)
         duration = ffprobe_duration(video_path) or (len(audio)/sr)
-        yield f"⏳ Phase 2/3 : Transcription IA avec {model_name}...", None
         model = load_model(model_name)
-        text_out = model.transcribe([clean_wav])[0]
-        text_str = text_out.text if hasattr(text_out, 'text') else str(text_out)
-        words = [w for w in text_str.split() if len(w) > 1]
-        if not words: return "⚠️ Aucune parole détectée dans la vidéo.", None
         yield "⏳ Phase 3/3 : Incrustation des sous-titres...", None
-        # Heuristique d'alignement simple
-        subs = []
-        chunk_size = 7
-        for i in range(0, len(words), chunk_size):
-            chunk = words[i:i+chunk_size]
-            s = (i / len(words)) * duration
-            e = (min(i + chunk_size, len(words)) / len(words)) * duration
-            subs.append((s, e, "\n".join(textwrap.wrap(" ".join(chunk), 40))))
-        res_v = burn(video_path, subs)
-        yield "✅ Succès ! Votre vidéo est prête.", res_v
     except Exception as e:
         traceback.print_exc()
-        yield f"❌ Erreur : {str(e)}", None
-def burn(video_path, subs):
-    out_path = "RobotsMali_Subtitled.mp4"
-    with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
-        for idx, (start, end, text) in enumerate(subs, 1):
-            def t_srt(sec):
-                h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
-                return f"{h:02}:{m:02}:{s:02},{ms:03}"
-            tf.write(f"{idx}\n{t_srt(start)} --> {t_srt(end)}\n{text}\n\n")
-        srt_name = tf.name
-    vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=24,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
-    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac {shlex.quote(out_path)}')
-    os.remove(srt_name)
-    return out_path
-# ---------------------------- # INTERFACE ARTISTIQUE # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
-.gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); }
-#title-block { text-align: center; padding: 20px; }
-.gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; font-weight: bold !important; transition: all 0.3s ease !important; }
-.gr-button-primary:hover { transform: scale(1.02); box-shadow: 0 0 15px rgba(16, 185, 129, 0.4); }
 """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
-    with gr.Column(elem_id="title-block"):
         gr.HTML("""
-            <h1 style='color:#facc15; font-size: 2.5rem; margin-bottom:0;'>🤖 ROBOTSMALI</h1>
-            <p style='color:#94a3b8; font-size: 1.1rem;'>Intelligence Artificielle pour la Langue Bambara</p>
             <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
         """)
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 📥 Source Vidéo")
             v_in = gr.Video(label=None, mirror_webcam=False)
             m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
             btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
@@ -191,14 +177,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
             status = gr.Markdown("*En attente de traitement...*")
             v_out = gr.Video(label=None)
-    # Section Exemples
     gr.Examples(
         examples=VIDEO_EXAMPLES,
         inputs=[v_in, m_sel],
-        label="📺 Sélectionner une vidéo d'exemple"
     )
-    gr.HTML("<div style='text-align: center; color: #475569; margin-top: 30px; font-size: 0.9rem;'>© 2025 RobotsMali • Bamako, Mali</div>")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])

 # -*- coding: utf-8 -*-
 """
+ROBOTSMALI — Sous-titrage Bambara (V5.6 - Production Final)
+Logiciel de transcription et d'incrustation vidéo pour le Bambara.
 """
 import os
 import shlex
 from nemo.collections import asr as nemo_asr
 import gradio as gr
+# ---------------------------- # CONFIGURATION IA # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
+# Chemin vers la vidéo d'exemple
+VIDEO_EXAMPLES = [
+    ["examples/MARALINKE.mp4", "Soloba V1 (CTC)"]
+]
 _cache = {}
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
+        raise RuntimeError(f"Erreur système: {res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
     return model
 def extract_audio(video_path, out_wav):
+    """Prépare la vidéo (H.264) et extrait l'audio 16kHz."""
     tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
     os.close(tmp_fd)
+    # Réencodage pour supporter tous les formats (Webcam/WebM compris)
     run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
     run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
     if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
 def clean_audio(wav_path):
     audio, sr = sf.read(wav_path)
     if audio.ndim == 2: audio = audio.mean(axis=1)
+    if sr != 16000:
+        audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=16000)
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
     if max_val > 1e-6: audio = audio / max_val * 0.9
     clean_path = wav_path.replace(".wav", "_clean.wav")
     sf.write(clean_path, audio, 16000)
     return clean_path, audio, 16000
+# ---------------------------- # TRANSCRIPTION & BURNING # ----------------------------
+def burn_subtitles(video_path, words, duration):
+    """Crée un fichier SRT et l'incruste dans la vidéo."""
+    out_path = "RobotsMali_Subtitled.mp4"
+    chunk_size = 7
+    with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as tf:
+        for i, idx in enumerate(range(0, len(words), chunk_size)):
+            chunk = words[idx : idx + chunk_size]
+            start = (idx / len(words)) * duration
+            end = (min(idx + chunk_size, len(words)) / len(words)) * duration
+            def t_srt(sec):
+                h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
+                return f"{h:02}:{m:02}:{s:02},{ms:03}"
+            txt = "\n".join(textwrap.wrap(" ".join(chunk), 40))
+            tf.write(f"{i+1}\n{t_srt(start)} --> {t_srt(end)}\n{txt}\n\n")
+        srt_name = tf.name
+    vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
+    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -crf 23 -c:a aac {shlex.quote(out_path)}')
+    os.remove(srt_name)
+    return out_path
 def pipeline(video_input, model_name):
     try:
+        if not video_input: return "❌ Aucune vidéo", None
         video_path = video_input
+        yield "⏳ Phase 1/3 : Analyse du fichier...", None
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
             wav_path = tf.name
         clean_wav, audio, sr = clean_audio(wav_path)
         duration = ffprobe_duration(video_path) or (len(audio)/sr)
+        yield f"⏳ Phase 2/3 : Transcription IA ({model_name})...", None
         model = load_model(model_name)
+        res = model.transcribe([clean_wav])[0]
+        text = res.text if hasattr(res, 'text') else str(res)
+        words = [w for w in text.split() if len(w) > 1]
+        if not words: return "⚠️ Pas de parole détectée", None
         yield "⏳ Phase 3/3 : Incrustation des sous-titres...", None
+        final_video = burn_subtitles(video_path, words, duration)
+        yield "✅ Succès !", final_video
     except Exception as e:
         traceback.print_exc()
+        yield f"❌ Erreur: {str(e)}", None
+# ---------------------------- # INTERFACE GRADIO # ----------------------------
 custom_css = """
 body { background-color: #0b0e14; }
+.gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); }
+#title-container { text-align: center; padding: 20px; }
+.gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
 """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    with gr.Column(elem_id="title-container"):
         gr.HTML("""
+            <h1 style='color:#facc15; font-size: 2.5rem; margin:0;'>🤖 ROBOTSMALI</h1>
+            <p style='color:#94a3b8; font-style:italic;'>Intelligence Artificielle pour le Bambara</p>
             <div style="height: 3px; width: 60px; background: #facc15; margin: 15px auto;"></div>
         """)
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 📥 Source")
             v_in = gr.Video(label=None, mirror_webcam=False)
             m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle IA")
             btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
             status = gr.Markdown("*En attente de traitement...*")
             v_out = gr.Video(label=None)
+    # Section des exemples avec cache_examples=False pour débloquer le clic
     gr.Examples(
         examples=VIDEO_EXAMPLES,
         inputs=[v_in, m_sel],
+        label="📺 Exemples Disponibles",
+        cache_examples=False
     )
+    gr.HTML("<div style='text-align: center; color: #475569; padding-top: 20px;'>© 2025 RobotsMali - Bamako</div>")
     btn.click(pipeline, [v_in, m_sel], [status, v_out])