Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Running

App Files Files Community

binaryMao commited on Dec 5, 2025

Commit

77e790d

verified ·

1 Parent(s): 63cfe96

Update app.py

Browse files

Files changed (1) hide show

app.py +353 -92

app.py CHANGED Viewed

@@ -1,8 +1,21 @@
 # -*- coding: utf-8 -*-
-"""
-ROBOTSMALI — Sous-titrage Bambara
-Version complète et stable intégrant tous les correctifs et fonctionnalités.
-"""
 import os
 import shlex
@@ -12,7 +25,7 @@ import traceback
 import random
 import textwrap
 from pathlib import Path
 import numpy as np
 import torch
 import soundfile as sf
@@ -20,55 +33,82 @@ import librosa
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
-import noisereduce as nr
-# ----------------------------
-# CONFIG
-# ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
-    "Soloba V1 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v1", "ctc"),
-    "Soloba V0 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v0", "ctc"),
-    "QuartzNet V1 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v1", "ctc_char"),
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
 _cache = {}
-# ----------------------------
-# UTIL: run_cmd, ffprobe_duration
-# ----------------------------
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
         raise RuntimeError(f"Commande échouée [{cmd}]\nOutput:\n{res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    if out.returncode != 0: return None
     try:
-        output = out.stdout.strip().split("\n")[0]
-        return float(output)
-    except: return None
-# ----------------------------
-# LOAD MODEL
-# ----------------------------
 def load_model(name):
-    if name in _cache: return _cache[name]
     repo, mode = MODELS[name]
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     if not nemo_file:
         raise FileNotFoundError(f"Aucun .nemo trouvé pour {name} dans {folder}")
     if mode == "rnnt":
         model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
     elif mode == "ctc_char":
@@ -76,97 +116,318 @@ def load_model(name):
     else:
         try:
             model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
-        except:
             model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
     return model
-# ----------------------------
-# AUDIO EXTRACTION & CLEAN
-# ----------------------------
 def extract_audio(video_path, out_wav):
-    cmd = f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}'
-    run_cmd(cmd)
 def clean_audio(wav_path, target_sr=16000):
     audio, sr = sf.read(wav_path)
-    if audio.ndim == 2: audio = audio.mean(axis=1)
     if sr != target_sr:
         audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=target_sr)
         sr = target_sr
-    try:
-        audio = nr.reduce_noise(y=audio, sr=sr, stationary=True, prop_decrease=0.75)
-    except: pass
-    max_val = np.max(np.abs(audio)) if audio.size > 0 else 0
     if max_val > 1e-6:
-        audio = audio / max_val * 0.95
     clean_path = str(Path(wav_path).with_name(Path(wav_path).stem + "_clean.wav"))
     sf.write(clean_path, audio, sr)
     return clean_path, audio, sr
-# ----------------------------
-# TRANSCRIPTION
-# ----------------------------
 def transcribe(model, wav_path):
     out = model.transcribe([wav_path])
-    if isinstance(out, list) and len(out)>0: out = out[0]
-    if hasattr(out, "text"): return out.text.strip()
     return str(out).strip()
-# (pack, align_vad, burn, pipeline restent identiques)
-# ----------------------------
-# COPIE VIDÉO EXEMPLE → /tmp
-# ----------------------------
-def get_example_video():
-    """Copie la vidéo depuis le dossier /examples du Space vers /tmp."""
-    repo_dir = "/home/user/app/examples"
-    filename = "MARALINKE-WiIi (Lève-toi) Black lives matter (Clip officiel) - MARALINKE (360p, h264).mp4"
-    src = os.path.join(repo_dir, filename)
-    dst = "/tmp/example_video.mp4"
-    if not os.path.exists(dst):
-        import shutil
-        shutil.copy(src, dst)
-    return dst
-# ----------------------------
-# INTERFACE GRADIO
-# ----------------------------
 with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
-    gr.Markdown("## 🤖 RobotsMali — Sous-titrage Bambara")
-    s = gr.Markdown(label="Statut de la tâche")
-    o = gr.Video(label="Vidéo sous-titrée")
-    with gr.Row():
-        with gr.Column():
-            v = gr.Video(label="Vidéo à sous-titrer", sources=["upload", "webcam"])
-            m = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR")
-            gr.Examples(
-                examples=[
-                    [get_example_video(), "Soloba V1 (CTC)"]
-                ],
-                inputs=[v, m],
-                fn=pipeline,
-                outputs=[s, o],
-                label="▶️ Vidéo d’exemple du Space",
-                run_on_click=True,
-                cache_examples=False
-            )
-            b = gr.Button("▶️ Générer les sous-titres")
-        with gr.Column():
-            gr.Markdown("### Résultats :")
-            s
-            o
     b.click(pipeline, [v, m], [s, o])
-if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 # -*- coding: utf-8 -*-
+""" ROBOTSMALI — Sous-titrage Bambara (V4.8 Colab Ready - Remuxage Vidéo) """
+# =========================================================================
+# 1. INSTALLATION ET MISE À JOUR DES DÉPENDANCES
+# =========================================================================
+print("Démarrage de l'installation des dépendances...")
+# Installation de FFmpeg (nécessaire pour ffprobe et extraction/burn)
+!apt-get update && apt-get install -y ffmpeg
+# Installation des librairies Python essentielles et des outils NeMo
+!pip install torch numpy soundfile librosa huggingface_hub gradio
+!pip install nemo_toolkit[asr]
+!pip install ctc-segmentation
+!pip install --upgrade gradio # Mise à jour de Gradio pour la compatibilité
+print("Installation des dépendances terminée.")
+# =========================================================================
 import os
 import shlex
 import random
 import textwrap
 from pathlib import Path
 import numpy as np
 import torch
 import soundfile as sf
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
+# Tente l'importation de la librairie d'alignement nécessaire
+try:
+    from ctc_segmentation import ctc_segmentation, CtcSegmentationParameters, prepare_text
+    HAS_CTC_SEGMENTATION = True
+except ImportError:
+    HAS_CTC_SEGMENTATION = False
+    print("ATTENTION: ctc_segmentation non installé. L'alignement sera basé sur une simple répartition égale du temps.")
+# ---------------------------- # CONFIG # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
+# Taille du segment pour la transcription par blocs
+SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
+    "Soloba V1 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v1", "ctc"),
+    "Soloba V0 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v0", "ctc"),
+    "QuartzNet V1 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v1", "ctc_char"),
     "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
 _cache = {}
+# Chemin vers la vidéo d'exemple.
+VIDEO_EXAMPLES = [
+    "examples/MARALINKE-Wii (Lève-toi) Black lives matter (Clip officiel) - MARALINKE (360p, H264).mp4"
+]
+# ---------------------------- # UTIL: run_cmd, ffprobe_duration # ----------------------------
 def run_cmd(cmd):
+    """Execute a shell command and raise on non-zero exit."""
+    print("RUN:", cmd)
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
         raise RuntimeError(f"Commande échouée [{cmd}]\nOutput:\n{res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
+    """Détermine la durée de la vidéo via ffprobe (pour vérification/débogage)."""
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if out.returncode != 0:
+        # Affiche l'erreur FFPROBE brute si l'extraction échoue
+        print(f"--- ERREUR FFPROBE BRUTE --- (Code: {out.returncode})")
+        print(out.stderr)
+        print("----------------------------")
+        return None
     try:
+        return float(out.stdout.strip())
+    except Exception as e:
+        print(f"--- ERREUR CONVERSION DURÉE --- (Output: {out.stdout.strip()})")
+        print(e)
+        return None
+# ---------------------------- # LOAD MODEL (robust) # ----------------------------
 def load_model(name):
+    """Charge le modèle NeMo correct selon type (rnnt / ctc / ctc_char)."""
+    if name in _cache:
+        return _cache[name]
     repo, mode = MODELS[name]
+    print(f"[LOAD] snapshot_download {repo} ...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     if not nemo_file:
         raise FileNotFoundError(f"Aucun .nemo trouvé pour {name} dans {folder}")
+    print(f"[LOAD] .nemo trouvé: {nemo_file}; mode={mode}")
     if mode == "rnnt":
         model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
     elif mode == "ctc_char":
     else:
         try:
             model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
+        except Exception as e:
+            print(f"[WARN] EncDecCTCModelBPE failed ({e}), fallback EncDecCTCModel")
             model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
+    print(f"[OK] Modèle {name} chargé sur {DEVICE}")
     return model
+# ---------------------------- # AUDIO EXTRACTION & CLEANING (ROBUSTE) # ----------------------------
 def extract_audio(video_path, out_wav):
+    """
+    Extrait l'audio en deux étapes pour stabiliser le fichier webcam/corrompu (Remuxage).
+    """
+    # Chemin du fichier intermédiaire stabilisé
+    tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
+    os.close(tmp_fd)
+    # ÉTAPE 1: Stabilisation par Copie de Flux (Remuxage) du fichier d'entrée vers un conteneur MP4 stable.
+    # Ceci réécrit les métadonnées de l'en-tête (duration) sans réencoder.
+    remux_cmd = (
+        f'ffmpeg -hide_banner -loglevel error -y '
+        f'-analyzeduration 2147483647 -probesize 2147483647 -ignore_unknown '
+        f'-i {shlex.quote(video_path)} -c copy {shlex.quote(stabilized_mp4)}'
+    )
+    print("RUN: Remuxage du fichier pour stabilisation...")
+    run_cmd(remux_cmd)
+    # ÉTAPE 2: Extraction de l'audio 16k WAV à partir du fichier stabilisé.
+    extract_cmd = (
+        f'ffmpeg -hide_banner -loglevel error -y '
+        f'-i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}'
+    )
+    print("RUN: Extraction de l'audio depuis le fichier stabilisé...")
+    run_cmd(extract_cmd)
+    # Nettoyage du fichier intermédiaire stabilisé
+    if os.path.exists(stabilized_mp4):
+        os.remove(stabilized_mp4)
 def clean_audio(wav_path, target_sr=16000):
+    """Load audio, ensure mono, resample to target_sr, normalize, write cleaned wav."""
     audio, sr = sf.read(wav_path)
+    if audio.ndim == 2:
+        audio = audio.mean(axis=1)
     if sr != target_sr:
         audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=target_sr)
         sr = target_sr
+    max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
     if max_val > 1e-6:
+        audio = audio / max_val * 0.9
     clean_path = str(Path(wav_path).with_name(Path(wav_path).stem + "_clean.wav"))
     sf.write(clean_path, audio, sr)
     return clean_path, audio, sr
+# ---------------------------- # TRANSCRIPTION, ETC. (Inchangé) # ----------------------------
+# Les autres fonctions (transcribe, keep_bambara, pack, align_heuristic, etc.)
+# restent les mêmes que dans la version V4.7.
 def transcribe(model, wav_path):
+    if not hasattr(model, "transcribe"):
+        raise RuntimeError("Le modèle ne supporte pas model.transcribe()")
     out = model.transcribe([wav_path])
+    if isinstance(out, list):
+        if len(out) == 0:
+            return ""
+        first = out[0]
+        if isinstance(first, str):
+            return first.strip()
+        if hasattr(first, "text"):
+            return first.text.strip()
+        return str(first).strip()
+    if hasattr(out, "text"):
+        return out.text.strip()
     return str(out).strip()
+def keep_bambara(words):
+    res = []
+    for w in words:
+        wl = w.lower()
+        if any(c in wl for c in ["ɛ","ɔ","ŋ"]) or sum(1 for c in wl if c in "aeiou") >= 2:
+            res.append(w)
+    return res
+MAX_CHARS = 45; MIN_DUR = 0.3; MAX_DUR = 3.2; MAX_WORDS = 8
+def wrap2(txt):
+    parts = textwrap.wrap(txt, MAX_CHARS)
+    if len(parts) <= 1:
+        return txt
+    mid = len(txt) // 2
+    left = txt.rfind(" ", 0, mid)
+    right = txt.find(" ", mid)
+    cut = left if (mid - left) <= ((right - mid) if right != -1 else 1e9) else right
+    l1 = txt[:cut].strip(); l2 = txt[cut:].strip()
+    return l1 + "\n" + l2 if l2 else l1
+def pack(spans, total):
+    tmp = []
+    for s, e, t in spans:
+        s = max(0, min(s, total)); e = max(0, min(e, total))
+        if e <= s or not t.strip(): continue
+        tmp.append((s, e, t.strip()))
+    merged = []
+    for seg in tmp:
+        if not merged:
+            merged.append(seg); continue
+        ps, pe, pt = merged[-1]; s, e, t = seg
+        if (e - s) < MIN_DUR or (s - pe) < 0.1:
+            merged[-1] = (ps, max(pe, e), (pt + " " + t).strip())
+        else:
+            merged.append(seg)
+    out = []; last_end = 0
+    for s, e, t in merged:
+        dur = e - s; words = t.split()
+        blocks = [" ".join(words[i:i+MAX_WORDS]) for i in range(0, len(words), MAX_WORDS)]
+        step = dur / max(1, len(blocks))
+        base = s
+        for b in blocks:
+            st = base; en = min(base + step, e); base = en
+            if en <= st: en = min(st + 0.05, total)
+            txt = wrap2(b)
+            if st < last_end:
+                st = last_end + 1e-3; en = max(en, st + 0.05)
+            out.append((st, en, txt)); last_end = en
+    return out
+def align_heuristic(words, total_dur):
+    total = total_dur
+    if not words:
+        return pack([], total)
+    spans = []
+    blocks = [" ".join(words[i:i+MAX_WORDS]) for i in range(0, len(words), MAX_WORDS)]
+    num_blocks = len(blocks)
+    max_step = min(MAX_DUR, total / num_blocks if num_blocks > 0 else total)
+    base = 0.0
+    for block in blocks:
+        st = base; en = min(base + max_step, total)
+        spans.append((st, en, block))
+        base = en
+    return pack(spans, total)
+def segment_and_align(model, audio, sr, total_dur, mode):
+    """Découpe l'audio, tente alignement CTC Segmentation, fallback Heuristique."""
+    segment_samples = int(SEGMENT_DURATION * sr)
+    total_samples = len(audio)
+    all_subs = []
+    for i in range(0, total_samples, segment_samples):
+        start_sample = i
+        end_sample = min(i + segment_samples, total_samples)
+        time_offset = start_sample / sr
+        segment_audio = audio[start_sample:end_sample]
+        segment_duration = (end_sample - start_sample) / sr
+        tmp_fd, tmp_seg_wav = tempfile.mkstemp(suffix=f"_seg_{i}.wav")
+        os.close(tmp_fd)
+        sf.write(tmp_seg_wav, segment_audio, sr)
+        try:
+            segment_text = transcribe(model, tmp_seg_wav)
+            words = keep_bambara(segment_text.split())
+            subs = None
+            if HAS_CTC_SEGMENTATION and words and mode in ["rnnt", "ctc"]:
+                try:
+                    x = torch.tensor(segment_audio).float().unsqueeze(0).to(DEVICE)
+                    ln = torch.tensor([x.shape[1]]).to(DEVICE)
+                    with torch.no_grad():
+                        logits, _ = model.forward(input_signal=x, input_signal_length=ln)
+                        if isinstance(logits, tuple):
+                            logits = logits[0]
+                    time_per_frame = segment_duration / max(1, logits.shape[1])
+                    try:
+                        raw = model.tokenizer.vocab
+                        vocab = list(raw.keys()) if isinstance(raw, dict) else list(raw)
+                    except Exception:
+                        vocab = None
+                    cfg = CtcSegmentationParameters()
+                    if vocab:
+                        cfg.char_list = vocab
+                    gt = prepare_text(cfg, words)[0]
+                    # CORRECTION DU DÉBALLAGE (STAR-UNPACKING)
+                    timing, *others = ctc_segmentation(cfg, logits.detach().cpu().numpy()[0], gt)
+                    spans = []
+                    for k in range(len(words)):
+                        start_time = timing[k] * time_per_frame
+                        end_time = timing[k+1] * time_per_frame if k + 1 < len(timing) else segment_duration
+                        spans.append((start_time, end_time, words[k]))
+                    subs = pack(spans, segment_duration)
+                except Exception as e:
+                    print(f"[WARN] CTC Segmentation échoué pour le segment à {time_offset:.2f}s ({e}) -> Fallback Heuristique")
+                    subs = align_heuristic(words, segment_duration)
+            else:
+                subs = align_heuristic(words, segment_duration)
+            if subs:
+                for start, end, text in subs:
+                    all_subs.append((start + time_offset, end + time_offset, text))
+        except Exception as e:
+            print(f"Échec critique de la transcription/alignement du segment à {time_offset:.2f}s: {e}")
+        finally:
+            if os.path.exists(tmp_seg_wav):
+                os.remove(tmp_seg_wav)
+    return pack(all_subs, total_dur)
+def burn(video_path, subs, output_path=None):
+    if output_path is None:
+        output_path = "RobotsMali_Subtitled.mp4"
+    tmp_fd, tmp_srt = tempfile.mkstemp(suffix=".srt")
+    os.close(tmp_fd)
+    def sec_to_srt(t):
+        h = int(t // 3600); m = int((t % 3600) // 60); s = int(t % 60); ms = int((t - int(t)) * 1000)
+        return f"{h:02}:{m:02}:{s:02},{ms:03}"
+    with open(tmp_srt, "w", encoding="utf-8") as f:
+            for i, (start, end, text) in enumerate(subs, 1):
+                f.write(f"{i}\n{sec_to_srt(start)} --> {sec_to_srt(end)}\n{text}\n\n")
+    vf = f"subtitles={shlex.quote(tmp_srt)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
+    cmd = f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac -b:a 192k {shlex.quote(output_path)}'
+    try:
+        run_cmd(cmd)
+    finally:
+        if os.path.exists(tmp_srt):
+            os.remove(tmp_srt)
+    return output_path
+# ---------------------------- # PIPELINE PRINCIPAL (V4.8) # ----------------------------
+def pipeline(video_input, model_name):
+    try:
+        if isinstance(video_input, dict) and "tmp_path" in video_input:
+            video_path = video_input["tmp_path"]
+        else:
+            video_path = video_input
+        # Tente d'obtenir la durée via ffprobe (pour un contrôle rapide)
+        duration = ffprobe_duration(video_path)
+        tmp_fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
+        os.close(tmp_fd)
+        # Extraction audio robuste (tentative de réparation/remuxage via ffmpeg)
+        extract_audio(video_path, tmp_wav)
+        clean_wav, audio, sr = clean_audio(tmp_wav)
+        # LOGIQUE DE FALLBACK : Si ffprobe a échoué, calcule la durée à partir du fichier WAV extrait
+        if duration is None:
+            if len(audio) > 0:
+                duration = len(audio) / sr
+                print(f"[WARN] FFprobe échoué. Durée recalculée à partir de l'audio extrait : {duration:.2f}s")
+            else:
+                 raise RuntimeError("Impossible d'obtenir une durée non nulle de la vidéo, même après extraction audio robuste.")
+        model = load_model(model_name)
+        mode = MODELS[model_name][1]
+        subs = segment_and_align(model, audio, sr, duration, mode)
+        if not subs:
+            return ("⚠️ Aucun sous-titre utilisable (sub list vide)", None)
+        out_video = burn(video_path, subs)
+        return ("✅ Terminé avec succès", out_video)
+    except Exception as e:
+        traceback.print_exc()
+        return (f"❌ Erreur — {str(e)}", None)
+# ---------------------------- # INTERFACE GRADIO # ----------------------------
 with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
+    gr.Markdown("## 🤖 RobotsMali — Sous-titrage Bambara (Colab Ready - Audio Max Robuste)")
+    gr.Markdown("L'extraction audio est maintenant ultra-robuste. Si vous utilisez la webcam ou un fichier téléchargé, ce script devrait pouvoir le traiter.")
+    # Composant Video sans 'examples'
+    v = gr.Video(label="Vidéo à sous-titrer (Fichier ou Webcam)")
+    # Utilisation de gr.Examples séparé pour la compatibilité
+    gr.Examples(
+        examples=VIDEO_EXAMPLES,
+        inputs=v,
+        label="Exemples de vidéos à tester (Téléchargez d'abord le fichier dans Colab pour utiliser ce chemin)"
+    )
+    m = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR Bambara")
+    b = gr.Button("▶️ Générer les Sous-titres Incrustés")
+    s = gr.Markdown(label="Statut")
+    o = gr.Video(label="Vidéo sous-titrée (Format MP4 H.264)")
     b.click(pipeline, [v, m], [s, o])
+demo.launch(share=True, debug=True)