Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Running

App Files Files Community

binaryMao commited on Nov 29, 2025

Commit

e9f0a14

verified ·

1 Parent(s): 2a33c20

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -176

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ import librosa
 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
-import noisereduce as nr  # Import pour la réduction de bruit
 # ----------------------------
 # CONFIG
@@ -53,49 +53,32 @@ def run_cmd(cmd):
     return res.stdout
 def ffprobe_duration(path):
-    """
-    Tente d'obtenir la durée via ffprobe (robuste pour les conteneurs webcam).
-    """
-    # FIXE: On lit la durée du format (conteneur)
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    if out.returncode != 0:
-        return None
     try:
         output = out.stdout.strip().split('\n')[0]
         return float(output)
-    except:
-        return None
 # ----------------------------
 # LOAD MODEL (robust)
 # ----------------------------
 def load_model(name):
-    """Charge le modèle NeMo correct selon type (rnnt / ctc / ctc_char)."""
-    if name in _cache:
-        return _cache[name]
     repo, mode = MODELS[name]
     print(f"[LOAD] snapshot_download {repo} ...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
-    if not nemo_file:
-        raise FileNotFoundError(f"Aucun .nemo trouvé pour {name} dans {folder}")
-    print(f"[LOAD] .nemo trouvé: {nemo_file}; mode={mode}")
-    # Sélection de la classe NeMo selon le mode
-    if mode == "rnnt":
-        model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
-    elif mode == "ctc_char":
-        model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
-    else:  # mode == "ctc" (BPE)
-        try:
-            model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
-        except Exception as e:
-            print(f"[WARN] EncDecCTCModelBPE failed ({e}), fallback EncDecCTCModel")
-            model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
@@ -111,83 +94,41 @@ def extract_audio(video_path, out_wav):
     run_cmd(cmd)
 def clean_audio(wav_path, target_sr=16000):
-    """
-    Load audio, apply noise reduction, resample, normalize, write cleaned wav.
-    """
     audio, sr = sf.read(wav_path)
-    if audio.ndim == 2:
-        audio = audio.mean(axis=1)
     if sr != target_sr:
         audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=target_sr)
         sr = target_sr
-    # --- AMÉLIORATION : Réduction de bruit ---
     try:
         print("[INFO] Application de la réduction de bruit (noisereduce)...")
-        # Réduit le bruit stationnaire (ventilateur, souffle) de 75%
         audio = nr.reduce_noise(y=audio, sr=sr, stationary=True, prop_decrease=0.75)
     except Exception as e:
         print(f"[WARN] Echec noisereduce: {e}")
-    # -----------------------------------------
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
-    if max_val > 1e-6:
-        # Normalisation à 0.95
-        audio = audio / max_val * 0.95
     clean_path = str(Path(wav_path).with_name(Path(wav_path).stem + "_clean.wav"))
     sf.write(clean_path, audio, sr)
     return clean_path, audio, sr
 # ----------------------------
-# TRANSCRIPTION
 # ----------------------------
 def transcribe(model, wav_path):
     """Robuste: essaie model.transcribe et nettoie la sortie."""
-    if not hasattr(model, "transcribe"):
-        raise RuntimeError("Le modèle ne supporte pas model.transcribe()")
     out = model.transcribe([wav_path])
-    if isinstance(out, list):
-        if len(out) == 0:
-            return ""
-        first = out[0]
-        if isinstance(first, str):
-            return first.strip()
-        if hasattr(first, "text"):
-            return first.text.strip()
-        return str(first).strip()
-    if hasattr(out, "text"):
-        return out.text.strip()
     return str(out).strip()
-# ----------------------------
-# UTILITAIRES sous-titres / packing
-# ----------------------------
-def keep_bambara(words):
-    res = []
-    for w in words:
-        wl = w.lower()
-        if any(c in wl for c in ["ɛ","ɔ","ŋ"]) or sum(1 for c in wl if c in "aeiou") >= 2:
-            res.append(w)
-    return res
 MAX_CHARS = 45; MIN_DUR = 0.3; MAX_DUR = 3.2; MAX_WORDS = 8
-def wrap2(txt):
-    parts = textwrap.wrap(txt, MAX_CHARS)
-    if len(parts) <= 1:
-        return txt
-    mid = len(txt) // 2
-    left = txt.rfind(" ", 0, mid)
-    right = txt.find(" ", mid)
-    cut = left if (mid - left) <= ((right - mid) if right != -1 else 1e9) else right
-    l1 = txt[:cut].strip(); l2 = txt[cut:].strip()
-    return l1 + "\n" + l2 if l2 else l1
 def pack(spans, total):
     tmp = []
     for s, e, t in spans:
         s = max(0, min(s, total)); e = max(0, min(e, total))
@@ -211,34 +152,29 @@ def pack(spans, total):
         for b in blocks:
             st = base; en = min(base + step, e); base = en
             if en <= st: en = min(st + 0.05, total)
-            txt = wrap2(b)
             if st < last_end:
                 st = last_end + 1e-3; en = max(en, st + 0.05)
             out.append((st, en, txt)); last_end = en
     return out
-# ----------------------------
-# VAD ALIGN (fallback alignment)
-# ----------------------------
 def align_vad(text, audio, sr, total_dur, top_db=28):
-    words = keep_bambara(text.split())
     total = total_dur
     if audio is None or len(audio) == 0 or not words:
         return pack([(0, total, " ".join(words[:MAX_WORDS]))], total)
     iv = librosa.effects.split(audio, top_db=top_db)
     if len(iv) == 0:
         return pack([(0, total, " ".join(words[:MAX_WORDS]))], total)
-    spans = []
-    L = sum(e - s for s, e in iv)
-    idx = 0
     for s, e in iv:
         seg = e - s; segt = seg / sr
-        k = max(1, int(round(len(words) * (seg / L))))
-        chunk = words[idx:idx+k]; idx += k
         if not chunk: continue
         lines = [chunk[i:i+MAX_WORDS] for i in range(0, len(chunk), MAX_WORDS)]
-        step = max(MIN_DUR, min(MAX_DUR, segt / max(1, len(lines))))
-        base = s / sr
         for j, ln in enumerate(lines):
             st = base + j * step; en = base + (j + 1) * step
             spans.append((st, en, " ".join(ln)))
@@ -248,114 +184,70 @@ def align_vad(text, audio, sr, total_dur, top_db=28):
 # Écriture SRT + Burn (réencode)
 # ----------------------------
 def burn(video_path, subs, output_path=None):
-    if output_path is None:
-        output_path = "RobotsMali_Subtitled.mp4"
-    tmp_fd, tmp_srt = tempfile.mkstemp(suffix=".srt")
-    os.close(tmp_fd)
     def sec_to_srt(t):
         h = int(t // 3600); m = int((t % 3600) // 60); s = int(t % 60); ms = int((t - int(t)) * 1000)
         return f"{h:02}:{m:02}:{s:02},{ms:03}"
     with open(tmp_srt, "w", encoding="utf-8") as f:
         for i, (start, end, text) in enumerate(subs, 1):
             f.write(f"{i}\n{sec_to_srt(start)} --> {sec_to_srt(end)}\n{text}\n\n")
-    # On réencode (libx264) car on applique subtitles filter
     vf = f"subtitles={shlex.quote(tmp_srt)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac -b:a 192k {shlex.quote(output_path)}'
-    try:
-        run_cmd(cmd)
     finally:
-        if os.path.exists(tmp_srt):
-            os.remove(tmp_srt)
     return output_path
 # ----------------------------
 # PIPELINE PRINCIPAL (Robuste)
 # ----------------------------
 def pipeline(video_input, model_name):
-    """
-    video_input : chemin ou dict Gradio (tmp_path)
-    model_name : clé dans MODELS
-    """
     try:
-        if isinstance(video_input, dict) and "tmp_path" in video_input:
-            video_path = video_input["tmp_path"]
-        else:
-            video_path = video_input
-        # 1. Tentative d'obtention de durée via FFPROBE
         duration = ffprobe_duration(video_path)
-        # 2. Extraction & Nettoyage Audio
-        tmp_fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
-        os.close(tmp_fd)
         extract_audio(video_path, tmp_wav)
         clean_wav, audio, sr = clean_audio(tmp_wav)
-        # 3. FALLBACK: Si FFprobe a échoué (None), on calcule depuis l'audio
         if duration is None:
             print("[INFO] ffprobe duration failed, calculating from audio...")
-            if sr and sr > 0:
-                duration = len(audio) / sr
-        # Vérification finale
         if not duration or duration <= 0:
              raise RuntimeError("Impossible de déterminer la durée de la vidéo (fichier corrompu ?)")
-        print(f"[INFO] Durée détectée: {duration:.2f}s")
-        # 4. Chargement modèle + Transcription
         model = load_model(model_name)
         text = transcribe(model, clean_wav)
         mode = MODELS[model_name][1]
-        # 5. Segmentation / Alignement
-        subs = None
         if mode == "rnnt":
-            # Logique d'alignement RNNT (CTC Segmentation)
             try:
                 from ctc_segmentation import ctc_segmentation, CtcSegmentationParameters, prepare_text
-                words = keep_bambara(text.split())
-                if not words:
-                    return ("⚠️ Aucun sous-titre utilisable (texte vide après filtrage)", None)
-                x = torch.tensor(audio).float().unsqueeze(0).to(DEVICE)
-                ln = torch.tensor([x.shape[1]]).to(DEVICE)
-                with torch.no_grad():
-                    logits = model(input_signal=x, input_signal_length=ln)[0]
                 time_per_frame = duration / max(1, logits.shape[1])
-                try:
-                    raw = model.tokenizer.vocab
-                    vocab = list(raw.keys()) if isinstance(raw, dict) else list(raw)
-                except Exception:
-                    vocab = None
-                cfg = CtcSegmentationParameters()
-                if vocab:
-                    cfg.char_list = vocab
                 gt = prepare_text(cfg, words)[0]
-                try:
-                    timing, _, _ = ctc_segmentation(cfg, logits.detach().cpu().numpy()[0], gt)
-                    spans = [(timing[i] * time_per_frame, timing[i+1] * time_per_frame, words[i]) for i in range(len(words) - 1)]
-                    subs = pack(spans, duration)
-                except AssertionError:
-                    print("[WARN] Audio shorter than text -> fallback to VAD alignment")
-                    subs = align_vad(text, audio, sr, duration)
-            except Exception as e:
-                print(f"[WARN] ctc_segmentation not available or failed ({e}) -> fallback VAD")
-                subs = align_vad(text, audio, sr, duration)
-        elif mode == "ctc_char" or mode == "ctc":
-             # Logique d'alignement CTC / CTC-Char (VAD fallback)
-             try:
                 subs = align_vad(text, audio, sr, duration)
-             except Exception as e:
-                print(f"[WARN] Alignment failed: {e}")
-                subs = align_vad(text, audio, sr, duration)
-        if not subs:
-            return ("⚠️ Aucun sous-titre utilisable (sub list vide)", None)
         out_video = burn(video_path, subs)
         return ("✅ Terminé avec succès", out_video)
@@ -364,13 +256,12 @@ def pipeline(video_input, model_name):
         return (f"❌ Erreur — {str(e)}", None)
 # ----------------------------
-# INTERFACE GRADIO (Corrigée Finalement)
 # ----------------------------
 with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
     gr.Markdown("## 🤖 RobotsMali — Sous-titrage Bambara (Amélioration Audio)")
     # 1. Définir toutes les sorties AVANT leur utilisation.
-    # Elles sont rendues ici implicitement et sont disponibles pour gr.Examples.
     s = gr.Markdown(label="Statut de la tâche")
     o = gr.Video(label="Vidéo sous-titrée")
@@ -380,31 +271,30 @@ with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
             v = gr.Video(label="Vidéo à sous-titrer", sources=["upload", "webcam"])
             m = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR")
-            # 3. gr.Examples utilise V, M, S, et O (tous sont maintenant définis)
             gr.Examples(
                 examples=[
-                    ["examples/video_bambara_sample.mp4", "Soloba V1 (CTC)"]
                 ],
                 inputs=[v, m],
                 fn=pipeline,
                 outputs=[s, o],
                 label="▶️ Utiliser un exemple (Vidéo stockée dans le Space)",
-                run_on_click=True
             )
             b = gr.Button("▶️ Générer les sous-titres", variant="primary")
         with gr.Column():
-            # 4. Réorganisation: Pas besoin de .render(), on place juste le titre
-            # et on fait confiance à Gradio pour afficher S et O dans l'ordre de leur définition.
             gr.Markdown("### Résultats:")
-            # Note: S et O sont déjà définis et placés dans l'interface au début du Blocks.
-            # Il n'y a rien à faire ici, à part s'assurer qu'ils sont bien affichés
-            # (ce qui est le cas par leur définition initiale dans le bloc).
-    # 5. Le bouton final fonctionne avec les variables correctement définies.
     b.click(pipeline, [v, m], [s, o])
 if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
+import noisereduce as nr
 # ----------------------------
 # CONFIG
     return res.stdout
 def ffprobe_duration(path):
+    """Tente d'obtenir la durée via ffprobe."""
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if out.returncode != 0: return None
     try:
         output = out.stdout.strip().split('\n')[0]
         return float(output)
+    except: return None
 # ----------------------------
 # LOAD MODEL (robust)
 # ----------------------------
 def load_model(name):
+    """Charge le modèle NeMo correct."""
+    if name in _cache: return _cache[name]
     repo, mode = MODELS[name]
     print(f"[LOAD] snapshot_download {repo} ...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
+    if not nemo_file: raise FileNotFoundError(f"Aucun .nemo trouvé pour {name} dans {folder}")
+    if mode == "rnnt": model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
+    elif mode == "ctc_char": model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
+    else:
+        try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
+        except Exception: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
     run_cmd(cmd)
 def clean_audio(wav_path, target_sr=16000):
+    """Load audio, apply noise reduction, resample, normalize, write cleaned wav."""
     audio, sr = sf.read(wav_path)
+    if audio.ndim == 2: audio = audio.mean(axis=1)
     if sr != target_sr:
         audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=target_sr)
         sr = target_sr
     try:
         print("[INFO] Application de la réduction de bruit (noisereduce)...")
         audio = nr.reduce_noise(y=audio, sr=sr, stationary=True, prop_decrease=0.75)
     except Exception as e:
         print(f"[WARN] Echec noisereduce: {e}")
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
+    if max_val > 1e-6: audio = audio / max_val * 0.95
     clean_path = str(Path(wav_path).with_name(Path(wav_path).stem + "_clean.wav"))
     sf.write(clean_path, audio, sr)
     return clean_path, audio, sr
 # ----------------------------
+# TRANSCRIPTION & ALIGNMENT UTILS
 # ----------------------------
 def transcribe(model, wav_path):
     """Robuste: essaie model.transcribe et nettoie la sortie."""
+    if not hasattr(model, "transcribe"): raise RuntimeError("Le modèle ne supporte pas model.transcribe()")
     out = model.transcribe([wav_path])
+    if isinstance(out, list) and len(out) > 0: out = out[0]
+    if hasattr(out, "text"): return out.text.strip()
     return str(out).strip()
 MAX_CHARS = 45; MIN_DUR = 0.3; MAX_DUR = 3.2; MAX_WORDS = 8
 def pack(spans, total):
+    # Logique complexe de regroupement et de réemballage (non modifiée)
     tmp = []
     for s, e, t in spans:
         s = max(0, min(s, total)); e = max(0, min(e, total))
         for b in blocks:
             st = base; en = min(base + step, e); base = en
             if en <= st: en = min(st + 0.05, total)
+            txt = textwrap.wrap(b, MAX_CHARS)
+            txt = txt[0] + "\n" + txt[1] if len(txt) > 1 else txt[0]
             if st < last_end:
                 st = last_end + 1e-3; en = max(en, st + 0.05)
             out.append((st, en, txt)); last_end = en
     return out
 def align_vad(text, audio, sr, total_dur, top_db=28):
+    # Logique VAD (non modifiée)
+    words = [w for w in text.split() if any(c in w.lower() for c in ["ɛ","ɔ","ŋ"]) or sum(1 for c in w.lower() if c in "aeiou") >= 2]
     total = total_dur
     if audio is None or len(audio) == 0 or not words:
         return pack([(0, total, " ".join(words[:MAX_WORDS]))], total)
     iv = librosa.effects.split(audio, top_db=top_db)
     if len(iv) == 0:
         return pack([(0, total, " ".join(words[:MAX_WORDS]))], total)
+    spans = []; L = sum(e - s for s, e in iv); idx = 0
     for s, e in iv:
         seg = e - s; segt = seg / sr
+        k = max(1, int(round(len(words) * (seg / L)))); chunk = words[idx:idx+k]; idx += k
         if not chunk: continue
         lines = [chunk[i:i+MAX_WORDS] for i in range(0, len(chunk), MAX_WORDS)]
+        step = max(MIN_DUR, min(MAX_DUR, segt / max(1, len(lines)))); base = s / sr
         for j, ln in enumerate(lines):
             st = base + j * step; en = base + (j + 1) * step
             spans.append((st, en, " ".join(ln)))
 # Écriture SRT + Burn (réencode)
 # ----------------------------
 def burn(video_path, subs, output_path=None):
+    """Crée le SRT temporaire et brûle les sous-titres dans la vidéo."""
+    if output_path is None: output_path = "RobotsMali_Subtitled.mp4"
+    tmp_fd, tmp_srt = tempfile.mkstemp(suffix=".srt"); os.close(tmp_fd)
     def sec_to_srt(t):
         h = int(t // 3600); m = int((t % 3600) // 60); s = int(t % 60); ms = int((t - int(t)) * 1000)
         return f"{h:02}:{m:02}:{s:02},{ms:03}"
     with open(tmp_srt, "w", encoding="utf-8") as f:
         for i, (start, end, text) in enumerate(subs, 1):
             f.write(f"{i}\n{sec_to_srt(start)} --> {sec_to_srt(end)}\n{text}\n\n")
     vf = f"subtitles={shlex.quote(tmp_srt)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
     cmd = f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac -b:a 192k {shlex.quote(output_path)}'
+    try: run_cmd(cmd)
     finally:
+        if os.path.exists(tmp_srt): os.remove(tmp_srt)
     return output_path
 # ----------------------------
 # PIPELINE PRINCIPAL (Robuste)
 # ----------------------------
 def pipeline(video_input, model_name):
+    """Gère le flux de sous-titrage complet."""
     try:
+        if isinstance(video_input, dict) and "tmp_path" in video_input: video_path = video_input["tmp_path"]
+        else: video_path = video_input
         duration = ffprobe_duration(video_path)
+        tmp_fd, tmp_wav = tempfile.mkstemp(suffix=".wav"); os.close(tmp_fd)
         extract_audio(video_path, tmp_wav)
         clean_wav, audio, sr = clean_audio(tmp_wav)
         if duration is None:
             print("[INFO] ffprobe duration failed, calculating from audio...")
+            if sr and sr > 0: duration = len(audio) / sr
         if not duration or duration <= 0:
              raise RuntimeError("Impossible de déterminer la durée de la vidéo (fichier corrompu ?)")
         model = load_model(model_name)
         text = transcribe(model, clean_wav)
         mode = MODELS[model_name][1]
+        # Logique d'alignement (CTC Segmentation ou VAD Fallback)
         if mode == "rnnt":
             try:
                 from ctc_segmentation import ctc_segmentation, CtcSegmentationParameters, prepare_text
+                words = [w for w in text.split() if any(c in w.lower() for c in ["ɛ","ɔ","ŋ"]) or sum(1 for c in w.lower() if c in "aeiou") >= 2]
+                if not words: return ("⚠️ Aucun sous-titre utilisable (texte vide après filtrage)", None)
+                x = torch.tensor(audio).float().unsqueeze(0).to(DEVICE); ln = torch.tensor([x.shape[1]]).to(DEVICE)
+                with torch.no_grad(): logits = model(input_signal=x, input_signal_length=ln)[0]
                 time_per_frame = duration / max(1, logits.shape[1])
+                cfg = CtcSegmentationParameters(); cfg.char_list = list(model.tokenizer.vocab.keys())
                 gt = prepare_text(cfg, words)[0]
+                timing, _, _ = ctc_segmentation(cfg, logits.detach().cpu().numpy()[0], gt)
+                spans = [(timing[i] * time_per_frame, timing[i+1] * time_per_frame, words[i]) for i in range(len(words) - 1)]
+                subs = pack(spans, duration)
+            except Exception:
                 subs = align_vad(text, audio, sr, duration)
+        else:
+            subs = align_vad(text, audio, sr, duration)
+        if not subs: return ("⚠️ Aucun sous-titre utilisable (sub list vide)", None)
         out_video = burn(video_path, subs)
         return ("✅ Terminé avec succès", out_video)
         return (f"❌ Erreur — {str(e)}", None)
 # ----------------------------
+# INTERFACE GRADIO (Version Finale Stabilité)
 # ----------------------------
 with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
     gr.Markdown("## 🤖 RobotsMali — Sous-titrage Bambara (Amélioration Audio)")
     # 1. Définir toutes les sorties AVANT leur utilisation.
     s = gr.Markdown(label="Statut de la tâche")
     o = gr.Video(label="Vidéo sous-titrée")
             v = gr.Video(label="Vidéo à sous-titrer", sources=["upload", "webcam"])
             m = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR")
+            # 3. gr.Examples (avec cache_examples=False et nom de fichier corrigé)
             gr.Examples(
                 examples=[
+                    # Utiliser le nom de fichier exact du dépôt
+                    ["examples/Upload MARALINKE-WILI (Lève-toi) Black lives matter (Clip officiel) - MARALINKE (360p, h264).mp4", "Soloba V1 (CTC)"]
                 ],
                 inputs=[v, m],
                 fn=pipeline,
                 outputs=[s, o],
                 label="▶️ Utiliser un exemple (Vidéo stockée dans le Space)",
+                run_on_click=True,
+                cache_examples=False
             )
             b = gr.Button("▶️ Générer les sous-titres", variant="primary")
         with gr.Column():
+            # 4. Affichage des sorties
             gr.Markdown("### Résultats:")
+            s
+            o
+    # 5. L'action du bouton
     b.click(pipeline, [v, m], [s, o])
 if __name__ == "__main__":
+    demo.launch(share=True)