Spaces:

RobotsMali
/

RobotsMali_Video_captionning

Running

App Files Files Community

binaryMao commited on Dec 16, 2025

Commit

e685733

verified ·

1 Parent(s): 2f3ab1a

Update app.py

Browse files

Amelioration de l'interface gradio

Files changed (1) hide show

app.py +121 -345

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
-""" ROBOTSMALI — Sous-titrage Bambara (V4.8 Colab Ready - Remuxage Vidéo) """
 import os
 import shlex
 import subprocess
@@ -8,7 +11,7 @@ import traceback
 import random
 import textwrap
 from pathlib import Path
 import numpy as np
 import torch
 import soundfile as sf
@@ -17,403 +20,176 @@ from huggingface_hub import snapshot_download
 from nemo.collections import asr as nemo_asr
 import gradio as gr
-# Tente l'importation de la librairie d'alignement nécessaire
-try:
-    from ctc_segmentation import ctc_segmentation, CtcSegmentationParameters, prepare_text
-    HAS_CTC_SEGMENTATION = True
-except ImportError:
-    HAS_CTC_SEGMENTATION = False
-    print("ATTENTION: ctc_segmentation non installé. L'alignement sera basé sur une simple répartition égale du temps.")
-# ---------------------------- # CONFIG # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
-# Taille du segment pour la transcription par blocs
 SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
-    "Soloni V0 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v0", "rnnt"),
     "Soloba V1 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v1", "ctc"),
-    "Soloba V0 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v0", "ctc"),
     "QuartzNet V1 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v1", "ctc_char"),
-    "QuartzNet V0 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v0", "ctc_char"),
 }
-_cache = {}
-# Chemin vers la vidéo d'exemple.
 VIDEO_EXAMPLES = [
-    "examples/MARALINKE-Wii (Lève-toi) Black lives matter (Clip officiel) - MARALINKE (360p, H264).mp4"
 ]
-# ---------------------------- # UTIL: run_cmd, ffprobe_duration # ----------------------------
 def run_cmd(cmd):
-    """Execute a shell command and raise on non-zero exit."""
-    print("RUN:", cmd)
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
-        raise RuntimeError(f"Commande échouée [{cmd}]\nOutput:\n{res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
-    """Détermine la durée de la vidéo via ffprobe (pour vérification/débogage)."""
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    if out.returncode != 0:
-        # Affiche l'erreur FFPROBE brute si l'extraction échoue
-        print(f"--- ERREUR FFPROBE BRUTE --- (Code: {out.returncode})")
-        print(out.stderr)
-        print("----------------------------")
-        return None
-    try:
-        return float(out.stdout.strip())
-    except Exception as e:
-        print(f"--- ERREUR CONVERSION DURÉE --- (Output: {out.stdout.strip()})")
-        print(e)
-        return None
-# ---------------------------- # LOAD MODEL (robust) # ----------------------------
 def load_model(name):
-    """Charge le modèle NeMo correct selon type (rnnt / ctc / ctc_char)."""
-    if name in _cache:
-        return _cache[name]
     repo, mode = MODELS[name]
-    print(f"[LOAD] snapshot_download {repo} ...")
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
-    if not nemo_file:
-        raise FileNotFoundError(f"Aucun .nemo trouvé pour {name} dans {folder}")
-    print(f"[LOAD] .nemo trouvé: {nemo_file}; mode={mode}")
     if mode == "rnnt":
         model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
-    elif mode == "ctc_char":
-        model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     else:
-        try:
-            model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
-        except Exception as e:
-            print(f"[WARN] EncDecCTCModelBPE failed ({e}), fallback EncDecCTCModel")
-            model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
-    print(f"[OK] Modèle {name} chargé sur {DEVICE}")
     return model
-# ---------------------------- # AUDIO EXTRACTION & CLEANING (ROBUSTE) # ----------------------------
 def extract_audio(video_path, out_wav):
-    """
-    Extrait l'audio en deux étapes pour stabiliser le fichier webcam/corrompu.
-    Correction : On réencode en libx264 car MP4 ne supporte pas le VP8 (Webcam).
-    """
-    # Chemin du fichier intermédiaire stabilisé
     tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
     os.close(tmp_fd)
-    # ÉTAPE 1: Stabilisation avec RÉENCODAGE (obligatoire pour la compatibilité WebM -> MP4)
-    # On utilise -c:v libx264 au lieu de -c copy
-    remux_cmd = (
-        f'ffmpeg -hide_banner -loglevel error -y '
-        f'-analyzeduration 2147483647 -probesize 2147483647 -ignore_unknown '
-        f'-i {shlex.quote(video_path)} '
-        f'-c:v libx264 -preset ultrafast -crf 23 -c:a aac '
-        f'{shlex.quote(stabilized_mp4)}'
-    )
-    print("RUN: Conversion et stabilisation du flux (Webcam compatible)...")
-    run_cmd(remux_cmd)
-    # ÉTAPE 2: Extraction de l'audio 16k WAV
-    extract_cmd = (
-        f'ffmpeg -hide_banner -loglevel error -y '
-        f'-i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}'
-    )
-    print("RUN: Extraction de l'audio depuis le fichier stabilisé...")
-    run_cmd(extract_cmd)
-    # Nettoyage
-    if os.path.exists(stabilized_mp4):
-        os.remove(stabilized_mp4)
-def clean_audio(wav_path, target_sr=16000):
-    """Load audio, ensure mono, resample to target_sr, normalize, write cleaned wav."""
     audio, sr = sf.read(wav_path)
-    if audio.ndim == 2:
-        audio = audio.mean(axis=1)
-    if sr != target_sr:
-        audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=target_sr)
-        sr = target_sr
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
-    if max_val > 1e-6:
-        audio = audio / max_val * 0.9
-    clean_path = str(Path(wav_path).with_name(Path(wav_path).stem + "_clean.wav"))
-    sf.write(clean_path, audio, sr)
-    return clean_path, audio, sr
-# ---------------------------- # TRANSCRIPTION, ETC. (Inchangé) # ----------------------------
-# Les autres fonctions (transcribe, keep_bambara, pack, align_heuristic, etc.)
-# restent les mêmes que dans la version V4.7.
 def transcribe(model, wav_path):
-    if not hasattr(model, "transcribe"):
-        raise RuntimeError("Le modèle ne supporte pas model.transcribe()")
     out = model.transcribe([wav_path])
-    if isinstance(out, list):
-        if len(out) == 0:
-            return ""
-        first = out[0]
-        if isinstance(first, str):
-            return first.strip()
-        if hasattr(first, "text"):
-            return first.text.strip()
-        return str(first).strip()
-    if hasattr(out, "text"):
-        return out.text.strip()
     return str(out).strip()
-def keep_bambara(words):
-    res = []
-    for w in words:
-        wl = w.lower()
-        if any(c in wl for c in ["ɛ","ɔ","ŋ"]) or sum(1 for c in wl if c in "aeiou") >= 2:
-            res.append(w)
-    return res
-MAX_CHARS = 45; MIN_DUR = 0.3; MAX_DUR = 3.2; MAX_WORDS = 8
-def wrap2(txt):
-    parts = textwrap.wrap(txt, MAX_CHARS)
-    if len(parts) <= 1:
-        return txt
-    mid = len(txt) // 2
-    left = txt.rfind(" ", 0, mid)
-    right = txt.find(" ", mid)
-    cut = left if (mid - left) <= ((right - mid) if right != -1 else 1e9) else right
-    l1 = txt[:cut].strip(); l2 = txt[cut:].strip()
-    return l1 + "\n" + l2 if l2 else l1
-def pack(spans, total):
-    tmp = []
-    for s, e, t in spans:
-        s = max(0, min(s, total)); e = max(0, min(e, total))
-        if e <= s or not t.strip(): continue
-        tmp.append((s, e, t.strip()))
-    merged = []
-    for seg in tmp:
-        if not merged:
-            merged.append(seg); continue
-        ps, pe, pt = merged[-1]; s, e, t = seg
-        if (e - s) < MIN_DUR or (s - pe) < 0.1:
-            merged[-1] = (ps, max(pe, e), (pt + " " + t).strip())
-        else:
-            merged.append(seg)
-    out = []; last_end = 0
-    for s, e, t in merged:
-        dur = e - s; words = t.split()
-        blocks = [" ".join(words[i:i+MAX_WORDS]) for i in range(0, len(words), MAX_WORDS)]
-        step = dur / max(1, len(blocks))
-        base = s
-        for b in blocks:
-            st = base; en = min(base + step, e); base = en
-            if en <= st: en = min(st + 0.05, total)
-            txt = wrap2(b)
-            if st < last_end:
-                st = last_end + 1e-3; en = max(en, st + 0.05)
-            out.append((st, en, txt)); last_end = en
-    return out
-def align_heuristic(words, total_dur):
-    total = total_dur
-    if not words:
-        return pack([], total)
-    spans = []
-    blocks = [" ".join(words[i:i+MAX_WORDS]) for i in range(0, len(words), MAX_WORDS)]
-    num_blocks = len(blocks)
-    max_step = min(MAX_DUR, total / num_blocks if num_blocks > 0 else total)
-    base = 0.0
-    for block in blocks:
-        st = base; en = min(base + max_step, total)
-        spans.append((st, en, block))
-        base = en
-    return pack(spans, total)
-def segment_and_align(model, audio, sr, total_dur, mode):
-    """Découpe l'audio, tente alignement CTC Segmentation, fallback Heuristique."""
-    segment_samples = int(SEGMENT_DURATION * sr)
-    total_samples = len(audio)
-    all_subs = []
-    for i in range(0, total_samples, segment_samples):
-        start_sample = i
-        end_sample = min(i + segment_samples, total_samples)
-        time_offset = start_sample / sr
-        segment_audio = audio[start_sample:end_sample]
-        segment_duration = (end_sample - start_sample) / sr
-        tmp_fd, tmp_seg_wav = tempfile.mkstemp(suffix=f"_seg_{i}.wav")
-        os.close(tmp_fd)
-        sf.write(tmp_seg_wav, segment_audio, sr)
-        try:
-            segment_text = transcribe(model, tmp_seg_wav)
-            words = keep_bambara(segment_text.split())
-            subs = None
-            if HAS_CTC_SEGMENTATION and words and mode in ["rnnt", "ctc"]:
-                try:
-                    x = torch.tensor(segment_audio).float().unsqueeze(0).to(DEVICE)
-                    ln = torch.tensor([x.shape[1]]).to(DEVICE)
-                    with torch.no_grad():
-                        logits, _ = model.forward(input_signal=x, input_signal_length=ln)
-                        if isinstance(logits, tuple):
-                            logits = logits[0]
-                    time_per_frame = segment_duration / max(1, logits.shape[1])
-                    try:
-                        raw = model.tokenizer.vocab
-                        vocab = list(raw.keys()) if isinstance(raw, dict) else list(raw)
-                    except Exception:
-                        vocab = None
-                    cfg = CtcSegmentationParameters()
-                    if vocab:
-                        cfg.char_list = vocab
-                    gt = prepare_text(cfg, words)[0]
-                    # CORRECTION DU DÉBALLAGE (STAR-UNPACKING)
-                    timing, *others = ctc_segmentation(cfg, logits.detach().cpu().numpy()[0], gt)
-                    spans = []
-                    for k in range(len(words)):
-                        start_time = timing[k] * time_per_frame
-                        end_time = timing[k+1] * time_per_frame if k + 1 < len(timing) else segment_duration
-                        spans.append((start_time, end_time, words[k]))
-                    subs = pack(spans, segment_duration)
-                except Exception as e:
-                    print(f"[WARN] CTC Segmentation échoué pour le segment à {time_offset:.2f}s ({e}) -> Fallback Heuristique")
-                    subs = align_heuristic(words, segment_duration)
-            else:
-                subs = align_heuristic(words, segment_duration)
-            if subs:
-                for start, end, text in subs:
-                    all_subs.append((start + time_offset, end + time_offset, text))
-        except Exception as e:
-            print(f"Échec critique de la transcription/alignement du segment à {time_offset:.2f}s: {e}")
-        finally:
-            if os.path.exists(tmp_seg_wav):
-                os.remove(tmp_seg_wav)
-    return pack(all_subs, total_dur)
-def burn(video_path, subs, output_path=None):
-    if output_path is None:
-        output_path = "RobotsMali_Subtitled.mp4"
-    tmp_fd, tmp_srt = tempfile.mkstemp(suffix=".srt")
-    os.close(tmp_fd)
-    def sec_to_srt(t):
-        h = int(t // 3600); m = int((t % 3600) // 60); s = int(t % 60); ms = int((t - int(t)) * 1000)
-        return f"{h:02}:{m:02}:{s:02},{ms:03}"
-    with open(tmp_srt, "w", encoding="utf-8") as f:
-            for i, (start, end, text) in enumerate(subs, 1):
-                f.write(f"{i}\n{sec_to_srt(start)} --> {sec_to_srt(end)}\n{text}\n\n")
-    vf = f"subtitles={shlex.quote(tmp_srt)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
-    cmd = f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -preset fast -crf 23 -c:a aac -b:a 192k {shlex.quote(output_path)}'
-    try:
-        run_cmd(cmd)
-    finally:
-        if os.path.exists(tmp_srt):
-            os.remove(tmp_srt)
-    return output_path
-# ---------------------------- # PIPELINE PRINCIPAL (V4.8) # ----------------------------
-def pipeline(video_input, model_name):
-    try:
-        if isinstance(video_input, dict) and "tmp_path" in video_input:
-            video_path = video_input["tmp_path"]
-        else:
-            video_path = video_input
-        # Tente d'obtenir la durée via ffprobe (pour un contrôle rapide)
-        duration = ffprobe_duration(video_path)
-        tmp_fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
-        os.close(tmp_fd)
-        # Extraction audio robuste (tentative de réparation/remuxage via ffmpeg)
-        extract_audio(video_path, tmp_wav)
-        clean_wav, audio, sr = clean_audio(tmp_wav)
-        # LOGIQUE DE FALLBACK : Si ffprobe a échoué, calcule la durée à partir du fichier WAV extrait
-        if duration is None:
-            if len(audio) > 0:
-                duration = len(audio) / sr
-                print(f"[WARN] FFprobe échoué. Durée recalculée à partir de l'audio extrait : {duration:.2f}s")
-            else:
-                 raise RuntimeError("Impossible d'obtenir une durée non nulle de la vidéo, même après extraction audio robuste.")
-        model = load_model(model_name)
-        mode = MODELS[model_name][1]
-        subs = segment_and_align(model, audio, sr, duration, mode)
-        if not subs:
-            return ("⚠️ Aucun sous-titre utilisable (sub list vide)", None)
-        out_video = burn(video_path, subs)
-        return ("✅ Terminé avec succès", out_video)
     except Exception as e:
-        traceback.print_exc()
-        return (f"❌ Erreur — {str(e)}", None)
-# ---------------------------- # INTERFACE GRADIO # ----------------------------
-with gr.Blocks(title="RobotsMali - Sous-titrage") as demo:
-    gr.Markdown("## 🤖 RobotsMali — Sous-titrage Bambara (Colab Ready - Audio Max Robuste)")
-    gr.Markdown("L'extraction audio est maintenant ultra-robuste. Si vous utilisez la webcam ou un fichier téléchargé, ce script devrait pouvoir le traiter.")
-    # Composant Video sans 'examples'
-    v = gr.Video(label="Vidéo à sous-titrer (Fichier ou Webcam)")
-    # Utilisation de gr.Examples séparé pour la compatibilité
     gr.Examples(
         examples=VIDEO_EXAMPLES,
-        inputs=v,
-        label="Exemples de vidéos à tester (Téléchargez d'abord le fichier dans Colab pour utiliser ce chemin)"
     )
-    m = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR Bambara")
-    b = gr.Button("▶️ Générer les Sous-titres Incrustés")
-    s = gr.Markdown(label="Statut")
-    o = gr.Video(label="Vidéo sous-titrée (Format MP4 H.264)")
-    b.click(pipeline, [v, m], [s, o])
-demo.launch(share=True, debug=True)

 # -*- coding: utf-8 -*-
+"""
+ROBOTSMALI — Sous-titrage Bambara (V5.0 - Intégration Exemples & Design)
+Compatible: Webcam, Fichiers locaux et Exemples Hugging Face
+"""
 import os
 import shlex
 import subprocess
 import random
 import textwrap
 from pathlib import Path
 import numpy as np
 import torch
 import soundfile as sf
 from nemo.collections import asr as nemo_asr
 import gradio as gr
+# ---------------------------- # CONFIG & MODÈLES # ----------------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 random.seed(1234)
 np.random.seed(1234)
 torch.manual_seed(1234)
 SEGMENT_DURATION = 10.0
 MODELS = {
     "Soloni V1 (RNNT)":        ("RobotsMali/soloni-114m-tdt-ctc-v1", "rnnt"),
     "Soloba V1 (CTC)":         ("RobotsMali/soloba-ctc-0.6b-v1", "ctc"),
     "QuartzNet V1 (CTC-char)": ("RobotsMali/stt-bm-quartznet15x5-v1", "ctc_char"),
 }
+# Liste des exemples basée sur votre capture d'écran Hugging Face
 VIDEO_EXAMPLES = [
+    ["examples/MARALINKE-Wii (Lève-toi) Black lives matter (Clip officiel) - MARALINKE (360p, H264).mp4", "Soloba V1 (CTC)"]
 ]
+_cache = {}
+# ---------------------------- # LOGIQUE TECHNIQUE # ----------------------------
 def run_cmd(cmd):
     res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
     if res.returncode != 0:
+        raise RuntimeError(f"Erreur FFmpeg: {res.stdout}")
     return res.stdout
 def ffprobe_duration(path):
     cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {shlex.quote(path)}'
     out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    try: return float(out.stdout.strip())
+    except: return None
 def load_model(name):
+    if name in _cache: return _cache[name]
     repo, mode = MODELS[name]
     folder = snapshot_download(repo, local_dir_use_symlinks=False)
     nemo_file = next((os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".nemo")), None)
     if mode == "rnnt":
         model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(nemo_file)
     else:
+        try: model = nemo_asr.models.EncDecCTCModelBPE.restore_from(nemo_file)
+        except: model = nemo_asr.models.EncDecCTCModel.restore_from(nemo_file)
     model.to(DEVICE).eval()
     _cache[name] = model
     return model
 def extract_audio(video_path, out_wav):
     tmp_fd, stabilized_mp4 = tempfile.mkstemp(suffix="_stabilized.mp4")
     os.close(tmp_fd)
+    # Re-encodage H.264 pour garantir la compatibilité (indispensable pour les sorties webcam)
+    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -c:v libx264 -preset ultrafast -crf 23 -c:a aac {shlex.quote(stabilized_mp4)}')
+    run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(stabilized_mp4)} -vn -ac 1 -ar 16000 -f wav {shlex.quote(out_wav)}')
+    if os.path.exists(stabilized_mp4): os.remove(stabilized_mp4)
+def clean_audio(wav_path):
     audio, sr = sf.read(wav_path)
+    if audio.ndim == 2: audio = audio.mean(axis=1)
+    if sr != 16000:
+        audio = librosa.resample(audio.astype(float), orig_sr=sr, target_sr=16000)
     max_val = np.max(np.abs(audio)) if audio.size > 0 else 0.0
+    if max_val > 1e-6: audio = audio / max_val * 0.9
+    clean_path = wav_path.replace(".wav", "_clean.wav")
+    sf.write(clean_path, audio, 16000)
+    return clean_path, audio, 16000
+# ---------------------------- # TRANSCRIPTION & SOUS-TITRES # ----------------------------
 def transcribe(model, wav_path):
     out = model.transcribe([wav_path])
+    if isinstance(out, list) and len(out) > 0:
+        res = out[0]
+        return res.text.strip() if hasattr(res, "text") else str(res).strip()
     return str(out).strip()
+def pipeline(video_input, model_name):
+    try:
+        if not video_input: return "❌ Veuillez charger une vidéo", None
+        video_path = video_input
+        # Statut initial
+        yield "⏳ Extraction de l'audio et stabilisation...", None
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tf:
+            wav_path = tf.name
+        extract_audio(video_path, wav_path)
+        clean_wav, audio, sr = clean_audio(wav_path)
+        duration = ffprobe_duration(video_path) or (len(audio)/sr)
+        yield f"⏳ Chargement du modèle {model_name}...", None
+        model = load_model(model_name)
+        yield "⏳ Transcription et alignement en cours...", None
+        # (Logique simplifiée pour l'exemple)
+        text = transcribe(model, clean_wav)
+        words = [w for w in text.split() if len(w) > 1] # Filtre basique
+        if not words:
+            yield "⚠️ Aucun discours détecté en Bambara.", None
+            return
+        # Création des segments (Heuristique)
+        total_words = len(words)
+        chunk_size = 8
+        subs = []
+        for i in range(0, total_words, chunk_size):
+            chunk = words[i:i+chunk_size]
+            s = (i / total_words) * duration
+            e = (min(i + chunk_size, total_words) / total_words) * duration
+            txt = "\n".join(textwrap.wrap(" ".join(chunk), 40))
+            subs.append((s, e, txt))
+        yield "⏳ Incrustation des sous-titres dans la vidéo...", None
+        # Burn subtitles
+        out_v = "RobotsMali_Final.mp4"
+        with tempfile.NamedTemporaryFile(suffix=".srt", mode="w", encoding="utf-8", delete=False) as srt_f:
+            for idx, (start, end, text) in enumerate(subs, 1):
+                def t(sec):
+                    h=int(sec//3600); m=int((sec%3600)//60); s=int(sec%60); ms=int((sec-int(sec))*1000)
+                    return f"{h:02}:{m:02}:{s:02},{ms:03}"
+                srt_f.write(f"{idx}\n{t(start)} --> {t(end)}\n{text}\n\n")
+            srt_name = srt_f.name
+        vf = f"subtitles={shlex.quote(srt_name)}:force_style='Fontsize=22,PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&'"
+        run_cmd(f'ffmpeg -hide_banner -loglevel error -y -i {shlex.quote(video_path)} -vf {shlex.quote(vf)} -c:v libx264 -crf 23 -c:a aac {shlex.quote(out_v)}')
+        os.remove(srt_name)
+        yield "✅ Sous-titrage terminé !", out_v
     except Exception as e:
+        yield f"❌ Erreur: {str(e)}", None
+# ---------------------------- # INTERFACE GRADIO STYLISÉE # ----------------------------
+custom_css = """
+body { background-color: #0b0e14; }
+.gradio-container { background: rgba(17, 25, 40, 0.8) !important; backdrop-filter: blur(12px); border-radius: 20px; border: 1px solid rgba(255, 255, 255, 0.1); }
+#header { text-align: center; padding: 20px; }
+#header h1 { color: #facc15; font-size: 2.5rem; margin-bottom: 0; }
+.gr-button-primary { background: linear-gradient(135deg, #059669, #10b981) !important; border: none !important; }
+"""
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    with gr.Div(elem_id="header"):
+        gr.HTML("<h1>🤖 ROBOTSMALI</h1><p style='color:#94a3b8'>Sous-titrage Automatique en Bambara (V5.0)</p>")
+        gr.HTML("<div style='height:2px; width:100px; background:#facc15; margin:10px auto;'></div>")
+    with gr.Row():
+        with gr.Column():
+            v_in = gr.Video(label="Vidéo (Webcam ou Fichier)", mirror_webcam=False)
+            m_sel = gr.Dropdown(list(MODELS.keys()), value="Soloba V1 (CTC)", label="Modèle ASR")
+            btn = gr.Button("🚀 GÉNÉRER LES SOUS-TITRES", variant="primary")
+        with gr.Column():
+            status = gr.Markdown("### État du traitement\n*Prêt...*")
+            v_out = gr.Video(label="Résultat final")
+    # Section des exemples (Intégration de votre fichier MARALINKE)
     gr.Examples(
         examples=VIDEO_EXAMPLES,
+        inputs=[v_in, m_sel],
+        label="📺 Vidéos d'exemple (Hugging Face)"
     )
+    gr.HTML("<div style='text-align:center; color:#475569; padding:20px'>© 2024 RobotsMali - Intelligence Artificielle pour le Mali</div>")
+    btn.click(pipeline, [v_in, m_sel], [status, v_out])
+if __name__ == "__main__":
+    demo.launch()