Spaces:

omarbajouk
/

CapsulesVideo

Sleeping

App Files Files Community

omarbajouk commited on Oct 26, 2025

Commit

260dbde

verified ·

1 Parent(s): d4b545e

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -301

app.py CHANGED Viewed

@@ -1,22 +1,21 @@
-# app.py
 # ============================================================
 # CPAS Bruxelles — Créateur de Capsules (Gradio + Kokoro + SadTalker)
-# Version "Space HF" optimisée (chargement rapide, imports différés)
 # ============================================================
-import os, json, re, uuid, shutil, traceback, gc, subprocess
 from typing import Optional
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageOps
-# ---------- Config statique ----------
 ROOT = os.getcwd()
 OUT_DIR = os.path.join(ROOT, "export")
 TMP_DIR = os.path.join(ROOT, "_tmp_capsules")
 os.makedirs(OUT_DIR, exist_ok=True)
 os.makedirs(TMP_DIR, exist_ok=True)
-# Charger config externe
 CONFIG_PATH = os.path.join(ROOT, "app_config.json")
 if os.path.exists(CONFIG_PATH):
     cfg = json.load(open(CONFIG_PATH, "r", encoding="utf-8"))
@@ -24,7 +23,6 @@ if os.path.exists(CONFIG_PATH):
     FONT_REG = cfg["font_paths"]["regular"]
     FONT_BOLD = cfg["font_paths"]["bold"]
 else:
-    # Valeurs de secours
     THEMES = {
         "Bleu Professionnel": {"primary": [0, 82, 147], "secondary": [0, 126, 200]},
         "Vert Gouvernemental": {"primary": [0, 104, 55], "secondary": [0, 155, 119]},
@@ -36,7 +34,6 @@ else:
 W, H = 1920, 1080
 MARGIN_X, SAFE_Y_TOP = 140, 140
-# ---------- État runtime ----------
 capsules = []
 manifest_path = os.path.join(OUT_DIR, "manifest.json")
 if os.path.exists(manifest_path):
@@ -47,12 +44,14 @@ if os.path.exists(manifest_path):
     except Exception:
         pass
 def _save_manifest():
     with open(manifest_path, "w", encoding="utf-8") as f:
         json.dump({"capsules": capsules}, f, ensure_ascii=False, indent=2)
 # ============================================================
-# OUTILS GÉNÉRAUX (rapides)
 # ============================================================
 def _wrap_text(text, font, max_width, draw):
     lines = []
@@ -60,11 +59,7 @@ def _wrap_text(text, font, max_width, draw):
         current = []
         for word in para.split(" "):
             test = " ".join(current + [word])
-            try:
-                w = draw.textlength(test, font=font)
-            except AttributeError:
-                bbox = draw.textbbox((0, 0), test, font=font)
-                w = bbox[2] - bbox[0]
             if w <= max_width or not current:
                 current.append(word)
             else:
@@ -74,33 +69,27 @@ def _wrap_text(text, font, max_width, draw):
             lines.append(" ".join(current))
     return lines
 def _draw_text_shadow(draw, xy, text, font, fill=(255, 255, 255)):
     x, y = xy
     draw.text((x + 2, y + 2), text, font=font, fill=(0, 0, 0))
     draw.text((x, y), text, font=font, fill=fill)
 def _safe_name(stem, ext=".mp4"):
     stem = re.sub(r"[^\w\-]+", "_", stem)[:40]
     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
-# ============================================================
-# SYNTHÈSE VOCALE — Edge-TTS multivoix (FR/NL) + gTTS fallback
-# ============================================================
-import asyncio
-import edge_tts
-from pydub import AudioSegment
-import soundfile as sf
 # ============================================================
-# 🔊 CHARGEMENT DYNAMIQUE DES VOIX EDGE-TTS (FR/NL)
 # ============================================================
 EDGE_VOICES = {}
 async def fetch_edge_voices_async():
-    """Charge dynamiquement toutes les voix FR/NL depuis Edge-TTS."""
     global EDGE_VOICES
     try:
         voices = await edge_tts.list_voices()
@@ -110,7 +99,6 @@ async def fetch_edge_voices_async():
             f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v["ShortName"]
             for v in filtered
         }
-        print(f"[Edge-TTS] {len(EDGE_VOICES)} voix FR/NL chargées.")
     except Exception as e:
         print(f"[Edge-TTS] Erreur chargement voix : {e}")
         EDGE_VOICES.update({
@@ -118,17 +106,16 @@ async def fetch_edge_voices_async():
             "nl-NL-MaaikeNeural - nl-NL (Female)": "nl-NL-MaaikeNeural",
         })
 def init_edge_voices():
-    """Démarre le chargement asynchrone sans bloquer Gradio."""
     try:
         loop = asyncio.get_event_loop()
         loop.create_task(fetch_edge_voices_async())
     except RuntimeError:
         asyncio.run(fetch_edge_voices_async())
 def get_edge_voices(lang="fr"):
-    """Retourne les voix déjà chargées (selon la langue)."""
-    global EDGE_VOICES
     if not EDGE_VOICES:
         init_edge_voices()
     if lang == "fr":
@@ -138,296 +125,178 @@ def get_edge_voices(lang="fr"):
     return list(EDGE_VOICES.values())
 async def _edge_tts_async(text, voice, outfile):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(outfile)
     return outfile
-def tts_edge(text: str, voice: str = "fr-FR-DeniseNeural") -> str:
-    """Génère un fichier WAV avec Edge-TTS (et fallback gTTS)."""
-    out_mp3 = os.path.join(TMP_DIR, f"edge_{uuid.uuid4().hex}.mp3")
-    try:
-        # Correction boucle asyncio (HF/Gradio)
-        try:
-            loop = asyncio.get_event_loop()
-            if loop.is_running():
-                import nest_asyncio
-                nest_asyncio.apply()
-        except RuntimeError:
-            pass
-        asyncio.run(_edge_tts_async(text, voice, out_mp3))
-        # Conversion WAV pour compatibilité MoviePy
-        out_wav = os.path.join(TMP_DIR, f"edge_{uuid.uuid4().hex}.wav")
-        AudioSegment.from_file(out_mp3).export(out_wav, format="wav")
-        os.remove(out_mp3)
-        return out_wav
-    except Exception as e:
-        print(f"[Edge-TTS] Erreur : {e} → fallback gTTS")
-        return tts_gtts(text, lang="fr" if voice.startswith("fr") else "nl")
-def tts_gtts(text: str, lang: str = "fr") -> str:
-    """Fallback via Google Text-to-Speech (gTTS)."""
     from gtts import gTTS
-    out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
-    gTTS(text=text, lang=lang).save(out)
-    # Conversion en WAV pour compatibilité
-    out_wav = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.wav")
-    AudioSegment.from_file(out).export(out_wav, format="wav")
-    os.remove(out)
-    return out_wav
 def _normalize_audio_to_wav(in_path: str) -> str:
-    # Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
-    from pydub import AudioSegment
     wav_path = os.path.join(TMP_DIR, f"norm_{uuid.uuid4().hex}.wav")
     snd = AudioSegment.from_file(in_path)
     snd = snd.set_frame_rate(44100).set_channels(2).set_sample_width(2)
     snd.export(wav_path, format="wav")
     return wav_path
 # ============================================================
-# FOND / GRAPHISME (PIL rapide)
 # ============================================================
 def make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, img_fond, fond_mode="plein écran"):
     c = THEMES[theme]
     primary = tuple(c["primary"]); secondary = tuple(c["secondary"])
     bg = Image.new("RGB", (W, H), primary)
     if img_fond and os.path.exists(img_fond):
-        img = Image.open(img_fond).convert("RGB")
-        if fond_mode == "plein écran":
-            img = img.resize((W, H))
-            img = img.filter(ImageFilter.GaussianBlur(1))
-            overlay = Image.new("RGBA", (W, H), (*primary, 90))
-            bg = Image.alpha_composite(img.convert("RGBA"), overlay).convert("RGB")
-        elif fond_mode == "moitié gauche":
-            img = img.resize((W//2, H))
-            mask = Image.linear_gradient("L").resize((W//2, H))
-            color = Image.new("RGB", (W//2, H), primary)
-            comp = Image.composite(img, color, ImageOps.invert(mask))
-            bg.paste(comp, (0, 0))
-        elif fond_mode == "moitié droite":
-            img = img.resize((W//2, H))
-            mask = Image.linear_gradient("L").resize((W//2, H))
-            color = Image.new("RGB", (W//2, H), primary)
-            comp = Image.composite(color, img, mask)
-            bg.paste(comp, (W//2, 0))
-        elif fond_mode == "moitié bas":
-            img = img.resize((W, H//2))
-            mask = Image.linear_gradient("L").rotate(90).resize((W, H//2))
-            color = Image.new("RGB", (W, H//2), primary)
-            comp = Image.composite(color, img, mask)
-            bg.paste(comp, (0, H//2))
     draw = ImageDraw.Draw(bg)
     f_title = ImageFont.truetype(FONT_BOLD, 84)
-    f_sub   = ImageFont.truetype(FONT_REG, 44)
-    f_text  = ImageFont.truetype(FONT_REG, 40)
     f_small = ImageFont.truetype(FONT_REG, 30)
     draw.rectangle([(0, 0), (W, 96)], fill=secondary)
     draw.rectangle([(0, H-96), (W, H)], fill=secondary)
     _draw_text_shadow(draw, (MARGIN_X, 30), "CPAS BRUXELLES • SERVICE PUBLIC", f_small)
-    _draw_text_shadow(draw, (W//2-280, H-72), "📞 0800 35 550 • 🌐 cpasbru.irisnet.be", f_small)
     _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP), titre, f_title)
     _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP + 100), sous_titre, f_sub)
     y = SAFE_Y_TOP + 200
     for line in texte_ecran.split("\n"):
         for l in _wrap_text("• " + line.strip("• "), f_text, W - MARGIN_X*2, draw):
             _draw_text_shadow(draw, (MARGIN_X, y), l, f_text)
             y += 55
     if logo_path and os.path.exists(logo_path):
         logo = Image.open(logo_path).convert("RGBA")
         logo.thumbnail((260, 260))
         lw, lh = logo.size
-        if logo_pos == "haut-gauche":
-            pos = (50, 50)
-        elif logo_pos == "haut-droite":
-            pos = (W - lw - 50, 50)
-        else:
-            pos = ((W - lw)//2, 50)
         bg.paste(logo, pos, logo)
     out = os.path.join(TMP_DIR, f"fond_{uuid.uuid4().hex[:6]}.png")
     bg.save(out)
     return out
-# ============================================================
-# SadTalker — appel subprocess (image -> visage animé)
-# ============================================================
-def _check_sadtalker_ready() -> Optional[str]:
-    base = os.path.join(ROOT, "SadTalker")
-    if not os.path.isdir(base):
-        return "Dossier SadTalker manquant. Ajoutez 'SadTalker/' à la racine du Space (voir README)."
-    ck = os.path.join(base, "checkpoints")
-    needed = [
-        "audio2exp.pt",
-        "GFPGANv1.4.pth",
-        "epoch_20.pth",
-        "mapping_00229-model.pth.tar",
-        "shape_predictor_68_face_landmarks.dat",
-    ]
-    missing = [f for f in needed if not os.path.exists(os.path.join(ck, f))]
-    if missing:
-        return "Checkpoints SadTalker manquants: " + ", ".join(missing)
-    return None
-def generate_sadtalker_video(image_path, audio_path, output_dir=TMP_DIR, fps=25) -> Optional[str]:
-    err = _check_sadtalker_ready()
-    if err:
-        # Pas d’échec brutal : on renvoie None (le fond seul sera utilisé)
-        print(f"[SadTalker] {err}")
-        return None
-    try:
-        os.makedirs(output_dir, exist_ok=True)
-        out_path = os.path.join(output_dir, f"sadtalker_{uuid.uuid4().hex[:6]}.mp4")
-        cmd = [
-            "python", "inference.py",
-            "--driven_audio", audio_path,
-            "--source_image", image_path,
-            "--result_dir", output_dir,
-            "--still", "--enhancer", "gfpgan",
-            "--fps", str(fps),
-        ]
-        subprocess.run(cmd, cwd=os.path.join(ROOT, "SadTalker"), check=True)
-        # Récupérer le dernier mp4 créé
-        candidates = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp4")]
-        latest = max(candidates, key=os.path.getctime) if candidates else None
-        if latest:
-            # Harmoniser le nom
-            shutil.move(latest, out_path)
-            return out_path
-        return None
-    except Exception as e:
-        print("[SadTalker] Erreur:", e)
-        return None
 # ============================================================
-# SOUS-TITRES .SRT
 # ============================================================
 def write_srt(text, duration):
-    parts = re.split(r'(?<=[\.!?])\s+', text.strip())
-    parts = [p for p in parts if p]
-    total = len("".join(parts)) or 1
     cur = 0.0
     srt = []
-    for i, p in enumerate(parts, 1):
-        prop = len(p)/total
-        start = cur
-        end = min(duration, cur + duration*prop)
         cur = end
-        def ts(t):
-            m, s = divmod(t, 60)
-            h, m = divmod(m, 60)
-            return f"{int(h):02}:{int(m):02}:{int(s):02},000"
-        srt += [f"{i}", f"{ts(start)} --> {ts(end)}", p, ""]
     path = os.path.join(OUT_DIR, f"srt_{uuid.uuid4().hex[:6]}.srt")
     open(path, "w", encoding="utf-8").write("\n".join(srt))
     return path
 # ============================================================
-# EXPORT VIDÉO (MoviePy — imports différés)
 # ============================================================
 def _write_video_with_fallback(final_clip, out_path_base, fps=25):
     attempts = [
-        {"ext": ".mp4", "codec": "libx264", "audio_codec": "aac"},
-        {"ext": ".mp4", "codec": "mpeg4",  "audio_codec": "aac"},
-        {"ext": ".mp4", "codec": "libx264","audio_codec": "libmp3lame"},
     ]
-    ffmpeg_params = ["-pix_fmt", "yuv420p", "-movflags", "+faststart", "-threads", "1", "-shortest"]
-    last_err = None
-    for i, opt in enumerate(attempts, 1):
-        out = out_path_base if out_path_base.endswith(opt["ext"]) else out_path_base + opt["ext"]
         try:
-            final_clip.write_videofile(
-                out,
-                fps=fps,
-                codec=opt["codec"],
-                audio_codec=opt["audio_codec"],
-                audio=True,
-                ffmpeg_params=ffmpeg_params,
-                logger=None,
-                threads=1,
-            )
-            if os.path.exists(out) and os.path.getsize(out) > 150000:
-                return out
         except Exception as e:
-            last_err = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
-    raise RuntimeError(last_err or "FFmpeg a échoué")
 # ============================================================
-# BUILD CAPSULE — Pipeline complet (corrigé)
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
-                  fond_mode="plein écran",
-                  image_presentateur=None, voix_type="Féminine",
-                  position_presentateur="bottom-right", plein=False,
-                  moteur_voix="Parler-TTS (offline)", langue="fr", speaker=None):
-    # 1) TTS (Edge multivoix ou fallback)
-    try:
-        audio_mp = tts_edge(texte_voix, voice=speaker or ("fr-FR-DeniseNeural" if langue == "fr" else "nl-NL-MaaikeNeural"))
-    except Exception as e:
-        print(f"[Capsule] Erreur TTS Edge ({e}), fallback gTTS.")
-        audio_mp = tts_gtts(texte_voix, lang=langue)
-    # S'assurer qu'on a un WAV
-    audio_wav = audio_mp
-    if not audio_mp.lower().endswith(".wav"):
-        try:
-            audio_wav = _normalize_audio_to_wav(audio_mp)
-        except Exception as e:
-            print(f"[Audio] Normalisation échouée ({e}), on garde {audio_mp}")
-    # 2) Fond (PIL)
-    fond_path = make_background(titre, sous_titre, texte_ecran, theme,
-                                logo_path, logo_pos, image_fond, fond_mode)
-    # 3) MoviePy (imports lents ici seulement)
-    from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, VideoFileClip
     import moviepy.video.fx.all as vfx
     audio = AudioFileClip(audio_wav)
     dur = float(audio.duration or 5.0)
-    target_fps = 25
     bg = ImageClip(fond_path).set_duration(dur)
-    # 4) SadTalker (optionnel)
     clips = [bg]
-    if image_presentateur and os.path.exists(image_presentateur):
-        vpath = generate_sadtalker_video(image_presentateur, audio_wav, fps=target_fps)
-        if vpath and os.path.exists(vpath):
-            v = VideoFileClip(vpath).without_audio().fx(vfx.loop, duration=dur)
-            if plein:
-                v = v.resize((W, H)).set_position(("center", "center"))
-            else:
-                v = v.resize(width=520)
-                pos_map = {
-                    "bottom-right": ("right", "bottom"),
-                    "bottom-left": ("left", "bottom"),
-                    "top-right": ("right", "top"),
-                    "top-left": ("left", "top"),
-                    "center": ("center", "center"),
-                }
-                v = v.set_position(pos_map.get(position_presentateur, ("right", "bottom")))
-            clips.append(v)
-    # 5) Composition + export
     final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
     name = _safe_name(f"{titre}_{langue}")
-    out_base = os.path.join(OUT_DIR, name)
-    out = _write_video_with_fallback(final, out_base, fps=target_fps)
-    # 6) Sous-titres + manifest
     srt_path = write_srt(texte_voix, dur)
     capsules.append({
         "file": out,
         "title": titre,
@@ -438,18 +307,7 @@ def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
     })
     _save_manifest()
-    # 7) Nettoyage
-    try:
-        audio.close()
-        final.close()
-        bg.close()
-        if os.path.exists(audio_mp): os.remove(audio_mp)
-        if audio_wav != audio_mp and os.path.exists(audio_wav): os.remove(audio_wav)
-    except Exception as e:
-        print(f"[Clean] Erreur nettoyage : {e}")
-    gc.collect()
-    return out, f"✅ Capsule {langue.upper()} créée ({dur:.1f}s, voix {speaker or voix_type})", srt_path
 # ============================================================
@@ -461,6 +319,7 @@ def table_capsules():
              f"{c['duration']}s", c["theme"], c["voice"], os.path.basename(c["file"])]
             for i, c in enumerate(capsules)]
 def assemble_final():
     if not capsules:
         return None, "❌ Aucune capsule."
@@ -476,6 +335,7 @@ def assemble_final():
             try: c.close()
             except: pass
 def supprimer_capsule(index):
     try:
         idx = int(index) - 1
@@ -489,7 +349,8 @@ def supprimer_capsule(index):
         else:
             return "⚠️ Index invalide.", table_capsules()
     except Exception as e:
-        return f"❌ Erreur lors de la suppression : {e}", table_capsules()
 def deplacer_capsule(index, direction):
     try:
@@ -501,62 +362,44 @@ def deplacer_capsule(index, direction):
         _save_manifest()
         return f"🔁 Capsule déplacée {direction}.", table_capsules()
     except Exception as e:
-        return f"❌ Erreur de déplacement : {e}", table_capsules()
 # ============================================================
 # UI GRADIO
 # ============================================================
 print("[INIT] Lancement de Gradio...")
 init_edge_voices()
-with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
-               theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎬 Créateur de Capsules CPAS – Version complète (SadTalker + Kokoro)")
-    gr.Markdown("**Astuce** : pour un démarrage instantané, chargez le dossier `SadTalker/checkpoints/` dans le Space (voir README).")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
             with gr.Column():
                 image_fond = gr.Image(label="🖼 Image de fond", type="filepath")
                 fond_mode = gr.Radio(["plein écran", "moitié gauche", "moitié droite", "moitié bas"],
-                                     label="Mode d'affichage du fond", value="plein écran")
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
-                logo_pos = gr.Radio(["haut-gauche","haut-droite","centre"],
-                                    label="Position logo", value="haut-gauche")
-                image_presentateur = gr.Image(label="🧑‍🎨 Image du présentateur (portrait pour SadTalker)", type="filepath")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
             with gr.Column():
-                titre = gr.Textbox(label="Titre", value="Aide médicale urgente / Dringende medische hulp")
-                sous_titre = gr.Textbox(label="Sous-titre", value="Soins accessibles à tous / Toegankelijke zorg voor iedereen")
                 theme = gr.Radio(list(THEMES.keys()), label="Thème", value="Bleu Professionnel")
                 langue = gr.Radio(["fr", "nl"], label="Langue de la voix", value="fr")
-                def maj_voix(lang):
-                    try:
-                        voices = get_edge_voices(lang)
-                        return gr.update(choices=voices, value=voices[0] if voices else None)
-                    except Exception as e:
-                        return gr.update(choices=[], value=None)
                 speaker_id = gr.Dropdown(
                     label="🎙 Voix Edge-TTS",
                     choices=get_edge_voices("fr"),
-                    value="fr-FR-DeniseNeural",
-                    info="Liste dynamique des voix Edge-TTS (FR & NL)"
                 )
-                langue.change(maj_voix, [langue], [speaker_id])
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
-                moteur_voix = gr.Radio(
-                    ["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],
-                    label="Moteur voix",
-                    value="Kokoro (HuggingFace, offline)"
-                )
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
                 texte_ecran = gr.Textbox(label="Texte à l'écran", lines=4,
@@ -580,36 +423,31 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
             btn_down = gr.Button("⬇️ Descendre")
             btn_del = gr.Button("🗑 Supprimer")
         message = gr.Markdown()
         btn_up.click(lambda i: deplacer_capsule(i, "up"), [index], [message, liste])
         btn_down.click(lambda i: deplacer_capsule(i, "down"), [index], [message, liste])
         btn_del.click(supprimer_capsule, [index], [message, liste])
         gr.Markdown("### 🎬 Assemblage final")
         btn_asm = gr.Button("🎥 Assembler la vidéo complète", variant="primary")
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
-    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo, ip, vx, pos_p, plein, motor, lang, speaker):
         try:
-            vid, msg, srt = build_capsule(t, st, tv, te, th,
-                                          img, logo, pos_logo, fmode,
-                                          ip, vx, pos_p, plein,
-                                          motor, lang, speaker=speaker)
-            return vid, srt, msg, table_capsules()
         except Exception as e:
             return None, None, f"❌ Erreur: {e}\n\n{traceback.format_exc()}", table_capsules()
-    btn.click(
-        creer_capsule_ui,
-        [titre, sous_titre, texte_voix, texte_ecran, theme,
-         image_fond, fond_mode, logo_path, logo_pos,
-         image_presentateur, voix_type, position_presentateur,
-         plein, moteur_voix, langue, speaker_id],
-        [sortie, srt_out, statut, liste]
-    )
 if __name__ == "__main__":
     demo.launch()

 # ============================================================
 # CPAS Bruxelles — Créateur de Capsules (Gradio + Kokoro + SadTalker)
+# Version complète (vidéo directe + SadTalker + sous-titres)
 # ============================================================
+import os, json, re, uuid, shutil, traceback, gc, subprocess, asyncio
 from typing import Optional
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageOps
+from pydub import AudioSegment
+# ---------- CONFIGURATION ----------
 ROOT = os.getcwd()
 OUT_DIR = os.path.join(ROOT, "export")
 TMP_DIR = os.path.join(ROOT, "_tmp_capsules")
 os.makedirs(OUT_DIR, exist_ok=True)
 os.makedirs(TMP_DIR, exist_ok=True)
 CONFIG_PATH = os.path.join(ROOT, "app_config.json")
 if os.path.exists(CONFIG_PATH):
     cfg = json.load(open(CONFIG_PATH, "r", encoding="utf-8"))
     FONT_REG = cfg["font_paths"]["regular"]
     FONT_BOLD = cfg["font_paths"]["bold"]
 else:
     THEMES = {
         "Bleu Professionnel": {"primary": [0, 82, 147], "secondary": [0, 126, 200]},
         "Vert Gouvernemental": {"primary": [0, 104, 55], "secondary": [0, 155, 119]},
 W, H = 1920, 1080
 MARGIN_X, SAFE_Y_TOP = 140, 140
 capsules = []
 manifest_path = os.path.join(OUT_DIR, "manifest.json")
 if os.path.exists(manifest_path):
     except Exception:
         pass
 def _save_manifest():
     with open(manifest_path, "w", encoding="utf-8") as f:
         json.dump({"capsules": capsules}, f, ensure_ascii=False, indent=2)
 # ============================================================
+# OUTILS GÉNÉRAUX
 # ============================================================
 def _wrap_text(text, font, max_width, draw):
     lines = []
         current = []
         for word in para.split(" "):
             test = " ".join(current + [word])
+            w = draw.textlength(test, font=font)
             if w <= max_width or not current:
                 current.append(word)
             else:
             lines.append(" ".join(current))
     return lines
 def _draw_text_shadow(draw, xy, text, font, fill=(255, 255, 255)):
     x, y = xy
     draw.text((x + 2, y + 2), text, font=font, fill=(0, 0, 0))
     draw.text((x, y), text, font=font, fill=fill)
 def _safe_name(stem, ext=".mp4"):
     stem = re.sub(r"[^\w\-]+", "_", stem)[:40]
     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
 # ============================================================
+# SYNTHÈSE VOCALE Edge-TTS
 # ============================================================
+import edge_tts
 EDGE_VOICES = {}
 async def fetch_edge_voices_async():
     global EDGE_VOICES
     try:
         voices = await edge_tts.list_voices()
             f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v["ShortName"]
             for v in filtered
         }
     except Exception as e:
         print(f"[Edge-TTS] Erreur chargement voix : {e}")
         EDGE_VOICES.update({
             "nl-NL-MaaikeNeural - nl-NL (Female)": "nl-NL-MaaikeNeural",
         })
 def init_edge_voices():
     try:
         loop = asyncio.get_event_loop()
         loop.create_task(fetch_edge_voices_async())
     except RuntimeError:
         asyncio.run(fetch_edge_voices_async())
 def get_edge_voices(lang="fr"):
     if not EDGE_VOICES:
         init_edge_voices()
     if lang == "fr":
     return list(EDGE_VOICES.values())
 async def _edge_tts_async(text, voice, outfile):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(outfile)
     return outfile
+def tts_edge(text: str, voice: str) -> str:
+    out_mp3 = os.path.join(TMP_DIR, f"tts_{uuid.uuid4().hex}.mp3")
+    asyncio.run(_edge_tts_async(text, voice, out_mp3))
+    out_wav = os.path.join(TMP_DIR, f"tts_{uuid.uuid4().hex}.wav")
+    AudioSegment.from_file(out_mp3).export(out_wav, format="wav")
+    os.remove(out_mp3)
+    return out_wav
+def tts_gtts(text: str, lang="fr") -> str:
     from gtts import gTTS
+    mp3 = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
+    gTTS(text=text, lang=lang).save(mp3)
+    wav = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.wav")
+    AudioSegment.from_file(mp3).export(wav, format="wav")
+    os.remove(mp3)
+    return wav
 def _normalize_audio_to_wav(in_path: str) -> str:
     wav_path = os.path.join(TMP_DIR, f"norm_{uuid.uuid4().hex}.wav")
     snd = AudioSegment.from_file(in_path)
     snd = snd.set_frame_rate(44100).set_channels(2).set_sample_width(2)
     snd.export(wav_path, format="wav")
     return wav_path
 # ============================================================
+# FOND / GRAPHISME
 # ============================================================
 def make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, img_fond, fond_mode="plein écran"):
     c = THEMES[theme]
     primary = tuple(c["primary"]); secondary = tuple(c["secondary"])
     bg = Image.new("RGB", (W, H), primary)
     if img_fond and os.path.exists(img_fond):
+        img = Image.open(img_fond).convert("RGB").resize((W, H))
+        img = img.filter(ImageFilter.GaussianBlur(1))
+        overlay = Image.new("RGBA", (W, H), (*primary, 90))
+        bg = Image.alpha_composite(img.convert("RGBA"), overlay).convert("RGB")
     draw = ImageDraw.Draw(bg)
     f_title = ImageFont.truetype(FONT_BOLD, 84)
+    f_sub = ImageFont.truetype(FONT_REG, 44)
+    f_text = ImageFont.truetype(FONT_REG, 40)
     f_small = ImageFont.truetype(FONT_REG, 30)
     draw.rectangle([(0, 0), (W, 96)], fill=secondary)
     draw.rectangle([(0, H-96), (W, H)], fill=secondary)
     _draw_text_shadow(draw, (MARGIN_X, 30), "CPAS BRUXELLES • SERVICE PUBLIC", f_small)
     _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP), titre, f_title)
     _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP + 100), sous_titre, f_sub)
     y = SAFE_Y_TOP + 200
     for line in texte_ecran.split("\n"):
         for l in _wrap_text("• " + line.strip("• "), f_text, W - MARGIN_X*2, draw):
             _draw_text_shadow(draw, (MARGIN_X, y), l, f_text)
             y += 55
     if logo_path and os.path.exists(logo_path):
         logo = Image.open(logo_path).convert("RGBA")
         logo.thumbnail((260, 260))
         lw, lh = logo.size
+        pos = (50, 50) if logo_pos == "haut-gauche" else (W - lw - 50, 50)
         bg.paste(logo, pos, logo)
     out = os.path.join(TMP_DIR, f"fond_{uuid.uuid4().hex[:6]}.png")
     bg.save(out)
     return out
 # ============================================================
+# SOUS-TITRES
 # ============================================================
 def write_srt(text, duration):
+    parts = re.split(r'(?<=[.!?])\s+', text.strip())
+    total_chars = len("".join(parts)) or 1
     cur = 0.0
     srt = []
+    for i, part in enumerate(parts, 1):
+        prop = len(part) / total_chars
+        start, end = cur, cur + duration * prop
         cur = end
+        ts = lambda t: f"{int(t//3600):02}:{int((t%3600)//60):02}:{int(t%60):02},000"
+        srt += [f"{i}", f"{ts(start)} --> {ts(end)}", part, ""]
     path = os.path.join(OUT_DIR, f"srt_{uuid.uuid4().hex[:6]}.srt")
     open(path, "w", encoding="utf-8").write("\n".join(srt))
     return path
 # ============================================================
+# EXPORT / COMPOSITION VIDÉO
 # ============================================================
 def _write_video_with_fallback(final_clip, out_path_base, fps=25):
     attempts = [
+        {"codec": "libx264", "audio_codec": "aac"},
+        {"codec": "mpeg4", "audio_codec": "aac"},
     ]
+    for opt in attempts:
+        out = out_path_base if out_path_base.endswith(".mp4") else out_path_base + ".mp4"
         try:
+            final_clip.write_videofile(out, fps=fps, codec=opt["codec"],
+                                       audio_codec=opt["audio_codec"],
+                                       ffmpeg_params=["-pix_fmt", "yuv420p", "-movflags", "+faststart"],
+                                       logger=None)
+            if os.path.exists(out): return out
         except Exception as e:
+            print("[FFmpeg] Erreur:", e)
+    raise RuntimeError("FFmpeg a échoué")
 # ============================================================
+# PIPELINE DE CRÉATION
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
+                  fond_mode="plein écran", image_presentateur=None,
+                  voix_type="Féminine", position_presentateur="bottom-right",
+                  plein=False, langue="fr", speaker=None,
+                  video_presentateur=None,
+                  source_audio_option="Garder la voix originale de la vidéo"):
+    from moviepy.editor import AudioFileClip, ImageClip, VideoFileClip, CompositeVideoClip
     import moviepy.video.fx.all as vfx
+    # --- AUDIO ---
+    if video_presentateur and source_audio_option == "Garder la voix originale de la vidéo":
+        vclip = VideoFileClip(video_presentateur)
+        audio_wav = os.path.join(TMP_DIR, f"orig_{uuid.uuid4().hex}.wav")
+        vclip.audio.write_audiofile(audio_wav, fps=44100, logger=None)
+        vclip.close()
+    else:
+        try:
+            audio_wav = tts_edge(texte_voix, voice=speaker or ("fr-FR-DeniseNeural" if langue == "fr" else "nl-NL-MaaikeNeural"))
+        except Exception:
+            audio_wav = tts_gtts(texte_voix, lang=langue)
     audio = AudioFileClip(audio_wav)
     dur = float(audio.duration or 5.0)
+    fond_path = make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, image_fond, fond_mode)
     bg = ImageClip(fond_path).set_duration(dur)
     clips = [bg]
+    # --- PRÉSENTATEUR ---
+    if video_presentateur and os.path.exists(video_presentateur):
+        v = VideoFileClip(video_presentateur)
+        if source_audio_option == "Remplacer par voix IA":
+            v = v.without_audio()
+        if not plein:
+            v = v.resize(width=520)
+        pos_map = {
+            "bottom-right": ("right", "bottom"),
+            "bottom-left": ("left", "bottom"),
+            "top-right": ("right", "top"),
+            "top-left": ("left", "top"),
+            "center": ("center", "center"),
+        }
+        v = v.set_position(pos_map.get(position_presentateur, ("right", "bottom")))
+        clips.append(v)
+    elif image_presentateur and os.path.exists(image_presentateur):
+        from moviepy.editor import ImageClip
+        img_clip = ImageClip(image_presentateur).set_duration(dur).resize(width=520)
+        img_clip = img_clip.set_position(("right", "bottom"))
+        clips.append(img_clip)
+    # --- COMPOSITION ---
     final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
     name = _safe_name(f"{titre}_{langue}")
+    out = _write_video_with_fallback(final, os.path.join(OUT_DIR, name))
     srt_path = write_srt(texte_voix, dur)
     capsules.append({
         "file": out,
         "title": titre,
     })
     _save_manifest()
+    return out, f"✅ Capsule créée ({dur:.1f}s)", srt_path
 # ============================================================
              f"{c['duration']}s", c["theme"], c["voice"], os.path.basename(c["file"])]
             for i, c in enumerate(capsules)]
 def assemble_final():
     if not capsules:
         return None, "❌ Aucune capsule."
             try: c.close()
             except: pass
 def supprimer_capsule(index):
     try:
         idx = int(index) - 1
         else:
             return "⚠️ Index invalide.", table_capsules()
     except Exception as e:
+        return f"❌ Erreur : {e}", table_capsules()
 def deplacer_capsule(index, direction):
     try:
         _save_manifest()
         return f"🔁 Capsule déplacée {direction}.", table_capsules()
     except Exception as e:
+        return f"❌ Erreur : {e}", table_capsules()
 # ============================================================
 # UI GRADIO
 # ============================================================
 print("[INIT] Lancement de Gradio...")
 init_edge_voices()
+with gr.Blocks(title="Créateur de Capsules CPAS – Complet") as demo:
+    gr.Markdown("## 🎬 Créateur de Capsules CPAS — version complète")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
             with gr.Column():
                 image_fond = gr.Image(label="🖼 Image de fond", type="filepath")
                 fond_mode = gr.Radio(["plein écran", "moitié gauche", "moitié droite", "moitié bas"],
+                                     label="Mode du fond", value="plein écran")
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
+                logo_pos = gr.Radio(["haut-gauche","haut-droite"], label="Position logo", value="haut-gauche")
+                video_presentateur = gr.Video(label="🎥 Vidéo du présentateur", type="filepath")
+                image_presentateur = gr.Image(label="🧑‍🎨 Image présentateur (SadTalker)", type="filepath")
+                source_audio_option = gr.Radio(["Garder la voix originale de la vidéo", "Remplacer par voix IA"],
+                                               label="🎧 Option audio", value="Garder la voix originale de la vidéo")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
             with gr.Column():
+                titre = gr.Textbox(label="Titre", value="Aide médicale urgente")
+                sous_titre = gr.Textbox(label="Sous-titre", value="Soins accessibles à tous")
                 theme = gr.Radio(list(THEMES.keys()), label="Thème", value="Bleu Professionnel")
                 langue = gr.Radio(["fr", "nl"], label="Langue de la voix", value="fr")
                 speaker_id = gr.Dropdown(
                     label="🎙 Voix Edge-TTS",
                     choices=get_edge_voices("fr"),
+                    value="fr-FR-DeniseNeural"
                 )
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
                 texte_ecran = gr.Textbox(label="Texte à l'écran", lines=4,
             btn_down = gr.Button("⬇️ Descendre")
             btn_del = gr.Button("🗑 Supprimer")
         message = gr.Markdown()
         btn_up.click(lambda i: deplacer_capsule(i, "up"), [index], [message, liste])
         btn_down.click(lambda i: deplacer_capsule(i, "down"), [index], [message, liste])
         btn_del.click(supprimer_capsule, [index], [message, liste])
         gr.Markdown("### 🎬 Assemblage final")
         btn_asm = gr.Button("🎥 Assembler la vidéo complète", variant="primary")
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
+    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo,
+                         ip, vx, pos_p, plein, lang, speaker, vid, src_opt):
         try:
+            vid_path, msg, srt = build_capsule(
+                t, st, tv, te, th, img, logo, pos_logo, fmode,
+                ip, vx, pos_p, plein, lang, speaker,
+                video_presentateur=vid, source_audio_option=src_opt)
+            return vid_path, srt, msg, table_capsules()
         except Exception as e:
             return None, None, f"❌ Erreur: {e}\n\n{traceback.format_exc()}", table_capsules()
+    btn.click(creer_capsule_ui,
+              [titre, sous_titre, texte_voix, texte_ecran, theme,
+               image_fond, fond_mode, logo_path, logo_pos,
+               image_presentateur, voix_type, position_presentateur,
+               plein, langue, speaker_id, video_presentateur, source_audio_option],
+              [sortie, srt_out, statut, liste])
 if __name__ == "__main__":
     demo.launch()