Spaces:

omarbajouk
/

CapsulesVideo

Sleeping

App Files Files Community

omarbajouk commited on Oct 24, 2025

Commit

8056606

verified ·

1 Parent(s): 3554094

Update app.py

Browse files

Files changed (1) hide show

app.py +275 -394

app.py CHANGED Viewed

@@ -1,71 +1,60 @@
 # ============================================================
-# IMPORTS & CONFIG
 # ============================================================
-import os, io, json, re, uuid, time, shutil, traceback, gc, asyncio
-from moviepy.editor import *
-import moviepy.video.fx.all as vfx
-from moviepy.video.compositing.concatenate import concatenate_videoclips
-from gtts import gTTS
-from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageOps
 import gradio as gr
-from pydub import AudioSegment
-from rich import print as rprint
-import edge_tts
 ROOT = os.getcwd()
 OUT_DIR = os.path.join(ROOT, "export")
 TMP_DIR = os.path.join(ROOT, "_tmp_capsules")
 os.makedirs(OUT_DIR, exist_ok=True)
 os.makedirs(TMP_DIR, exist_ok=True)
-FONT_REG = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
-FONT_BOLD = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
 W, H = 1920, 1080
 MARGIN_X, SAFE_Y_TOP = 140, 140
 capsules = []
 manifest_path = os.path.join(OUT_DIR, "manifest.json")
-themes = {
-    "Bleu Professionnel": {"primary": (0, 82, 147), "secondary": (0, 126, 200)},
-    "Vert Gouvernemental": {"primary": (0, 104, 55), "secondary": (0, 155, 119)},
-    "Violet Élégant": {"primary": (74, 20, 140), "secondary": (103, 58, 183)},
-}
-# ============================================================
-# 🚀 Téléchargement automatique de SadTalker (si absent)
-# ============================================================
-import os, subprocess
-if not os.path.exists("SadTalker"):
-    print("📦 Téléchargement de SadTalker depuis GitHub...")
-    subprocess.run(["git", "clone", "--depth", "1", "https://github.com/OpenTalker/SadTalker.git"], check=True)
-    # Télécharger les checkpoints nécessaires
-    os.makedirs("SadTalker/checkpoints", exist_ok=True)
-    checkpoints = {
-        "audio2exp.pt": "https://huggingface.co/OpenTalker/SadTalker/resolve/main/checkpoints/audio2exp.pt",
-        "GFPGANv1.4.pth": "https://huggingface.co/OpenTalker/SadTalker/resolve/main/checkpoints/GFPGANv1.4.pth",
-        "epoch_20.pth": "https://huggingface.co/OpenTalker/SadTalker/resolve/main/checkpoints/epoch_20.pth",
-        "mapping_00229-model.pth.tar": "https://huggingface.co/OpenTalker/SadTalker/resolve/main/checkpoints/mapping_00229-model.pth.tar",
-        "shape_predictor_68_face_landmarks.dat": "https://huggingface.co/OpenTalker/SadTalker/resolve/main/checkpoints/shape_predictor_68_face_landmarks.dat",
-    }
-    for name, url in checkpoints.items():
-        dest = f"SadTalker/checkpoints/{name}"
-        if not os.path.exists(dest):
-            print(f"⬇️ Téléchargement {name}...")
-            subprocess.run(["wget", "-q", "-O", dest, url], check=True)
-    print("✅ SadTalker prêt à l’emploi.")
 # ============================================================
-# OUTILS GÉNÉRAUX
 # ============================================================
-def wrap_text(text, font, max_width, draw):
     lines = []
     for para in text.split("\n"):
         current = []
@@ -85,122 +74,49 @@ def wrap_text(text, font, max_width, draw):
             lines.append(" ".join(current))
     return lines
-def draw_text_shadow(draw, xy, text, font, fill=(255,255,255)):
     x, y = xy
-    draw.text((x+2, y+2), text, font=font, fill=(0,0,0))
     draw.text((x, y), text, font=font, fill=fill)
-def safe_name(stem, ext=".mp4"):
     stem = re.sub(r"[^\w\-]+", "_", stem)[:40]
     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
-def save_manifest():
-    with open(manifest_path, "w", encoding="utf-8") as f:
-        json.dump({"capsules": capsules}, f, ensure_ascii=False, indent=2)
-if os.path.exists(manifest_path):
-    try:
-        data = json.load(open(manifest_path, "r", encoding="utf-8"))
-        if isinstance(data, dict) and "capsules" in data:
-            capsules = data["capsules"]
-    except Exception as e:
-        rprint(f"[yellow]Impossible de charger le manifest: {e}[/yellow]")
 # ============================================================
-# 🧠 SYNTHÈSE VOCALE – gTTS / Edge / espeak / Kokoro (Hugging Face)
 # ============================================================
-from transformers import pipeline
-import soundfile as sf
-import asyncio
-from pydub import AudioSegment
-def tts_gtts(text, lang="fr"):
-    """🔹 Génère avec Google TTS (rapide, en ligne)"""
-    out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
     from gtts import gTTS
     gTTS(text=text, lang=lang).save(out)
     return out
-def tts_espeak(text, voice="fr+f3"):
-    """🔹 Génère avec eSpeak (offline, basique)"""
-    out = os.path.join(TMP_DIR, f"espeak_{uuid.uuid4().hex}.wav")
-    os.system(f'espeak-ng -v {voice} -s 165 -p 50 --stdout "{text}" > "{out}"')
-    return out
-async def _tts_edge_async(text, voice="fr-FR-DeniseNeural"):
-    """🔹 Edge-TTS (Azure Cloud)"""
-    import edge_tts
-    out = os.path.join(TMP_DIR, f"edge_{uuid.uuid4().hex}.mp3")
-    communicate = edge_tts.Communicate(text, voice)
-    await communicate.save(out)
-    return out
-def tts_edge(text, voice="fr-FR-DeniseNeural"):
-    """Génère via Edge-TTS puis convertit en WAV"""
-    try:
-        mp3_path = asyncio.run(_tts_edge_async(text, voice))
-        wav_path = os.path.join(TMP_DIR, f"edge_{uuid.uuid4().hex}.wav")
-        AudioSegment.from_file(mp3_path).export(wav_path, format="wav")
-        os.remove(mp3_path)
-        return wav_path
-    except Exception as e:
-        rprint(f"[red]Erreur Edge-TTS: {e}, fallback espeak[/red]")
-        return tts_espeak(text)
-def tts_kokoro(text, langue="fr"):
-    """🔹 Synthèse Kokoro Hugging Face (offline, voix très naturelle)"""
-    try:
-        kokoro = pipeline("text-to-speech", model="onnx-community/Kokoro-82M-v1.0-ONNX")
-        output = kokoro(text)
-        wav_path = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
-        sf.write(wav_path, output["audio"], output["sampling_rate"])
-        return wav_path
-    except Exception as e:
-        rprint(f"[red]Erreur Kokoro: {e}, fallback espeak[/red]")
-        return tts_espeak(text)
-def synth_voice(text, engine="Kokoro", gender_hint="Féminine", langue="fr"):
-    """
-    Sélectionne automatiquement le moteur TTS selon le choix utilisateur.
-    """
-    try:
-        if engine == "Kokoro":
-            return tts_kokoro(text, langue=langue)
-        elif engine == "gTTS":
-            path = tts_gtts(text, lang=langue)
-            # Ajustement léger masculin
-            if gender_hint.lower().startswith("m"):
-                snd = AudioSegment.from_file(path)
-                snd = snd._spawn(snd.raw_data, overrides={"frame_rate": int(snd.frame_rate * 0.94)}).set_frame_rate(snd.frame_rate)
-                snd.export(path, format="mp3")
-            return path
-        elif engine == "edge-tts":
-            voices = {
-                "fr": {"f": "fr-FR-DeniseNeural", "m": "fr-FR-AlainNeural"},
-                "nl": {"f": "nl-NL-ColetteNeural", "m": "nl-BE-ArnaudNeural"},
-                "en": {"f": "en-GB-LibbyNeural", "m": "en-GB-RyanNeural"},
-            }
-            v = voices.get(langue, voices["fr"])["f" if gender_hint.lower().startswith("f") else "m"]
-            return tts_edge(text, voice=v)
-        else:  # espeak fallback
-            voice = f"{langue}+f3" if gender_hint.lower().startswith("f") else f"{langue}+m3"
-            return tts_espeak(text, voice=voice)
-    except Exception as e:
-        rprint(f"[red]Erreur synthèse {engine}: {e}, fallback espeak[/red]")
-        voice = f"{langue}+f3" if gender_hint.lower().startswith("f") else f"{langue}+m3"
-        return tts_espeak(text, voice=voice)
-# ============================================================
-# AUDIO NORMALISATION WAV
-# ============================================================
 def _normalize_audio_to_wav(in_path: str) -> str:
-    # Convertit n'importe quel format (mp3/wav) en WAV standard
     wav_path = os.path.join(TMP_DIR, f"norm_{uuid.uuid4().hex}.wav")
     snd = AudioSegment.from_file(in_path)
     snd = snd.set_frame_rate(44100).set_channels(2).set_sample_width(2)
@@ -208,311 +124,277 @@ def _normalize_audio_to_wav(in_path: str) -> str:
     return wav_path
 # ============================================================
-# SOUS-TITRES
-# ============================================================
-def write_srt(text, duration):
-    parts = re.split(r'(?<=[\.!?])\s+', text.strip())
-    parts = [p for p in parts if p]
-    total = len("".join(parts)) or 1
-    cur = 0.0
-    srt = []
-    for i, p in enumerate(parts, 1):
-        prop = len(p)/total
-        start = cur
-        end = min(duration, cur + duration*prop)
-        cur = end
-        def ts(t):
-            m, s = divmod(t, 60)
-            h, m = divmod(m, 60)
-            return f"{int(h):02}:{int(m):02}:{int(s):02},000"
-        srt += [f"{i}", f"{ts(start)} --> {ts(end)}", p, ""]
-    path = os.path.join(OUT_DIR, f"srt_{uuid.uuid4().hex[:6]}.srt")
-    open(path, "w", encoding="utf-8").write("\n".join(srt))
-    return path
-# ============================================================
-# CRÉATION FOND
 # ============================================================
 def make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, img_fond, fond_mode="plein écran"):
-    c = themes[theme]
-    bg = Image.new("RGB", (W, H), c["primary"])
     if img_fond and os.path.exists(img_fond):
         img = Image.open(img_fond).convert("RGB")
         if fond_mode == "plein écran":
             img = img.resize((W, H))
             img = img.filter(ImageFilter.GaussianBlur(1))
-            overlay = Image.new("RGBA", (W, H), (*c["primary"], 90))
             bg = Image.alpha_composite(img.convert("RGBA"), overlay).convert("RGB")
         elif fond_mode == "moitié gauche":
             img = img.resize((W//2, H))
             mask = Image.linear_gradient("L").resize((W//2, H))
-            color = Image.new("RGB", (W//2, H), c["primary"])
             comp = Image.composite(img, color, ImageOps.invert(mask))
             bg.paste(comp, (0, 0))
         elif fond_mode == "moitié droite":
             img = img.resize((W//2, H))
             mask = Image.linear_gradient("L").resize((W//2, H))
-            color = Image.new("RGB", (W//2, H), c["primary"])
             comp = Image.composite(color, img, mask)
             bg.paste(comp, (W//2, 0))
         elif fond_mode == "moitié bas":
             img = img.resize((W, H//2))
             mask = Image.linear_gradient("L").rotate(90).resize((W, H//2))
-            color = Image.new("RGB", (W, H//2), c["primary"])
             comp = Image.composite(color, img, mask)
             bg.paste(comp, (0, H//2))
     draw = ImageDraw.Draw(bg)
     f_title = ImageFont.truetype(FONT_BOLD, 84)
-    f_sub = ImageFont.truetype(FONT_REG, 44)
-    f_text = ImageFont.truetype(FONT_REG, 40)
     f_small = ImageFont.truetype(FONT_REG, 30)
-    draw.rectangle([(0, 0), (W, 96)], fill=c["secondary"])
-    draw.rectangle([(0, H-96), (W, H)], fill=c["secondary"])
-    draw_text_shadow(draw, (MARGIN_X, 30), "CPAS BRUXELLES • SERVICE PUBLIC", f_small)
-    draw_text_shadow(draw, (W//2-280, H-72), "📞 0800 35 550 • 🌐 cpasbru.irisnet.be", f_small)
-    draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP), titre, f_title)
-    draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP+100), sous_titre, f_sub)
     y = SAFE_Y_TOP + 200
     for line in texte_ecran.split("\n"):
-        for l in wrap_text("• "+line.strip("• "), f_text, W-MARGIN_X*2, draw):
-            draw_text_shadow(draw, (MARGIN_X, y), l, f_text)
             y += 55
     if logo_path and os.path.exists(logo_path):
         logo = Image.open(logo_path).convert("RGBA")
-        logo.thumbnail((260,260))
         lw, lh = logo.size
-        pos = (50,50) if logo_pos=="haut-gauche" else (W-lw-50,50) if logo_pos=="haut-droite" else ((W-lw)//2,50)
         bg.paste(logo, pos, logo)
     out = os.path.join(TMP_DIR, f"fond_{uuid.uuid4().hex[:6]}.png")
     bg.save(out)
     return out
 # ============================================================
-# FFMPEG FALLBACK (sans temp_audiofile)
 # ============================================================
-def _write_video_with_fallback(final_clip, out_path_base, fps=24):
     attempts = [
         {"ext": ".mp4", "codec": "libx264", "audio_codec": "aac"},
-        {"ext": ".mp4", "codec": "mpeg4", "audio_codec": "aac"},
-        {"ext": ".mp4", "codec": "libx264", "audio_codec": "libmp3lame"},
-    ]
-    ffmpeg_common = [
-        "-pix_fmt", "yuv420p",
-        "-movflags", "+faststart",
-        "-threads", "1",
-        "-max_muxing_queue_size", "1024",
-        "-shortest"
     ]
     last_err = None
     for i, opt in enumerate(attempts, 1):
         out = out_path_base if out_path_base.endswith(opt["ext"]) else out_path_base + opt["ext"]
         try:
-            rprint(f"[cyan]FFmpeg try #{i}: codec={opt['codec']} audio={opt['audio_codec']} -> {out}[/cyan]")
             final_clip.write_videofile(
                 out,
                 fps=fps,
                 codec=opt["codec"],
                 audio_codec=opt["audio_codec"],
                 audio=True,
-                ffmpeg_params=ffmpeg_common,
                 logger=None,
                 threads=1,
             )
             if os.path.exists(out) and os.path.getsize(out) > 150000:
                 return out
         except Exception as e:
-            last_err = f"{type(e).__name__}: {e}\n" + traceback.format_exc()
-            rprint(f"[yellow]Échec essai #{i}: {last_err}[/yellow]")
-    raise RuntimeError(last_err or "Échec inconnu de l'encodage FFmpeg")
-def generate_sadtalker_video(image_path, audio_path, output_dir=TMP_DIR, fps=25):
-    """
-    Génère une vidéo animée du visage à partir d'une image et d'un fichier audio,
-    en utilisant SadTalker.
-    """
-    try:
-        os.makedirs(output_dir, exist_ok=True)
-        out_path = os.path.join(output_dir, f"sadtalker_{uuid.uuid4().hex[:6]}.mp4")
-        cmd = (
-            f'cd SadTalker && python inference.py '
-            f'--driven_audio "{audio_path}" '
-            f'--source_image "{image_path}" '
-            f'--result_dir "{output_dir}" '
-            f'--still --enhancer gfpgan --fps {fps}'
-        )
-        os.system(cmd)
-        # Cherche le dernier fichier mp4 généré
-        candidates = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp4")]
-        if not candidates:
-            rprint("[red]❌ Aucune sortie SadTalker générée[/red]")
-            return None
-        latest = max(candidates, key=os.path.getctime)
-        os.rename(latest, out_path)
-        if os.path.exists(out_path):
-            return out_path
-        return None
-    except Exception as e:
-        rprint(f"[red]Erreur SadTalker: {e}[/red]")
-        return None
-#=============================================================
 # ============================================================
-# BUILD CAPSULE – Version SadTalker (qualité type Sora 2)
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
                   fond_mode="plein écran",
-                  video_presentateur=None, voix_type="Féminine",
                   position_presentateur="bottom-right", plein=False,
-                  moteur_voix="Edge-TTS (voix naturelle)", langue="fr"):
-    """
-    Construit une capsule vidéo complète avec :
-    - TTS Edge/gTTS/espeak
-    - Génération visuelle (fond + texte)
-    - Présentateur animé via SadTalker
-    """
-    try:
-        # ====================================================
-        # 1️⃣ Synthèse vocale
-        # ====================================================
-        # Sélection du moteur vocal selon le choix utilisateur
-        if moteur_voix.startswith("Kokoro"):
-            engine = "Kokoro"
-        elif moteur_voix.startswith("Edge"):
-            engine = "edge-tts"
-        elif moteur_voix.startswith("gTTS"):
-            engine = "gTTS"
-        else:
-            engine = "espeak"
-        audio_path_mp = synth_voice(texte_voix, engine=engine, gender_hint=voix_type, langue=langue)
-        audio_path = _normalize_audio_to_wav(audio_path_mp)
-        if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 1000:
-            raise RuntimeError(f"Audio non valide : {audio_path}")
-        audio = AudioFileClip(audio_path)
-        dur = float(audio.duration or 5.0)
-        target_fps = 25  # ✅ FPS fixe haute qualité
-        # ====================================================
-        # 2️⃣ Génération du fond graphique
-        # ====================================================
-        fond_path = make_background(
-            titre, sous_titre, texte_ecran, theme,
-            logo_path, logo_pos, image_fond, fond_mode
-        )
-        bg = ImageClip(fond_path).set_duration(dur)
-        # ====================================================
-        # 3️⃣ Génération présentateur (SadTalker)
-        # ====================================================
-        clips = [bg]
-        if video_presentateur and os.path.exists(video_presentateur):
-            ext = os.path.splitext(video_presentateur)[1].lower()
-            video_path = None
-            if ext in [".jpg", ".jpeg", ".png"]:
-                rprint("[cyan]🎭 Génération visage animé avec SadTalker...[/cyan]")
-                synced = generate_sadtalker_video(video_presentateur, audio_path, fps=target_fps)
-                if synced:
-                    video_path = synced
-                    rprint("[green]✅ SadTalker : visage animé généré[/green]")
-                else:
-                    rprint("[red]⚠️ SadTalker n’a pas pu produire de vidéo[/red]")
             else:
-                rprint("[yellow]⚠️ SadTalker attend une image (portrait), pas une vidéo.[/yellow]")
-                video_path = video_presentateur
-            if video_path and os.path.exists(video_path):
-                v = VideoFileClip(video_path).without_audio()
-                v = v.fx(vfx.loop, duration=dur)
-                if plein:
-                    v = v.resize((W, H))
-                else:
-                    v = v.resize(width=480)
-                    pos_map = {
-                        "bottom-right": ("right", "bottom"),
-                        "bottom-left": ("left", "bottom"),
-                        "top-right": ("right", "top"),
-                        "top-left": ("left", "top"),
-                        "center": ("center", "center"),
-                    }
-                    v = v.set_position(pos_map.get(position_presentateur, ("right", "bottom")))
-                clips.append(v)
-        # ====================================================
-        # 4️⃣ Composition finale et export
-        # ====================================================
-        final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
-        name = safe_name(f"{titre}_{langue}")
-        out_base = os.path.join(OUT_DIR, name)
-        out = _write_video_with_fallback(final, out_base, fps=target_fps)
-        # ====================================================
-        # 5️⃣ Sous-titres + Manifest
-        # ====================================================
-        srt_path = write_srt(texte_voix, dur)
-        capsules.append({
-            "file": out,
-            "title": titre,
-            "langue": langue,
-            "voice": voix_type,
-            "theme": theme,
-            "duration": round(dur, 1)
-        })
-        save_manifest()
-        # ====================================================
-        # 6️⃣ Nettoyage
-        # ====================================================
-        audio.close()
-        final.close()
-        bg.close()
-        try:
-            if os.path.exists(audio_path_mp): os.remove(audio_path_mp)
-            if os.path.exists(audio_path): os.remove(audio_path)
-        except:
-            pass
-        gc.collect()
-        return out, f"✅ Capsule {langue.upper()} créée ({dur:.1f}s, voix {voix_type})", srt_path
-    except Exception as e:
-        err_msg = f"❌ Erreur: {e}\n\nTraceback:\n{traceback.format_exc()}"
-        rprint(f"[red]{err_msg}[/red]")
-        return None, err_msg, None
 # ============================================================
-# TABLEAU, ASSEMBLAGE ET GESTION DES CAPSULES
 # ============================================================
 def table_capsules():
     return [[i+1, c["title"], c.get("langue","fr").upper(),
-             f"{c['duration']}s", c["theme"], c["voice"],
-             os.path.basename(c["file"])]
             for i, c in enumerate(capsules)]
 def assemble_final():
     if not capsules:
         return None, "❌ Aucune capsule."
     clips = [VideoFileClip(c["file"]) for c in capsules]
-    final = concatenate_videoclips(clips, method="compose")
     try:
-        out = _write_video_with_fallback(final, os.path.join(OUT_DIR, safe_name("VIDEO_COMPLETE")), fps=25)
         return out, f"🎉 Vidéo finale prête ({len(capsules)} capsules)."
     finally:
         for c in clips:
             try: c.close()
             except: pass
-        try: final.close()
-        except: pass
 def supprimer_capsule(index):
     try:
@@ -522,14 +404,13 @@ def supprimer_capsule(index):
             if os.path.exists(fichier):
                 os.remove(fichier)
             del capsules[idx]
-            save_manifest()
             return f"🗑 Capsule supprimée : {fichier}", table_capsules()
         else:
             return "⚠️ Index invalide.", table_capsules()
     except Exception as e:
         return f"❌ Erreur lors de la suppression : {e}", table_capsules()
 def deplacer_capsule(index, direction):
     try:
         idx = int(index) - 1
@@ -537,17 +418,19 @@ def deplacer_capsule(index, direction):
             capsules[idx - 1], capsules[idx] = capsules[idx], capsules[idx - 1]
         elif direction == "down" and idx < len(capsules) - 1:
             capsules[idx + 1], capsules[idx] = capsules[idx], capsules[idx + 1]
-        save_manifest()
         return f"🔁 Capsule déplacée {direction}.", table_capsules()
     except Exception as e:
         return f"❌ Erreur de déplacement : {e}", table_capsules()
 # ============================================================
-# INTERFACE GRADIO (adaptée pour SadTalker)
 # ============================================================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎬 Créateur de Capsules CPAS – Version SadTalker (réaliste)")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
@@ -558,24 +441,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
                 logo_pos = gr.Radio(["haut-gauche","haut-droite","centre"],
                                     label="Position logo", value="haut-gauche")
-                # SadTalker prend une IMAGE, pas une vidéo
-                video_presentateur = gr.Image(label="🧑‍🎨 Image du présentateur (SadTalker)", type="filepath")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
             with gr.Column():
                 titre = gr.Textbox(label="Titre", value="Aide médicale urgente / Dringende medische hulp")
                 sous_titre = gr.Textbox(label="Sous-titre", value="Soins accessibles à tous / Toegankelijke zorg voor iedereen")
-                theme = gr.Radio(list(themes.keys()), label="Thème", value="Bleu Professionnel")
                 langue = gr.Radio(["fr","nl"], label="Langue de la voix", value="fr")
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
-                    ["Kokoro (HuggingFace, offline)", "Edge-TTS (voix naturelle)", "gTTS (en ligne)", "espeak-ng (offline)"],
                     label="Moteur voix",
                     value="Kokoro (HuggingFace, offline)"
                 )
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
                 texte_ecran = gr.Textbox(label="Texte à l'écran", lines=4,
@@ -586,7 +466,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         srt_out = gr.File(label="Sous-titres .srt")
         statut = gr.Markdown()
-    # Onglet gestion
     with gr.Tab("Gestion & Assemblage"):
         gr.Markdown("### 🗂 Gestion des capsules")
         liste = gr.Dataframe(
@@ -610,22 +489,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
-    # Fonction principale pour Gradio
-    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo, vp, vx, pos_p, plein, motor, langue):
-        vid, msg, srt = build_capsule(t, st, tv, te, th,
-                                      img, logo, pos_logo, fmode,
-                                      vp, vx, pos_p, plein,
-                                      motor, langue)
-        return vid, srt, msg, table_capsules()
     btn.click(
         creer_capsule_ui,
         [titre, sous_titre, texte_voix, texte_ecran, theme,
          image_fond, fond_mode, logo_path, logo_pos,
-         video_presentateur, voix_type, position_presentateur,
          plein, moteur_voix, langue],
         [sortie, srt_out, statut, liste]
     )
-print("🚀 Lancement de l'interface SadTalker FR/NL…")
-demo.launch(share=True, debug=True)

+# app.py
 # ============================================================
+# CPAS Bruxelles — Créateur de Capsules (Gradio + Kokoro + SadTalker)
+# Version "Space HF" optimisée (chargement rapide, imports différés)
 # ============================================================
+import os, json, re, uuid, shutil, traceback, gc, subprocess
+from typing import Optional
 import gradio as gr
+from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageOps
+# ---------- Config statique ----------
 ROOT = os.getcwd()
 OUT_DIR = os.path.join(ROOT, "export")
 TMP_DIR = os.path.join(ROOT, "_tmp_capsules")
 os.makedirs(OUT_DIR, exist_ok=True)
 os.makedirs(TMP_DIR, exist_ok=True)
+# Charger config externe
+CONFIG_PATH = os.path.join(ROOT, "app_config.json")
+if os.path.exists(CONFIG_PATH):
+    cfg = json.load(open(CONFIG_PATH, "r", encoding="utf-8"))
+    THEMES = cfg["themes"]
+    FONT_REG = cfg["font_paths"]["regular"]
+    FONT_BOLD = cfg["font_paths"]["bold"]
+else:
+    # Valeurs de secours
+    THEMES = {
+        "Bleu Professionnel": {"primary": [0, 82, 147], "secondary": [0, 126, 200]},
+        "Vert Gouvernemental": {"primary": [0, 104, 55], "secondary": [0, 155, 119]},
+        "Violet Élégant": {"primary": [74, 20, 140], "secondary": [103, 58, 183]},
+    }
+    FONT_REG = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+    FONT_BOLD = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
 W, H = 1920, 1080
 MARGIN_X, SAFE_Y_TOP = 140, 140
+# ---------- État runtime ----------
 capsules = []
 manifest_path = os.path.join(OUT_DIR, "manifest.json")
+if os.path.exists(manifest_path):
+    try:
+        data = json.load(open(manifest_path, "r", encoding="utf-8"))
+        if isinstance(data, dict) and "capsules" in data:
+            capsules = data["capsules"]
+    except Exception:
+        pass
+def _save_manifest():
+    with open(manifest_path, "w", encoding="utf-8") as f:
+        json.dump({"capsules": capsules}, f, ensure_ascii=False, indent=2)
 # ============================================================
+# OUTILS GÉNÉRAUX (rapides)
 # ============================================================
+def _wrap_text(text, font, max_width, draw):
     lines = []
     for para in text.split("\n"):
         current = []
             lines.append(" ".join(current))
     return lines
+def _draw_text_shadow(draw, xy, text, font, fill=(255, 255, 255)):
     x, y = xy
+    draw.text((x + 2, y + 2), text, font=font, fill=(0, 0, 0))
     draw.text((x, y), text, font=font, fill=fill)
+def _safe_name(stem, ext=".mp4"):
     stem = re.sub(r"[^\w\-]+", "_", stem)[:40]
     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
 # ============================================================
+# SYNTHÈSE VOCALE (Kokoro par défaut, gTTS en secours)
 # ============================================================
+kokoro_pipeline = None  # lazy load
+def _get_kokoro():
+    global kokoro_pipeline
+    if kokoro_pipeline is None:
+        from transformers import pipeline
+        # nécessite transformers récent + onnxruntime + soundfile
+        kokoro_pipeline = pipeline("text-to-speech", model="onnx-community/Kokoro-82M-v1.0-ONNX")
+    return kokoro_pipeline
+def tts_kokoro(text: str, langue: str = "fr") -> str:
+    import soundfile as sf
+    out = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
+    try:
+        kokoro = _get_kokoro()
+        result = kokoro(text)
+        sf.write(out, result["audio"], result["sampling_rate"])
+        return out
+    except Exception as e:
+        # Fallback gTTS si problème
+        return tts_gtts(text, lang=langue)
+def tts_gtts(text: str, lang: str = "fr") -> str:
     from gtts import gTTS
+    out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
     gTTS(text=text, lang=lang).save(out)
     return out
 def _normalize_audio_to_wav(in_path: str) -> str:
+    # Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
+    from pydub import AudioSegment
     wav_path = os.path.join(TMP_DIR, f"norm_{uuid.uuid4().hex}.wav")
     snd = AudioSegment.from_file(in_path)
     snd = snd.set_frame_rate(44100).set_channels(2).set_sample_width(2)
     return wav_path
 # ============================================================
+# FOND / GRAPHISME (PIL rapide)
 # ============================================================
 def make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, img_fond, fond_mode="plein écran"):
+    c = THEMES[theme]
+    primary = tuple(c["primary"]); secondary = tuple(c["secondary"])
+    bg = Image.new("RGB", (W, H), primary)
     if img_fond and os.path.exists(img_fond):
         img = Image.open(img_fond).convert("RGB")
         if fond_mode == "plein écran":
             img = img.resize((W, H))
             img = img.filter(ImageFilter.GaussianBlur(1))
+            overlay = Image.new("RGBA", (W, H), (*primary, 90))
             bg = Image.alpha_composite(img.convert("RGBA"), overlay).convert("RGB")
         elif fond_mode == "moitié gauche":
             img = img.resize((W//2, H))
             mask = Image.linear_gradient("L").resize((W//2, H))
+            color = Image.new("RGB", (W//2, H), primary)
             comp = Image.composite(img, color, ImageOps.invert(mask))
             bg.paste(comp, (0, 0))
         elif fond_mode == "moitié droite":
             img = img.resize((W//2, H))
             mask = Image.linear_gradient("L").resize((W//2, H))
+            color = Image.new("RGB", (W//2, H), primary)
             comp = Image.composite(color, img, mask)
             bg.paste(comp, (W//2, 0))
         elif fond_mode == "moitié bas":
             img = img.resize((W, H//2))
             mask = Image.linear_gradient("L").rotate(90).resize((W, H//2))
+            color = Image.new("RGB", (W, H//2), primary)
             comp = Image.composite(color, img, mask)
             bg.paste(comp, (0, H//2))
     draw = ImageDraw.Draw(bg)
     f_title = ImageFont.truetype(FONT_BOLD, 84)
+    f_sub   = ImageFont.truetype(FONT_REG, 44)
+    f_text  = ImageFont.truetype(FONT_REG, 40)
     f_small = ImageFont.truetype(FONT_REG, 30)
+    draw.rectangle([(0, 0), (W, 96)], fill=secondary)
+    draw.rectangle([(0, H-96), (W, H)], fill=secondary)
+    _draw_text_shadow(draw, (MARGIN_X, 30), "CPAS BRUXELLES • SERVICE PUBLIC", f_small)
+    _draw_text_shadow(draw, (W//2-280, H-72), "📞 0800 35 550 • 🌐 cpasbru.irisnet.be", f_small)
+    _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP), titre, f_title)
+    _draw_text_shadow(draw, (MARGIN_X, SAFE_Y_TOP + 100), sous_titre, f_sub)
     y = SAFE_Y_TOP + 200
     for line in texte_ecran.split("\n"):
+        for l in _wrap_text("• " + line.strip("• "), f_text, W - MARGIN_X*2, draw):
+            _draw_text_shadow(draw, (MARGIN_X, y), l, f_text)
             y += 55
     if logo_path and os.path.exists(logo_path):
         logo = Image.open(logo_path).convert("RGBA")
+        logo.thumbnail((260, 260))
         lw, lh = logo.size
+        if logo_pos == "haut-gauche":
+            pos = (50, 50)
+        elif logo_pos == "haut-droite":
+            pos = (W - lw - 50, 50)
+        else:
+            pos = ((W - lw)//2, 50)
         bg.paste(logo, pos, logo)
     out = os.path.join(TMP_DIR, f"fond_{uuid.uuid4().hex[:6]}.png")
     bg.save(out)
     return out
 # ============================================================
+# SadTalker — appel subprocess (image -> visage animé)
+# ============================================================
+def _check_sadtalker_ready() -> Optional[str]:
+    base = os.path.join(ROOT, "SadTalker")
+    if not os.path.isdir(base):
+        return "Dossier SadTalker manquant. Ajoutez 'SadTalker/' à la racine du Space (voir README)."
+    ck = os.path.join(base, "checkpoints")
+    needed = [
+        "audio2exp.pt",
+        "GFPGANv1.4.pth",
+        "epoch_20.pth",
+        "mapping_00229-model.pth.tar",
+        "shape_predictor_68_face_landmarks.dat",
+    ]
+    missing = [f for f in needed if not os.path.exists(os.path.join(ck, f))]
+    if missing:
+        return "Checkpoints SadTalker manquants: " + ", ".join(missing)
+    return None
+def generate_sadtalker_video(image_path, audio_path, output_dir=TMP_DIR, fps=25) -> Optional[str]:
+    err = _check_sadtalker_ready()
+    if err:
+        # Pas d’échec brutal : on renvoie None (le fond seul sera utilisé)
+        print(f"[SadTalker] {err}")
+        return None
+    try:
+        os.makedirs(output_dir, exist_ok=True)
+        out_path = os.path.join(output_dir, f"sadtalker_{uuid.uuid4().hex[:6]}.mp4")
+        cmd = [
+            "python", "inference.py",
+            "--driven_audio", audio_path,
+            "--source_image", image_path,
+            "--result_dir", output_dir,
+            "--still", "--enhancer", "gfpgan",
+            "--fps", str(fps),
+        ]
+        subprocess.run(cmd, cwd=os.path.join(ROOT, "SadTalker"), check=True)
+        # Récupérer le dernier mp4 créé
+        candidates = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp4")]
+        latest = max(candidates, key=os.path.getctime) if candidates else None
+        if latest:
+            # Harmoniser le nom
+            shutil.move(latest, out_path)
+            return out_path
+        return None
+    except Exception as e:
+        print("[SadTalker] Erreur:", e)
+        return None
+# ============================================================
+# SOUS-TITRES .SRT
 # ============================================================
+def write_srt(text, duration):
+    parts = re.split(r'(?<=[\.!?])\s+', text.strip())
+    parts = [p for p in parts if p]
+    total = len("".join(parts)) or 1
+    cur = 0.0
+    srt = []
+    for i, p in enumerate(parts, 1):
+        prop = len(p)/total
+        start = cur
+        end = min(duration, cur + duration*prop)
+        cur = end
+        def ts(t):
+            m, s = divmod(t, 60)
+            h, m = divmod(m, 60)
+            return f"{int(h):02}:{int(m):02}:{int(s):02},000"
+        srt += [f"{i}", f"{ts(start)} --> {ts(end)}", p, ""]
+    path = os.path.join(OUT_DIR, f"srt_{uuid.uuid4().hex[:6]}.srt")
+    open(path, "w", encoding="utf-8").write("\n".join(srt))
+    return path
+# ============================================================
+# EXPORT VIDÉO (MoviePy — imports différés)
+# ============================================================
+def _write_video_with_fallback(final_clip, out_path_base, fps=25):
     attempts = [
         {"ext": ".mp4", "codec": "libx264", "audio_codec": "aac"},
+        {"ext": ".mp4", "codec": "mpeg4",  "audio_codec": "aac"},
+        {"ext": ".mp4", "codec": "libx264","audio_codec": "libmp3lame"},
     ]
+    ffmpeg_params = ["-pix_fmt", "yuv420p", "-movflags", "+faststart", "-threads", "1", "-shortest"]
     last_err = None
     for i, opt in enumerate(attempts, 1):
         out = out_path_base if out_path_base.endswith(opt["ext"]) else out_path_base + opt["ext"]
         try:
             final_clip.write_videofile(
                 out,
                 fps=fps,
                 codec=opt["codec"],
                 audio_codec=opt["audio_codec"],
                 audio=True,
+                ffmpeg_params=ffmpeg_params,
                 logger=None,
                 threads=1,
             )
             if os.path.exists(out) and os.path.getsize(out) > 150000:
                 return out
         except Exception as e:
+            last_err = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
+    raise RuntimeError(last_err or "FFmpeg a échoué")
 # ============================================================
+# BUILD CAPSULE — Pipeline complet
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
                   fond_mode="plein écran",
+                  image_presentateur=None, voix_type="Féminine",
                   position_presentateur="bottom-right", plein=False,
+                  moteur_voix="Kokoro (HuggingFace, offline)", langue="fr"):
+    # 1) TTS
+    engine = "Kokoro" if moteur_voix.startswith("Kokoro") else ("gTTS" if moteur_voix.startswith("gTTS") else "Kokoro")
+    audio_mp = tts_kokoro(texte_voix, langue=langue) if engine == "Kokoro" else tts_gtts(texte_voix, lang=langue)
+    audio_wav = _normalize_audio_to_wav(audio_mp)
+    # 2) Fond (PIL)
+    fond_path = make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos, image_fond, fond_mode)
+    # 3) MoviePy (imports lents ici seulement)
+    from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, VideoFileClip
+    import moviepy.video.fx.all as vfx
+    audio = AudioFileClip(audio_wav)
+    dur = float(audio.duration or 5.0)
+    target_fps = 25
+    bg = ImageClip(fond_path).set_duration(dur)
+    # 4) SadTalker (optionnel)
+    clips = [bg]
+    if image_presentateur and os.path.exists(image_presentateur):
+        vpath = generate_sadtalker_video(image_presentateur, audio_wav, fps=target_fps)
+        if vpath and os.path.exists(vpath):
+            v = VideoFileClip(vpath).without_audio().fx(vfx.loop, duration=dur)
+            if plein:
+                v = v.resize((W, H))
+                v = v.set_position(("center", "center"))
             else:
+                v = v.resize(width=520)
+                pos_map = {
+                    "bottom-right": ("right", "bottom"),
+                    "bottom-left": ("left", "bottom"),
+                    "top-right": ("right", "top"),
+                    "top-left": ("left", "top"),
+                    "center": ("center", "center"),
+                }
+                v = v.set_position(pos_map.get(position_presentateur, ("right", "bottom")))
+            clips.append(v)
+    # 5) Composition + export
+    final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
+    name = _safe_name(f"{titre}_{langue}")
+    out_base = os.path.join(OUT_DIR, name)
+    out = _write_video_with_fallback(final, out_base, fps=target_fps)
+    # 6) SRT + manifest
+    srt_path = write_srt(texte_voix, dur)
+    capsules.append({
+        "file": out,
+        "title": titre,
+        "langue": langue,
+        "voice": voix_type,
+        "theme": theme,
+        "duration": round(dur, 1)
+    })
+    _save_manifest()
+    # Nettoyage
+    try:
+        audio.close(); final.close(); bg.close()
+        if os.path.exists(audio_mp): os.remove(audio_mp)
+        if os.path.exists(audio_wav): os.remove(audio_wav)
+    except Exception:
+        pass
+    gc.collect()
+    return out, f"✅ Capsule {langue.upper()} créée ({dur:.1f}s, voix {voix_type})", srt_path
 # ============================================================
+# GESTION / ASSEMBLAGE
 # ============================================================
 def table_capsules():
+    import os
     return [[i+1, c["title"], c.get("langue","fr").upper(),
+             f"{c['duration']}s", c["theme"], c["voice"], os.path.basename(c["file"])]
             for i, c in enumerate(capsules)]
 def assemble_final():
     if not capsules:
         return None, "❌ Aucune capsule."
+    from moviepy.editor import VideoFileClip
+    from moviepy.video.compositing.concatenate import concatenate_videoclips
     clips = [VideoFileClip(c["file"]) for c in capsules]
     try:
+        out = _write_video_with_fallback(concatenate_videoclips(clips, method="compose"),
+                                         os.path.join(OUT_DIR, _safe_name("VIDEO_COMPLETE")), fps=25)
         return out, f"🎉 Vidéo finale prête ({len(capsules)} capsules)."
     finally:
         for c in clips:
             try: c.close()
             except: pass
 def supprimer_capsule(index):
     try:
             if os.path.exists(fichier):
                 os.remove(fichier)
             del capsules[idx]
+            _save_manifest()
             return f"🗑 Capsule supprimée : {fichier}", table_capsules()
         else:
             return "⚠️ Index invalide.", table_capsules()
     except Exception as e:
         return f"❌ Erreur lors de la suppression : {e}", table_capsules()
 def deplacer_capsule(index, direction):
     try:
         idx = int(index) - 1
             capsules[idx - 1], capsules[idx] = capsules[idx], capsules[idx - 1]
         elif direction == "down" and idx < len(capsules) - 1:
             capsules[idx + 1], capsules[idx] = capsules[idx], capsules[idx + 1]
+        _save_manifest()
         return f"🔁 Capsule déplacée {direction}.", table_capsules()
     except Exception as e:
         return f"❌ Erreur de déplacement : {e}", table_capsules()
 # ============================================================
+# UI GRADIO
 # ============================================================
+with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
+               theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎬 Créateur de Capsules CPAS – Version complète (SadTalker + Kokoro)")
+    gr.Markdown("**Astuce** : pour un démarrage instantané, chargez le dossier `SadTalker/checkpoints/` dans le Space (voir README).")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
                 logo_pos = gr.Radio(["haut-gauche","haut-droite","centre"],
                                     label="Position logo", value="haut-gauche")
+                image_presentateur = gr.Image(label="🧑‍🎨 Image du présentateur (portrait pour SadTalker)", type="filepath")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
             with gr.Column():
                 titre = gr.Textbox(label="Titre", value="Aide médicale urgente / Dringende medische hulp")
                 sous_titre = gr.Textbox(label="Sous-titre", value="Soins accessibles à tous / Toegankelijke zorg voor iedereen")
+                theme = gr.Radio(list(THEMES.keys()), label="Thème", value="Bleu Professionnel")
                 langue = gr.Radio(["fr","nl"], label="Langue de la voix", value="fr")
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
+                    ["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],
                     label="Moteur voix",
                     value="Kokoro (HuggingFace, offline)"
                 )
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
                 texte_ecran = gr.Textbox(label="Texte à l'écran", lines=4,
         srt_out = gr.File(label="Sous-titres .srt")
         statut = gr.Markdown()
     with gr.Tab("Gestion & Assemblage"):
         gr.Markdown("### 🗂 Gestion des capsules")
         liste = gr.Dataframe(
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
+    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo, ip, vx, pos_p, plein, motor, lang):
+        try:
+            vid, msg, srt = build_capsule(t, st, tv, te, th,
+                                          img, logo, pos_logo, fmode,
+                                          ip, vx, pos_p, plein,
+                                          motor, lang)
+            return vid, srt, msg, table_capsules()
+        except Exception as e:
+            return None, None, f"❌ Erreur: {e}\n\n{traceback.format_exc()}", table_capsules()
     btn.click(
         creer_capsule_ui,
         [titre, sous_titre, texte_voix, texte_ecran, theme,
          image_fond, fond_mode, logo_path, logo_pos,
+         image_presentateur, voix_type, position_presentateur,
          plein, moteur_voix, langue],
         [sortie, srt_out, statut, liste]
     )
+if __name__ == "__main__":
+    demo.launch()