Spaces:

omarbajouk
/

CapsulesVideo

Sleeping

App Files Files Community

omarbajouk commited on Oct 26, 2025

Commit

84f0004

verified ·

1 Parent(s): ab9c83b

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -85

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # app.py
 # ============================================================
 # CPAS Bruxelles — Créateur de Capsules (Gradio + Kokoro + SadTalker)
-# Version "Space HF" optimisée (chargement rapide, imports différés)
 # ============================================================
 import os, json, re, uuid, shutil, traceback, gc, subprocess
@@ -95,8 +95,6 @@ import soundfile as sf
 # 🔊 CHARGEMENT DYNAMIQUE DES VOIX EDGE-TTS (FR/NL)
 # ============================================================
 EDGE_VOICES = {}
 async def fetch_edge_voices_async():
@@ -137,8 +135,6 @@ def get_edge_voices(lang="fr"):
         return [v for k, v in EDGE_VOICES.items() if k.startswith("nl-")]
     return list(EDGE_VOICES.values())
 async def _edge_tts_async(text, voice, outfile):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(outfile)
@@ -259,53 +255,42 @@ def make_background(titre, sous_titre, texte_ecran, theme, logo_path, logo_pos,
     return out
 # ============================================================
-# SadTalker — appel subprocess (image -> visage animé)
 # ============================================================
-def _check_sadtalker_ready() -> Optional[str]:
-    base = os.path.join(ROOT, "SadTalker")
-    if not os.path.isdir(base):
-        return "Dossier SadTalker manquant. Ajoutez 'SadTalker/' à la racine du Space (voir README)."
-    ck = os.path.join(base, "checkpoints")
-    needed = [
-        "audio2exp.pt",
-        "GFPGANv1.4.pth",
-        "epoch_20.pth",
-        "mapping_00229-model.pth.tar",
-        "shape_predictor_68_face_landmarks.dat",
-    ]
-    missing = [f for f in needed if not os.path.exists(os.path.join(ck, f))]
-    if missing:
-        return "Checkpoints SadTalker manquants: " + ", ".join(missing)
-    return None
-def generate_sadtalker_video(image_path, audio_path, output_dir=TMP_DIR, fps=25) -> Optional[str]:
-    err = _check_sadtalker_ready()
-    if err:
-        # Pas d’échec brutal : on renvoie None (le fond seul sera utilisé)
-        print(f"[SadTalker] {err}")
-        return None
     try:
-        os.makedirs(output_dir, exist_ok=True)
-        out_path = os.path.join(output_dir, f"sadtalker_{uuid.uuid4().hex[:6]}.mp4")
-        cmd = [
-            "python", "inference.py",
-            "--driven_audio", audio_path,
-            "--source_image", image_path,
-            "--result_dir", output_dir,
-            "--still", "--enhancer", "gfpgan",
-            "--fps", str(fps),
-        ]
-        subprocess.run(cmd, cwd=os.path.join(ROOT, "SadTalker"), check=True)
-        # Récupérer le dernier mp4 créé
-        candidates = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp4")]
-        latest = max(candidates, key=os.path.getctime) if candidates else None
-        if latest:
-            # Harmoniser le nom
-            shutil.move(latest, out_path)
-            return out_path
-        return None
     except Exception as e:
-        print("[SadTalker] Erreur:", e)
         return None
 # ============================================================
@@ -362,12 +347,12 @@ def _write_video_with_fallback(final_clip, out_path_base, fps=25):
     raise RuntimeError(last_err or "FFmpeg a échoué")
 # ============================================================
-# BUILD CAPSULE — Pipeline complet (corrigé)
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
                   fond_mode="plein écran",
-                  image_presentateur=None, voix_type="Féminine",
                   position_presentateur="bottom-right", plein=False,
                   moteur_voix="Parler-TTS (offline)", langue="fr", speaker=None):
@@ -386,7 +371,6 @@ def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
         except Exception as e:
             print(f"[Audio] Normalisation échouée ({e}), on garde {audio_mp}")
     # 2) Fond (PIL)
     fond_path = make_background(titre, sous_titre, texte_ecran, theme,
                                 logo_path, logo_pos, image_fond, fond_mode)
@@ -400,25 +384,17 @@ def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
     target_fps = 25
     bg = ImageClip(fond_path).set_duration(dur)
-    # 4) SadTalker (optionnel)
     clips = [bg]
-    if image_presentateur and os.path.exists(image_presentateur):
-        vpath = generate_sadtalker_video(image_presentateur, audio_wav, fps=target_fps)
-        if vpath and os.path.exists(vpath):
-            v = VideoFileClip(vpath).without_audio().fx(vfx.loop, duration=dur)
-            if plein:
-                v = v.resize((W, H)).set_position(("center", "center"))
-            else:
-                v = v.resize(width=520)
-                pos_map = {
-                    "bottom-right": ("right", "bottom"),
-                    "bottom-left": ("left", "bottom"),
-                    "top-right": ("right", "top"),
-                    "top-left": ("left", "top"),
-                    "center": ("center", "center"),
-                }
-                v = v.set_position(pos_map.get(position_presentateur, ("right", "bottom")))
-            clips.append(v)
     # 5) Composition + export
     final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
@@ -443,6 +419,8 @@ def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
         audio.close()
         final.close()
         bg.close()
         if os.path.exists(audio_mp): os.remove(audio_mp)
         if audio_wav != audio_mp and os.path.exists(audio_wav): os.remove(audio_wav)
     except Exception as e:
@@ -451,7 +429,6 @@ def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
     return out, f"✅ Capsule {langue.upper()} créée ({dur:.1f}s, voix {speaker or voix_type})", srt_path
 # ============================================================
 # GESTION / ASSEMBLAGE
 # ============================================================
@@ -508,11 +485,11 @@ def deplacer_capsule(index, direction):
 # ============================================================
 print("[INIT] Lancement de Gradio...")
 init_edge_voices()
-with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
                theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎬 Créateur de Capsules CPAS – Version complète (SadTalker + Kokoro)")
-    gr.Markdown("**Astuce** : pour un démarrage instantané, chargez le dossier `SadTalker/checkpoints/` dans le Space (voir README).")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
@@ -523,7 +500,8 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
                 logo_pos = gr.Radio(["haut-gauche","haut-droite","centre"],
                                     label="Position logo", value="haut-gauche")
-                image_presentateur = gr.Image(label="🧑‍🎨 Image du présentateur (portrait pour SadTalker)", type="filepath")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
@@ -540,8 +518,6 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
                     except Exception as e:
                         return gr.update(choices=[], value=None)
                 speaker_id = gr.Dropdown(
                     label="🎙 Voix Edge-TTS",
                     choices=get_edge_voices("fr"),
@@ -553,9 +529,9 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
-                    ["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],
                     label="Moteur voix",
-                    value="Kokoro (HuggingFace, offline)"
                 )
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
@@ -590,11 +566,11 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
-    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo, ip, vx, pos_p, plein, motor, lang, speaker):
         try:
             vid, msg, srt = build_capsule(t, st, tv, te, th,
                                           img, logo, pos_logo, fmode,
-                                          ip, vx, pos_p, plein,
                                           motor, lang, speaker=speaker)
             return vid, srt, msg, table_capsules()
         except Exception as e:
@@ -604,12 +580,10 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
         creer_capsule_ui,
         [titre, sous_titre, texte_voix, texte_ecran, theme,
          image_fond, fond_mode, logo_path, logo_pos,
-         image_presentateur, voix_type, position_presentateur,
          plein, moteur_voix, langue, speaker_id],
         [sortie, srt_out, statut, liste]
     )
 if __name__ == "__main__":
-    demo.launch()

 # app.py
 # ============================================================
 # CPAS Bruxelles — Créateur de Capsules (Gradio + Kokoro + SadTalker)
+# Version modifiée pour utiliser une vidéo de présentateur au lieu d'une image
 # ============================================================
 import os, json, re, uuid, shutil, traceback, gc, subprocess
 # 🔊 CHARGEMENT DYNAMIQUE DES VOIX EDGE-TTS (FR/NL)
 # ============================================================
 EDGE_VOICES = {}
 async def fetch_edge_voices_async():
         return [v for k, v in EDGE_VOICES.items() if k.startswith("nl-")]
     return list(EDGE_VOICES.values())
 async def _edge_tts_async(text, voice, outfile):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(outfile)
     return out
 # ============================================================
+# SUPPRESSION DE LA PARTIE SADTALKER (plus nécessaire)
 # ============================================================
+def _prepare_video_presentateur(video_path, audio_duration, position, plein_ecran=False):
+    """Prépare la vidéo du présentateur avec la bonne durée et position."""
+    from moviepy.editor import VideoFileClip
+    import moviepy.video.fx.all as vfx
     try:
+        v = VideoFileClip(video_path).without_audio()
+        # Ajuster la durée à celle de l'audio
+        if v.duration < audio_duration:
+            # Si la vidéo est plus courte, la boucler
+            v = v.fx(vfx.loop, duration=audio_duration)
+        elif v.duration > audio_duration:
+            # Si la vidéo est plus longue, la couper
+            v = v.subclip(0, audio_duration)
+        # Ajuster la taille et la position
+        if plein_ecran:
+            v = v.resize((W, H)).set_position(("center", "center"))
+        else:
+            v = v.resize(width=520)  # Taille réduite pour le coin
+            pos_map = {
+                "bottom-right": ("right", "bottom"),
+                "bottom-left": ("left", "bottom"),
+                "top-right": ("right", "top"),
+                "top-left": ("left", "top"),
+                "center": ("center", "center"),
+            }
+            v = v.set_position(pos_map.get(position, ("right", "bottom")))
+        return v
     except Exception as e:
+        print(f"[Préparation vidéo] Erreur : {e}")
         return None
 # ============================================================
     raise RuntimeError(last_err or "FFmpeg a échoué")
 # ============================================================
+# BUILD CAPSULE — Pipeline complet (modifié pour vidéo présentateur)
 # ============================================================
 def build_capsule(titre, sous_titre, texte_voix, texte_ecran, theme,
                   image_fond=None, logo_path=None, logo_pos="haut-gauche",
                   fond_mode="plein écran",
+                  video_presentateur=None, voix_type="Féminine",
                   position_presentateur="bottom-right", plein=False,
                   moteur_voix="Parler-TTS (offline)", langue="fr", speaker=None):
         except Exception as e:
             print(f"[Audio] Normalisation échouée ({e}), on garde {audio_mp}")
     # 2) Fond (PIL)
     fond_path = make_background(titre, sous_titre, texte_ecran, theme,
                                 logo_path, logo_pos, image_fond, fond_mode)
     target_fps = 25
     bg = ImageClip(fond_path).set_duration(dur)
+    # 4) Vidéo présentateur (au lieu de SadTalker)
     clips = [bg]
+    if video_presentateur and os.path.exists(video_presentateur):
+        v_presentateur = _prepare_video_presentateur(
+            video_presentateur,
+            dur,
+            position_presentateur,
+            plein
+        )
+        if v_presentateur:
+            clips.append(v_presentateur)
     # 5) Composition + export
     final = CompositeVideoClip(clips).set_audio(audio.set_fps(44100))
         audio.close()
         final.close()
         bg.close()
+        if 'v_presentateur' in locals():
+            v_presentateur.close()
         if os.path.exists(audio_mp): os.remove(audio_mp)
         if audio_wav != audio_mp and os.path.exists(audio_wav): os.remove(audio_wav)
     except Exception as e:
     return out, f"✅ Capsule {langue.upper()} créée ({dur:.1f}s, voix {speaker or voix_type})", srt_path
 # ============================================================
 # GESTION / ASSEMBLAGE
 # ============================================================
 # ============================================================
 print("[INIT] Lancement de Gradio...")
 init_edge_voices()
+with gr.Blocks(title="Créateur de Capsules CPAS – Version avec vidéo présentateur",
                theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎬 Créateur de Capsules CPAS – Version avec vidéo présentateur")
+    gr.Markdown("**Nouveau** : Utilisez directement une vidéo de présentateur au lieu d'une image.")
     with gr.Tab("Créer une capsule"):
         with gr.Row():
                 logo_path = gr.Image(label="🏛 Logo", type="filepath")
                 logo_pos = gr.Radio(["haut-gauche","haut-droite","centre"],
                                     label="Position logo", value="haut-gauche")
+                # REMPLACEMENT : Image → Video
+                video_presentateur = gr.Video(label="🎬 Vidéo du présentateur", type="filepath")
                 position_presentateur = gr.Radio(["bottom-right","bottom-left","top-right","top-left","center"],
                                                  label="Position", value="bottom-right")
                 plein = gr.Checkbox(label="Plein écran présentateur", value=False)
                     except Exception as e:
                         return gr.update(choices=[], value=None)
                 speaker_id = gr.Dropdown(
                     label="🎙 Voix Edge-TTS",
                     choices=get_edge_voices("fr"),
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
+                    ["Edge-TTS (recommandé)", "gTTS (fallback)"],
                     label="Moteur voix",
+                    value="Edge-TTS (recommandé)"
                 )
                 texte_voix = gr.Textbox(label="Texte voix off", lines=4,
                                         value="Bonjour, le CPAS de Bruxelles vous aide pour vos soins de santé.")
         sortie_finale = gr.Video(label="Vidéo finale")
         btn_asm.click(lambda: assemble_final(), [], [sortie_finale, message])
+    def creer_capsule_ui(t, st, tv, te, th, img, fmode, logo, pos_logo, vp, vx, pos_p, plein, motor, lang, speaker):
         try:
             vid, msg, srt = build_capsule(t, st, tv, te, th,
                                           img, logo, pos_logo, fmode,
+                                          vp, vx, pos_p, plein,
                                           motor, lang, speaker=speaker)
             return vid, srt, msg, table_capsules()
         except Exception as e:
         creer_capsule_ui,
         [titre, sous_titre, texte_voix, texte_ecran, theme,
          image_fond, fond_mode, logo_path, logo_pos,
+         video_presentateur, voix_type, position_presentateur,  # Changé ici
          plein, moteur_voix, langue, speaker_id],
         [sortie, srt_out, statut, liste]
     )
 if __name__ == "__main__":
+    demo.launch()