Spaces:

omarbajouk
/

CapsulesVideo

Running

App Files Files Community

omarbajouk commited on Oct 26, 2025

Commit

7c81ff4

verified ·

1 Parent(s): f58942a

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -33

app.py CHANGED Viewed

@@ -84,52 +84,78 @@ def _safe_name(stem, ext=".mp4"):
     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
 # ============================================================
-# SYNTHÈSE VOCALE (Kokoro par défaut, gTTS en secours)
 # ============================================================
-kokoro_pipeline = None  # lazy load
-def _get_kokoro():
-    global kokoro_pipeline
-    if kokoro_pipeline is None:
-        from transformers import pipeline
-        # nécessite transformers récent + onnxruntime + soundfile
-        kokoro_pipeline = pipeline("text-to-speech", model="onnx-community/Kokoro-82M-v1.0-ONNX")
-    return kokoro_pipeline
 def get_kokoro_voices(lang="fr"):
-    """Retourne la liste des speakers Kokoro disponibles pour une langue."""
-    try:
-        from transformers import AutoProcessor
-        model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"
-        processor = AutoProcessor.from_pretrained(model_id)
-        voices = sorted([v for v in processor.speakers if v.startswith(lang)])
-        return voices
-    except Exception as e:
-        print(f"[Kokoro] Impossible de charger les voix ({e})")
-        return []
-def tts_kokoro(text: str, langue: str = "fr", speaker: Optional[str] = None) -> str:
     import soundfile as sf
     out = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
     try:
-        kokoro = _get_kokoro()
-        args = {"text": text}
-        if speaker:
-            args["speaker_id"] = speaker
-        result = kokoro(**args)
-        sf.write(out, result["audio"], result["sampling_rate"])
         return out
     except Exception as e:
-        print(f"[Kokoro] Erreur TTS: {e}")
         return tts_gtts(text, lang=langue)
 def tts_gtts(text: str, lang: str = "fr") -> str:
     from gtts import gTTS
     out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
     gTTS(text=text, lang=lang).save(out)
     return out
 def _normalize_audio_to_wav(in_path: str) -> str:
     # Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
     from pydub import AudioSegment
@@ -474,20 +500,20 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
                 def maj_voix(lang):
                     try:
                         voices = get_kokoro_voices(lang)
-                        if not voices:
-                            return gr.update(choices=["(aucune disponible)"], value="(aucune disponible)")
-                        return gr.update(choices=voices, value=voices[0])
                     except Exception as e:
                         return gr.update(choices=[], value=None)
                 speaker_id = gr.Dropdown(
-                    label="👤 Voix / Speaker Kokoro",
                     choices=get_kokoro_voices("fr"),
-                    value=None
                 )
                 langue.change(maj_voix, [langue], [speaker_id])
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
                     ["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],

     return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
 # ============================================================
+# SYNTHÈSE VOCALE — Kokoro (basé sur hexgrad/Kokoro-TTS) + gTTS fallback
 # ============================================================
+from kokoro import KModel, KPipeline
+import torch
+CUDA_AVAILABLE = torch.cuda.is_available()
+# Charger les modèles et pipelines Kokoro
+models = {
+    gpu: KModel().to("cuda" if gpu else "cpu").eval()
+    for gpu in [False] + ([True] if CUDA_AVAILABLE else [])
+}
+# Kokoro utilise deux pipelines ('a' et 'b') pour les voix
+pipelines = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
+# Dictionnaire des voix FR / NL inspiré de hexgrad/Kokoro-TTS
+KOKORO_VOICES = {
+    # --- Français ---
+    "🇫🇷 🚺 Heart ❤️": "af_heart",
+    "🇫🇷 🚺 Bella 🔥": "af_bella",
+    "🇫🇷 🚺 Nicole 🎧": "af_nicole",
+    "🇫🇷 🚹 Michael 🎙": "am_michael",
+    "🇫🇷 🚹 Adam ⚡": "am_adam",
+    # --- Néerlandais (ou voix NL proches) ---
+    "🇳🇱 🚺 Emma 💛": "bf_emma",
+    "🇳🇱 🚺 Isabella 💬": "bf_isabella",
+    "🇳🇱 🚹 George 💼": "bm_george",
+    "🇳🇱 🚹 Lewis 🧠": "bm_lewis",
+}
 def get_kokoro_voices(lang="fr"):
+    """Retourne la liste des voix Kokoro selon la langue."""
+    if lang == "fr":
+        return [v for v in KOKORO_VOICES.values() if v.startswith(("af_", "am_"))]
+    elif lang == "nl":
+        return [v for v in KOKORO_VOICES.values() if v.startswith(("bf_", "bm_"))]
+    else:
+        return list(KOKORO_VOICES.values())
+def tts_kokoro(text: str, langue: str = "fr", speaker: str = "af_heart") -> str:
+    """Génère un fichier WAV avec la voix Kokoro sélectionnée."""
     import soundfile as sf
     out = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
     try:
+        pipeline = pipelines[speaker[0]]  # 'a' ou 'b'
+        pack = pipeline.load_voice(speaker)
+        use_gpu = CUDA_AVAILABLE
+        model = models[True] if use_gpu else models[False]
+        speed = 1.0
+        for _, ps, _ in pipeline(text, speaker, speed):
+            ref_s = pack[len(ps) - 1]
+            audio = model(ps, ref_s, speed)
+            sf.write(out, audio.numpy(), 24000)
+            break  # Un seul segment suffit pour notre usage
         return out
     except Exception as e:
+        print(f"[Kokoro] Erreur TTS ({e}), fallback gTTS.")
         return tts_gtts(text, lang=langue)
 def tts_gtts(text: str, lang: str = "fr") -> str:
+    """Fallback simple via Google Text-to-Speech."""
     from gtts import gTTS
     out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
     gTTS(text=text, lang=lang).save(out)
     return out
 def _normalize_audio_to_wav(in_path: str) -> str:
     # Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
     from pydub import AudioSegment
                 def maj_voix(lang):
                     try:
                         voices = get_kokoro_voices(lang)
+                        return gr.update(choices=voices, value=voices[0] if voices else None)
                     except Exception as e:
+                        print("[UI] Erreur lors du chargement des voix:", e)
                         return gr.update(choices=[], value=None)
                 speaker_id = gr.Dropdown(
+                    label="🎙 Voix Kokoro",
                     choices=get_kokoro_voices("fr"),
+                    value="af_heart",
+                    info="Choisissez la voix pour Kokoro selon la langue (FR/NL)."
                 )
                 langue.change(maj_voix, [langue], [speaker_id])
                 voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
                 moteur_voix = gr.Radio(
                     ["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],