Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -84,52 +84,78 @@ def _safe_name(stem, ext=".mp4"):
|
|
| 84 |
return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
|
| 85 |
|
| 86 |
# ============================================================
|
| 87 |
-
# SYNTHÈSE VOCALE (Kokoro
|
| 88 |
# ============================================================
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
-
def _get_kokoro():
|
| 92 |
-
global kokoro_pipeline
|
| 93 |
-
if kokoro_pipeline is None:
|
| 94 |
-
from transformers import pipeline
|
| 95 |
-
# nécessite transformers récent + onnxruntime + soundfile
|
| 96 |
-
kokoro_pipeline = pipeline("text-to-speech", model="onnx-community/Kokoro-82M-v1.0-ONNX")
|
| 97 |
-
return kokoro_pipeline
|
| 98 |
|
| 99 |
def get_kokoro_voices(lang="fr"):
|
| 100 |
-
"""Retourne la liste des
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
return
|
| 107 |
-
except Exception as e:
|
| 108 |
-
print(f"[Kokoro] Impossible de charger les voix ({e})")
|
| 109 |
-
return []
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
| 112 |
import soundfile as sf
|
| 113 |
out = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
|
| 114 |
try:
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return out
|
|
|
|
| 122 |
except Exception as e:
|
| 123 |
-
print(f"[Kokoro] Erreur TTS
|
| 124 |
return tts_gtts(text, lang=langue)
|
| 125 |
|
| 126 |
|
| 127 |
def tts_gtts(text: str, lang: str = "fr") -> str:
|
|
|
|
| 128 |
from gtts import gTTS
|
| 129 |
out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
|
| 130 |
gTTS(text=text, lang=lang).save(out)
|
| 131 |
return out
|
| 132 |
|
|
|
|
| 133 |
def _normalize_audio_to_wav(in_path: str) -> str:
|
| 134 |
# Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
|
| 135 |
from pydub import AudioSegment
|
|
@@ -474,20 +500,20 @@ with gr.Blocks(title="Créateur de Capsules CPAS – SadTalker + Kokoro",
|
|
| 474 |
def maj_voix(lang):
|
| 475 |
try:
|
| 476 |
voices = get_kokoro_voices(lang)
|
| 477 |
-
if
|
| 478 |
-
return gr.update(choices=["(aucune disponible)"], value="(aucune disponible)")
|
| 479 |
-
return gr.update(choices=voices, value=voices[0])
|
| 480 |
except Exception as e:
|
|
|
|
| 481 |
return gr.update(choices=[], value=None)
|
| 482 |
|
| 483 |
speaker_id = gr.Dropdown(
|
| 484 |
-
label="
|
| 485 |
choices=get_kokoro_voices("fr"),
|
| 486 |
-
value=
|
|
|
|
| 487 |
)
|
|
|
|
| 488 |
langue.change(maj_voix, [langue], [speaker_id])
|
| 489 |
|
| 490 |
-
|
| 491 |
voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
|
| 492 |
moteur_voix = gr.Radio(
|
| 493 |
["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],
|
|
|
|
| 84 |
return f"{stem}_{uuid.uuid4().hex[:6]}{ext}"
|
| 85 |
|
| 86 |
# ============================================================
|
| 87 |
+
# SYNTHÈSE VOCALE — Kokoro (basé sur hexgrad/Kokoro-TTS) + gTTS fallback
|
| 88 |
# ============================================================
|
| 89 |
+
from kokoro import KModel, KPipeline
|
| 90 |
+
import torch
|
| 91 |
+
|
| 92 |
+
CUDA_AVAILABLE = torch.cuda.is_available()
|
| 93 |
+
|
| 94 |
+
# Charger les modèles et pipelines Kokoro
|
| 95 |
+
models = {
|
| 96 |
+
gpu: KModel().to("cuda" if gpu else "cpu").eval()
|
| 97 |
+
for gpu in [False] + ([True] if CUDA_AVAILABLE else [])
|
| 98 |
+
}
|
| 99 |
+
# Kokoro utilise deux pipelines ('a' et 'b') pour les voix
|
| 100 |
+
pipelines = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
|
| 101 |
+
|
| 102 |
+
# Dictionnaire des voix FR / NL inspiré de hexgrad/Kokoro-TTS
|
| 103 |
+
KOKORO_VOICES = {
|
| 104 |
+
# --- Français ---
|
| 105 |
+
"🇫🇷 🚺 Heart ❤️": "af_heart",
|
| 106 |
+
"🇫🇷 🚺 Bella 🔥": "af_bella",
|
| 107 |
+
"🇫🇷 🚺 Nicole 🎧": "af_nicole",
|
| 108 |
+
"🇫🇷 🚹 Michael 🎙": "am_michael",
|
| 109 |
+
"🇫🇷 🚹 Adam ⚡": "am_adam",
|
| 110 |
+
# --- Néerlandais (ou voix NL proches) ---
|
| 111 |
+
"🇳🇱 🚺 Emma 💛": "bf_emma",
|
| 112 |
+
"🇳🇱 🚺 Isabella 💬": "bf_isabella",
|
| 113 |
+
"🇳🇱 🚹 George 💼": "bm_george",
|
| 114 |
+
"🇳🇱 🚹 Lewis 🧠": "bm_lewis",
|
| 115 |
+
}
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def get_kokoro_voices(lang="fr"):
|
| 119 |
+
"""Retourne la liste des voix Kokoro selon la langue."""
|
| 120 |
+
if lang == "fr":
|
| 121 |
+
return [v for v in KOKORO_VOICES.values() if v.startswith(("af_", "am_"))]
|
| 122 |
+
elif lang == "nl":
|
| 123 |
+
return [v for v in KOKORO_VOICES.values() if v.startswith(("bf_", "bm_"))]
|
| 124 |
+
else:
|
| 125 |
+
return list(KOKORO_VOICES.values())
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
|
| 128 |
+
def tts_kokoro(text: str, langue: str = "fr", speaker: str = "af_heart") -> str:
|
| 129 |
+
"""Génère un fichier WAV avec la voix Kokoro sélectionnée."""
|
| 130 |
import soundfile as sf
|
| 131 |
out = os.path.join(TMP_DIR, f"kokoro_{uuid.uuid4().hex}.wav")
|
| 132 |
try:
|
| 133 |
+
pipeline = pipelines[speaker[0]] # 'a' ou 'b'
|
| 134 |
+
pack = pipeline.load_voice(speaker)
|
| 135 |
+
use_gpu = CUDA_AVAILABLE
|
| 136 |
+
model = models[True] if use_gpu else models[False]
|
| 137 |
+
speed = 1.0
|
| 138 |
+
|
| 139 |
+
for _, ps, _ in pipeline(text, speaker, speed):
|
| 140 |
+
ref_s = pack[len(ps) - 1]
|
| 141 |
+
audio = model(ps, ref_s, speed)
|
| 142 |
+
sf.write(out, audio.numpy(), 24000)
|
| 143 |
+
break # Un seul segment suffit pour notre usage
|
| 144 |
return out
|
| 145 |
+
|
| 146 |
except Exception as e:
|
| 147 |
+
print(f"[Kokoro] Erreur TTS ({e}), fallback gTTS.")
|
| 148 |
return tts_gtts(text, lang=langue)
|
| 149 |
|
| 150 |
|
| 151 |
def tts_gtts(text: str, lang: str = "fr") -> str:
|
| 152 |
+
"""Fallback simple via Google Text-to-Speech."""
|
| 153 |
from gtts import gTTS
|
| 154 |
out = os.path.join(TMP_DIR, f"gtts_{uuid.uuid4().hex}.mp3")
|
| 155 |
gTTS(text=text, lang=lang).save(out)
|
| 156 |
return out
|
| 157 |
|
| 158 |
+
|
| 159 |
def _normalize_audio_to_wav(in_path: str) -> str:
|
| 160 |
# Convertit n'importe quel format (mp3/wav) en WAV standard (44.1kHz stéréo)
|
| 161 |
from pydub import AudioSegment
|
|
|
|
| 500 |
def maj_voix(lang):
|
| 501 |
try:
|
| 502 |
voices = get_kokoro_voices(lang)
|
| 503 |
+
return gr.update(choices=voices, value=voices[0] if voices else None)
|
|
|
|
|
|
|
| 504 |
except Exception as e:
|
| 505 |
+
print("[UI] Erreur lors du chargement des voix:", e)
|
| 506 |
return gr.update(choices=[], value=None)
|
| 507 |
|
| 508 |
speaker_id = gr.Dropdown(
|
| 509 |
+
label="🎙 Voix Kokoro",
|
| 510 |
choices=get_kokoro_voices("fr"),
|
| 511 |
+
value="af_heart",
|
| 512 |
+
info="Choisissez la voix pour Kokoro selon la langue (FR/NL)."
|
| 513 |
)
|
| 514 |
+
|
| 515 |
langue.change(maj_voix, [langue], [speaker_id])
|
| 516 |
|
|
|
|
| 517 |
voix_type = gr.Radio(["Féminine","Masculine"], label="Voix IA", value="Féminine")
|
| 518 |
moteur_voix = gr.Radio(
|
| 519 |
["Kokoro (HuggingFace, offline)", "gTTS (en ligne)"],
|