ChatCraft / backend /scripts /generate_unit_sounds.py
gabraken's picture
Add Map
5c0862e
#!/usr/bin/env python3
"""
Génère les sons d'unités via l'API ElevenLabs :
- move_ack : TTS avec une voix différente par unité et émotion (colère, calme, mystère…)
- death / fire : Sound Effects API (effets sonores, pas voix)
Ne crée que les fichiers qui n'existent pas. ELEVENLABS_API_KEY dans .env.
Usage : cd backend && python -m scripts.generate_unit_sounds
"""
from __future__ import annotations
import sys
from pathlib import Path
_backend = Path(__file__).resolve().parent.parent
if str(_backend) not in sys.path:
sys.path.insert(0, str(_backend))
import asyncio
import logging
from typing import Optional
import httpx
from config import ELEVENLABS_API_KEY
from game.units import UnitType
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
SOUNDS_DIR = _backend / "static" / "sounds" / "units"
TTS_URL = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
SFX_URL = "https://api.elevenlabs.io/v1/sound-generation"
VOICES_URL = "https://api.elevenlabs.io/v1/voices"
OUTPUT_FORMAT = "mp3_22050_32"
TTS_MODEL = "eleven_multilingual_v2"
SFX_MODEL = "eleven_text_to_sound_v2"
# Unité → (phrase courte, stabilité pour plus/moins d'émotion, style)
# stabilité basse = plus d'émotion ; style optionnel
MOVE_ACK: dict[str, tuple[str, float, float]] = {
UnitType.SCV.value: ("Affirmative.", 0.65, 0.0), # flemme, neutre
UnitType.MARINE.value: ("Yes sir!", 0.35, 0.4), # colère/énergie
UnitType.MEDIC.value: ("Moving out.", 0.7, 0.0), # calme
UnitType.GOLIATH.value: ("Roger.", 0.5, 0.3), # déterminé
UnitType.TANK.value: ("Copy.", 0.75, 0.0), # lourd, impassible
UnitType.WRAITH.value: ("On my way.", 0.4, 0.5), # mystère, furtif
}
# Effets sonores (texte pour Sound Effects API) — ~1–2 s
DEATH_SFX: dict[str, str] = {
UnitType.SCV.value: "Short mechanical explosion, worker unit destroyed, metal crunch, 1 second",
UnitType.MARINE.value: "Soldier death cry, short impact, 1 second",
UnitType.MEDIC.value: "Short piercing female scream, death cry, heartbreaking, under 1 second",
UnitType.GOLIATH.value: "Heavy mech explosion, metal wreckage, 1.5 seconds",
UnitType.TANK.value: "Large tank explosion, heavy armor destroyed, 1.5 seconds",
UnitType.WRAITH.value: "Starfighter explosion, distant burst, 1 second",
}
FIRE_SFX: dict[str, str] = {
UnitType.SCV.value: "Small welding tool, repair sound, short",
UnitType.MARINE.value: "Assault rifle burst, gunfire, 1 second",
UnitType.MEDIC.value: "Healing beam, soft sci-fi zap, 1 second",
UnitType.GOLIATH.value: "Dual heavy cannons firing, mechanical, 1 second",
UnitType.TANK.value: "Tank cannon firing, heavy thump, 1 second",
UnitType.WRAITH.value: "Laser burst, starfighter weapon, 1 second",
}
def _out_path(unit: str, kind: str) -> Path:
return SOUNDS_DIR / unit / f"{kind}.mp3"
def _gender_of_voice(voice: dict) -> Optional[str]:
labels = voice.get("labels") or {}
g = (labels.get("gender") or "").lower()
if g in ("male", "female"):
return g
return None
async def fetch_voice_ids_by_gender(client: httpx.AsyncClient) -> tuple[list[str], list[str]]:
"""Récupère les voice_id séparés par genre (male, female) via labels."""
male, female = [], []
try:
r = await client.get(VOICES_URL, headers={"xi-api-key": ELEVENLABS_API_KEY}, timeout=15)
r.raise_for_status()
voices = (r.json().get("voices") or []) or []
for v in voices:
vid = v.get("voice_id")
if not vid:
continue
g = _gender_of_voice(v)
if g == "male":
male.append(vid)
elif g == "female":
female.append(vid)
if male or female:
log.info("Voix: %d male, %d female", len(male), len(female))
return (male, female)
except Exception as e:
log.warning("Impossible de lister les voix: %s. Fallback par défaut.", e)
# Fallback : voix connues ElevenLabs (Rachel = female, Adam = male)
female_fb = ["21m00Tcm4TlvDq8ikWAM"] # Rachel
male_fb = ["pNInz6obpgDQGcFmaJgB", "VR6AewLTigWG4xSOukaG", "onwK4e9ZLuTAKqWW03F9"] # Adam, Sam, etc.
return (male_fb, female_fb)
# Unités en voix male vs female (move_ack uniquement)
MALE_UNITS = {UnitType.SCV.value, UnitType.MARINE.value, UnitType.GOLIATH.value, UnitType.TANK.value}
FEMALE_UNITS = {UnitType.MEDIC.value, UnitType.WRAITH.value}
async def generate_tts(
client: httpx.AsyncClient,
voice_id: str,
unit: str,
text: str,
stability: float,
style: float,
) -> Optional[bytes]:
url = TTS_URL.format(voice_id=voice_id)
headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
body = {
"text": text,
"model_id": TTS_MODEL,
"voice_settings": {
"stability": stability,
"similarity_boost": 0.8,
"style": style,
},
}
try:
r = await client.post(url, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=30)
r.raise_for_status()
return r.content
except httpx.HTTPStatusError as e:
log.error("TTS %s: HTTP %s %s", unit, e.response.status_code, e.response.text[:200])
return None
except Exception as e:
log.exception("TTS %s: %s", unit, e)
return None
async def generate_sfx(
client: httpx.AsyncClient,
unit: str,
kind: str,
text: str,
) -> Optional[bytes]:
headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
body = {
"text": text,
"model_id": SFX_MODEL,
"duration_seconds": 1.2,
"prompt_influence": 0.4,
}
try:
r = await client.post(SFX_URL, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=45)
r.raise_for_status()
return r.content
except httpx.HTTPStatusError as e:
log.error("SFX %s/%s: HTTP %s %s", unit, kind, e.response.status_code, e.response.text[:200])
return None
except Exception as e:
log.exception("SFX %s/%s: %s", unit, kind, e)
return None
async def main() -> None:
if not ELEVENLABS_API_KEY:
log.error("ELEVENLABS_API_KEY non défini (fichier .env dans backend/)")
sys.exit(1)
SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
created = 0
skipped = 0
async with httpx.AsyncClient() as client:
male_ids, female_ids = await fetch_voice_ids_by_gender(client)
if not male_ids and not female_ids:
log.error("Aucune voix disponible. Vérifie ta clé API.")
sys.exit(1)
if not male_ids:
male_ids = female_ids or ["pNInz6obpgDQGcFmaJgB"]
if not female_ids:
female_ids = male_ids or ["21m00Tcm4TlvDq8ikWAM"]
unit_list = [u.value for u in UnitType]
male_idx, female_idx = 0, 0
for unit in unit_list:
unit_dir = SOUNDS_DIR / unit
unit_dir.mkdir(parents=True, exist_ok=True)
if unit in MALE_UNITS:
voice_id = male_ids[male_idx % len(male_ids)]
male_idx += 1
else:
voice_id = female_ids[female_idx % len(female_ids)]
female_idx += 1
# move_ack : TTS avec émotion
out_move = _out_path(unit, "move_ack")
if out_move.exists():
log.info("Skip (existe): %s/move_ack", unit)
skipped += 1
else:
text, stability, style = MOVE_ACK[unit]
log.info("TTS %s move_ack — %r (voix %s)", unit, text, voice_id[:8])
data = await generate_tts(client, voice_id, unit, text, stability, style)
if data:
out_move.write_bytes(data)
created += 1
# death : Sound Effects
out_death = _out_path(unit, "death")
if out_death.exists():
log.info("Skip (existe): %s/death", unit)
skipped += 1
else:
text = DEATH_SFX[unit]
log.info("SFX %s death — %r", unit, text[:50])
data = await generate_sfx(client, unit, "death", text)
if data:
out_death.write_bytes(data)
created += 1
# fire : Sound Effects
out_fire = _out_path(unit, "fire")
if out_fire.exists():
log.info("Skip (existe): %s/fire", unit)
skipped += 1
else:
text = FIRE_SFX[unit]
log.info("SFX %s fire — %r", unit, text[:50])
data = await generate_sfx(client, unit, "fire", text)
if data:
out_fire.write_bytes(data)
created += 1
log.info("Terminé: %d créés, %d ignorés (déjà présents).", created, skipped)
if __name__ == "__main__":
asyncio.run(main())