#!/usr/bin/env python3 """ Génère les sons d'unités via l'API ElevenLabs : - move_ack : TTS avec une voix différente par unité et émotion (colère, calme, mystère…) - death / fire : Sound Effects API (effets sonores, pas voix) Ne crée que les fichiers qui n'existent pas. ELEVENLABS_API_KEY dans .env. Usage : cd backend && python -m scripts.generate_unit_sounds """ from __future__ import annotations import sys from pathlib import Path _backend = Path(__file__).resolve().parent.parent if str(_backend) not in sys.path: sys.path.insert(0, str(_backend)) import asyncio import logging from typing import Optional import httpx from config import ELEVENLABS_API_KEY from game.units import UnitType logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) SOUNDS_DIR = _backend / "static" / "sounds" / "units" TTS_URL = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" SFX_URL = "https://api.elevenlabs.io/v1/sound-generation" VOICES_URL = "https://api.elevenlabs.io/v1/voices" OUTPUT_FORMAT = "mp3_22050_32" TTS_MODEL = "eleven_multilingual_v2" SFX_MODEL = "eleven_text_to_sound_v2" # Unité → (phrase courte, stabilité pour plus/moins d'émotion, style) # stabilité basse = plus d'émotion ; style optionnel MOVE_ACK: dict[str, tuple[str, float, float]] = { UnitType.SCV.value: ("Affirmative.", 0.65, 0.0), # flemme, neutre UnitType.MARINE.value: ("Yes sir!", 0.35, 0.4), # colère/énergie UnitType.MEDIC.value: ("Moving out.", 0.7, 0.0), # calme UnitType.GOLIATH.value: ("Roger.", 0.5, 0.3), # déterminé UnitType.TANK.value: ("Copy.", 0.75, 0.0), # lourd, impassible UnitType.WRAITH.value: ("On my way.", 0.4, 0.5), # mystère, furtif } # Effets sonores (texte pour Sound Effects API) — ~1–2 s DEATH_SFX: dict[str, str] = { UnitType.SCV.value: "Short mechanical explosion, worker unit destroyed, metal crunch, 1 second", UnitType.MARINE.value: "Soldier death cry, short impact, 1 second", UnitType.MEDIC.value: "Short piercing female scream, death cry, heartbreaking, under 1 second", UnitType.GOLIATH.value: "Heavy mech explosion, metal wreckage, 1.5 seconds", UnitType.TANK.value: "Large tank explosion, heavy armor destroyed, 1.5 seconds", UnitType.WRAITH.value: "Starfighter explosion, distant burst, 1 second", } FIRE_SFX: dict[str, str] = { UnitType.SCV.value: "Small welding tool, repair sound, short", UnitType.MARINE.value: "Assault rifle burst, gunfire, 1 second", UnitType.MEDIC.value: "Healing beam, soft sci-fi zap, 1 second", UnitType.GOLIATH.value: "Dual heavy cannons firing, mechanical, 1 second", UnitType.TANK.value: "Tank cannon firing, heavy thump, 1 second", UnitType.WRAITH.value: "Laser burst, starfighter weapon, 1 second", } def _out_path(unit: str, kind: str) -> Path: return SOUNDS_DIR / unit / f"{kind}.mp3" def _gender_of_voice(voice: dict) -> Optional[str]: labels = voice.get("labels") or {} g = (labels.get("gender") or "").lower() if g in ("male", "female"): return g return None async def fetch_voice_ids_by_gender(client: httpx.AsyncClient) -> tuple[list[str], list[str]]: """Récupère les voice_id séparés par genre (male, female) via labels.""" male, female = [], [] try: r = await client.get(VOICES_URL, headers={"xi-api-key": ELEVENLABS_API_KEY}, timeout=15) r.raise_for_status() voices = (r.json().get("voices") or []) or [] for v in voices: vid = v.get("voice_id") if not vid: continue g = _gender_of_voice(v) if g == "male": male.append(vid) elif g == "female": female.append(vid) if male or female: log.info("Voix: %d male, %d female", len(male), len(female)) return (male, female) except Exception as e: log.warning("Impossible de lister les voix: %s. Fallback par défaut.", e) # Fallback : voix connues ElevenLabs (Rachel = female, Adam = male) female_fb = ["21m00Tcm4TlvDq8ikWAM"] # Rachel male_fb = ["pNInz6obpgDQGcFmaJgB", "VR6AewLTigWG4xSOukaG", "onwK4e9ZLuTAKqWW03F9"] # Adam, Sam, etc. return (male_fb, female_fb) # Unités en voix male vs female (move_ack uniquement) MALE_UNITS = {UnitType.SCV.value, UnitType.MARINE.value, UnitType.GOLIATH.value, UnitType.TANK.value} FEMALE_UNITS = {UnitType.MEDIC.value, UnitType.WRAITH.value} async def generate_tts( client: httpx.AsyncClient, voice_id: str, unit: str, text: str, stability: float, style: float, ) -> Optional[bytes]: url = TTS_URL.format(voice_id=voice_id) headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"} body = { "text": text, "model_id": TTS_MODEL, "voice_settings": { "stability": stability, "similarity_boost": 0.8, "style": style, }, } try: r = await client.post(url, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=30) r.raise_for_status() return r.content except httpx.HTTPStatusError as e: log.error("TTS %s: HTTP %s %s", unit, e.response.status_code, e.response.text[:200]) return None except Exception as e: log.exception("TTS %s: %s", unit, e) return None async def generate_sfx( client: httpx.AsyncClient, unit: str, kind: str, text: str, ) -> Optional[bytes]: headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"} body = { "text": text, "model_id": SFX_MODEL, "duration_seconds": 1.2, "prompt_influence": 0.4, } try: r = await client.post(SFX_URL, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=45) r.raise_for_status() return r.content except httpx.HTTPStatusError as e: log.error("SFX %s/%s: HTTP %s %s", unit, kind, e.response.status_code, e.response.text[:200]) return None except Exception as e: log.exception("SFX %s/%s: %s", unit, kind, e) return None async def main() -> None: if not ELEVENLABS_API_KEY: log.error("ELEVENLABS_API_KEY non défini (fichier .env dans backend/)") sys.exit(1) SOUNDS_DIR.mkdir(parents=True, exist_ok=True) created = 0 skipped = 0 async with httpx.AsyncClient() as client: male_ids, female_ids = await fetch_voice_ids_by_gender(client) if not male_ids and not female_ids: log.error("Aucune voix disponible. Vérifie ta clé API.") sys.exit(1) if not male_ids: male_ids = female_ids or ["pNInz6obpgDQGcFmaJgB"] if not female_ids: female_ids = male_ids or ["21m00Tcm4TlvDq8ikWAM"] unit_list = [u.value for u in UnitType] male_idx, female_idx = 0, 0 for unit in unit_list: unit_dir = SOUNDS_DIR / unit unit_dir.mkdir(parents=True, exist_ok=True) if unit in MALE_UNITS: voice_id = male_ids[male_idx % len(male_ids)] male_idx += 1 else: voice_id = female_ids[female_idx % len(female_ids)] female_idx += 1 # move_ack : TTS avec émotion out_move = _out_path(unit, "move_ack") if out_move.exists(): log.info("Skip (existe): %s/move_ack", unit) skipped += 1 else: text, stability, style = MOVE_ACK[unit] log.info("TTS %s move_ack — %r (voix %s)", unit, text, voice_id[:8]) data = await generate_tts(client, voice_id, unit, text, stability, style) if data: out_move.write_bytes(data) created += 1 # death : Sound Effects out_death = _out_path(unit, "death") if out_death.exists(): log.info("Skip (existe): %s/death", unit) skipped += 1 else: text = DEATH_SFX[unit] log.info("SFX %s death — %r", unit, text[:50]) data = await generate_sfx(client, unit, "death", text) if data: out_death.write_bytes(data) created += 1 # fire : Sound Effects out_fire = _out_path(unit, "fire") if out_fire.exists(): log.info("Skip (existe): %s/fire", unit) skipped += 1 else: text = FIRE_SFX[unit] log.info("SFX %s fire — %r", unit, text[:50]) data = await generate_sfx(client, unit, "fire", text) if data: out_fire.write_bytes(data) created += 1 log.info("Terminé: %d créés, %d ignorés (déjà présents).", created, skipped) if __name__ == "__main__": asyncio.run(main())