Spaces:

mistral-hackaton-2026
/

ChatCraft

Running

App Files Files Community

ChatCraft / backend /scripts /generate_unit_sounds.py

gabraken

Add Map

5c0862e about 1 month ago

raw

history blame contribute delete

9.1 kB

	#!/usr/bin/env python3
	"""
	Génère les sons d'unités via l'API ElevenLabs :
	- move_ack : TTS avec une voix différente par unité et émotion (colère, calme, mystère…)
	- death / fire : Sound Effects API (effets sonores, pas voix)
	Ne crée que les fichiers qui n'existent pas. ELEVENLABS_API_KEY dans .env.

	Usage : cd backend && python -m scripts.generate_unit_sounds
	"""
	from __future__ import annotations

	import sys
	from pathlib import Path

	_backend = Path(__file__).resolve().parent.parent
	if str(_backend) not in sys.path:
	sys.path.insert(0, str(_backend))

	import asyncio
	import logging
	from typing import Optional

	import httpx

	from config import ELEVENLABS_API_KEY
	from game.units import UnitType

	logging.basicConfig(level=logging.INFO)
	log = logging.getLogger(__name__)

	SOUNDS_DIR = _backend / "static" / "sounds" / "units"
	TTS_URL = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
	SFX_URL = "https://api.elevenlabs.io/v1/sound-generation"
	VOICES_URL = "https://api.elevenlabs.io/v1/voices"
	OUTPUT_FORMAT = "mp3_22050_32"
	TTS_MODEL = "eleven_multilingual_v2"
	SFX_MODEL = "eleven_text_to_sound_v2"

	# Unité → (phrase courte, stabilité pour plus/moins d'émotion, style)
	# stabilité basse = plus d'émotion ; style optionnel
	MOVE_ACK: dict[str, tuple[str, float, float]] = {
	UnitType.SCV.value: ("Affirmative.", 0.65, 0.0), # flemme, neutre
	UnitType.MARINE.value: ("Yes sir!", 0.35, 0.4), # colère/énergie
	UnitType.MEDIC.value: ("Moving out.", 0.7, 0.0), # calme
	UnitType.GOLIATH.value: ("Roger.", 0.5, 0.3), # déterminé
	UnitType.TANK.value: ("Copy.", 0.75, 0.0), # lourd, impassible
	UnitType.WRAITH.value: ("On my way.", 0.4, 0.5), # mystère, furtif
	}

	# Effets sonores (texte pour Sound Effects API) — ~1–2 s
	DEATH_SFX: dict[str, str] = {
	UnitType.SCV.value: "Short mechanical explosion, worker unit destroyed, metal crunch, 1 second",
	UnitType.MARINE.value: "Soldier death cry, short impact, 1 second",
	UnitType.MEDIC.value: "Short piercing female scream, death cry, heartbreaking, under 1 second",
	UnitType.GOLIATH.value: "Heavy mech explosion, metal wreckage, 1.5 seconds",
	UnitType.TANK.value: "Large tank explosion, heavy armor destroyed, 1.5 seconds",
	UnitType.WRAITH.value: "Starfighter explosion, distant burst, 1 second",
	}

	FIRE_SFX: dict[str, str] = {
	UnitType.SCV.value: "Small welding tool, repair sound, short",
	UnitType.MARINE.value: "Assault rifle burst, gunfire, 1 second",
	UnitType.MEDIC.value: "Healing beam, soft sci-fi zap, 1 second",
	UnitType.GOLIATH.value: "Dual heavy cannons firing, mechanical, 1 second",
	UnitType.TANK.value: "Tank cannon firing, heavy thump, 1 second",
	UnitType.WRAITH.value: "Laser burst, starfighter weapon, 1 second",
	}


	def _out_path(unit: str, kind: str) -> Path:
	return SOUNDS_DIR / unit / f"{kind}.mp3"


	def _gender_of_voice(voice: dict) -> Optional[str]:
	labels = voice.get("labels") or {}
	g = (labels.get("gender") or "").lower()
	if g in ("male", "female"):
	return g
	return None


	async def fetch_voice_ids_by_gender(client: httpx.AsyncClient) -> tuple[list[str], list[str]]:
	"""Récupère les voice_id séparés par genre (male, female) via labels."""
	male, female = [], []
	try:
	r = await client.get(VOICES_URL, headers={"xi-api-key": ELEVENLABS_API_KEY}, timeout=15)
	r.raise_for_status()
	voices = (r.json().get("voices") or []) or []
	for v in voices:
	vid = v.get("voice_id")
	if not vid:
	continue
	g = _gender_of_voice(v)
	if g == "male":
	male.append(vid)
	elif g == "female":
	female.append(vid)
	if male or female:
	log.info("Voix: %d male, %d female", len(male), len(female))
	return (male, female)
	except Exception as e:
	log.warning("Impossible de lister les voix: %s. Fallback par défaut.", e)
	# Fallback : voix connues ElevenLabs (Rachel = female, Adam = male)
	female_fb = ["21m00Tcm4TlvDq8ikWAM"] # Rachel
	male_fb = ["pNInz6obpgDQGcFmaJgB", "VR6AewLTigWG4xSOukaG", "onwK4e9ZLuTAKqWW03F9"] # Adam, Sam, etc.
	return (male_fb, female_fb)


	# Unités en voix male vs female (move_ack uniquement)
	MALE_UNITS = {UnitType.SCV.value, UnitType.MARINE.value, UnitType.GOLIATH.value, UnitType.TANK.value}
	FEMALE_UNITS = {UnitType.MEDIC.value, UnitType.WRAITH.value}


	async def generate_tts(
	client: httpx.AsyncClient,
	voice_id: str,
	unit: str,
	text: str,
	stability: float,
	style: float,
	) -> Optional[bytes]:
	url = TTS_URL.format(voice_id=voice_id)
	headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
	body = {
	"text": text,
	"model_id": TTS_MODEL,
	"voice_settings": {
	"stability": stability,
	"similarity_boost": 0.8,
	"style": style,
	},
	}
	try:
	r = await client.post(url, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=30)
	r.raise_for_status()
	return r.content
	except httpx.HTTPStatusError as e:
	log.error("TTS %s: HTTP %s %s", unit, e.response.status_code, e.response.text[:200])
	return None
	except Exception as e:
	log.exception("TTS %s: %s", unit, e)
	return None


	async def generate_sfx(
	client: httpx.AsyncClient,
	unit: str,
	kind: str,
	text: str,
	) -> Optional[bytes]:
	headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
	body = {
	"text": text,
	"model_id": SFX_MODEL,
	"duration_seconds": 1.2,
	"prompt_influence": 0.4,
	}
	try:
	r = await client.post(SFX_URL, params={"output_format": OUTPUT_FORMAT}, headers=headers, json=body, timeout=45)
	r.raise_for_status()
	return r.content
	except httpx.HTTPStatusError as e:
	log.error("SFX %s/%s: HTTP %s %s", unit, kind, e.response.status_code, e.response.text[:200])
	return None
	except Exception as e:
	log.exception("SFX %s/%s: %s", unit, kind, e)
	return None


	async def main() -> None:
	if not ELEVENLABS_API_KEY:
	log.error("ELEVENLABS_API_KEY non défini (fichier .env dans backend/)")
	sys.exit(1)

	SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
	created = 0
	skipped = 0

	async with httpx.AsyncClient() as client:
	male_ids, female_ids = await fetch_voice_ids_by_gender(client)
	if not male_ids and not female_ids:
	log.error("Aucune voix disponible. Vérifie ta clé API.")
	sys.exit(1)
	if not male_ids:
	male_ids = female_ids or ["pNInz6obpgDQGcFmaJgB"]
	if not female_ids:
	female_ids = male_ids or ["21m00Tcm4TlvDq8ikWAM"]

	unit_list = [u.value for u in UnitType]
	male_idx, female_idx = 0, 0
	for unit in unit_list:
	unit_dir = SOUNDS_DIR / unit
	unit_dir.mkdir(parents=True, exist_ok=True)
	if unit in MALE_UNITS:
	voice_id = male_ids[male_idx % len(male_ids)]
	male_idx += 1
	else:
	voice_id = female_ids[female_idx % len(female_ids)]
	female_idx += 1

	# move_ack : TTS avec émotion
	out_move = _out_path(unit, "move_ack")
	if out_move.exists():
	log.info("Skip (existe): %s/move_ack", unit)
	skipped += 1
	else:
	text, stability, style = MOVE_ACK[unit]
	log.info("TTS %s move_ack — %r (voix %s)", unit, text, voice_id[:8])
	data = await generate_tts(client, voice_id, unit, text, stability, style)
	if data:
	out_move.write_bytes(data)
	created += 1

	# death : Sound Effects
	out_death = _out_path(unit, "death")
	if out_death.exists():
	log.info("Skip (existe): %s/death", unit)
	skipped += 1
	else:
	text = DEATH_SFX[unit]
	log.info("SFX %s death — %r", unit, text[:50])
	data = await generate_sfx(client, unit, "death", text)
	if data:
	out_death.write_bytes(data)
	created += 1

	# fire : Sound Effects
	out_fire = _out_path(unit, "fire")
	if out_fire.exists():
	log.info("Skip (existe): %s/fire", unit)
	skipped += 1
	else:
	text = FIRE_SFX[unit]
	log.info("SFX %s fire — %r", unit, text[:50])
	data = await generate_sfx(client, unit, "fire", text)
	if data:
	out_fire.write_bytes(data)
	created += 1

	log.info("Terminé: %d créés, %d ignorés (déjà présents).", created, skipped)


	if __name__ == "__main__":
	asyncio.run(main())