import os import tempfile import numpy as np MODAL_VOICE_URL = os.environ.get( "MODAL_VOICE_URL", "https://mitvho09--tinyworld-inference-voice-endpoint.modal.run", ) VOICE_TIMEOUT = float(os.environ.get("TINYWORLD_VOICE_TIMEOUT", "25")) def _is_mock() -> bool: return os.environ.get("TINYWORLD_MOCK", "0") == "1" def _backend() -> str: return os.environ.get("TINYWORLD_INFER", "modal").lower() def build_voice_description(character) -> str: return character.get("voice_description", "(a neutral voice)") def generate_voice(text: str, voice_desc: str) -> str: try: if _is_mock(): return _mock_generate(text) if _backend() == "local": import inference # ZeroGPU VoxCPM2, imported lazily return inference.synthesize_voice(text, voice_desc) return _real_generate(text, voice_desc) except Exception as e: print(f"[voice] generation failed: {e}") return _mock_generate(text) if _is_mock() else None def _mock_generate(text: str) -> str: # Audible placeholder so the voice/hear features are verifiable without a GPU. sample_rate = 24000 duration = min(1.6, 0.5 + 0.03 * len(text.split())) t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False) base = 150 + (hash(text) % 120) # per-line pitch wobble = 1 + 0.04 * np.sin(2 * np.pi * 5 * t) # gentle speech-like wobble tone = 0.22 * np.sin(2 * np.pi * base * wobble * t) tone += 0.08 * np.sin(2 * np.pi * base * 2 * t) env = np.minimum(1.0, np.minimum(t * 12, (duration - t) * 8)) # fade in/out audio = (tone * env).astype(np.float32) path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav") try: import soundfile as sf sf.write(path, audio, sample_rate) except ImportError: import wave with wave.open(path, "w") as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(sample_rate) wf.writeframes((audio * 32767).astype(np.int16).tobytes()) return path def _real_generate(text: str, voice_desc: str) -> str: try: import httpx payload = {"text": text, "voice_desc": voice_desc} with httpx.Client(timeout=VOICE_TIMEOUT, follow_redirects=True) as client: resp = client.post(MODAL_VOICE_URL, json=payload) resp.raise_for_status() path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav") with open(path, "wb") as f: f.write(resp.content) return path except Exception as e: print(f"[voice] Modal call failed: {e}") return None if __name__ == "__main__": import characters as c path = generate_voice("Hello there!", c.CHARACTERS[0]["voice_description"]) print(path) assert os.path.exists(path), f"File not found: {path}" print("OK")