tinyworld / voice.py
sush0401's picture
TinyWorld + Crisis Mode, ZeroGPU in-process inference
d3a7a1c verified
Raw
History Blame Contribute Delete
2.93 kB
import os
import tempfile
import numpy as np
MODAL_VOICE_URL = os.environ.get(
"MODAL_VOICE_URL",
"https://mitvho09--tinyworld-inference-voice-endpoint.modal.run",
)
VOICE_TIMEOUT = float(os.environ.get("TINYWORLD_VOICE_TIMEOUT", "25"))
def _is_mock() -> bool:
return os.environ.get("TINYWORLD_MOCK", "0") == "1"
def _backend() -> str:
return os.environ.get("TINYWORLD_INFER", "modal").lower()
def build_voice_description(character) -> str:
return character.get("voice_description", "(a neutral voice)")
def generate_voice(text: str, voice_desc: str) -> str:
try:
if _is_mock():
return _mock_generate(text)
if _backend() == "local":
import inference # ZeroGPU VoxCPM2, imported lazily
return inference.synthesize_voice(text, voice_desc)
return _real_generate(text, voice_desc)
except Exception as e:
print(f"[voice] generation failed: {e}")
return _mock_generate(text) if _is_mock() else None
def _mock_generate(text: str) -> str:
# Audible placeholder so the voice/hear features are verifiable without a GPU.
sample_rate = 24000
duration = min(1.6, 0.5 + 0.03 * len(text.split()))
t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
base = 150 + (hash(text) % 120) # per-line pitch
wobble = 1 + 0.04 * np.sin(2 * np.pi * 5 * t) # gentle speech-like wobble
tone = 0.22 * np.sin(2 * np.pi * base * wobble * t)
tone += 0.08 * np.sin(2 * np.pi * base * 2 * t)
env = np.minimum(1.0, np.minimum(t * 12, (duration - t) * 8)) # fade in/out
audio = (tone * env).astype(np.float32)
path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
try:
import soundfile as sf
sf.write(path, audio, sample_rate)
except ImportError:
import wave
with wave.open(path, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sample_rate)
wf.writeframes((audio * 32767).astype(np.int16).tobytes())
return path
def _real_generate(text: str, voice_desc: str) -> str:
try:
import httpx
payload = {"text": text, "voice_desc": voice_desc}
with httpx.Client(timeout=VOICE_TIMEOUT, follow_redirects=True) as client:
resp = client.post(MODAL_VOICE_URL, json=payload)
resp.raise_for_status()
path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
with open(path, "wb") as f:
f.write(resp.content)
return path
except Exception as e:
print(f"[voice] Modal call failed: {e}")
return None
if __name__ == "__main__":
import characters as c
path = generate_voice("Hello there!", c.CHARACTERS[0]["voice_description"])
print(path)
assert os.path.exists(path), f"File not found: {path}"
print("OK")