Spaces:

build-small-hackathon
/

tinyworld

Sleeping

App Files Files Community

tinyworld / voice.py

sush0401

TinyWorld + Crisis Mode, ZeroGPU in-process inference

d3a7a1c verified 14 days ago

Raw

History Blame Contribute Delete

2.93 kB

	import os
	import tempfile
	import numpy as np

	MODAL_VOICE_URL = os.environ.get(
	"MODAL_VOICE_URL",
	"https://mitvho09--tinyworld-inference-voice-endpoint.modal.run",
	)
	VOICE_TIMEOUT = float(os.environ.get("TINYWORLD_VOICE_TIMEOUT", "25"))


	def _is_mock() -> bool:
	return os.environ.get("TINYWORLD_MOCK", "0") == "1"


	def _backend() -> str:
	return os.environ.get("TINYWORLD_INFER", "modal").lower()


	def build_voice_description(character) -> str:
	return character.get("voice_description", "(a neutral voice)")


	def generate_voice(text: str, voice_desc: str) -> str:
	try:
	if _is_mock():
	return _mock_generate(text)
	if _backend() == "local":
	import inference # ZeroGPU VoxCPM2, imported lazily
	return inference.synthesize_voice(text, voice_desc)
	return _real_generate(text, voice_desc)
	except Exception as e:
	print(f"[voice] generation failed: {e}")
	return _mock_generate(text) if _is_mock() else None


	def _mock_generate(text: str) -> str:
	# Audible placeholder so the voice/hear features are verifiable without a GPU.
	sample_rate = 24000
	duration = min(1.6, 0.5 + 0.03 * len(text.split()))
	t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
	base = 150 + (hash(text) % 120) # per-line pitch
	wobble = 1 + 0.04 * np.sin(2 * np.pi * 5 * t) # gentle speech-like wobble
	tone = 0.22 * np.sin(2 * np.pi * base * wobble * t)
	tone += 0.08 * np.sin(2 * np.pi * base * 2 * t)
	env = np.minimum(1.0, np.minimum(t * 12, (duration - t) * 8)) # fade in/out
	audio = (tone * env).astype(np.float32)
	path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
	try:
	import soundfile as sf
	sf.write(path, audio, sample_rate)
	except ImportError:
	import wave
	with wave.open(path, "w") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(sample_rate)
	wf.writeframes((audio * 32767).astype(np.int16).tobytes())
	return path


	def _real_generate(text: str, voice_desc: str) -> str:
	try:
	import httpx

	payload = {"text": text, "voice_desc": voice_desc}

	with httpx.Client(timeout=VOICE_TIMEOUT, follow_redirects=True) as client:
	resp = client.post(MODAL_VOICE_URL, json=payload)
	resp.raise_for_status()

	path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
	with open(path, "wb") as f:
	f.write(resp.content)

	return path

	except Exception as e:
	print(f"[voice] Modal call failed: {e}")
	return None


	if __name__ == "__main__":
	import characters as c
	path = generate_voice("Hello there!", c.CHARACTERS[0]["voice_description"])
	print(path)
	assert os.path.exists(path), f"File not found: {path}"
	print("OK")