Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import numpy as np | |
| MODAL_VOICE_URL = os.environ.get( | |
| "MODAL_VOICE_URL", | |
| "https://mitvho09--tinyworld-inference-voice-endpoint.modal.run", | |
| ) | |
| VOICE_TIMEOUT = float(os.environ.get("TINYWORLD_VOICE_TIMEOUT", "25")) | |
| def _is_mock() -> bool: | |
| return os.environ.get("TINYWORLD_MOCK", "0") == "1" | |
| def _backend() -> str: | |
| return os.environ.get("TINYWORLD_INFER", "modal").lower() | |
| def build_voice_description(character) -> str: | |
| return character.get("voice_description", "(a neutral voice)") | |
| def generate_voice(text: str, voice_desc: str) -> str: | |
| try: | |
| if _is_mock(): | |
| return _mock_generate(text) | |
| if _backend() == "local": | |
| import inference # ZeroGPU VoxCPM2, imported lazily | |
| return inference.synthesize_voice(text, voice_desc) | |
| return _real_generate(text, voice_desc) | |
| except Exception as e: | |
| print(f"[voice] generation failed: {e}") | |
| return _mock_generate(text) if _is_mock() else None | |
| def _mock_generate(text: str) -> str: | |
| # Audible placeholder so the voice/hear features are verifiable without a GPU. | |
| sample_rate = 24000 | |
| duration = min(1.6, 0.5 + 0.03 * len(text.split())) | |
| t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False) | |
| base = 150 + (hash(text) % 120) # per-line pitch | |
| wobble = 1 + 0.04 * np.sin(2 * np.pi * 5 * t) # gentle speech-like wobble | |
| tone = 0.22 * np.sin(2 * np.pi * base * wobble * t) | |
| tone += 0.08 * np.sin(2 * np.pi * base * 2 * t) | |
| env = np.minimum(1.0, np.minimum(t * 12, (duration - t) * 8)) # fade in/out | |
| audio = (tone * env).astype(np.float32) | |
| path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav") | |
| try: | |
| import soundfile as sf | |
| sf.write(path, audio, sample_rate) | |
| except ImportError: | |
| import wave | |
| with wave.open(path, "w") as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(sample_rate) | |
| wf.writeframes((audio * 32767).astype(np.int16).tobytes()) | |
| return path | |
| def _real_generate(text: str, voice_desc: str) -> str: | |
| try: | |
| import httpx | |
| payload = {"text": text, "voice_desc": voice_desc} | |
| with httpx.Client(timeout=VOICE_TIMEOUT, follow_redirects=True) as client: | |
| resp = client.post(MODAL_VOICE_URL, json=payload) | |
| resp.raise_for_status() | |
| path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav") | |
| with open(path, "wb") as f: | |
| f.write(resp.content) | |
| return path | |
| except Exception as e: | |
| print(f"[voice] Modal call failed: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| import characters as c | |
| path = generate_voice("Hello there!", c.CHARACTERS[0]["voice_description"]) | |
| print(path) | |
| assert os.path.exists(path), f"File not found: {path}" | |
| print("OK") | |