import os
import tempfile
import numpy as np

MODAL_VOICE_URL = os.environ.get(
    "MODAL_VOICE_URL",
    "https://mitvho09--tinyworld-inference-voice-endpoint.modal.run",
)
VOICE_TIMEOUT = float(os.environ.get("TINYWORLD_VOICE_TIMEOUT", "25"))


def _is_mock() -> bool:
    return os.environ.get("TINYWORLD_MOCK", "0") == "1"


def _backend() -> str:
    return os.environ.get("TINYWORLD_INFER", "modal").lower()


def build_voice_description(character) -> str:
    return character.get("voice_description", "(a neutral voice)")


def generate_voice(text: str, voice_desc: str) -> str:
    try:
        if _is_mock():
            return _mock_generate(text)
        if _backend() == "local":
            import inference  # ZeroGPU VoxCPM2, imported lazily
            return inference.synthesize_voice(text, voice_desc)
        return _real_generate(text, voice_desc)
    except Exception as e:
        print(f"[voice] generation failed: {e}")
        return _mock_generate(text) if _is_mock() else None


def _mock_generate(text: str) -> str:
    # Audible placeholder so the voice/hear features are verifiable without a GPU.
    sample_rate = 24000
    duration = min(1.6, 0.5 + 0.03 * len(text.split()))
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    base = 150 + (hash(text) % 120)  # per-line pitch
    wobble = 1 + 0.04 * np.sin(2 * np.pi * 5 * t)  # gentle speech-like wobble
    tone = 0.22 * np.sin(2 * np.pi * base * wobble * t)
    tone += 0.08 * np.sin(2 * np.pi * base * 2 * t)
    env = np.minimum(1.0, np.minimum(t * 12, (duration - t) * 8))  # fade in/out
    audio = (tone * env).astype(np.float32)
    path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
    try:
        import soundfile as sf
        sf.write(path, audio, sample_rate)
    except ImportError:
        import wave
        with wave.open(path, "w") as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
            wf.setframerate(sample_rate)
            wf.writeframes((audio * 32767).astype(np.int16).tobytes())
    return path


def _real_generate(text: str, voice_desc: str) -> str:
    try:
        import httpx

        payload = {"text": text, "voice_desc": voice_desc}

        with httpx.Client(timeout=VOICE_TIMEOUT, follow_redirects=True) as client:
            resp = client.post(MODAL_VOICE_URL, json=payload)
            resp.raise_for_status()

        path = os.path.join(tempfile.gettempdir(), f"tinyworld_voice_{os.getpid()}.wav")
        with open(path, "wb") as f:
            f.write(resp.content)

        return path

    except Exception as e:
        print(f"[voice] Modal call failed: {e}")
        return None


if __name__ == "__main__":
    import characters as c
    path = generate_voice("Hello there!", c.CHARACTERS[0]["voice_description"])
    print(path)
    assert os.path.exists(path), f"File not found: {path}"
    print("OK")