"""
Wire the voice engine to any local LLM behind an HTTP API.

Works with anything that takes text and returns text — llama.cpp server,
Ollama, vLLM, or your own FastAPI wrapper. Edit `ask()` to match your API.

Usage:
    LLM_URL=http://localhost:8000/chat python examples/http_llm.py
"""
import os
import signal

import httpx

from voice2 import VoiceEngine, VoiceConfig

LLM_URL = os.environ.get("LLM_URL", "http://localhost:11434/api/generate")
MODEL = os.environ.get("LLM_MODEL", "llama3.2")


def ask(text: str) -> str:
    """Ollama-style example. Adapt the payload/response to your server."""
    try:
        r = httpx.post(
            LLM_URL,
            json={"model": MODEL, "prompt": text, "stream": False},
            timeout=120.0,
        )
        r.raise_for_status()
        return r.json().get("response", "").strip() or "..."
    except Exception as e:
        return f"Backend error: {e}"


def main() -> None:
    engine = VoiceEngine(VoiceConfig(), ask)
    engine.load_models()
    engine.start()
    print("Online. Talk naturally. Space = interrupt, Ctrl+C = quit.")
    try:
        signal.pause()
    except KeyboardInterrupt:
        pass
    engine.stop()


if __name__ == "__main__":
    main()