voice2 / examples /http_llm.py
AIIT-Threshold's picture
voice2 1.0.0 — full-duplex interruptible voice engine for local AI
050e596 verified
Raw
History Blame Contribute Delete
1.23 kB
"""
Wire the voice engine to any local LLM behind an HTTP API.
Works with anything that takes text and returns text — llama.cpp server,
Ollama, vLLM, or your own FastAPI wrapper. Edit `ask()` to match your API.
Usage:
LLM_URL=http://localhost:8000/chat python examples/http_llm.py
"""
import os
import signal
import httpx
from voice2 import VoiceEngine, VoiceConfig
LLM_URL = os.environ.get("LLM_URL", "http://localhost:11434/api/generate")
MODEL = os.environ.get("LLM_MODEL", "llama3.2")
def ask(text: str) -> str:
"""Ollama-style example. Adapt the payload/response to your server."""
try:
r = httpx.post(
LLM_URL,
json={"model": MODEL, "prompt": text, "stream": False},
timeout=120.0,
)
r.raise_for_status()
return r.json().get("response", "").strip() or "..."
except Exception as e:
return f"Backend error: {e}"
def main() -> None:
engine = VoiceEngine(VoiceConfig(), ask)
engine.load_models()
engine.start()
print("Online. Talk naturally. Space = interrupt, Ctrl+C = quit.")
try:
signal.pause()
except KeyboardInterrupt:
pass
engine.stop()
if __name__ == "__main__":
main()