| """ |
| Wire the voice engine to any local LLM behind an HTTP API. |
| |
| Works with anything that takes text and returns text — llama.cpp server, |
| Ollama, vLLM, or your own FastAPI wrapper. Edit `ask()` to match your API. |
| |
| Usage: |
| LLM_URL=http://localhost:8000/chat python examples/http_llm.py |
| """ |
| import os |
| import signal |
|
|
| import httpx |
|
|
| from voice2 import VoiceEngine, VoiceConfig |
|
|
| LLM_URL = os.environ.get("LLM_URL", "http://localhost:11434/api/generate") |
| MODEL = os.environ.get("LLM_MODEL", "llama3.2") |
|
|
|
|
| def ask(text: str) -> str: |
| """Ollama-style example. Adapt the payload/response to your server.""" |
| try: |
| r = httpx.post( |
| LLM_URL, |
| json={"model": MODEL, "prompt": text, "stream": False}, |
| timeout=120.0, |
| ) |
| r.raise_for_status() |
| return r.json().get("response", "").strip() or "..." |
| except Exception as e: |
| return f"Backend error: {e}" |
|
|
|
|
| def main() -> None: |
| engine = VoiceEngine(VoiceConfig(), ask) |
| engine.load_models() |
| engine.start() |
| print("Online. Talk naturally. Space = interrupt, Ctrl+C = quit.") |
| try: |
| signal.pause() |
| except KeyboardInterrupt: |
| pass |
| engine.stop() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|