voice2 1.0.0 — full-duplex interruptible voice engine for local AI

050e596 verified 2 days ago

1.23 kB

	"""
	Wire the voice engine to any local LLM behind an HTTP API.

	Works with anything that takes text and returns text — llama.cpp server,
	Ollama, vLLM, or your own FastAPI wrapper. Edit `ask()` to match your API.

	Usage:
	LLM_URL=http://localhost:8000/chat python examples/http_llm.py
	"""
	import os
	import signal

	import httpx

	from voice2 import VoiceEngine, VoiceConfig

	LLM_URL = os.environ.get("LLM_URL", "http://localhost:11434/api/generate")
	MODEL = os.environ.get("LLM_MODEL", "llama3.2")


	def ask(text: str) -> str:
	"""Ollama-style example. Adapt the payload/response to your server."""
	try:
	r = httpx.post(
	LLM_URL,
	json={"model": MODEL, "prompt": text, "stream": False},
	timeout=120.0,
	)
	r.raise_for_status()
	return r.json().get("response", "").strip() or "..."
	except Exception as e:
	return f"Backend error: {e}"


	def main() -> None:
	engine = VoiceEngine(VoiceConfig(), ask)
	engine.load_models()
	engine.start()
	print("Online. Talk naturally. Space = interrupt, Ctrl+C = quit.")
	try:
	signal.pause()
	except KeyboardInterrupt:
	pass
	engine.stop()


	if __name__ == "__main__":
	main()