Spaces:
Running
Running
| # LLM Provider Selection | |
| LLM_PROVIDER=ollama # Options: nvidia, ollama | |
| MCP_ENABLED=true | |
| MCP_SERVER_URL=https://huggingface.co/mcp | |
| MCP_EXTRA_SERVER_URLS=https://docs.livekit.io/mcp # Comma-separated extra MCP servers (set empty to disable) | |
| # STT Provider Selection | |
| STT_PROVIDER=moonshine # Options: moonshine, nvidia, deepgram | |
| # Moonshine STT Settings (local speech-to-text) | |
| MOONSHINE_MODEL_ID=usefulsensors/moonshine-streaming-medium | |
| MOONSHINE_LANGUAGE=en | |
| # Deepgram STT Settings (cloud speech-to-text) | |
| DEEPGRAM_STT_MODEL=nova-3 | |
| DEEPGRAM_STT_LANGUAGE=en-US | |
| # NVIDIA STT Settings (cloud speech-to-text) | |
| NVIDIA_STT_API_KEY= # Optional: uses NVIDIA_API_KEY if not set | |
| NVIDIA_STT_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer | |
| NVIDIA_STT_LANGUAGE_CODE=en-US | |
| # NVIDIA API Key (shared by LLM and STT unless NVIDIA_STT_API_KEY is set) | |
| NVIDIA_API_KEY=your_nvidia_api_key_here | |
| # NVIDIA LLM Settings | |
| NVIDIA_MODEL=meta/llama-3.1-8b-instruct | |
| # Ollama LLM Settings | |
| OLLAMA_CLOUD_MODE=true | |
| OLLAMA_MODEL=qwen3-next:80b | |
| OLLAMA_API_KEY=your_ollama_api_key_here | |
| # Set OLLAMA_CLOUD_MODE=false to use local Ollama at http://localhost:11434/v1. | |
| # Local mode can keep OLLAMA_API_KEY=ollama if your server ignores auth. | |
| # Note: do not use ":cloud" aliases with the /v1 endpoint. | |
| # Langfuse Tracing Settings (optional) | |
| LANGFUSE_ENABLED=false | |
| LANGFUSE_HOST=https://cloud.langfuse.com | |
| LANGFUSE_BASE_URL= # Optional alternative to LANGFUSE_HOST | |
| LANGFUSE_PROJECT_ID= # Required for frontend deep links: project/<project_id>/... | |
| LANGFUSE_PUBLIC_KEY= | |
| LANGFUSE_SECRET_KEY= | |
| LANGFUSE_PUBLIC_TRACES=false # Mark traces public so non-members can open shared links | |
| LANGFUSE_ASSISTANT_TEXT_GRACE_TIMEOUT_MS=500 # Short wait for assistant text on normal turns | |
| LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS=8000 # Legacy fallback retained for compatibility | |
| LANGFUSE_POST_TOOL_RESPONSE_TIMEOUT_MS=30000 | |
| LANGFUSE_MAX_PENDING_TRACE_TASKS=200 | |
| LANGFUSE_TRACE_FLUSH_TIMEOUT_MS=1000 | |
| LANGFUSE_CONTINUATION_COALESCE_WINDOW_MS=1500 # Merge immediate continuation turns into one trace; 0 disables it | |
| # Common LLM Parameters | |
| LLM_TEMPERATURE=0.7 | |
| LLM_MAX_TOKENS=1024 | |
| # LLM/MCP API timeout/retry tuning. | |
| LLM_CONN_TIMEOUT_SEC=20.0 | |
| MCP_CONN_TIMEOUT_SEC=20.0 # Timeout for one MCP tool request/response cycle | |
| LLM_CONN_MAX_RETRY=1 | |
| LLM_CONN_RETRY_INTERVAL_SEC=1.0 | |
| TURN_LLM_STALL_TIMEOUT_SEC=12.0 | |
| MCP_STARTUP_GREETING_TIMEOUT_SEC=0.0 # Set >0 to force-interrupt slow startup greetings; 0 disables the cutoff | |
| # TTS Provider Selection | |
| TTS_PROVIDER=pocket # Options: pocket, deepgram, nvidia | |
| DEEPGRAM_API_KEY= # Required when STT_PROVIDER=deepgram or TTS_PROVIDER=deepgram | |
| NVIDIA_TTS_API_KEY= # Optional: uses NVIDIA_API_KEY if not set | |
| # NVIDIA TTS Settings (cloud or self-hosted Riva) | |
| NVIDIA_TTS_VOICE=Magpie-Multilingual.EN-US.Leo | |
| NVIDIA_TTS_LANGUAGE_CODE=en-US | |
| NVIDIA_TTS_SERVER=grpc.nvcf.nvidia.com:443 | |
| NVIDIA_TTS_FUNCTION_ID=877104f7-e885-42b9-8de8-f6e4c6303969 | |
| NVIDIA_TTS_USE_SSL=true # Set false for self-hosted Riva without TLS | |
| # Pocket TTS Settings (local text-to-speech) | |
| POCKET_TTS_VOICE=alba | |
| POCKET_TTS_TEMPERATURE=0.7 | |
| POCKET_TTS_LSD_DECODE_STEPS=1 | |
| POCKET_TTS_CONN_TIMEOUT_SEC=45.0 # Timeout for one PocketTTS synthesis attempt | |
| # PocketTTS output sample rate is fixed to native 24kHz. | |
| # LiveKit Settings | |
| LIVEKIT_URL=wss://your-livekit-server.example.com | |
| LIVEKIT_API_KEY=your_livekit_api_key_here | |
| LIVEKIT_API_SECRET=your_livekit_api_secret_here | |
| LIVEKIT_AGENT_NAME=open-voice-agent-local # Use a unique name per environment to avoid worker collisions | |
| LIVEKIT_NUM_IDLE_PROCESSES=1 # Use 0-1 locally to reduce memory pressure | |
| LIVEKIT_INITIALIZE_PROCESS_TIMEOUT_SEC=20.0 # Increase idle worker bootstrap timeout | |
| LIVEKIT_JOB_MEMORY_WARN_MB=6144 # Per-job memory warning threshold (6 GB) | |
| # LiveKit audio input configuration | |
| LIVEKIT_SAMPLE_RATE=24000 | |
| LIVEKIT_NUM_CHANNELS=1 | |
| LIVEKIT_FRAME_SIZE_MS=60 # Larger frames slightly reduce responsiveness but avoid over-eager VAD transitions | |
| LIVEKIT_PRE_CONNECT_AUDIO=true | |
| LIVEKIT_PRE_CONNECT_TIMEOUT=3.0 | |
| # Voice Activity Detection (VAD) configuration | |
| VAD_MIN_SPEECH_DURATION=0.18 # Require 180ms of speech before activation | |
| VAD_MIN_SILENCE_DURATION=0.55 # Wait longer before treating a pause as end of speech | |
| VAD_THRESHOLD=0.5 # Silero default; keep balanced sensitivity for speech vs background noise | |
| # Turn endpointing tuning | |
| MIN_ENDPOINTING_DELAY=0.5 # Default turn commit delay before endpointing | |
| MAX_ENDPOINTING_DELAY=3.0 # Let the detector wait longer when phrasing suggests continuation | |
| PREEMPTIVE_GENERATION=false # Wait for the committed turn before generating a reply | |