Spaces:

dvalle08
/

open-voice-agent

Running

App Files Files Community

open-voice-agent / .env.example

dvalle08

feat: Enhance turn tracing and metrics collection

04178a2 about 1 month ago

raw

history blame contribute delete

4.61 kB

	# LLM Provider Selection
	LLM_PROVIDER=ollama # Options: nvidia, ollama
	MCP_ENABLED=true
	MCP_SERVER_URL=https://huggingface.co/mcp
	MCP_EXTRA_SERVER_URLS=https://docs.livekit.io/mcp # Comma-separated extra MCP servers (set empty to disable)

	# STT Provider Selection
	STT_PROVIDER=moonshine # Options: moonshine, nvidia, deepgram

	# Moonshine STT Settings (local speech-to-text)
	MOONSHINE_MODEL_ID=usefulsensors/moonshine-streaming-medium
	MOONSHINE_LANGUAGE=en

	# Deepgram STT Settings (cloud speech-to-text)
	DEEPGRAM_STT_MODEL=nova-3
	DEEPGRAM_STT_LANGUAGE=en-US

	# NVIDIA STT Settings (cloud speech-to-text)
	NVIDIA_STT_API_KEY= # Optional: uses NVIDIA_API_KEY if not set
	NVIDIA_STT_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer
	NVIDIA_STT_LANGUAGE_CODE=en-US

	# NVIDIA API Key (shared by LLM and STT unless NVIDIA_STT_API_KEY is set)
	NVIDIA_API_KEY=your_nvidia_api_key_here

	# NVIDIA LLM Settings
	NVIDIA_MODEL=meta/llama-3.1-8b-instruct

	# Ollama LLM Settings
	OLLAMA_CLOUD_MODE=true
	OLLAMA_MODEL=qwen3-next:80b
	OLLAMA_API_KEY=your_ollama_api_key_here
	# Set OLLAMA_CLOUD_MODE=false to use local Ollama at http://localhost:11434/v1.
	# Local mode can keep OLLAMA_API_KEY=ollama if your server ignores auth.
	# Note: do not use ":cloud" aliases with the /v1 endpoint.

	# Langfuse Tracing Settings (optional)
	LANGFUSE_ENABLED=false
	LANGFUSE_HOST=https://cloud.langfuse.com
	LANGFUSE_BASE_URL= # Optional alternative to LANGFUSE_HOST
	LANGFUSE_PROJECT_ID= # Required for frontend deep links: project/<project_id>/...
	LANGFUSE_PUBLIC_KEY=
	LANGFUSE_SECRET_KEY=
	LANGFUSE_PUBLIC_TRACES=false # Mark traces public so non-members can open shared links
	LANGFUSE_ASSISTANT_TEXT_GRACE_TIMEOUT_MS=500 # Short wait for assistant text on normal turns
	LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS=8000 # Legacy fallback retained for compatibility
	LANGFUSE_POST_TOOL_RESPONSE_TIMEOUT_MS=30000
	LANGFUSE_MAX_PENDING_TRACE_TASKS=200
	LANGFUSE_TRACE_FLUSH_TIMEOUT_MS=1000
	LANGFUSE_CONTINUATION_COALESCE_WINDOW_MS=1500 # Merge immediate continuation turns into one trace; 0 disables it

	# Common LLM Parameters
	LLM_TEMPERATURE=0.7
	LLM_MAX_TOKENS=1024
	# LLM/MCP API timeout/retry tuning.
	LLM_CONN_TIMEOUT_SEC=20.0
	MCP_CONN_TIMEOUT_SEC=20.0 # Timeout for one MCP tool request/response cycle
	LLM_CONN_MAX_RETRY=1
	LLM_CONN_RETRY_INTERVAL_SEC=1.0
	TURN_LLM_STALL_TIMEOUT_SEC=12.0
	MCP_STARTUP_GREETING_TIMEOUT_SEC=0.0 # Set >0 to force-interrupt slow startup greetings; 0 disables the cutoff

	# TTS Provider Selection
	TTS_PROVIDER=pocket # Options: pocket, deepgram, nvidia
	DEEPGRAM_API_KEY= # Required when STT_PROVIDER=deepgram or TTS_PROVIDER=deepgram
	NVIDIA_TTS_API_KEY= # Optional: uses NVIDIA_API_KEY if not set

	# NVIDIA TTS Settings (cloud or self-hosted Riva)
	NVIDIA_TTS_VOICE=Magpie-Multilingual.EN-US.Leo
	NVIDIA_TTS_LANGUAGE_CODE=en-US
	NVIDIA_TTS_SERVER=grpc.nvcf.nvidia.com:443
	NVIDIA_TTS_FUNCTION_ID=877104f7-e885-42b9-8de8-f6e4c6303969
	NVIDIA_TTS_USE_SSL=true # Set false for self-hosted Riva without TLS

	# Pocket TTS Settings (local text-to-speech)
	POCKET_TTS_VOICE=alba
	POCKET_TTS_TEMPERATURE=0.7
	POCKET_TTS_LSD_DECODE_STEPS=1
	POCKET_TTS_CONN_TIMEOUT_SEC=45.0 # Timeout for one PocketTTS synthesis attempt
	# PocketTTS output sample rate is fixed to native 24kHz.

	# LiveKit Settings
	LIVEKIT_URL=wss://your-livekit-server.example.com
	LIVEKIT_API_KEY=your_livekit_api_key_here
	LIVEKIT_API_SECRET=your_livekit_api_secret_here
	LIVEKIT_AGENT_NAME=open-voice-agent-local # Use a unique name per environment to avoid worker collisions
	LIVEKIT_NUM_IDLE_PROCESSES=1 # Use 0-1 locally to reduce memory pressure
	LIVEKIT_INITIALIZE_PROCESS_TIMEOUT_SEC=20.0 # Increase idle worker bootstrap timeout
	LIVEKIT_JOB_MEMORY_WARN_MB=6144 # Per-job memory warning threshold (6 GB)

	# LiveKit audio input configuration
	LIVEKIT_SAMPLE_RATE=24000
	LIVEKIT_NUM_CHANNELS=1
	LIVEKIT_FRAME_SIZE_MS=60 # Larger frames slightly reduce responsiveness but avoid over-eager VAD transitions
	LIVEKIT_PRE_CONNECT_AUDIO=true
	LIVEKIT_PRE_CONNECT_TIMEOUT=3.0

	# Voice Activity Detection (VAD) configuration
	VAD_MIN_SPEECH_DURATION=0.18 # Require 180ms of speech before activation
	VAD_MIN_SILENCE_DURATION=0.55 # Wait longer before treating a pause as end of speech
	VAD_THRESHOLD=0.5 # Silero default; keep balanced sensitivity for speech vs background noise

	# Turn endpointing tuning
	MIN_ENDPOINTING_DELAY=0.5 # Default turn commit delay before endpointing
	MAX_ENDPOINTING_DELAY=3.0 # Let the detector wait longer when phrasing suggests continuation
	PREEMPTIVE_GENERATION=false # Wait for the committed turn before generating a reply