# STT Provider Configuration # Options: "speechmatics", "deepgram", or "deepgram-flux" STT_PROVIDER=speechmatics # Speechmatics API Key # Get your API key from: https://portal.speechmatics.com/ SPEECHMATICS_API_KEY=your_speechmatics_api_key_here # Deepgram API Key (only needed if STT_PROVIDER=deepgram or deepgram-flux) # Get your API key from: https://console.deepgram.com/ DEEPGRAM_API_KEY=your_deepgram_api_key_here # ElevenLabs API Key # Get your API key from: https://elevenlabs.io/app/settings/api-keys ELEVENLABS_API_KEY=your_elevenlabs_api_key_here # ElevenLabs Voice ID (optional, defaults to custom voice) # Find voice IDs at: https://elevenlabs.io/app/voices ELEVENLABS_VOICE_ID=ry8mpwRw6nugb2qjP0tu # DeepInfra API Key (for Qwen LLM and Gating Layer) # Get your API key from: https://deepinfra.com/ DEEPINFRA_API_KEY=your_deepinfra_api_key_here # Optional: Override default models # DEEPINFRA_MODEL=Qwen/Qwen3-235B-A22B-Instruct-2507 # Main LLM (default) # DEEPINFRA_GATING_MODEL=meta-llama/Llama-3.2-3B-Instruct # Gating Layer (default) # Pipecat FastAPI service URL (for frontend to connect) NEXT_PUBLIC_PIPECAT_URL=http://localhost:7860 # Pipecat FastAPI service configuration PIPECAT_HOST=localhost PIPECAT_PORT=7860 # Mem0 API Key (optional, enables long-term memory) # Get one from: https://docs.mem0.ai/ MEM0_API_KEY=your_mem0_api_key_here # TTS Provider Configuration # Options: "elevenlabs" (cloud, requires API key) or "qwen3" (local, free) TTS_PROVIDER=qwen3 # Qwen3-TTS Configuration (only needed if TTS_PROVIDER=qwen3) # Model: 0.6B (faster, less memory) or 1.7B (better quality) QWEN3_TTS_MODEL=Qwen/Qwen3-TTS-12Hz-0.6B-Base # Device: "mps" for Mac, "cuda" for NVIDIA GPU, "cpu" for CPU QWEN3_TTS_DEVICE=mps # Reference audio file for voice cloning (relative to project root) QWEN3_TTS_REF_AUDIO=assets/audio/tars-clean-compressed.mp3 # Emotional State Monitoring # Continuously analyzes video for confusion/hesitation/frustration # Triggers TARS to offer help proactively EMOTIONAL_MONITORING_ENABLED=true # How often to sample video frames (in seconds) EMOTIONAL_SAMPLING_INTERVAL=3.0 # How many consecutive negative states before intervention EMOTIONAL_INTERVENTION_THRESHOLD=2