File size: 2,222 Bytes
e8ed0e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# STT Provider Configuration
# Options: "speechmatics", "deepgram", or "deepgram-flux"
STT_PROVIDER=speechmatics

# Speechmatics API Key
# Get your API key from: https://portal.speechmatics.com/
SPEECHMATICS_API_KEY=your_speechmatics_api_key_here

# Deepgram API Key (only needed if STT_PROVIDER=deepgram or deepgram-flux)
# Get your API key from: https://console.deepgram.com/
DEEPGRAM_API_KEY=your_deepgram_api_key_here

# ElevenLabs API Key
# Get your API key from: https://elevenlabs.io/app/settings/api-keys
ELEVENLABS_API_KEY=your_elevenlabs_api_key_here

# ElevenLabs Voice ID (optional, defaults to custom voice)
# Find voice IDs at: https://elevenlabs.io/app/voices
ELEVENLABS_VOICE_ID=ry8mpwRw6nugb2qjP0tu

# DeepInfra API Key (for Qwen LLM and Gating Layer)
# Get your API key from: https://deepinfra.com/
DEEPINFRA_API_KEY=your_deepinfra_api_key_here
# Optional: Override default models
# DEEPINFRA_MODEL=Qwen/Qwen3-235B-A22B-Instruct-2507  # Main LLM (default)
# DEEPINFRA_GATING_MODEL=meta-llama/Llama-3.2-3B-Instruct  # Gating Layer (default)

# Pipecat FastAPI service URL (for frontend to connect)
NEXT_PUBLIC_PIPECAT_URL=http://localhost:7860

# Pipecat FastAPI service configuration
PIPECAT_HOST=localhost
PIPECAT_PORT=7860

# Mem0 API Key (optional, enables long-term memory)
# Get one from: https://docs.mem0.ai/
MEM0_API_KEY=your_mem0_api_key_here

# TTS Provider Configuration
# Options: "elevenlabs" (cloud, requires API key) or "qwen3" (local, free)
TTS_PROVIDER=qwen3

# Qwen3-TTS Configuration (only needed if TTS_PROVIDER=qwen3)
# Model: 0.6B (faster, less memory) or 1.7B (better quality)
QWEN3_TTS_MODEL=Qwen/Qwen3-TTS-12Hz-0.6B-Base
# Device: "mps" for Mac, "cuda" for NVIDIA GPU, "cpu" for CPU
QWEN3_TTS_DEVICE=mps
# Reference audio file for voice cloning (relative to project root)
QWEN3_TTS_REF_AUDIO=assets/audio/tars-clean-compressed.mp3

# Emotional State Monitoring
# Continuously analyzes video for confusion/hesitation/frustration
# Triggers TARS to offer help proactively
EMOTIONAL_MONITORING_ENABLED=true
# How often to sample video frames (in seconds)
EMOTIONAL_SAMPLING_INTERVAL=3.0
# How many consecutive negative states before intervention
EMOTIONAL_INTERVENTION_THRESHOLD=2