File size: 8,947 Bytes
65b597c
affeafa
9af190b
affeafa
a16f712
affeafa
 
 
 
 
 
65b597c
affeafa
65b597c
 
affeafa
3a5c3c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
affeafa
9af190b
65b597c
9af190b
65b597c
3a5c3c8
 
 
 
65b597c
 
 
 
9af190b
65b597c
affeafa
 
 
 
65b597c
affeafa
 
 
 
 
 
 
 
11c8a27
 
9af190b
11c8a27
 
 
 
 
9af190b
11c8a27
 
 
 
9af190b
11c8a27
 
f70205c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef8254e
f70205c
 
 
 
 
ef8254e
f70205c
 
 
 
 
 
 
 
 
 
4730cee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f70205c
affeafa
0bfc688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
affeafa
090f2a4
 
1ec6fec
090f2a4
 
 
 
81dd251
1ec6fec
090f2a4
 
 
 
 
 
 
 
 
 
a16f712
090f2a4
81dd251
 
3d758c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
affeafa
 
5ec72cd
 
 
 
1bf5975
090f2a4
4730cee
3d758c5
4730cee
 
 
5ec72cd
 
63d722c
 
 
 
 
 
 
3a97ef8
63d722c
 
 
 
 
 
 
 
0e98d54
5e589a1
0e98d54
0146898
0e98d54
 
 
 
 
 
 
 
 
 
 
 
 
 
63d722c
 
affeafa
 
0bfc688
affeafa
5ec72cd
63d722c
affeafa
 
 
 
9af190b
65b597c
 
 
9af190b
affeafa
3a5c3c8
 
 
 
 
 
 
 
 
affeafa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import json
from pathlib import Path
from typing import Optional

from pydantic import Field, ValidationError
from pydantic_settings import BaseSettings, SettingsConfigDict
from dotenv import load_dotenv

from src.core.logger import logger

BASE_DIR = Path(__file__).parent.parent.parent
ENV_FILE = BASE_DIR / ".env"

load_dotenv(ENV_FILE, override=True)
logger.info(f"Loaded environment from: {ENV_FILE}")

SENSITIVE_KEY_MARKERS = ("key", "token", "secret", "password")


def _is_sensitive_key(key: str) -> bool:
    key_lower = key.lower()
    return any(marker in key_lower for marker in SENSITIVE_KEY_MARKERS)


def _redact_sensitive_value(value: object) -> str:
    if value is None:
        return "<not set>"
    if isinstance(value, str) and not value:
        return "<not set>"
    return "<redacted>"


def mask_sensitive_data(data: dict) -> dict:
    masked = {}

    for key, value in data.items():
        if _is_sensitive_key(key):
            masked[key] = _redact_sensitive_value(value)
            continue

        if isinstance(value, dict):
            masked[key] = mask_sensitive_data(value)
        else:
            masked[key] = value

    return masked


class CoreSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=str(ENV_FILE) if ENV_FILE.exists() else None,
        env_file_encoding="utf-8",
        case_sensitive=True,
        extra="ignore",
        protected_namespaces=(),
    )


class VoiceSettings(CoreSettings):
    POCKET_TTS_VOICE: str = Field(
        default="alba",
        description="Default voice (alba, marius, javert, jean, fantine, cosette, eponine, azelma) or path to audio file",
    )
    POCKET_TTS_TEMPERATURE: float = Field(
        default=0.7,
        ge=0.0,
        le=2.0,
        description="Sampling temperature for generation",
    )
    POCKET_TTS_LSD_DECODE_STEPS: int = Field(
        default=1,
        ge=1,
        description="LSD decoding steps (higher = better quality, slower)",
    )

    # LiveKit Audio Input Settings
    LIVEKIT_SAMPLE_RATE: int = Field(
        default=24000,
        description="Audio input sample rate (Hz)",
    )
    LIVEKIT_NUM_CHANNELS: int = Field(
        default=1,
        description="Number of audio input channels (1=mono)",
    )
    LIVEKIT_FRAME_SIZE_MS: int = Field(
        default=20,
        ge=10,
        le=100,
        description="Audio frame size in milliseconds (smaller = faster VAD response)",
    )
    LIVEKIT_PRE_CONNECT_AUDIO: bool = Field(
        default=True,
        description="Pre-connect audio before room join",
    )
    LIVEKIT_PRE_CONNECT_TIMEOUT: float = Field(
        default=3.0,
        ge=1.0,
        le=10.0,
        description="Timeout for pre-connect audio (seconds)",
    )

    # Voice Activity Detection Settings
    VAD_MIN_SPEECH_DURATION: float = Field(
        default=0.18,
        ge=0.1,
        le=1.0,
        description="Minimum speech duration (seconds) before VAD activation",
    )
    VAD_MIN_SILENCE_DURATION: float = Field(
        default=0.30,
        ge=0.1,
        le=2.0,
        description="Minimum silence duration (seconds) before VAD deactivation",
    )
    VAD_THRESHOLD: float = Field(
        default=0.6,
        ge=0.0,
        le=1.0,
        description="VAD activation threshold (higher = less sensitive, 0.5 is Silero default)",
    )
    MIN_ENDPOINTING_DELAY: float = Field(
        default=0.15,
        ge=0.0,
        le=10.0,
        description="Minimum endpointing delay (seconds) before committing user turn",
    )
    MAX_ENDPOINTING_DELAY: float = Field(
        default=1.0,
        ge=0.1,
        le=10.0,
        description="Maximum endpointing delay (seconds) when turn detector expects continuation",
    )
    PREEMPTIVE_GENERATION: bool = Field(
        default=True,
        description="Enable speculative LLM/TTS generation before final turn commit",
    )


class STTSettings(CoreSettings):
    # Provider selection
    STT_PROVIDER: str = Field(
        default="moonshine",
        description="STT provider: 'nvidia' or 'moonshine'"
    )

    # Moonshine STT settings
    MOONSHINE_MODEL_ID: str = Field(
        default="usefulsensors/moonshine-streaming-medium",
        description="Moonshine model size: tiny, base, small, or medium"
    )
    MOONSHINE_LANGUAGE: str = Field(
        default="en",
        description="Language code for Moonshine STT"
    )

    # NVIDIA STT settings
    NVIDIA_STT_API_KEY: Optional[str] = Field(
        default=None,
        description="NVIDIA API key for STT (falls back to NVIDIA_API_KEY if not set)"
    )
    NVIDIA_STT_MODEL: str = Field(
        default="parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer",
        description="NVIDIA STT model ID"
    )
    NVIDIA_STT_LANGUAGE_CODE: str = Field(
        default="en-US",
        description="Language code for NVIDIA STT"
    )


class LLMSettings(CoreSettings):
    # Provider selection
    LLM_PROVIDER: str = Field(
        default="huggingface",
        description="LLM provider: 'nvidia' or 'huggingface'"
    )

    # NVIDIA settings (existing)
    NVIDIA_API_KEY: Optional[str] = Field(default=None)
    NVIDIA_MODEL: str = Field(default="qwen/qwen2.5-7b-instruct")

    # HuggingFace settings (new)
    HUGGINGFACE_MODEL_ID: str = Field(
        default="Qwen/Qwen2.5-3B-Instruct",
        description="HuggingFace model repository ID"
    )
    HUGGINGFACE_DEVICE: Optional[str] = Field(
        default=None,
        description="Device for inference: 'cuda', 'cpu', or None for auto-detect"
    )

    # Common LLM parameters
    LLM_TEMPERATURE: float = Field(default=0.7, ge=0.0, le=2.0)
    LLM_MAX_TOKENS: int = Field(default=1024, gt=0)
    LLM_CONN_TIMEOUT_SEC: float = Field(
        default=12.0,
        gt=0.0,
        le=120.0,
        description="LLM API timeout in seconds for one request attempt",
    )
    LLM_CONN_MAX_RETRY: int = Field(
        default=1,
        ge=0,
        le=10,
        description="Maximum LLM retry attempts on transient failures",
    )
    LLM_CONN_RETRY_INTERVAL_SEC: float = Field(
        default=1.0,
        ge=0.0,
        le=30.0,
        description="Delay in seconds between LLM retries",
    )
    TURN_LLM_STALL_TIMEOUT_SEC: float = Field(
        default=8.0,
        gt=0.0,
        le=120.0,
        description="Warn when a finalized user turn does not reach LLM stage within this timeout",
    )


class LiveKitSettings(CoreSettings):
    LIVEKIT_URL: Optional[str] = Field(default=None)
    LIVEKIT_API_KEY: Optional[str] = Field(default=None)
    LIVEKIT_API_SECRET: Optional[str] = Field(default=None)
    LIVEKIT_AGENT_NAME: str = Field(default="open-voice-agent")
    LIVEKIT_NUM_IDLE_PROCESSES: int = Field(default=1, ge=0)
    LIVEKIT_JOB_MEMORY_WARN_MB: float = Field(
        default=6144,
        gt=0,
        description="Per-job memory warning threshold in MB",
    )


class LangfuseSettings(CoreSettings):
    LANGFUSE_ENABLED: bool = Field(
        default=False,
        description="Enable Langfuse tracing via OTEL exporter",
    )
    LANGFUSE_PUBLIC_KEY: Optional[str] = Field(default=None)
    LANGFUSE_SECRET_KEY: Optional[str] = Field(default=None)
    LANGFUSE_ENVIRONMENT: str = Field(default="development")
    LANGFUSE_HOST: Optional[str] = Field(
        default=None,
        description="Langfuse host URL, e.g. https://cloud.langfuse.com",
    )
    LANGFUSE_BASE_URL: Optional[str] = Field(
        default=None,
        description="Alternative to LANGFUSE_HOST",
    )
    LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS: float = Field(
        default=8000.0,
        ge=0.0,
        le=10000.0,
        description="Timeout to wait for assistant text before force-finalizing trace",
    )
    LANGFUSE_MAX_PENDING_TRACE_TASKS: int = Field(
        default=200,
        ge=1,
        le=5000,
        description="Maximum queued background trace emission tasks",
    )
    LANGFUSE_TRACE_FLUSH_TIMEOUT_MS: float = Field(
        default=1000.0,
        ge=0.0,
        le=10000.0,
        description="Best-effort tracer flush timeout in milliseconds",
    )


class Settings(CoreSettings):
    voice: VoiceSettings = Field(default_factory=VoiceSettings)
    stt: STTSettings = Field(default_factory=STTSettings)
    llm: LLMSettings = Field(default_factory=LLMSettings)
    livekit: LiveKitSettings = Field(default_factory=LiveKitSettings)
    langfuse: LangfuseSettings = Field(default_factory=LangfuseSettings)


try:
    settings = Settings()

    settings_dict = settings.model_dump()
    masked_settings = mask_sensitive_data(settings_dict)
    logger.info(f"Settings loaded: {json.dumps(masked_settings, indent=2)}")

except ValidationError as e:
    safe_errors = e.errors(
        include_url=False,
        include_context=False,
        include_input=False,
    )
    logger.exception(
        "Error validating settings: %s",
        json.dumps(safe_errors),
    )
    raise