Spaces:
Running
Running
File size: 6,943 Bytes
f26de06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
"""Application-wide configuration settings."""
import os
from dataclasses import dataclass, field
from typing import Optional
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
@dataclass
class Settings:
"""Application-wide configuration settings."""
# ============================================
# LLM Provider Settings
# ============================================
llm_provider: str = os.getenv("LLM_PROVIDER", "auto")
# Hugging Face settings
hf_token: str = os.getenv("HF_TOKEN", "")
hf_chat_model: str = os.getenv("HF_CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
hf_temperature: float = float(os.getenv("HF_TEMPERATURE", "0.001"))
hf_max_new_tokens: int = int(os.getenv("HF_MAX_NEW_TOKENS", "512"))
# Model settings
model_name: str = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
# ============================================
# Audio Provider Settings
# ============================================
audio_provider: str = os.getenv("AUDIO_PROVIDER", "auto")
tts_model: str = os.getenv("TTS_MODEL", "hexgrad/Kokoro-82M")
stt_model: str = os.getenv("STT_MODEL", "openai/whisper-large-v3")
# ============================================
# VAD (Voice Activity Detection) Settings
# ============================================
vad_enabled: bool = os.getenv("VAD_ENABLED", "true").lower() == "true"
vad_sample_rate: int = int(os.getenv("VAD_SAMPLE_RATE", "16000"))
vad_frame_duration_ms: int = int(os.getenv("VAD_FRAME_DURATION_MS", "30"))
vad_aggressiveness: int = int(os.getenv("VAD_AGGRESSIVENESS", "2"))
vad_speech_threshold: float = float(os.getenv("VAD_SPEECH_THRESHOLD", "0.5"))
vad_silence_threshold: float = float(os.getenv("VAD_SILENCE_THRESHOLD", "0.3"))
vad_min_speech_ms: int = int(os.getenv("VAD_MIN_SPEECH_MS", "300"))
vad_max_speech_s: float = float(os.getenv("VAD_MAX_SPEECH_S", "30.0"))
vad_post_speech_silence_ms: int = int(os.getenv("VAD_POST_SPEECH_SILENCE_MS", "800"))
# ============================================
# Screen/Vision Settings
# ============================================
screen_capture_interval: float = float(os.getenv("SCREEN_CAPTURE_INTERVAL", "1.0"))
screen_compression_quality: int = int(os.getenv("SCREEN_COMPRESSION_QUALITY", "50"))
max_width: int = int(os.getenv("SCREEN_MAX_WIDTH", "3440"))
max_height: int = int(os.getenv("SCREEN_MAX_HEIGHT", "1440"))
# Vision model (Nebius)
NEBIUS_MODEL: str = os.getenv("NEBIUS_MODEL", "google/gemma-3-27b-it-fast")
NEBIUS_API_KEY: str = os.getenv("NEBIUS_API_KEY", "")
NEBIUS_BASE_URL: str = os.getenv("NEBIUS_BASE_URL", "https://api.studio.nebius.com/v1/")
# Auto-enable vision when screen context is needed
vision_auto_enabled: bool = os.getenv("VISION_AUTO_ENABLED", "true").lower() == "true"
vision_fps: float = float(os.getenv("VISION_FPS", "0.05")) # Frames per second
# ============================================
# MCP Server Settings
# ============================================
mcp_server_url: str = os.getenv("MCP_SERVER_URL", "http://localhost:8000")
mcp_auto_start: bool = os.getenv("MCP_AUTO_START", "true").lower() == "true"
# ============================================
# CRM Data Settings
# ============================================
crm_data_dir: str = os.getenv("CRM_DATA_DIR", "./data")
# ============================================
# Hyper-V Settings (Legacy)
# ============================================
hyperv_enabled: bool = os.getenv("HYPERV_ENABLED", "false").lower() == "true"
hyperv_host: str = os.getenv("HYPERV_HOST", "localhost")
hyperv_username: Optional[str] = os.getenv("HYPERV_USERNAME")
hyperv_password: Optional[str] = os.getenv("HYPERV_PASSWORD")
# ============================================
# Application Settings
# ============================================
max_conversation_history: int = int(os.getenv("MAX_CONVERSATION_HISTORY", "50"))
temp_dir: str = os.getenv("TEMP_DIR", "./temp")
log_level: str = os.getenv("LOG_LEVEL", "INFO")
# Feature flags
enable_screen_sharing_button: bool = os.getenv("ENABLE_SCREEN_SHARING_BUTTON", "true").lower() == "true"
enable_voice_input: bool = os.getenv("ENABLE_VOICE_INPUT", "true").lower() == "true"
def __post_init__(self):
"""Initialize directories and validate settings."""
# Ensure necessary directories exist
Path(self.temp_dir).mkdir(exist_ok=True, parents=True)
Path("./config").mkdir(exist_ok=True, parents=True)
Path("./logs").mkdir(exist_ok=True, parents=True)
Path(self.crm_data_dir).mkdir(exist_ok=True, parents=True)
# π Refresh dynamic, env-backed values so they pick up changes done at runtime
self.hf_token = os.getenv("HF_TOKEN", self.hf_token)
self.NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY", self.NEBIUS_API_KEY)
def is_hf_token_valid(self) -> bool:
"""Check if HuggingFace token is set and looks like a real HF token."""
token = os.getenv("HF_TOKEN", "") # always read the latest env
return bool(token and token.startswith("hf_") and len(token) > 20)
@property
def effective_llm_provider(self) -> str:
if self.llm_provider == "auto":
return "huggingface" if self.is_hf_token_valid() else "openai"
return self.llm_provider
@property
def effective_audio_provider(self) -> str:
if self.audio_provider == "auto":
return "huggingface" if self.is_hf_token_valid() else "openai"
return self.audio_provider
@property
def llm_endpoint(self) -> str:
if self.effective_llm_provider == "huggingface":
return f"https://api-inference.huggingface.co/models/{self.hf_chat_model}"
return getattr(self, 'openai_endpoint', '')
@property
def llm_api_key(self) -> str:
if self.effective_llm_provider == "huggingface":
return os.getenv("HF_TOKEN", "") # latest HF token
return getattr(self, "openai_api_key", "")
@property
def effective_model_name(self) -> str:
return self.hf_chat_model if self.effective_llm_provider == "huggingface" else self.model_name
def get_vad_config(self) -> dict:
"""Get VAD configuration as a dictionary."""
return {
"sample_rate": self.vad_sample_rate,
"frame_duration_ms": self.vad_frame_duration_ms,
"aggressiveness": self.vad_aggressiveness,
"speech_threshold": self.vad_speech_threshold,
"silence_threshold": self.vad_silence_threshold,
"min_speech_duration_ms": self.vad_min_speech_ms,
"max_speech_duration_s": self.vad_max_speech_s,
"post_speech_silence_ms": self.vad_post_speech_silence_ms,
}
|