File size: 6,943 Bytes
f26de06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""Application-wide configuration settings."""

import os
from dataclasses import dataclass, field
from typing import Optional
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()


@dataclass
class Settings:
    """Application-wide configuration settings."""

    # ============================================
    # LLM Provider Settings
    # ============================================
    llm_provider: str = os.getenv("LLM_PROVIDER", "auto")
    
    # Hugging Face settings
    hf_token: str = os.getenv("HF_TOKEN", "")
    hf_chat_model: str = os.getenv("HF_CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
    hf_temperature: float = float(os.getenv("HF_TEMPERATURE", "0.001"))
    hf_max_new_tokens: int = int(os.getenv("HF_MAX_NEW_TOKENS", "512"))

    # Model settings
    model_name: str = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")

    # ============================================
    # Audio Provider Settings
    # ============================================
    audio_provider: str = os.getenv("AUDIO_PROVIDER", "auto")
    tts_model: str = os.getenv("TTS_MODEL", "hexgrad/Kokoro-82M")
    stt_model: str = os.getenv("STT_MODEL", "openai/whisper-large-v3")

    # ============================================
    # VAD (Voice Activity Detection) Settings
    # ============================================
    vad_enabled: bool = os.getenv("VAD_ENABLED", "true").lower() == "true"
    vad_sample_rate: int = int(os.getenv("VAD_SAMPLE_RATE", "16000"))
    vad_frame_duration_ms: int = int(os.getenv("VAD_FRAME_DURATION_MS", "30"))
    vad_aggressiveness: int = int(os.getenv("VAD_AGGRESSIVENESS", "2"))
    vad_speech_threshold: float = float(os.getenv("VAD_SPEECH_THRESHOLD", "0.5"))
    vad_silence_threshold: float = float(os.getenv("VAD_SILENCE_THRESHOLD", "0.3"))
    vad_min_speech_ms: int = int(os.getenv("VAD_MIN_SPEECH_MS", "300"))
    vad_max_speech_s: float = float(os.getenv("VAD_MAX_SPEECH_S", "30.0"))
    vad_post_speech_silence_ms: int = int(os.getenv("VAD_POST_SPEECH_SILENCE_MS", "800"))

    # ============================================
    # Screen/Vision Settings
    # ============================================
    screen_capture_interval: float = float(os.getenv("SCREEN_CAPTURE_INTERVAL", "1.0"))
    screen_compression_quality: int = int(os.getenv("SCREEN_COMPRESSION_QUALITY", "50"))
    max_width: int = int(os.getenv("SCREEN_MAX_WIDTH", "3440"))
    max_height: int = int(os.getenv("SCREEN_MAX_HEIGHT", "1440"))
    
    # Vision model (Nebius)
    NEBIUS_MODEL: str = os.getenv("NEBIUS_MODEL", "google/gemma-3-27b-it-fast")
    NEBIUS_API_KEY: str = os.getenv("NEBIUS_API_KEY", "")
    NEBIUS_BASE_URL: str = os.getenv("NEBIUS_BASE_URL", "https://api.studio.nebius.com/v1/")
    
    # Auto-enable vision when screen context is needed
    vision_auto_enabled: bool = os.getenv("VISION_AUTO_ENABLED", "true").lower() == "true"
    vision_fps: float = float(os.getenv("VISION_FPS", "0.05"))  # Frames per second

    # ============================================
    # MCP Server Settings
    # ============================================
    mcp_server_url: str = os.getenv("MCP_SERVER_URL", "http://localhost:8000")
    mcp_auto_start: bool = os.getenv("MCP_AUTO_START", "true").lower() == "true"

    # ============================================
    # CRM Data Settings
    # ============================================
    crm_data_dir: str = os.getenv("CRM_DATA_DIR", "./data")

    # ============================================
    # Hyper-V Settings (Legacy)
    # ============================================
    hyperv_enabled: bool = os.getenv("HYPERV_ENABLED", "false").lower() == "true"
    hyperv_host: str = os.getenv("HYPERV_HOST", "localhost")
    hyperv_username: Optional[str] = os.getenv("HYPERV_USERNAME")
    hyperv_password: Optional[str] = os.getenv("HYPERV_PASSWORD")

    # ============================================
    # Application Settings
    # ============================================
    max_conversation_history: int = int(os.getenv("MAX_CONVERSATION_HISTORY", "50"))
    temp_dir: str = os.getenv("TEMP_DIR", "./temp")
    log_level: str = os.getenv("LOG_LEVEL", "INFO")
    
    # Feature flags
    enable_screen_sharing_button: bool = os.getenv("ENABLE_SCREEN_SHARING_BUTTON", "true").lower() == "true"
    enable_voice_input: bool = os.getenv("ENABLE_VOICE_INPUT", "true").lower() == "true"

    def __post_init__(self):
        """Initialize directories and validate settings."""
        # Ensure necessary directories exist
        Path(self.temp_dir).mkdir(exist_ok=True, parents=True)
        Path("./config").mkdir(exist_ok=True, parents=True)
        Path("./logs").mkdir(exist_ok=True, parents=True)
        Path(self.crm_data_dir).mkdir(exist_ok=True, parents=True)

        # πŸ” Refresh dynamic, env-backed values so they pick up changes done at runtime
        self.hf_token = os.getenv("HF_TOKEN", self.hf_token)
        self.NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY", self.NEBIUS_API_KEY)


    def is_hf_token_valid(self) -> bool:
        """Check if HuggingFace token is set and looks like a real HF token."""
        token = os.getenv("HF_TOKEN", "")  # always read the latest env
        return bool(token and token.startswith("hf_") and len(token) > 20)

    @property
    def effective_llm_provider(self) -> str:
        if self.llm_provider == "auto":
            return "huggingface" if self.is_hf_token_valid() else "openai"
        return self.llm_provider

    @property
    def effective_audio_provider(self) -> str:
        if self.audio_provider == "auto":
            return "huggingface" if self.is_hf_token_valid() else "openai"
        return self.audio_provider

    @property
    def llm_endpoint(self) -> str:
        if self.effective_llm_provider == "huggingface":
            return f"https://api-inference.huggingface.co/models/{self.hf_chat_model}"
        return getattr(self, 'openai_endpoint', '')

    @property
    def llm_api_key(self) -> str:
        if self.effective_llm_provider == "huggingface":
            return os.getenv("HF_TOKEN", "")  # latest HF token
        return getattr(self, "openai_api_key", "")

    @property
    def effective_model_name(self) -> str:
        return self.hf_chat_model if self.effective_llm_provider == "huggingface" else self.model_name

    def get_vad_config(self) -> dict:
        """Get VAD configuration as a dictionary."""
        return {
            "sample_rate": self.vad_sample_rate,
            "frame_duration_ms": self.vad_frame_duration_ms,
            "aggressiveness": self.vad_aggressiveness,
            "speech_threshold": self.vad_speech_threshold,
            "silence_threshold": self.vad_silence_threshold,
            "min_speech_duration_ms": self.vad_min_speech_ms,
            "max_speech_duration_s": self.vad_max_speech_s,
            "post_speech_silence_ms": self.vad_post_speech_silence_ms,
        }