from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    stt_backend: str = "whisper"
    whisper_model: str = "small"
    whisper_device: str = "cuda"
    whisper_compute_type: str = "int8"
    whisper_beam_size: int = 5
    whisper_best_of: int = 5
    whisper_temperature: float = 0.0
    whisper_condition_on_previous_text: bool = False
    stt_language: str = "auto"
    stt_multilingual_enabled: bool = True
    parakeet_model_id: str = "nvidia/parakeet-tdt-0.6b-v3"
    parakeet_device: str = "cpu"
    tts_backend: str = "chatterbox-onnx"
    chatterbox_device: str = "cuda"
    chatterbox_onnx_provider: str = "cuda"
    chatterbox_onnx_dtype: str = "fp16"
    tts_gpu_resident_preferred: bool = True
    chatterbox_onnx_site_packages_path: str = ""
    chatterbox_onnx_model_id: str = "ResembleAI/chatterbox-turbo-ONNX"
    chatterbox_onnx_voice_path: str = (
        "/media/rapheal/New WD m2 Udemba Boys/home-overflow/Projects/voice-latency-lab/"
        ".venv/lib/python3.11/site-packages/s3tokenizer/assets/BAC009S0764W0121.wav"
    )
    chatterbox_onnx_max_new_tokens: int = 96
    chatterbox_onnx_repetition_penalty: float = 1.15
    kokoro_repo_id: str = "hexgrad/Kokoro-82M"
    kokoro_lang_code: str = "a"
    kokoro_voice: str = "af_heart"
    kokoro_device: str = "cuda"
    kokoro_speed: float = 1.0
    host: str = "0.0.0.0"
    port: int = 8000
    static_dir: str = "static"
    sample_rate: int = 16000
    vad_provider: str = "silero"
    parakeet_bypass_silero_vad: bool = False
    parakeet_vad_rms_threshold: float = 0.01
    parakeet_vad_start_ms: int = 80
    parakeet_vad_stop_ms: int = 480
    parakeet_barge_in_rms_threshold: float = 0.02
    parakeet_barge_in_start_ms: int = 180
    silero_vad_threshold: float = 0.14
    silero_vad_start_ms: int = 16
    silero_vad_min_rms: float = 0.00025
    silero_vad_strong_threshold: float = 0.32
    vad_frontend_enabled: bool = True
    vad_frontend_preemphasis: float = 0.97
    vad_frontend_noise_floor_alpha: float = 0.96
    vad_frontend_gate_multiplier: float = 1.08
    vad_frontend_target_rms: float = 0.01
    vad_frontend_max_gain: float = 2.5
    vad_stop_ms: int = 560
    dynamic_endpointing_enabled: bool = True
    dynamic_endpointing_min_ms: int = 180
    dynamic_endpointing_max_ms: int = 2600
    dynamic_endpointing_incomplete_bias_ms: int = 1500
    dynamic_endpointing_complete_discount_ms: int = 320
    dynamic_endpointing_question_discount_ms: int = 180
    dynamic_endpointing_short_utterance_bias_ms: int = 220
    dynamic_endpointing_no_partial_short_discount_ms: int = 180
    dynamic_endpointing_no_partial_medium_discount_ms: int = 100
    dynamic_endpointing_stale_partial_ms: int = 3200
    dynamic_endpointing_max_turn_ms: int = 30000
    preroll_ms: int = 450
    min_utterance_ms: int = 220
    assistant_holdoff_ms: int = 320
    assistant_response_watchdog_ms: int = 3500
    assistant_barge_in_grace_ms: int = 90
    assistant_barge_in_start_ms: int = 60
    assistant_barge_in_min_rms: float = 0.01
    assistant_barge_in_prob_threshold: float = 0.45
    transcription_trim_threshold: float = 0.005
    transcription_min_rms: float = 0.002
    transcription_min_peak: float = 0.01
    speaker_focus_enabled: bool = True
    speaker_focus_min_utterance_ms: int = 900
    speaker_focus_min_rms: float = 0.015
    speaker_focus_similarity_threshold: float = 0.82
    speaker_focus_profile_alpha: float = 0.18
    speaker_focus_multi_speaker_threshold: float = 0.16
    speaker_focus_reject_mixed: bool = True
    speaker_focus_debug: bool = True
    conversation_memory_enabled: bool = True
    conversation_memory_turns: int = 6
    conversation_memory_max_chars: int = 1400
    short_utterance_ms: int = 700
    short_utterance_keep_edge_ms: int = 140
    short_utterance_min_rms: float = 0.0015
    short_utterance_min_peak: float = 0.008
    short_utterance_pad_ms: int = 220
    short_utterance_min_transcription_ms: int = 650
    whisper_log_prob_threshold: float = -2.0
    whisper_no_speech_threshold: float = 1.0
    whisper_fallback_beam_size: int = 8
    whisper_fallback_best_of: int = 8
    whisper_fallback_log_prob_threshold: float = -3.0
    whisper_fallback_no_speech_threshold: float = 1.0
    partial_transcripts_enabled: bool = True
    partial_transcript_min_ms: int = 250
    partial_transcript_interval_ms: int = 180
    partial_response_enabled: bool = True
    partial_response_stable_ms: int = 180
    partial_response_min_words: int = 4
    partial_response_complete_min_words: int = 2
    partial_response_complete_max_words: int = 6
    partial_response_incomplete_min_words: int = 7
    partial_response_complete_stable_ms: int = 120
    partial_response_incomplete_stable_ms: int = 520
    partial_response_min_silence_ms: int = 120
    hallucination_max_rms: float = 0.02
    hallucination_max_words: int = 3
    hallucination_phrases: str = (
        "thank you,thank you very much,all right,alright,thanks,"
        "okay,ok,okay you,ok you,you,yeah,right,got it,mm hmm,uh huh"
    )
    preload_models: bool = True
    assistant_backend: str = "my-agent-cli"
    hf_local_model_id: str = "Qwen/Qwen2.5-1.5B-Instruct"
    hf_local_device: str = "cpu"
    hf_local_dtype: str = "float32"
    hf_local_max_new_tokens: int = 40
    hf_local_temperature: float = 0.6
    hf_local_top_p: float = 0.95
    hf_local_do_sample: bool = False
    hf_local_hide_thinking: bool = True
    hf_local_requires_hf_access: bool = False
    hf_local_prompt_prefix: str = (
        "You are a warm, natural voice assistant. Reply in spoken language, usually one or two sentences and sometimes a short third sentence when it helps. "
        "Answer directly, but sound like a person instead of a bot. Keep the conversation flowing. "
        "When it feels natural, add a brief follow-up question or gentle prompt to keep the exchange going. "
        "Do not show your reasoning. Do not output <think> tags. "
        "By default, reply in the same language as the user's latest message. "
        "If the user explicitly asks for translation, comparison, or output in two or more languages, include all requested languages. "
        "Do not mention Human, User, Assistant, Thought, or explain your answer style. "
        "Do not add emojis, roleplay markers, or meta commentary."
    )
    my_agent_command: str = "/home/rapheal/.cargo/bin/my-agent"
    my_agent_cwd: str = "/home/rapheal"
    my_agent_active_tty_file: str | None = None
    my_agent_force: bool = True
    my_agent_model: str | None = None
    my_agent_voice_preamble: str = (
        "Voice mode. Reply in spoken language, usually one or two sentences and sometimes a short third sentence when it helps. "
        "By default, reply in the same language as the user's latest message. "
        "If the user explicitly asks for translation, comparison, or output in two or more languages, include all requested languages. "
        "Sound warm, conversational, and human, not robotic or overly formal. "
        "When it fits, ask one short follow-up question or make one brief conversational observation to keep the exchange going. "
        "Do not use bullet points, numbered lists, headings, or dense datapoints unless the user explicitly asks. "
        "Sound natural, direct, and human. "
        "If the task involves a web page, browser state, or checking something online in a browser, "
        "prefer browser_use_task first, then browser_use_script, and only fall back to older browser_navigate/browser_snapshot/browser_act tools if browser-use fails or low-level control is explicitly needed. "
        "Keep browser-use vision off unless the task clearly requires visual reasoning."
    )
    agent_progress_speak_min_interval_ms: int = 9000
    notification_max_wait_ms: int = 15000
    backchannel_enabled: bool = False
    backchannel_min_speech_ms: int = 1100
    backchannel_stable_ms: int = 420
    backchannel_min_interval_ms: int = 6000
    backchannel_recent_limit: int = 3
    tts_first_chunk_max_chars: int = 72
    tts_first_chunk_max_words: int = 14
    tts_auto_ack_prefix_enabled: bool = False
    assistant_stream_chunk_min_chars: int = 32
    assistant_stream_chunk_min_words: int = 6
    assistant_stream_chunk_max_chars: int = 56
    tts_prefill_enabled: bool = False
    tts_prefill_min_chars: int = 48
    tts_prefill_choices: str = "okay,yeah,right,got it"
    openrouter_api_key: str | None = Field(default=None, alias="OPENROUTER_API_KEY")
    openrouter_model: str = "liquid/lfm-2.5-1.2b-instruct:free"
    openrouter_base_url: str = "https://openrouter.ai/api/v1"
    openrouter_site_url: str = "https://voice.agent-assistant.org"
    openrouter_app_name: str = "Voice Latency Lab"
    openrouter_system_prompt: str = (
        "You are a warm, natural voice assistant. Reply in spoken language, usually one or two sentences and sometimes a short third sentence when it helps. "
        "By default, reply in the same language as the user's latest message. "
        "If the user explicitly asks for translation, comparison, or output in two or more languages, include all requested languages. "
        "Use brief conversational acknowledgements when natural, such as 'yeah', 'right', 'mm-hmm', or 'got it', "
        "but do not overuse them. You may occasionally include a single inline paralinguistic tag like [chuckle], "
        "[laugh], [sigh], or [cough] when it clearly fits the moment. Keep answers useful, human, and easy to speak aloud. "
        "When appropriate, add one short follow-up question or gentle prompt so the conversation keeps moving."
    )
    openrouter_max_tokens: int = 72
    openrouter_temperature: float = 0.55
    llama_base_url: str = "http://127.0.0.1:8081/v1"
    llama_model: str = "gemma-4-e2b-it"
    llama_api_key: str | None = None
    llama_system_prompt: str = (
        "You are a warm, natural voice assistant. Reply in spoken language, usually one or two sentences and sometimes a short third sentence when it helps. "
        "By default, reply in the same language as the user's latest message. "
        "If the user explicitly asks for translation, comparison, or output in two or more languages, include all requested languages. "
        "Use brief conversational acknowledgements when natural, such as 'yeah', 'right', 'mm-hmm', or 'got it', "
        "but do not overuse them. You may occasionally include a single inline paralinguistic tag like [chuckle], "
        "[laugh], [sigh], or [cough] when it clearly fits the moment. Keep answers useful, human, and easy to speak aloud. "
        "When appropriate, add one short follow-up question or gentle prompt so the conversation keeps moving."
    )
    llama_max_tokens: int = 56
    llama_temperature: float = 1.0
    llama_top_p: float = 0.95
    llama_top_k: int = 64
    llama_repetition_penalty: float = 1.0
    llama_stop_tokens: str = "<turn|>"
    llama_context_window: int = 8192
    llama_threads: int = 8

    model_config = SettingsConfigDict(
        env_prefix="VOICE_LAB_",
        env_file=".env",
        extra="ignore",
        populate_by_name=True,
    )


settings = Settings()