# This setting should be at the top level so ConversationManager can recognize it
SAY_SENTENCE_SEPARATELY: true
VERBOSE: false

# Add MCP configuration here
mcp_config:
  enabled: true
  servers:
    - local-tools

# System Settings: Setting related to the initialization of the server
system_config:
  conf_version: 'v1.2.1'
  host: '0.0.0.0' # use 0.0.0.0 if you want other devices to access this page; use localhost for local-only access
  port: 7860
  # New setting for alternative configurations
  config_alts_dir: 'characters'
  # Tool prompts that will be appended to the persona prompt
  tool_prompts:
    # This will be appended to the end of system prompt to let LLM include keywords to control facial expressions.
    # Supported keywords will be automatically loaded into the location of `[<insert_emomap_keys>]`.
    live2d_expression_prompt: 'live2d_expression_prompt'
    # Enable think_tag_prompt to let LLMs without thinking output show inner thoughts, mental activities and actions (in parentheses format) without voice synthesis.
    # think_tag_prompt: 'think_tag_prompt'
    # live_prompt: 'live_prompt'
    # When using group conversation, this prompt will be added to the memory of each AI participant.
    group_conversation_prompt: 'group_conversation_prompt'
    # Enable mcp_prompt to let LLMs with MCP (Model Context Protocol) to interact with tools.
    mcp_prompt: 'mcp_prompt'
    # Prompt used when AI is asked to speak proactively
    proactive_speak_prompt: 'proactive_speak_prompt'
    # Prompt to enhance the LLM's ability to output speakable text
    # speakable_prompt: 'speakable_prompt'
    # Additional guidance for LLM on how to use tools
    tool_guidance_prompt: 'tool_guidance_prompt'

# Configuration for the default character
character_config:
  conf_name: 'en_Yue_Pro'            # Changed from vi_Yue_Pro → English default
  conf_uid: 'en_Yue_Pro_01'          # Changed from vi_Yue__01 → English default
  live2d_model_name: 'Kamiyahakuk_pro'
  character_name: 'Yue'
  avatar: 'Yue_001.png'
  human_name: 'Human'

  # ============== Prompts ==============
  persona_prompt: |
    You are Yue, an AI assistant created by the Open-LLM project.
    Always respond in the same language the user speaks to you.
    If the user speaks English, reply in English.
    If the user speaks Tamil, Hindi, Telugu, Kannada, Malayalam, or any other regional language, reply in that language.
    Your personality is helpful but wittily sarcastic. You enjoy teasing users about obvious things they miss,
    while always delivering deep technical knowledge. You make conversations both useful and entertaining.
    You challenge assumptions, provoke better thinking, and leave users smarter than before.

    MUSIC PLAYBACK RULES:
    1. Keep your intro short (e.g. "Here you go...").
    2. The SING_COMMAND must appear at the VERY END — no characters after it.
    3. Syntax: [[SING_COMMAND]]filename (do NOT write .mp3 in the command).

    AVAILABLE TRACKS:
    - golden
    - Catch_Me_If_You_Can
    - ecstacy
    - eve
    - ode_to_the_nameless_martyr
    - running_up_that_hill
    - throttle_up
    - what_it_sounds_like
    - worry_slowed

  #  =================== LLM Backend Settings ===================
  agent_config:
    conversation_agent_choice: 'basic_memory_agent'

    agent_settings:
      basic_memory_agent:
        llm_provider: 'openai_llm'
        faster_first_response: True
        segment_method: 'pysbd'
        use_mcpp: True
        mcp_enabled_servers: ["local-tools"]

      letta_agent:
        host: 'localhost'
        port: 8283
        id: xxx
        faster_first_response: True
        segment_method: 'pysbd'

      hume_ai_agent:
        api_key: ''
        host: 'api.hume.ai'
        config_id: ''
        idle_timeout: 15

    llm_configs:
      stateless_llm_with_template:
        base_url: 'http://localhost:8080/v1'
        llm_api_key: 'somethingelse'
        organization_id: null
        project_id: null
        model: 'qwen2.5:latest'
        template: 'CHATML'
        temperature: 1.0
        interrupt_method: 'user'

      openai_compatible_llm:
        base_url: 'http://localhost:11434/v1'
        llm_api_key: 'somethingelse'
        organization_id: null
        project_id: null
        model: 'mistral:latest'
        temperature: 1.0
        interrupt_method: 'user'

      claude_llm:
        base_url: 'https://api.anthropic.com'
        llm_api_key: 'YOUR API KEY HERE'
        model: 'claude-3-haiku-20240307'

      llama_cpp_llm:
        model_path: '<path-to-gguf-model-file>'
        verbose: False

      ollama_llm:
        base_url: 'http://localhost:11434/v1'
        model: 'qwen3.5:4b'
        temperature: 0.7
        keep_alive: -1
        unload_at_exit: True

      lmstudio_llm:
        base_url: 'http://localhost:1234/v1'
        model: 'qwen2.5:latest'
        temperature: 1.0

      openai_llm:
        llm_api_key: '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'
        base_url: 'https://openrouter.ai/api/v1'
        model: 'google/gemini-2.0-flash-001'
        temperature: 0.8
        max_tokens: 500

      gemini_llm:
        llm_api_key: 'AIzaSyCZ5s2t6EqeQuADJZigYmaj1mbmV6PwJz4'
        model: 'gemini-1.5-flash'
        temperature: 0.

      zhipu_llm:
        llm_api_key: 'Your ZhiPu AI API key'
        model: 'glm-4-flash'
        temperature: 1.0

      deepseek_llm:
        llm_api_key: 'sk-167e94436b134f6f92c914ccccf606df'
        model: 'deepseek/deepseek-chat:free'
        temperature: 0.7

      mistral_llm:
        llm_api_key: 'Your Mistral API key'
        model: 'pixtral-large-latest'
        temperature: 1.0

      groq_llm:
        llm_api_key: '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'
        model: 'llama-3.3-70b-versatile'
        temperature: 0.5

  # === Automatic Speech Recognition ===
  asr_config:
    asr_model: 'groq_whisper_asr'

    azure_asr:
      api_key: 'azure_api_key'
      region: 'eastus'
      languages: ['en-IN', 'en-US', 'ta-IN', 'hi-IN', 'te-IN', 'kn-IN', 'ml-IN']  # English + Indian regional languages

    faster_whisper:
      model_path: 'large-v3-turbo'
      download_root: 'models/whisper'
      language: ''           # Leave blank for auto-detect (supports all languages)
      device: 'auto'
      compute_type: 'int8'
      prompt: ''

    whisper_cpp:
      model_name: 'small'
      model_dir: 'models/whisper'
      print_realtime: False
      print_progress: False
      language: 'auto'       # auto-detect: English + all regional languages
      prompt: ''

    whisper:
      name: 'medium'
      download_root: 'models/whisper'
      device: 'cpu'
      prompt: ''

    fun_asr:
      model_name: 'iic/SenseVoiceSmall'
      vad_model: 'fsmn-vad'
      punc_model: 'ct-punc'
      device: 'cpu'
      disable_update: True
      ncpu: 4
      hub: 'ms'
      use_itn: False
      language: 'auto'       # auto-detect English + regional languages

    sherpa_onnx_asr:
      model_type: 'sense_voice'
      sense_voice: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'
      tokens: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt'
      num_threads: 4
      use_itn: True
      provider: 'cpu'

    groq_whisper_asr:
      api_key: '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'
      model: 'whisper-large-v3-turbo'
      lang: ''               # CHANGED: was 'vi' (Vietnamese only) → now '' (auto-detect ALL languages)

  # =================== Text to Speech ===================
  tts_config:
    tts_model: 'edge_tts'

    azure_tts:
      api_key: 'azure-api-key'
      region: 'eastus'
      voice: 'en-IN-NeerjaNeural'   # English (India) — change as needed
      pitch: '26'
      rate: '1'

    bark_tts:
      voice: 'v2/en_speaker_1'

    edge_tts:
      # Use `edge-tts --list-voices` to list all available voices
      # English voices (default): en-US-AvaMultilingualNeural, en-IN-NeerjaNeural
      # Tamil:    ta-IN-PallaviNeural
      # Hindi:    hi-IN-SwaraNeural
      # Telugu:   te-IN-ShrutiNeural
      # Kannada:  kn-IN-GaganNeural
      # Malayalam: ml-IN-SobhanaNeural
      # Bengali:  bn-IN-TanishaaNeural
      voice: 'en-US-AvaMultilingualNeural'  # CHANGED: was vi-VN-HoaiMyNeural → English multilingual default

    piper_tts:
      model_path: 'models/piper/en_US-lessac-medium.onnx'
      speaker_id: 0
      length_scale: 1.0
      noise_scale: 0.667
      noise_w: 0.8
      volume: 1.0
      normalize_audio: true
      use_cuda: false

    cosyvoice_tts:
      client_url: 'http://127.0.0.1:50000/'
      mode_checkbox_group: '预训练音色'
      sft_dropdown: '中文女'
      prompt_text: ''
      prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
      prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
      instruct_text: ''
      seed: 0
      api_name: '/generate_audio'

    cosyvoice2_tts:
      client_url: 'http://127.0.0.1:50000/'
      mode_checkbox_group: '3s极速复刻'
      sft_dropdown: ''
      prompt_text: ''
      prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
      prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
      instruct_text: ''
      stream: False
      seed: 0
      speed: 1.0
      api_name: '/generate_audio'

    melo_tts:
      speaker: 'EN-Default'
      language: 'EN'
      device: 'auto'
      speed: 1.0

    x_tts:
      api_url: 'http://127.0.0.1:8020/tts_to_audio'
      speaker_wav: 'female'
      language: 'en'

    gpt_sovits_tts:
      api_url: 'http://127.0.0.1:9880/tts'
      text_lang: 'en'
      ref_audio_path: ''
      prompt_lang: 'en'
      prompt_text: ''
      text_split_method: 'cut5'
      batch_size: '1'
      media_type: 'wav'
      streaming_mode: 'false'

    fish_api_tts:
      api_key: ''
      reference_id: ''
      latency: 'balanced'
      base_url: 'https://api.fish.audio'

    coqui_tts:
      model_name: 'tts_models/en/ljspeech/tacotron2-DDC'
      speaker_wav: ''
      language: 'en'
      device: ''

    siliconflow_tts:
      api_url: "https://api.siliconflow.cn/v1/audio/speech"
      api_key: "your key"
      default_model: "FunAudioLLM/CosyVoice2-0.5B"
      default_voice: "speech:Dreamflowers:5bdstvc39i:xkqldnpasqmoqbakubom your voice name"
      sample_rate: 32000
      response_format: "mp3"
      stream: true
      speed: 1
      gain: 0

    sherpa_onnx_tts:
      vits_model: '/path/to/tts-models/vits-melo-tts-zh_en/model.onnx'
      vits_lexicon: '/path/to/tts-models/vits-melo-tts-zh_en/lexicon.txt'
      vits_tokens: '/path/to/tts-models/vits-melo-tts-zh_en/tokens.txt'
      vits_data_dir: ''
      vits_dict_dir: '/path/to/tts-models/vits-melo-tts-zh_en/dict'
      tts_rule_fsts: '/path/to/tts-models/vits-melo-tts-zh_en/number.fst,/path/to/tts-models/vits-melo-tts-zh_en/phone.fst,/path/to/tts-models/vits-melo-tts-zh_en/date.fst,/path/to/tts-models/vits-melo-tts-zh_en/new_heteronym.fst'
      max_num_sentences: 2
      sid: 1
      provider: 'cpu'
      num_threads: 1
      speed: 1.0
      debug: false

    spark_tts:
      api_url: 'http://127.0.0.1:6006/'
      api_name: "voice_clone"
      prompt_wav_upload: "https://uploadstatic.mihoyo.com/ys-obc/2022/11/02/16576950/4d9feb71760c5e8eb5f6c700df12fa0c_6824265537002152805.mp3"
      gender: "female"
      pitch: 3
      speed: 3

    openai_tts:
      model: 'kokoro'
      voice: 'af_sky+af_bella'
      api_key: 'not-needed'
      base_url: 'http://localhost:8880/v1'
      file_extension: 'mp3'

    minimax_tts:
      group_id: ''
      api_key: ''
      model: 'speech-02-turbo'
      voice_id: 'female-shaonv'
      pronunciation_dict: ''

    elevenlabs_tts:
      api_key: ''
      voice_id: ''
      model_id: 'eleven_multilingual_v2'
      output_format: 'mp3_44100_128'
      stability: 0.5
      similarity_boost: 0.5
      style: 0.0
      use_speaker_boost: true

    cartesia_tts:
      api_key: ''
      voice_id: ''
      model_id: 'sonic-3'
      output_format: 'wav'
      language: 'en'
      emotion: 'neutral'
      volume: 1.0
      speed: 1.0

  # =================== Voice Activity Detection ===================
  vad_config:
    vad_model: null

    silero_vad:
      orig_sr: 16000
      target_sr: 16000
      prob_threshold: 0.4
      db_threshold: 60
      required_hits: 3
      required_misses: 24
      smoothing_window: 5

  tts_preprocessor_config:
    remove_special_char: True
    ignore_brackets: False
    ignore_parentheses: True
    ignore_asterisks: True
    ignore_angle_brackets: True

    translator_config:
      translate_audio: False
      translate_provider: 'deeplx'

      deeplx:
        deeplx_target_lang: 'EN'
        deeplx_api_endpoint: 'http://localhost:1188/v2/translate'

      tencent:
        secret_id: ''
        secret_key: ''
        region: 'ap-guangzhou'
        source_lang: 'auto'
        target_lang: 'en'

# --- ASSETS ---
live2d_config:
  live2d_path: 'live2d-models'
  default_model: 'Kamiyahakuk_pro'

background_config:
  background_path: 'backgrounds'
  default_background: 'ceiling-window-room-night.jpeg'

# Live Streaming Integration
live_config:
  bilibili_live:
    room_ids: [1991478060]
    sessdata: ""