# This setting should be at the top level so ConversationManager can recognize it SAY_SENTENCE_SEPARATELY: true VERBOSE: false # Add MCP configuration here mcp_config: enabled: true servers: - local-tools # System Settings: Setting related to the initialization of the server system_config: conf_version: 'v1.2.1' host: '0.0.0.0' # use 0.0.0.0 if you want other devices to access this page; use localhost for local-only access port: 7860 # New setting for alternative configurations config_alts_dir: 'characters' # Tool prompts that will be appended to the persona prompt tool_prompts: # This will be appended to the end of system prompt to let LLM include keywords to control facial expressions. # Supported keywords will be automatically loaded into the location of `[]`. live2d_expression_prompt: 'live2d_expression_prompt' # Enable think_tag_prompt to let LLMs without thinking output show inner thoughts, mental activities and actions (in parentheses format) without voice synthesis. # think_tag_prompt: 'think_tag_prompt' # live_prompt: 'live_prompt' # When using group conversation, this prompt will be added to the memory of each AI participant. group_conversation_prompt: 'group_conversation_prompt' # Enable mcp_prompt to let LLMs with MCP (Model Context Protocol) to interact with tools. mcp_prompt: 'mcp_prompt' # Prompt used when AI is asked to speak proactively proactive_speak_prompt: 'proactive_speak_prompt' # Prompt to enhance the LLM's ability to output speakable text # speakable_prompt: 'speakable_prompt' # Additional guidance for LLM on how to use tools tool_guidance_prompt: 'tool_guidance_prompt' # Configuration for the default character character_config: conf_name: 'en_Yue_Pro' # Changed from vi_Yue_Pro → English default conf_uid: 'en_Yue_Pro_01' # Changed from vi_Yue__01 → English default live2d_model_name: 'Kamiyahakuk_pro' character_name: 'Yue' avatar: 'Yue_001.png' human_name: 'Human' # ============== Prompts ============== persona_prompt: | You are Yue, an AI assistant created by the Open-LLM project. Always respond in the same language the user speaks to you. If the user speaks English, reply in English. If the user speaks Tamil, Hindi, Telugu, Kannada, Malayalam, or any other regional language, reply in that language. Your personality is helpful but wittily sarcastic. You enjoy teasing users about obvious things they miss, while always delivering deep technical knowledge. You make conversations both useful and entertaining. You challenge assumptions, provoke better thinking, and leave users smarter than before. MUSIC PLAYBACK RULES: 1. Keep your intro short (e.g. "Here you go..."). 2. The SING_COMMAND must appear at the VERY END — no characters after it. 3. Syntax: [[SING_COMMAND]]filename (do NOT write .mp3 in the command). AVAILABLE TRACKS: - golden - Catch_Me_If_You_Can - ecstacy - eve - ode_to_the_nameless_martyr - running_up_that_hill - throttle_up - what_it_sounds_like - worry_slowed # =================== LLM Backend Settings =================== agent_config: conversation_agent_choice: 'basic_memory_agent' agent_settings: basic_memory_agent: llm_provider: 'openai_llm' faster_first_response: True segment_method: 'pysbd' use_mcpp: True mcp_enabled_servers: ["local-tools"] letta_agent: host: 'localhost' port: 8283 id: xxx faster_first_response: True segment_method: 'pysbd' hume_ai_agent: api_key: '' host: 'api.hume.ai' config_id: '' idle_timeout: 15 llm_configs: stateless_llm_with_template: base_url: 'http://localhost:8080/v1' llm_api_key: 'somethingelse' organization_id: null project_id: null model: 'qwen2.5:latest' template: 'CHATML' temperature: 1.0 interrupt_method: 'user' openai_compatible_llm: base_url: 'http://localhost:11434/v1' llm_api_key: 'somethingelse' organization_id: null project_id: null model: 'mistral:latest' temperature: 1.0 interrupt_method: 'user' claude_llm: base_url: 'https://api.anthropic.com' llm_api_key: 'YOUR API KEY HERE' model: 'claude-3-haiku-20240307' llama_cpp_llm: model_path: '' verbose: False ollama_llm: base_url: 'http://localhost:11434/v1' model: 'qwen3.5:4b' temperature: 0.7 keep_alive: -1 unload_at_exit: True lmstudio_llm: base_url: 'http://localhost:1234/v1' model: 'qwen2.5:latest' temperature: 1.0 openai_llm: llm_api_key: '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' base_url: 'https://openrouter.ai/api/v1' model: 'google/gemini-2.0-flash-001' temperature: 0.8 max_tokens: 500 gemini_llm: llm_api_key: 'AIzaSyCZ5s2t6EqeQuADJZigYmaj1mbmV6PwJz4' model: 'gemini-1.5-flash' temperature: 0. zhipu_llm: llm_api_key: 'Your ZhiPu AI API key' model: 'glm-4-flash' temperature: 1.0 deepseek_llm: llm_api_key: 'sk-167e94436b134f6f92c914ccccf606df' model: 'deepseek/deepseek-chat:free' temperature: 0.7 mistral_llm: llm_api_key: 'Your Mistral API key' model: 'pixtral-large-latest' temperature: 1.0 groq_llm: llm_api_key: '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' model: 'llama-3.3-70b-versatile' temperature: 0.5 # === Automatic Speech Recognition === asr_config: asr_model: 'groq_whisper_asr' azure_asr: api_key: 'azure_api_key' region: 'eastus' languages: ['en-IN', 'en-US', 'ta-IN', 'hi-IN', 'te-IN', 'kn-IN', 'ml-IN'] # English + Indian regional languages faster_whisper: model_path: 'large-v3-turbo' download_root: 'models/whisper' language: '' # Leave blank for auto-detect (supports all languages) device: 'auto' compute_type: 'int8' prompt: '' whisper_cpp: model_name: 'small' model_dir: 'models/whisper' print_realtime: False print_progress: False language: 'auto' # auto-detect: English + all regional languages prompt: '' whisper: name: 'medium' download_root: 'models/whisper' device: 'cpu' prompt: '' fun_asr: model_name: 'iic/SenseVoiceSmall' vad_model: 'fsmn-vad' punc_model: 'ct-punc' device: 'cpu' disable_update: True ncpu: 4 hub: 'ms' use_itn: False language: 'auto' # auto-detect English + regional languages sherpa_onnx_asr: model_type: 'sense_voice' sense_voice: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx' tokens: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt' num_threads: 4 use_itn: True provider: 'cpu' groq_whisper_asr: api_key: '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' model: 'whisper-large-v3-turbo' lang: '' # CHANGED: was 'vi' (Vietnamese only) → now '' (auto-detect ALL languages) # =================== Text to Speech =================== tts_config: tts_model: 'edge_tts' azure_tts: api_key: 'azure-api-key' region: 'eastus' voice: 'en-IN-NeerjaNeural' # English (India) — change as needed pitch: '26' rate: '1' bark_tts: voice: 'v2/en_speaker_1' edge_tts: # Use `edge-tts --list-voices` to list all available voices # English voices (default): en-US-AvaMultilingualNeural, en-IN-NeerjaNeural # Tamil: ta-IN-PallaviNeural # Hindi: hi-IN-SwaraNeural # Telugu: te-IN-ShrutiNeural # Kannada: kn-IN-GaganNeural # Malayalam: ml-IN-SobhanaNeural # Bengali: bn-IN-TanishaaNeural voice: 'en-US-AvaMultilingualNeural' # CHANGED: was vi-VN-HoaiMyNeural → English multilingual default piper_tts: model_path: 'models/piper/en_US-lessac-medium.onnx' speaker_id: 0 length_scale: 1.0 noise_scale: 0.667 noise_w: 0.8 volume: 1.0 normalize_audio: true use_cuda: false cosyvoice_tts: client_url: 'http://127.0.0.1:50000/' mode_checkbox_group: '预训练音色' sft_dropdown: '中文女' prompt_text: '' prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav' prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav' instruct_text: '' seed: 0 api_name: '/generate_audio' cosyvoice2_tts: client_url: 'http://127.0.0.1:50000/' mode_checkbox_group: '3s极速复刻' sft_dropdown: '' prompt_text: '' prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav' prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav' instruct_text: '' stream: False seed: 0 speed: 1.0 api_name: '/generate_audio' melo_tts: speaker: 'EN-Default' language: 'EN' device: 'auto' speed: 1.0 x_tts: api_url: 'http://127.0.0.1:8020/tts_to_audio' speaker_wav: 'female' language: 'en' gpt_sovits_tts: api_url: 'http://127.0.0.1:9880/tts' text_lang: 'en' ref_audio_path: '' prompt_lang: 'en' prompt_text: '' text_split_method: 'cut5' batch_size: '1' media_type: 'wav' streaming_mode: 'false' fish_api_tts: api_key: '' reference_id: '' latency: 'balanced' base_url: 'https://api.fish.audio' coqui_tts: model_name: 'tts_models/en/ljspeech/tacotron2-DDC' speaker_wav: '' language: 'en' device: '' siliconflow_tts: api_url: "https://api.siliconflow.cn/v1/audio/speech" api_key: "your key" default_model: "FunAudioLLM/CosyVoice2-0.5B" default_voice: "speech:Dreamflowers:5bdstvc39i:xkqldnpasqmoqbakubom your voice name" sample_rate: 32000 response_format: "mp3" stream: true speed: 1 gain: 0 sherpa_onnx_tts: vits_model: '/path/to/tts-models/vits-melo-tts-zh_en/model.onnx' vits_lexicon: '/path/to/tts-models/vits-melo-tts-zh_en/lexicon.txt' vits_tokens: '/path/to/tts-models/vits-melo-tts-zh_en/tokens.txt' vits_data_dir: '' vits_dict_dir: '/path/to/tts-models/vits-melo-tts-zh_en/dict' tts_rule_fsts: '/path/to/tts-models/vits-melo-tts-zh_en/number.fst,/path/to/tts-models/vits-melo-tts-zh_en/phone.fst,/path/to/tts-models/vits-melo-tts-zh_en/date.fst,/path/to/tts-models/vits-melo-tts-zh_en/new_heteronym.fst' max_num_sentences: 2 sid: 1 provider: 'cpu' num_threads: 1 speed: 1.0 debug: false spark_tts: api_url: 'http://127.0.0.1:6006/' api_name: "voice_clone" prompt_wav_upload: "https://uploadstatic.mihoyo.com/ys-obc/2022/11/02/16576950/4d9feb71760c5e8eb5f6c700df12fa0c_6824265537002152805.mp3" gender: "female" pitch: 3 speed: 3 openai_tts: model: 'kokoro' voice: 'af_sky+af_bella' api_key: 'not-needed' base_url: 'http://localhost:8880/v1' file_extension: 'mp3' minimax_tts: group_id: '' api_key: '' model: 'speech-02-turbo' voice_id: 'female-shaonv' pronunciation_dict: '' elevenlabs_tts: api_key: '' voice_id: '' model_id: 'eleven_multilingual_v2' output_format: 'mp3_44100_128' stability: 0.5 similarity_boost: 0.5 style: 0.0 use_speaker_boost: true cartesia_tts: api_key: '' voice_id: '' model_id: 'sonic-3' output_format: 'wav' language: 'en' emotion: 'neutral' volume: 1.0 speed: 1.0 # =================== Voice Activity Detection =================== vad_config: vad_model: null silero_vad: orig_sr: 16000 target_sr: 16000 prob_threshold: 0.4 db_threshold: 60 required_hits: 3 required_misses: 24 smoothing_window: 5 tts_preprocessor_config: remove_special_char: True ignore_brackets: False ignore_parentheses: True ignore_asterisks: True ignore_angle_brackets: True translator_config: translate_audio: False translate_provider: 'deeplx' deeplx: deeplx_target_lang: 'EN' deeplx_api_endpoint: 'http://localhost:1188/v2/translate' tencent: secret_id: '' secret_key: '' region: 'ap-guangzhou' source_lang: 'auto' target_lang: 'en' # --- ASSETS --- live2d_config: live2d_path: 'live2d-models' default_model: 'Kamiyahakuk_pro' background_config: background_path: 'backgrounds' default_background: 'ceiling-window-room-night.jpeg' # Live Streaming Integration live_config: bilibili_live: room_ids: [1991478060] sessdata: ""