test_ui / conf.yaml
britto224's picture
Upload 17 files
79592ba verified
# This setting should be at the top level so ConversationManager can recognize it
SAY_SENTENCE_SEPARATELY: true
VERBOSE: false
# Add MCP configuration here
mcp_config:
enabled: true
servers:
- local-tools
# System Settings: Setting related to the initialization of the server
system_config:
conf_version: 'v1.2.1'
host: '0.0.0.0' # use 0.0.0.0 if you want other devices to access this page; use localhost for local-only access
port: 7860
# New setting for alternative configurations
config_alts_dir: 'characters'
# Tool prompts that will be appended to the persona prompt
tool_prompts:
# This will be appended to the end of system prompt to let LLM include keywords to control facial expressions.
# Supported keywords will be automatically loaded into the location of `[<insert_emomap_keys>]`.
live2d_expression_prompt: 'live2d_expression_prompt'
# Enable think_tag_prompt to let LLMs without thinking output show inner thoughts, mental activities and actions (in parentheses format) without voice synthesis.
# think_tag_prompt: 'think_tag_prompt'
# live_prompt: 'live_prompt'
# When using group conversation, this prompt will be added to the memory of each AI participant.
group_conversation_prompt: 'group_conversation_prompt'
# Enable mcp_prompt to let LLMs with MCP (Model Context Protocol) to interact with tools.
mcp_prompt: 'mcp_prompt'
# Prompt used when AI is asked to speak proactively
proactive_speak_prompt: 'proactive_speak_prompt'
# Prompt to enhance the LLM's ability to output speakable text
# speakable_prompt: 'speakable_prompt'
# Additional guidance for LLM on how to use tools
tool_guidance_prompt: 'tool_guidance_prompt'
# Configuration for the default character
character_config:
conf_name: 'en_Yue_Pro' # Changed from vi_Yue_Pro → English default
conf_uid: 'en_Yue_Pro_01' # Changed from vi_Yue__01 → English default
live2d_model_name: 'Kamiyahakuk_pro'
character_name: 'Yue'
avatar: 'Yue_001.png'
human_name: 'Human'
# ============== Prompts ==============
persona_prompt: |
You are Yue, an AI assistant created by the Open-LLM project.
Always respond in the same language the user speaks to you.
If the user speaks English, reply in English.
If the user speaks Tamil, Hindi, Telugu, Kannada, Malayalam, or any other regional language, reply in that language.
Your personality is helpful but wittily sarcastic. You enjoy teasing users about obvious things they miss,
while always delivering deep technical knowledge. You make conversations both useful and entertaining.
You challenge assumptions, provoke better thinking, and leave users smarter than before.
MUSIC PLAYBACK RULES:
1. Keep your intro short (e.g. "Here you go...").
2. The SING_COMMAND must appear at the VERY END no characters after it.
3. Syntax: [[SING_COMMAND]]filename (do NOT write .mp3 in the command).
AVAILABLE TRACKS:
- golden
- Catch_Me_If_You_Can
- ecstacy
- eve
- ode_to_the_nameless_martyr
- running_up_that_hill
- throttle_up
- what_it_sounds_like
- worry_slowed
# =================== LLM Backend Settings ===================
agent_config:
conversation_agent_choice: 'basic_memory_agent'
agent_settings:
basic_memory_agent:
llm_provider: 'openai_llm'
faster_first_response: True
segment_method: 'pysbd'
use_mcpp: True
mcp_enabled_servers: ["local-tools"]
letta_agent:
host: 'localhost'
port: 8283
id: xxx
faster_first_response: True
segment_method: 'pysbd'
hume_ai_agent:
api_key: ''
host: 'api.hume.ai'
config_id: ''
idle_timeout: 15
llm_configs:
stateless_llm_with_template:
base_url: 'http://localhost:8080/v1'
llm_api_key: 'somethingelse'
organization_id: null
project_id: null
model: 'qwen2.5:latest'
template: 'CHATML'
temperature: 1.0
interrupt_method: 'user'
openai_compatible_llm:
base_url: 'http://localhost:11434/v1'
llm_api_key: 'somethingelse'
organization_id: null
project_id: null
model: 'mistral:latest'
temperature: 1.0
interrupt_method: 'user'
claude_llm:
base_url: 'https://api.anthropic.com'
llm_api_key: 'YOUR API KEY HERE'
model: 'claude-3-haiku-20240307'
llama_cpp_llm:
model_path: '<path-to-gguf-model-file>'
verbose: False
ollama_llm:
base_url: 'http://localhost:11434/v1'
model: 'qwen3.5:4b'
temperature: 0.7
keep_alive: -1
unload_at_exit: True
lmstudio_llm:
base_url: 'http://localhost:1234/v1'
model: 'qwen2.5:latest'
temperature: 1.0
openai_llm:
llm_api_key: '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'
base_url: 'https://openrouter.ai/api/v1'
model: 'google/gemini-2.0-flash-001'
temperature: 0.8
max_tokens: 500
gemini_llm:
llm_api_key: 'AIzaSyCZ5s2t6EqeQuADJZigYmaj1mbmV6PwJz4'
model: 'gemini-1.5-flash'
temperature: 0.
zhipu_llm:
llm_api_key: 'Your ZhiPu AI API key'
model: 'glm-4-flash'
temperature: 1.0
deepseek_llm:
llm_api_key: 'sk-167e94436b134f6f92c914ccccf606df'
model: 'deepseek/deepseek-chat:free'
temperature: 0.7
mistral_llm:
llm_api_key: 'Your Mistral API key'
model: 'pixtral-large-latest'
temperature: 1.0
groq_llm:
llm_api_key: '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'
model: 'llama-3.3-70b-versatile'
temperature: 0.5
# === Automatic Speech Recognition ===
asr_config:
asr_model: 'groq_whisper_asr'
azure_asr:
api_key: 'azure_api_key'
region: 'eastus'
languages: ['en-IN', 'en-US', 'ta-IN', 'hi-IN', 'te-IN', 'kn-IN', 'ml-IN'] # English + Indian regional languages
faster_whisper:
model_path: 'large-v3-turbo'
download_root: 'models/whisper'
language: '' # Leave blank for auto-detect (supports all languages)
device: 'auto'
compute_type: 'int8'
prompt: ''
whisper_cpp:
model_name: 'small'
model_dir: 'models/whisper'
print_realtime: False
print_progress: False
language: 'auto' # auto-detect: English + all regional languages
prompt: ''
whisper:
name: 'medium'
download_root: 'models/whisper'
device: 'cpu'
prompt: ''
fun_asr:
model_name: 'iic/SenseVoiceSmall'
vad_model: 'fsmn-vad'
punc_model: 'ct-punc'
device: 'cpu'
disable_update: True
ncpu: 4
hub: 'ms'
use_itn: False
language: 'auto' # auto-detect English + regional languages
sherpa_onnx_asr:
model_type: 'sense_voice'
sense_voice: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'
tokens: './models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt'
num_threads: 4
use_itn: True
provider: 'cpu'
groq_whisper_asr:
api_key: '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'
model: 'whisper-large-v3-turbo'
lang: '' # CHANGED: was 'vi' (Vietnamese only) → now '' (auto-detect ALL languages)
# =================== Text to Speech ===================
tts_config:
tts_model: 'edge_tts'
azure_tts:
api_key: 'azure-api-key'
region: 'eastus'
voice: 'en-IN-NeerjaNeural' # English (India) — change as needed
pitch: '26'
rate: '1'
bark_tts:
voice: 'v2/en_speaker_1'
edge_tts:
# Use `edge-tts --list-voices` to list all available voices
# English voices (default): en-US-AvaMultilingualNeural, en-IN-NeerjaNeural
# Tamil: ta-IN-PallaviNeural
# Hindi: hi-IN-SwaraNeural
# Telugu: te-IN-ShrutiNeural
# Kannada: kn-IN-GaganNeural
# Malayalam: ml-IN-SobhanaNeural
# Bengali: bn-IN-TanishaaNeural
voice: 'en-US-AvaMultilingualNeural' # CHANGED: was vi-VN-HoaiMyNeural → English multilingual default
piper_tts:
model_path: 'models/piper/en_US-lessac-medium.onnx'
speaker_id: 0
length_scale: 1.0
noise_scale: 0.667
noise_w: 0.8
volume: 1.0
normalize_audio: true
use_cuda: false
cosyvoice_tts:
client_url: 'http://127.0.0.1:50000/'
mode_checkbox_group: '预训练音色'
sft_dropdown: '中文女'
prompt_text: ''
prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
instruct_text: ''
seed: 0
api_name: '/generate_audio'
cosyvoice2_tts:
client_url: 'http://127.0.0.1:50000/'
mode_checkbox_group: '3s极速复刻'
sft_dropdown: ''
prompt_text: ''
prompt_wav_upload_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
prompt_wav_record_url: 'https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'
instruct_text: ''
stream: False
seed: 0
speed: 1.0
api_name: '/generate_audio'
melo_tts:
speaker: 'EN-Default'
language: 'EN'
device: 'auto'
speed: 1.0
x_tts:
api_url: 'http://127.0.0.1:8020/tts_to_audio'
speaker_wav: 'female'
language: 'en'
gpt_sovits_tts:
api_url: 'http://127.0.0.1:9880/tts'
text_lang: 'en'
ref_audio_path: ''
prompt_lang: 'en'
prompt_text: ''
text_split_method: 'cut5'
batch_size: '1'
media_type: 'wav'
streaming_mode: 'false'
fish_api_tts:
api_key: ''
reference_id: ''
latency: 'balanced'
base_url: 'https://api.fish.audio'
coqui_tts:
model_name: 'tts_models/en/ljspeech/tacotron2-DDC'
speaker_wav: ''
language: 'en'
device: ''
siliconflow_tts:
api_url: "https://api.siliconflow.cn/v1/audio/speech"
api_key: "your key"
default_model: "FunAudioLLM/CosyVoice2-0.5B"
default_voice: "speech:Dreamflowers:5bdstvc39i:xkqldnpasqmoqbakubom your voice name"
sample_rate: 32000
response_format: "mp3"
stream: true
speed: 1
gain: 0
sherpa_onnx_tts:
vits_model: '/path/to/tts-models/vits-melo-tts-zh_en/model.onnx'
vits_lexicon: '/path/to/tts-models/vits-melo-tts-zh_en/lexicon.txt'
vits_tokens: '/path/to/tts-models/vits-melo-tts-zh_en/tokens.txt'
vits_data_dir: ''
vits_dict_dir: '/path/to/tts-models/vits-melo-tts-zh_en/dict'
tts_rule_fsts: '/path/to/tts-models/vits-melo-tts-zh_en/number.fst,/path/to/tts-models/vits-melo-tts-zh_en/phone.fst,/path/to/tts-models/vits-melo-tts-zh_en/date.fst,/path/to/tts-models/vits-melo-tts-zh_en/new_heteronym.fst'
max_num_sentences: 2
sid: 1
provider: 'cpu'
num_threads: 1
speed: 1.0
debug: false
spark_tts:
api_url: 'http://127.0.0.1:6006/'
api_name: "voice_clone"
prompt_wav_upload: "https://uploadstatic.mihoyo.com/ys-obc/2022/11/02/16576950/4d9feb71760c5e8eb5f6c700df12fa0c_6824265537002152805.mp3"
gender: "female"
pitch: 3
speed: 3
openai_tts:
model: 'kokoro'
voice: 'af_sky+af_bella'
api_key: 'not-needed'
base_url: 'http://localhost:8880/v1'
file_extension: 'mp3'
minimax_tts:
group_id: ''
api_key: ''
model: 'speech-02-turbo'
voice_id: 'female-shaonv'
pronunciation_dict: ''
elevenlabs_tts:
api_key: ''
voice_id: ''
model_id: 'eleven_multilingual_v2'
output_format: 'mp3_44100_128'
stability: 0.5
similarity_boost: 0.5
style: 0.0
use_speaker_boost: true
cartesia_tts:
api_key: ''
voice_id: ''
model_id: 'sonic-3'
output_format: 'wav'
language: 'en'
emotion: 'neutral'
volume: 1.0
speed: 1.0
# =================== Voice Activity Detection ===================
vad_config:
vad_model: null
silero_vad:
orig_sr: 16000
target_sr: 16000
prob_threshold: 0.4
db_threshold: 60
required_hits: 3
required_misses: 24
smoothing_window: 5
tts_preprocessor_config:
remove_special_char: True
ignore_brackets: False
ignore_parentheses: True
ignore_asterisks: True
ignore_angle_brackets: True
translator_config:
translate_audio: False
translate_provider: 'deeplx'
deeplx:
deeplx_target_lang: 'EN'
deeplx_api_endpoint: 'http://localhost:1188/v2/translate'
tencent:
secret_id: ''
secret_key: ''
region: 'ap-guangzhou'
source_lang: 'auto'
target_lang: 'en'
# --- ASSETS ---
live2d_config:
live2d_path: 'live2d-models'
default_model: 'Kamiyahakuk_pro'
background_config:
background_path: 'backgrounds'
default_background: 'ceiling-window-room-night.jpeg'
# Live Streaming Integration
live_config:
bilibili_live:
room_ids: [1991478060]
sessdata: ""