src/voice_dialogue/config/llm_config.py · MoYoYoTech/VoiceDialogue at main

VoiceDialogue / src /voice_dialogue /config /llm_config.py

liumaolin

简化LLM模型路径管理。

2baeba2 9 months ago

4.08 kB

	"""LLM模型配置管理"""

	from typing import Dict, Any

	from voice_dialogue.utils.apple_silicon import get_optimal_llama_cpp_config, get_apple_silicon_info
	from .paths import LLM_MODELS_PATH

	__all__ = (
	'get_llm_model_params',
	'get_apple_silicon_summary',
	'CHINESE_SYSTEM_PROMPT',
	'ENGLISH_SYSTEM_PROMPT',
	'BUILTIN_LLM_MODEL_PATH',
	)

	BUILTIN_LLM_MODEL_PATH = LLM_MODELS_PATH / 'qwen' / 'Qwen3-8B-Q6_K.gguf'

	CHINESE_SYSTEM_PROMPT = (
	"你是AI助手。请以自然流畅的中文口语化表达直接回答问题，避免冗余的思考过程。"
	"你的回答第一句话必须少于十个字。每段回答控制在二到三句话，既不要过短也不要过长，以适应对话语境。"
	"回答应准确、精炼且有依据。"
	)

	ENGLISH_SYSTEM_PROMPT = (
	"You are an AI assistant. "
	"Please answer directly and naturally, using conversational English, without showing your thinking process. "
	"Your first sentence must be less than 10 words. "
	"Your responses should be accurate, concise, and well-supported, ideally around 2-3 sentences long to ensure a good conversational flow."
	)


	def get_llm_model_params() -> Dict[str, Any]:
	"""
	获取LLM模型参数，基于Apple Silicon芯片信息动态配置

	Returns:
	Dict[str, Any]: LLM模型参数配置
	"""
	# 获取Apple Silicon优化配置
	optimal_config = get_optimal_llama_cpp_config()

	# 基础模型参数
	model_params = {
	'streaming': True,
	'n_gpu_layers': -1,
	'n_batch': 1024,
	'temperature': 0.7,
	'top_p': 0.8,
	'top_k': 20,
	'max_tokens': 32768,
	'model_kwargs': {
	'mini_p': 0,
	'presence_penalty': 1.5
	},
	'verbose': False,
	}

	# 应用Apple Silicon优化配置
	model_params.update(optimal_config)

	return model_params


	def get_apple_silicon_summary() -> Dict[str, Any]:
	"""
	获取Apple Silicon芯片信息摘要

	Returns:
	Dict[str, Any]: 芯片信息摘要
	"""
	chip_info = get_apple_silicon_info()
	optimal_config = get_optimal_llama_cpp_config()

	return {
	'chip_name': chip_info.chip_name,
	'is_apple_silicon': chip_info.is_apple_silicon,
	'total_cores': chip_info.total_cores,
	'performance_cores': chip_info.performance_cores,
	'efficiency_cores': chip_info.efficiency_cores,
	'memory_gb': chip_info.memory_gb,
	'gpu_cores': chip_info.gpu_cores,
	'optimal_n_threads': optimal_config['n_threads'],
	'optimal_n_ctx': optimal_config['n_ctx'],
	'config_note': '仅使用性能核心(P-cores)以获得最佳性能' if chip_info.is_apple_silicon else '标准配置'
	}


	# 预设配置模板
	LLAMA_CPP_CONFIG_PRESETS = {
	'high_performance': {
	'description': '高性能配置 - 适合Apple M1 Pro/Max及以上',
	'n_ctx': 8192,
	'temperature': 0.7,
	'top_p': 0.9,
	'top_k': 20,
	},
	'balanced': {
	'description': '平衡配置 - 适合Apple M1/M2基础版',
	'n_ctx': 4096,
	'temperature': 0.7,
	'top_p': 0.9,
	'top_k': 20,
	},
	'memory_efficient': {
	'description': '内存优化配置 - 适合内存较小的设备',
	'n_ctx': 2048,
	'temperature': 0.7,
	'top_p': 0.9,
	'top_k': 20,
	}
	}


	def get_config_preset(preset_name: str) -> Dict[str, Any]:
	"""
	获取预设配置

	Args:
	preset_name: 预设名称 ('high_performance', 'balanced', 'memory_efficient')

	Returns:
	Dict[str, Any]: 预设配置
	"""
	if preset_name not in LLAMA_CPP_CONFIG_PRESETS:
	raise ValueError(f"未知的预设配置: {preset_name}")

	preset = LLAMA_CPP_CONFIG_PRESETS[preset_name].copy()

	# 移除描述字段
	preset.pop('description', None)

	# 添加n_threads配置（基于当前硬件）
	optimal_config = get_optimal_llama_cpp_config()
	preset['n_threads'] = optimal_config['n_threads']

	return preset