from typing import Dict, List from tqdm import tqdm from config import Config from utils.cache_manager import CacheManager from core.openai_client import OpenAIClient class CharacterAnalyzer: """角色性格分析器 - 针对大规模文本优化""" def __init__(self): self.client = OpenAIClient.get_client() self.cache = CacheManager() def select_representative_chunks(self, chunks: List[Dict], character_chunks: List[int], max_chunks: int = None) -> List[Dict]: """选择最具代表性的文本块""" max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS # 均匀分布选择 if len(character_chunks) <= max_chunks: selected_ids = character_chunks else: step = len(character_chunks) // max_chunks selected_ids = [character_chunks[i * step] for i in range(max_chunks)] selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)] return selected_chunks def analyze_character_batch(self, character_name: str, text_chunks: List[Dict]) -> Dict: """分批分析角色性格""" # 检查缓存 cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}" cached = self.cache.get(cache_key) if cached: print(f"从缓存加载 {character_name} 的分析结果") return cached # 合并文本块 combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks]) analysis_prompt = f""" 请深度分析小说中"{character_name}"这个角色的性格特征。 基于以下文本片段进行分析: {combined_text[:8000]} # 限制输入长度 请从以下维度分析,并以JSON格式返回: {{ "name": "{character_name}", "core_traits": ["特质1", "特质2", "特质3"], "speaking_style": "说话风格描述", "behavior_patterns": "行为模式描述", "values": "核心价值观", "emotional_style": "情感表达方式", "relationship_style": "人际关系风格", "background": "背景信息", "key_quotes": ["典型语句1", "典型语句2"], "personality_summary": "性格总结(100字以内)" }} 注意: 1. 只基于文本内容分析,不要添加原著之外的信息 2. 提取该角色的典型对话和行为 3. 关注语言风格、用词习惯、口头禅等 """ try: response = self.client.chat.completions.create( model=Config.MODEL_NAME, messages=[ {"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"}, {"role": "user", "content": analysis_prompt} ] ) analysis_text = response.choices[0].message.content.strip() # 尝试解析JSON import json import re json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL) if json_match: profile = json.loads(json_match.group()) else: profile = self._parse_text_analysis(analysis_text, character_name) profile['raw_analysis'] = analysis_text # 缓存结果 self.cache.set(cache_key, profile) return profile except Exception as e: print(f"分析失败: {e}") return self._default_profile(character_name) def _parse_text_analysis(self, text: str, character_name: str) -> Dict: """解析文本格式的分析结果""" profile = { 'name': character_name, 'core_traits': [], 'speaking_style': '', 'behavior_patterns': '', 'values': '', 'emotional_style': '', 'relationship_style': '', 'background': '', 'key_quotes': [], 'personality_summary': '' } # 提取各个部分 import re patterns = { 'core_traits': r'core_traits["\']?\s*:\s*\[(.*?)\]', 'speaking_style': r'speaking_style["\']?\s*:\s*["\']([^"\']+)["\']', 'key_quotes': r'key_quotes["\']?\s*:\s*\[(.*?)\]', } for key, pattern in patterns.items(): match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) if match: content = match.group(1) if key in ['core_traits', 'key_quotes']: items = re.findall(r'["\']([^"\']+)["\']', content) profile[key] = items else: profile[key] = content return profile def _default_profile(self, character_name: str) -> Dict: """默认角色配置""" return { 'name': character_name, 'core_traits': ['复杂', '多面'], 'speaking_style': '根据情境变化', 'behavior_patterns': '待观察', 'values': '待分析', 'emotional_style': '情感丰富', 'relationship_style': '因人而异', 'background': '小说角色', 'key_quotes': [], 'personality_summary': f'{character_name}是一个复杂的角色', 'raw_analysis': '使用默认配置' } def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict], character_chunks: List[int]) -> Dict: """通过对话实例增强角色配置""" # 提取包含该角色的对话 dialogues = [] for chunk_id in character_chunks[:5]: # 只看前几个块 if chunk_id < len(chunks): chunk_text = chunks[chunk_id]['text'] # 简单提取引号内容 import re quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text) dialogues.extend(quotes[:3]) if dialogues: profile['example_dialogues'] = dialogues[:5] return profile