File size: 6,595 Bytes
a226682 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
from typing import Dict, List
from tqdm import tqdm
from config import Config
from utils.cache_manager import CacheManager
from core.openai_client import OpenAIClient
class CharacterAnalyzer:
"""角色性格分析器 - 针对大规模文本优化"""
def __init__(self):
self.client = OpenAIClient.get_client()
self.cache = CacheManager()
def select_representative_chunks(self, chunks: List[Dict],
character_chunks: List[int],
max_chunks: int = None) -> List[Dict]:
"""选择最具代表性的文本块"""
max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS
# 均匀分布选择
if len(character_chunks) <= max_chunks:
selected_ids = character_chunks
else:
step = len(character_chunks) // max_chunks
selected_ids = [character_chunks[i * step] for i in range(max_chunks)]
selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)]
return selected_chunks
def analyze_character_batch(self, character_name: str,
text_chunks: List[Dict]) -> Dict:
"""分批分析角色性格"""
# 检查缓存
cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}"
cached = self.cache.get(cache_key)
if cached:
print(f"从缓存加载 {character_name} 的分析结果")
return cached
# 合并文本块
combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks])
analysis_prompt = f"""
请深度分析小说中"{character_name}"这个角色的性格特征。
基于以下文本片段进行分析:
{combined_text[:8000]} # 限制输入长度
请从以下维度分析,并以JSON格式返回:
{{
"name": "{character_name}",
"core_traits": ["特质1", "特质2", "特质3"],
"speaking_style": "说话风格描述",
"behavior_patterns": "行为模式描述",
"values": "核心价值观",
"emotional_style": "情感表达方式",
"relationship_style": "人际关系风格",
"background": "背景信息",
"key_quotes": ["典型语句1", "典型语句2"],
"personality_summary": "性格总结(100字以内)"
}}
注意:
1. 只基于文本内容分析,不要添加原著之外的信息
2. 提取该角色的典型对话和行为
3. 关注语言风格、用词习惯、口头禅等
"""
try:
response = self.client.chat.completions.create(
model=Config.MODEL_NAME,
messages=[
{"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"},
{"role": "user", "content": analysis_prompt}
]
)
analysis_text = response.choices[0].message.content.strip()
# 尝试解析JSON
import json
import re
json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL)
if json_match:
profile = json.loads(json_match.group())
else:
profile = self._parse_text_analysis(analysis_text, character_name)
profile['raw_analysis'] = analysis_text
# 缓存结果
self.cache.set(cache_key, profile)
return profile
except Exception as e:
print(f"分析失败: {e}")
return self._default_profile(character_name)
def _parse_text_analysis(self, text: str, character_name: str) -> Dict:
"""解析文本格式的分析结果"""
profile = {
'name': character_name,
'core_traits': [],
'speaking_style': '',
'behavior_patterns': '',
'values': '',
'emotional_style': '',
'relationship_style': '',
'background': '',
'key_quotes': [],
'personality_summary': ''
}
# 提取各个部分
import re
patterns = {
'core_traits': r'core_traits["\']?\s*:\s*\[(.*?)\]',
'speaking_style': r'speaking_style["\']?\s*:\s*["\']([^"\']+)["\']',
'key_quotes': r'key_quotes["\']?\s*:\s*\[(.*?)\]',
}
for key, pattern in patterns.items():
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1)
if key in ['core_traits', 'key_quotes']:
items = re.findall(r'["\']([^"\']+)["\']', content)
profile[key] = items
else:
profile[key] = content
return profile
def _default_profile(self, character_name: str) -> Dict:
"""默认角色配置"""
return {
'name': character_name,
'core_traits': ['复杂', '多面'],
'speaking_style': '根据情境变化',
'behavior_patterns': '待观察',
'values': '待分析',
'emotional_style': '情感丰富',
'relationship_style': '因人而异',
'background': '小说角色',
'key_quotes': [],
'personality_summary': f'{character_name}是一个复杂的角色',
'raw_analysis': '使用默认配置'
}
def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict],
character_chunks: List[int]) -> Dict:
"""通过对话实例增强角色配置"""
# 提取包含该角色的对话
dialogues = []
for chunk_id in character_chunks[:5]: # 只看前几个块
if chunk_id < len(chunks):
chunk_text = chunks[chunk_id]['text']
# 简单提取引号内容
import re
quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text)
dialogues.extend(quotes[:3])
if dialogues:
profile['example_dialogues'] = dialogues[:5]
return profile |