FictionAgent / core /character_analyzer.py
gdwind's picture
Upload folder using huggingface_hub
a226682 verified
from typing import Dict, List
from tqdm import tqdm
from config import Config
from utils.cache_manager import CacheManager
from core.openai_client import OpenAIClient
class CharacterAnalyzer:
"""角色性格分析器 - 针对大规模文本优化"""
def __init__(self):
self.client = OpenAIClient.get_client()
self.cache = CacheManager()
def select_representative_chunks(self, chunks: List[Dict],
character_chunks: List[int],
max_chunks: int = None) -> List[Dict]:
"""选择最具代表性的文本块"""
max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS
# 均匀分布选择
if len(character_chunks) <= max_chunks:
selected_ids = character_chunks
else:
step = len(character_chunks) // max_chunks
selected_ids = [character_chunks[i * step] for i in range(max_chunks)]
selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)]
return selected_chunks
def analyze_character_batch(self, character_name: str,
text_chunks: List[Dict]) -> Dict:
"""分批分析角色性格"""
# 检查缓存
cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}"
cached = self.cache.get(cache_key)
if cached:
print(f"从缓存加载 {character_name} 的分析结果")
return cached
# 合并文本块
combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks])
analysis_prompt = f"""
请深度分析小说中"{character_name}"这个角色的性格特征。
基于以下文本片段进行分析:
{combined_text[:8000]} # 限制输入长度
请从以下维度分析,并以JSON格式返回:
{{
"name": "{character_name}",
"core_traits": ["特质1", "特质2", "特质3"],
"speaking_style": "说话风格描述",
"behavior_patterns": "行为模式描述",
"values": "核心价值观",
"emotional_style": "情感表达方式",
"relationship_style": "人际关系风格",
"background": "背景信息",
"key_quotes": ["典型语句1", "典型语句2"],
"personality_summary": "性格总结(100字以内)"
}}
注意:
1. 只基于文本内容分析,不要添加原著之外的信息
2. 提取该角色的典型对话和行为
3. 关注语言风格、用词习惯、口头禅等
"""
try:
response = self.client.chat.completions.create(
model=Config.MODEL_NAME,
messages=[
{"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"},
{"role": "user", "content": analysis_prompt}
]
)
analysis_text = response.choices[0].message.content.strip()
# 尝试解析JSON
import json
import re
json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL)
if json_match:
profile = json.loads(json_match.group())
else:
profile = self._parse_text_analysis(analysis_text, character_name)
profile['raw_analysis'] = analysis_text
# 缓存结果
self.cache.set(cache_key, profile)
return profile
except Exception as e:
print(f"分析失败: {e}")
return self._default_profile(character_name)
def _parse_text_analysis(self, text: str, character_name: str) -> Dict:
"""解析文本格式的分析结果"""
profile = {
'name': character_name,
'core_traits': [],
'speaking_style': '',
'behavior_patterns': '',
'values': '',
'emotional_style': '',
'relationship_style': '',
'background': '',
'key_quotes': [],
'personality_summary': ''
}
# 提取各个部分
import re
patterns = {
'core_traits': r'core_traits["\']?\s*:\s*\[(.*?)\]',
'speaking_style': r'speaking_style["\']?\s*:\s*["\']([^"\']+)["\']',
'key_quotes': r'key_quotes["\']?\s*:\s*\[(.*?)\]',
}
for key, pattern in patterns.items():
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
if match:
content = match.group(1)
if key in ['core_traits', 'key_quotes']:
items = re.findall(r'["\']([^"\']+)["\']', content)
profile[key] = items
else:
profile[key] = content
return profile
def _default_profile(self, character_name: str) -> Dict:
"""默认角色配置"""
return {
'name': character_name,
'core_traits': ['复杂', '多面'],
'speaking_style': '根据情境变化',
'behavior_patterns': '待观察',
'values': '待分析',
'emotional_style': '情感丰富',
'relationship_style': '因人而异',
'background': '小说角色',
'key_quotes': [],
'personality_summary': f'{character_name}是一个复杂的角色',
'raw_analysis': '使用默认配置'
}
def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict],
character_chunks: List[int]) -> Dict:
"""通过对话实例增强角色配置"""
# 提取包含该角色的对话
dialogues = []
for chunk_id in character_chunks[:5]: # 只看前几个块
if chunk_id < len(chunks):
chunk_text = chunks[chunk_id]['text']
# 简单提取引号内容
import re
quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text)
dialogues.extend(quotes[:3])
if dialogues:
profile['example_dialogues'] = dialogues[:5]
return profile