|
|
from typing import Dict, List |
|
|
from tqdm import tqdm |
|
|
from config import Config |
|
|
from utils.cache_manager import CacheManager |
|
|
from core.openai_client import OpenAIClient |
|
|
|
|
|
class CharacterAnalyzer: |
|
|
"""角色性格分析器 - 针对大规模文本优化""" |
|
|
|
|
|
def __init__(self): |
|
|
self.client = OpenAIClient.get_client() |
|
|
self.cache = CacheManager() |
|
|
|
|
|
def select_representative_chunks(self, chunks: List[Dict], |
|
|
character_chunks: List[int], |
|
|
max_chunks: int = None) -> List[Dict]: |
|
|
"""选择最具代表性的文本块""" |
|
|
|
|
|
max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS |
|
|
|
|
|
|
|
|
if len(character_chunks) <= max_chunks: |
|
|
selected_ids = character_chunks |
|
|
else: |
|
|
step = len(character_chunks) // max_chunks |
|
|
selected_ids = [character_chunks[i * step] for i in range(max_chunks)] |
|
|
|
|
|
selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)] |
|
|
return selected_chunks |
|
|
|
|
|
def analyze_character_batch(self, character_name: str, |
|
|
text_chunks: List[Dict]) -> Dict: |
|
|
"""分批分析角色性格""" |
|
|
|
|
|
|
|
|
cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}" |
|
|
cached = self.cache.get(cache_key) |
|
|
if cached: |
|
|
print(f"从缓存加载 {character_name} 的分析结果") |
|
|
return cached |
|
|
|
|
|
|
|
|
combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks]) |
|
|
|
|
|
analysis_prompt = f""" |
|
|
请深度分析小说中"{character_name}"这个角色的性格特征。 |
|
|
|
|
|
基于以下文本片段进行分析: |
|
|
|
|
|
{combined_text[:8000]} # 限制输入长度 |
|
|
|
|
|
请从以下维度分析,并以JSON格式返回: |
|
|
|
|
|
{{ |
|
|
"name": "{character_name}", |
|
|
"core_traits": ["特质1", "特质2", "特质3"], |
|
|
"speaking_style": "说话风格描述", |
|
|
"behavior_patterns": "行为模式描述", |
|
|
"values": "核心价值观", |
|
|
"emotional_style": "情感表达方式", |
|
|
"relationship_style": "人际关系风格", |
|
|
"background": "背景信息", |
|
|
"key_quotes": ["典型语句1", "典型语句2"], |
|
|
"personality_summary": "性格总结(100字以内)" |
|
|
}} |
|
|
|
|
|
注意: |
|
|
1. 只基于文本内容分析,不要添加原著之外的信息 |
|
|
2. 提取该角色的典型对话和行为 |
|
|
3. 关注语言风格、用词习惯、口头禅等 |
|
|
""" |
|
|
|
|
|
try: |
|
|
response = self.client.chat.completions.create( |
|
|
model=Config.MODEL_NAME, |
|
|
messages=[ |
|
|
{"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"}, |
|
|
{"role": "user", "content": analysis_prompt} |
|
|
] |
|
|
) |
|
|
|
|
|
analysis_text = response.choices[0].message.content.strip() |
|
|
|
|
|
|
|
|
import json |
|
|
import re |
|
|
|
|
|
json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL) |
|
|
if json_match: |
|
|
profile = json.loads(json_match.group()) |
|
|
else: |
|
|
profile = self._parse_text_analysis(analysis_text, character_name) |
|
|
|
|
|
profile['raw_analysis'] = analysis_text |
|
|
|
|
|
|
|
|
self.cache.set(cache_key, profile) |
|
|
|
|
|
return profile |
|
|
|
|
|
except Exception as e: |
|
|
print(f"分析失败: {e}") |
|
|
return self._default_profile(character_name) |
|
|
|
|
|
def _parse_text_analysis(self, text: str, character_name: str) -> Dict: |
|
|
"""解析文本格式的分析结果""" |
|
|
|
|
|
profile = { |
|
|
'name': character_name, |
|
|
'core_traits': [], |
|
|
'speaking_style': '', |
|
|
'behavior_patterns': '', |
|
|
'values': '', |
|
|
'emotional_style': '', |
|
|
'relationship_style': '', |
|
|
'background': '', |
|
|
'key_quotes': [], |
|
|
'personality_summary': '' |
|
|
} |
|
|
|
|
|
|
|
|
import re |
|
|
|
|
|
patterns = { |
|
|
'core_traits': r'core_traits["\']?\s*:\s*\[(.*?)\]', |
|
|
'speaking_style': r'speaking_style["\']?\s*:\s*["\']([^"\']+)["\']', |
|
|
'key_quotes': r'key_quotes["\']?\s*:\s*\[(.*?)\]', |
|
|
} |
|
|
|
|
|
for key, pattern in patterns.items(): |
|
|
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) |
|
|
if match: |
|
|
content = match.group(1) |
|
|
if key in ['core_traits', 'key_quotes']: |
|
|
items = re.findall(r'["\']([^"\']+)["\']', content) |
|
|
profile[key] = items |
|
|
else: |
|
|
profile[key] = content |
|
|
|
|
|
return profile |
|
|
|
|
|
def _default_profile(self, character_name: str) -> Dict: |
|
|
"""默认角色配置""" |
|
|
return { |
|
|
'name': character_name, |
|
|
'core_traits': ['复杂', '多面'], |
|
|
'speaking_style': '根据情境变化', |
|
|
'behavior_patterns': '待观察', |
|
|
'values': '待分析', |
|
|
'emotional_style': '情感丰富', |
|
|
'relationship_style': '因人而异', |
|
|
'background': '小说角色', |
|
|
'key_quotes': [], |
|
|
'personality_summary': f'{character_name}是一个复杂的角色', |
|
|
'raw_analysis': '使用默认配置' |
|
|
} |
|
|
|
|
|
def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict], |
|
|
character_chunks: List[int]) -> Dict: |
|
|
"""通过对话实例增强角色配置""" |
|
|
|
|
|
|
|
|
dialogues = [] |
|
|
for chunk_id in character_chunks[:5]: |
|
|
if chunk_id < len(chunks): |
|
|
chunk_text = chunks[chunk_id]['text'] |
|
|
|
|
|
import re |
|
|
quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text) |
|
|
dialogues.extend(quotes[:3]) |
|
|
|
|
|
if dialogues: |
|
|
profile['example_dialogues'] = dialogues[:5] |
|
|
|
|
|
return profile |