FictionAgent / core /character_analyzer.py

Upload folder using huggingface_hub

a226682 verified about 2 months ago

6.6 kB

	from typing import Dict, List
	from tqdm import tqdm
	from config import Config
	from utils.cache_manager import CacheManager
	from core.openai_client import OpenAIClient

	class CharacterAnalyzer:
	"""角色性格分析器 - 针对大规模文本优化"""

	def __init__(self):
	self.client = OpenAIClient.get_client()
	self.cache = CacheManager()

	def select_representative_chunks(self, chunks: List[Dict],
	character_chunks: List[int],
	max_chunks: int = None) -> List[Dict]:
	"""选择最具代表性的文本块"""

	max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS

	# 均匀分布选择
	if len(character_chunks) <= max_chunks:
	selected_ids = character_chunks
	else:
	step = len(character_chunks) // max_chunks
	selected_ids = [character_chunks[i * step] for i in range(max_chunks)]

	selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)]
	return selected_chunks

	def analyze_character_batch(self, character_name: str,
	text_chunks: List[Dict]) -> Dict:
	"""分批分析角色性格"""

	# 检查缓存
	cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}"
	cached = self.cache.get(cache_key)
	if cached:
	print(f"从缓存加载 {character_name} 的分析结果")
	return cached

	# 合并文本块
	combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks])

	analysis_prompt = f"""
	请深度分析小说中"{character_name}"这个角色的性格特征。

	基于以下文本片段进行分析：

	{combined_text[:8000]} # 限制输入长度

	请从以下维度分析，并以JSON格式返回：

	{{
	"name": "{character_name}",
	"core_traits": ["特质1", "特质2", "特质3"],
	"speaking_style": "说话风格描述",
	"behavior_patterns": "行为模式描述",
	"values": "核心价值观",
	"emotional_style": "情感表达方式",
	"relationship_style": "人际关系风格",
	"background": "背景信息",
	"key_quotes": ["典型语句1", "典型语句2"],
	"personality_summary": "性格总结（100字以内）"
	}}

	注意：
	1. 只基于文本内容分析，不要添加原著之外的信息
	2. 提取该角色的典型对话和行为
	3. 关注语言风格、用词习惯、口头禅等
	"""

	try:
	response = self.client.chat.completions.create(
	model=Config.MODEL_NAME,
	messages=[
	{"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"},
	{"role": "user", "content": analysis_prompt}
	]
	)

	analysis_text = response.choices[0].message.content.strip()

	# 尝试解析JSON
	import json
	import re

	json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL)
	if json_match:
	profile = json.loads(json_match.group())
	else:
	profile = self._parse_text_analysis(analysis_text, character_name)

	profile['raw_analysis'] = analysis_text

	# 缓存结果
	self.cache.set(cache_key, profile)

	return profile

	except Exception as e:
	print(f"分析失败: {e}")
	return self._default_profile(character_name)

	def _parse_text_analysis(self, text: str, character_name: str) -> Dict:
	"""解析文本格式的分析结果"""

	profile = {
	'name': character_name,
	'core_traits': [],
	'speaking_style': '',
	'behavior_patterns': '',
	'values': '',
	'emotional_style': '',
	'relationship_style': '',
	'background': '',
	'key_quotes': [],
	'personality_summary': ''
	}

	# 提取各个部分
	import re

	patterns = {
	'core_traits': r'core_traits["\']?\s:\s\[(.*?)\]',
	'speaking_style': r'speaking_style["\']?\s:\s["\']([^"\']+)["\']',
	'key_quotes': r'key_quotes["\']?\s:\s\[(.*?)\]',
	}

	for key, pattern in patterns.items():
	match = re.search(pattern, text, re.DOTALL \| re.IGNORECASE)
	if match:
	content = match.group(1)
	if key in ['core_traits', 'key_quotes']:
	items = re.findall(r'["\']([^"\']+)["\']', content)
	profile[key] = items
	else:
	profile[key] = content

	return profile

	def _default_profile(self, character_name: str) -> Dict:
	"""默认角色配置"""
	return {
	'name': character_name,
	'core_traits': ['复杂', '多面'],
	'speaking_style': '根据情境变化',
	'behavior_patterns': '待观察',
	'values': '待分析',
	'emotional_style': '情感丰富',
	'relationship_style': '因人而异',
	'background': '小说角色',
	'key_quotes': [],
	'personality_summary': f'{character_name}是一个复杂的角色',
	'raw_analysis': '使用默认配置'
	}

	def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict],
	character_chunks: List[int]) -> Dict:
	"""通过对话实例增强角色配置"""

	# 提取包含该角色的对话
	dialogues = []
	for chunk_id in character_chunks[:5]: # 只看前几个块
	if chunk_id < len(chunks):
	chunk_text = chunks[chunk_id]['text']
	# 简单提取引号内容
	import re
	quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text)
	dialogues.extend(quotes[:3])

	if dialogues:
	profile['example_dialogues'] = dialogues[:5]

	return profile