File size: 5,296 Bytes
a226682 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
"""批量分析多个角色的工具脚本"""
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core import TextProcessor, CharacterExtractor, CharacterAnalyzer
from utils import CacheManager
import json
from tqdm import tqdm
def batch_analyze(novel_path: str, output_dir: str = "character_profiles",
max_characters: int = 10):
"""批量分析小说中的所有主要角色
Args:
novel_path: 小说文件路径
output_dir: 输出目录
max_characters: 最多分析的角色数
"""
print("="*70)
print("📚 批量角色分析工具")
print("="*70)
# 1. 加载小说
print(f"\n📖 加载小说: {novel_path}")
try:
with open(novel_path, 'r', encoding='utf-8') as f:
novel = f.read()
except:
print(f"❌ 无法加载文件: {novel_path}")
return
print(f"✓ 已加载 {len(novel):,} 个字符")
# 2. 处理文本
print("\n📄 处理文本...")
processor = TextProcessor()
chunks = processor.chunk_text(novel)
stats = processor.get_statistics(novel)
print(f"✓ 文本已分为 {len(chunks)} 个块")
print(f"✓ 检测语言: {stats['language']}")
# 3. 提取角色
print("\n👥 提取角色...")
extractor = CharacterExtractor()
characters = extractor.extract_main_characters(
chunks,
text_sample=novel[:3000],
language=stats['language']
)
if not characters:
print("❌ 未找到角色")
return
print(f"✓ 找到 {len(characters)} 个主要角色")
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 4. 分析每个角色
print(f"\n🧠 开始分析角色 (最多 {max_characters} 个)...")
analyzer = CharacterAnalyzer()
all_profiles = []
analyze_count = min(max_characters, len(characters))
for i, char in enumerate(tqdm(characters[:analyze_count], desc="分析进度")):
char_name = char['name']
try:
# 选择代表性片段
representative_chunks = analyzer.select_representative_chunks(
chunks,
char['info']['chunks']
)
# 分析角色
profile = analyzer.analyze_character_batch(
char_name,
representative_chunks
)
# 增强配置
profile = analyzer.enhance_profile_with_examples(
profile,
chunks,
char['info']['chunks']
)
all_profiles.append(profile)
# 保存单个角色配置
char_filename = f"{profile['name'].replace(' ', '_').replace('/', '_')}.json"
char_file = os.path.join(output_dir, char_filename)
with open(char_file, 'w', encoding='utf-8') as f:
json.dump(profile, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"\n❌ 分析 {char_name} 失败: {e}")
continue
# 5. 保存汇总
all_file = os.path.join(output_dir, "all_characters.json")
with open(all_file, 'w', encoding='utf-8') as f:
json.dump(all_profiles, f, ensure_ascii=False, indent=2)
# 6. 生成报告
report_file = os.path.join(output_dir, "analysis_report.txt")
with open(report_file, 'w', encoding='utf-8') as f:
f.write("="*70 + "\n")
f.write("角色分析报告\n")
f.write("="*70 + "\n\n")
f.write(f"小说文件: {novel_path}\n")
f.write(f"文本长度: {len(novel):,} 字符\n")
f.write(f"分析角色数: {len(all_profiles)}\n\n")
f.write("-"*70 + "\n\n")
for i, profile in enumerate(all_profiles, 1):
f.write(f"{i}. {profile['name']}\n")
f.write(f" 核心特质: {', '.join(profile.get('core_traits', []))}\n")
f.write(f" 性格总结: {profile.get('personality_summary', 'N/A')}\n")
f.write("\n")
# 完成
print("\n" + "="*70)
print("✅ 分析完成!")
print("="*70)
print(f"📁 输出目录: {output_dir}")
print(f"📊 分析角色数: {len(all_profiles)}")
print(f"📄 汇总文件: {all_file}")
print(f"📋 报告文件: {report_file}")
print("="*70)
def main():
import argparse
parser = argparse.ArgumentParser(
description="批量分析小说角色",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
python batch_analyze.py novel.txt
python batch_analyze.py novel.txt -o my_characters -n 15
"""
)
parser.add_argument("novel_path", help="小说文件路径")
parser.add_argument("-o", "--output", default="character_profiles",
help="输出目录 (默认: character_profiles)")
parser.add_argument("-n", "--num", type=int, default=10,
help="最多分析的角色数 (默认: 10)")
args = parser.parse_args()
batch_analyze(args.novel_path, args.output, args.num)
if __name__ == "__main__":
main() |