File size: 5,296 Bytes
a226682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""批量分析多个角色的工具脚本"""

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from core import TextProcessor, CharacterExtractor, CharacterAnalyzer
from utils import CacheManager
import json
from tqdm import tqdm

def batch_analyze(novel_path: str, output_dir: str = "character_profiles", 
                 max_characters: int = 10):
    """批量分析小说中的所有主要角色
    
    Args:
        novel_path: 小说文件路径
        output_dir: 输出目录
        max_characters: 最多分析的角色数
    """
    
    print("="*70)
    print("📚 批量角色分析工具")
    print("="*70)
    
    # 1. 加载小说
    print(f"\n📖 加载小说: {novel_path}")
    try:
        with open(novel_path, 'r', encoding='utf-8') as f:
            novel = f.read()
    except:
        print(f"❌ 无法加载文件: {novel_path}")
        return
    
    print(f"✓ 已加载 {len(novel):,} 个字符")
    
    # 2. 处理文本
    print("\n📄 处理文本...")
    processor = TextProcessor()
    chunks = processor.chunk_text(novel)
    stats = processor.get_statistics(novel)
    
    print(f"✓ 文本已分为 {len(chunks)} 个块")
    print(f"✓ 检测语言: {stats['language']}")
    
    # 3. 提取角色
    print("\n👥 提取角色...")
    extractor = CharacterExtractor()
    characters = extractor.extract_main_characters(
        chunks,
        text_sample=novel[:3000],
        language=stats['language']
    )
    
    if not characters:
        print("❌ 未找到角色")
        return
    
    print(f"✓ 找到 {len(characters)} 个主要角色")
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 4. 分析每个角色
    print(f"\n🧠 开始分析角色 (最多 {max_characters} 个)...")
    analyzer = CharacterAnalyzer()
    
    all_profiles = []
    analyze_count = min(max_characters, len(characters))
    
    for i, char in enumerate(tqdm(characters[:analyze_count], desc="分析进度")):
        char_name = char['name']
        
        try:
            # 选择代表性片段
            representative_chunks = analyzer.select_representative_chunks(
                chunks,
                char['info']['chunks']
            )
            
            # 分析角色
            profile = analyzer.analyze_character_batch(
                char_name,
                representative_chunks
            )
            
            # 增强配置
            profile = analyzer.enhance_profile_with_examples(
                profile,
                chunks,
                char['info']['chunks']
            )
            
            all_profiles.append(profile)
            
            # 保存单个角色配置
            char_filename = f"{profile['name'].replace(' ', '_').replace('/', '_')}.json"
            char_file = os.path.join(output_dir, char_filename)
            
            with open(char_file, 'w', encoding='utf-8') as f:
                json.dump(profile, f, ensure_ascii=False, indent=2)
            
        except Exception as e:
            print(f"\n❌ 分析 {char_name} 失败: {e}")
            continue
    
    # 5. 保存汇总
    all_file = os.path.join(output_dir, "all_characters.json")
    with open(all_file, 'w', encoding='utf-8') as f:
        json.dump(all_profiles, f, ensure_ascii=False, indent=2)
    
    # 6. 生成报告
    report_file = os.path.join(output_dir, "analysis_report.txt")
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write("="*70 + "\n")
        f.write("角色分析报告\n")
        f.write("="*70 + "\n\n")
        f.write(f"小说文件: {novel_path}\n")
        f.write(f"文本长度: {len(novel):,} 字符\n")
        f.write(f"分析角色数: {len(all_profiles)}\n\n")
        f.write("-"*70 + "\n\n")
        
        for i, profile in enumerate(all_profiles, 1):
            f.write(f"{i}. {profile['name']}\n")
            f.write(f"   核心特质: {', '.join(profile.get('core_traits', []))}\n")
            f.write(f"   性格总结: {profile.get('personality_summary', 'N/A')}\n")
            f.write("\n")
    
    # 完成
    print("\n" + "="*70)
    print("✅ 分析完成!")
    print("="*70)
    print(f"📁 输出目录: {output_dir}")
    print(f"📊 分析角色数: {len(all_profiles)}")
    print(f"📄 汇总文件: {all_file}")
    print(f"📋 报告文件: {report_file}")
    print("="*70)

def main():
    import argparse
    
    parser = argparse.ArgumentParser(
        description="批量分析小说角色",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例:
  python batch_analyze.py novel.txt
  python batch_analyze.py novel.txt -o my_characters -n 15
        """
    )
    
    parser.add_argument("novel_path", help="小说文件路径")
    parser.add_argument("-o", "--output", default="character_profiles", 
                       help="输出目录 (默认: character_profiles)")
    parser.add_argument("-n", "--num", type=int, default=10,
                       help="最多分析的角色数 (默认: 10)")
    
    args = parser.parse_args()
    
    batch_analyze(args.novel_path, args.output, args.num)

if __name__ == "__main__":
    main()