File size: 5,296 Bytes

a226682

"""批量分析多个角色的工具脚本"""

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from core import TextProcessor, CharacterExtractor, CharacterAnalyzer
from utils import CacheManager
import json
from tqdm import tqdm

def batch_analyze(novel_path: str, output_dir: str = "character_profiles", 
                 max_characters: int = 10):
    """批量分析小说中的所有主要角色
    
    Args:
        novel_path: 小说文件路径
        output_dir: 输出目录
        max_characters: 最多分析的角色数
    """
    
    print("="*70)
    print("📚 批量角色分析工具")
    print("="*70)
    
    # 1. 加载小说
    print(f"\n📖 加载小说: {novel_path}")
    try:
        with open(novel_path, 'r', encoding='utf-8') as f:
            novel = f.read()
    except:
        print(f"❌ 无法加载文件: {novel_path}")
        return
    
    print(f"✓ 已加载 {len(novel):,} 个字符")
    
    # 2. 处理文本
    print("\n📄 处理文本...")
    processor = TextProcessor()
    chunks = processor.chunk_text(novel)
    stats = processor.get_statistics(novel)
    
    print(f"✓ 文本已分为 {len(chunks)} 个块")
    print(f"✓ 检测语言: {stats['language']}")
    
    # 3. 提取角色
    print("\n👥 提取角色...")
    extractor = CharacterExtractor()
    characters = extractor.extract_main_characters(
        chunks,
        text_sample=novel[:3000],
        language=stats['language']
    )
    
    if not characters:
        print("❌ 未找到角色")
        return
    
    print(f"✓ 找到 {len(characters)} 个主要角色")
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 4. 分析每个角色
    print(f"\n🧠 开始分析角色 (最多 {max_characters} 个)...")
    analyzer = CharacterAnalyzer()
    
    all_profiles = []
    analyze_count = min(max_characters, len(characters))
    
    for i, char in enumerate(tqdm(characters[:analyze_count], desc="分析进度")):
        char_name = char['name']
        
        try:
            # 选择代表性片段
            representative_chunks = analyzer.select_representative_chunks(
                chunks,
                char['info']['chunks']
            )
            
            # 分析角色
            profile = analyzer.analyze_character_batch(
                char_name,
                representative_chunks
            )
            
            # 增强配置
            profile = analyzer.enhance_profile_with_examples(
                profile,
                chunks,
                char['info']['chunks']
            )
            
            all_profiles.append(profile)
            
            # 保存单个角色配置
            char_filename = f"{profile['name'].replace(' ', '_').replace('/', '_')}.json"
            char_file = os.path.join(output_dir, char_filename)
            
            with open(char_file, 'w', encoding='utf-8') as f:
                json.dump(profile, f, ensure_ascii=False, indent=2)
            
        except Exception as e:
            print(f"\n❌ 分析 {char_name} 失败: {e}")
            continue
    
    # 5. 保存汇总
    all_file = os.path.join(output_dir, "all_characters.json")
    with open(all_file, 'w', encoding='utf-8') as f:
        json.dump(all_profiles, f, ensure_ascii=False, indent=2)
    
    # 6. 生成报告
    report_file = os.path.join(output_dir, "analysis_report.txt")
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write("="*70 + "\n")
        f.write("角色分析报告\n")
        f.write("="*70 + "\n\n")
        f.write(f"小说文件: {novel_path}\n")
        f.write(f"文本长度: {len(novel):,} 字符\n")
        f.write(f"分析角色数: {len(all_profiles)}\n\n")
        f.write("-"*70 + "\n\n")
        
        for i, profile in enumerate(all_profiles, 1):
            f.write(f"{i}. {profile['name']}\n")
            f.write(f"   核心特质: {', '.join(profile.get('core_traits', []))}\n")
            f.write(f"   性格总结: {profile.get('personality_summary', 'N/A')}\n")
            f.write("\n")
    
    # 完成
    print("\n" + "="*70)
    print("✅ 分析完成！")
    print("="*70)
    print(f"📁 输出目录: {output_dir}")
    print(f"📊 分析角色数: {len(all_profiles)}")
    print(f"📄 汇总文件: {all_file}")
    print(f"📋 报告文件: {report_file}")
    print("="*70)

def main():
    import argparse
    
    parser = argparse.ArgumentParser(
        description="批量分析小说角色",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例:
  python batch_analyze.py novel.txt
  python batch_analyze.py novel.txt -o my_characters -n 15
        """
    )
    
    parser.add_argument("novel_path", help="小说文件路径")
    parser.add_argument("-o", "--output", default="character_profiles", 
                       help="输出目录 (默认: character_profiles)")
    parser.add_argument("-n", "--num", type=int, default=10,
                       help="最多分析的角色数 (默认: 10)")
    
    args = parser.parse_args()
    
    batch_analyze(args.novel_path, args.output, args.num)

if __name__ == "__main__":
    main()