#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 读取用户档案JSON文件中每个profile的summary项 """ import json import os from typing import List, Dict, Any def read_profile_summaries(json_file_path: str, max_profiles: int = None) -> List[Dict[str, Any]]: """ 读取JSON文件中每个profile的summary项 参数: json_file_path: JSON文件路径 max_profiles: 最大读取的profile数量,默认None表示读取所有 返回: List[Dict]: 包含profile_id和summary的字典列表 """ try: with open(json_file_path, 'r', encoding='utf-8') as f: data = json.load(f) summaries = [] count = 0 # 遍历所有profile,但限制数量 for key, value in data.items(): # 跳过metadata项 if key == "metadata": continue # 检查是否达到最大数量 if max_profiles is not None and count >= max_profiles: break # 检查是否是profile项(通常以Profile_开头) if isinstance(value, dict) and "Summary" in value: profile_info = { "profile_id": key, "summary": value["Summary"] } summaries.append(profile_info) count += 1 if max_profiles is not None: print(f"成功读取 {len(summaries)} 个profile的summary (限制: {max_profiles})") else: print(f"成功读取 {len(summaries)} 个profile的summary (读取所有)") return summaries except FileNotFoundError: print(f"错误: 文件 {json_file_path} 不存在") return [] except json.JSONDecodeError as e: print(f"错误: JSON解析失败 - {e}") return [] except Exception as e: print(f"错误: {e}") return [] def save_summaries_to_file(summaries: List[Dict[str, Any]], output_file: str): """ 将summaries保存到文件 参数: summaries: summary数据列表 output_file: 输出文件路径 """ try: with open(output_file, 'w', encoding='utf-8') as f: json.dump(summaries, f, ensure_ascii=False, indent=2) print(f"summaries已保存到: {output_file}") except Exception as e: print(f"保存文件时出错: {e}") def print_summary_stats(summaries: List[Dict[str, Any]]): """ 打印summary统计信息 参数: summaries: summary数据列表 """ if not summaries: print("没有找到任何summary数据") return total_count = len(summaries) total_length = sum(len(item["summary"]) for item in summaries) avg_length = total_length / total_count if total_count > 0 else 0 print(f"\n=== Summary统计信息 ===") print(f"总数量: {total_count}") print(f"平均长度: {avg_length:.1f} 字符") print(f"总长度: {total_length} 字符") # 显示前3个profile的summary示例 print(f"\n=== 前3个Profile的Summary示例 ===") for i, item in enumerate(summaries[:3]): print(f"\n{i+1}. Profile ID: {item['profile_id']}") summary_preview = item['summary'][:200] + "..." if len(item['summary']) > 200 else item['summary'] print(f" Summary: {summary_preview}") def main(): """主函数""" # 输入文件路径 input_file = "/home/zhou/deeppersona/generate_user_profile_test/output/profile_world.json" # 输出文件路径 output_file = "/home/zhou/deeppersona/generate_user_profile_final/output/summary_world.json" print(f"开始读取文件: {input_file}") # 读取summaries summaries = read_profile_summaries(input_file) if summaries: # 打印统计信息 print_summary_stats(summaries) # 保存到文件 save_summaries_to_file(summaries, output_file) # 返回结果供其他模块使用 return summaries else: print("未能读取到任何summary数据") return [] if __name__ == "__main__": summaries = main()