Yufan_Zhou
Fix imports, add generate_user_profile_final, and clean up old directories
0701598
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
读取用户档案JSON文件中每个profile的summary项
"""
import json
import os
from typing import List, Dict, Any
def read_profile_summaries(json_file_path: str, max_profiles: int = None) -> List[Dict[str, Any]]:
"""
读取JSON文件中每个profile的summary项
参数:
json_file_path: JSON文件路径
max_profiles: 最大读取的profile数量,默认None表示读取所有
返回:
List[Dict]: 包含profile_id和summary的字典列表
"""
try:
with open(json_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
summaries = []
count = 0
# 遍历所有profile,但限制数量
for key, value in data.items():
# 跳过metadata项
if key == "metadata":
continue
# 检查是否达到最大数量
if max_profiles is not None and count >= max_profiles:
break
# 检查是否是profile项(通常以Profile_开头)
if isinstance(value, dict) and "Summary" in value:
profile_info = {
"profile_id": key,
"summary": value["Summary"]
}
summaries.append(profile_info)
count += 1
if max_profiles is not None:
print(f"成功读取 {len(summaries)} 个profile的summary (限制: {max_profiles})")
else:
print(f"成功读取 {len(summaries)} 个profile的summary (读取所有)")
return summaries
except FileNotFoundError:
print(f"错误: 文件 {json_file_path} 不存在")
return []
except json.JSONDecodeError as e:
print(f"错误: JSON解析失败 - {e}")
return []
except Exception as e:
print(f"错误: {e}")
return []
def save_summaries_to_file(summaries: List[Dict[str, Any]], output_file: str):
"""
将summaries保存到文件
参数:
summaries: summary数据列表
output_file: 输出文件路径
"""
try:
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(summaries, f, ensure_ascii=False, indent=2)
print(f"summaries已保存到: {output_file}")
except Exception as e:
print(f"保存文件时出错: {e}")
def print_summary_stats(summaries: List[Dict[str, Any]]):
"""
打印summary统计信息
参数:
summaries: summary数据列表
"""
if not summaries:
print("没有找到任何summary数据")
return
total_count = len(summaries)
total_length = sum(len(item["summary"]) for item in summaries)
avg_length = total_length / total_count if total_count > 0 else 0
print(f"\n=== Summary统计信息 ===")
print(f"总数量: {total_count}")
print(f"平均长度: {avg_length:.1f} 字符")
print(f"总长度: {total_length} 字符")
# 显示前3个profile的summary示例
print(f"\n=== 前3个Profile的Summary示例 ===")
for i, item in enumerate(summaries[:3]):
print(f"\n{i+1}. Profile ID: {item['profile_id']}")
summary_preview = item['summary'][:200] + "..." if len(item['summary']) > 200 else item['summary']
print(f" Summary: {summary_preview}")
def main():
"""主函数"""
# 输入文件路径
input_file = "/home/zhou/deeppersona/generate_user_profile_test/output/profile_world.json"
# 输出文件路径
output_file = "/home/zhou/deeppersona/generate_user_profile_final/output/summary_world.json"
print(f"开始读取文件: {input_file}")
# 读取summaries
summaries = read_profile_summaries(input_file)
if summaries:
# 打印统计信息
print_summary_stats(summaries)
# 保存到文件
save_summaries_to_file(summaries, output_file)
# 返回结果供其他模块使用
return summaries
else:
print("未能读取到任何summary数据")
return []
if __name__ == "__main__":
summaries = main()