import json from collections import defaultdict import os def analyze_json_file(file_path): # 读取JSON文件 with open(file_path, 'r',encoding="utf-8") as f: data = json.load(f) # 初始化统计字典 line_diff_stats = defaultdict(list) token_diff_stats = defaultdict(list) # 收集数据 for entry in data: line_diff = entry['line_diff'] token_diff = entry['token_diff'] line = entry['line'] token = entry['token'] line_diff_stats[line_diff].append(line) token_diff_stats[token_diff].append(token) # 处理line_diff统计结果 print("Models, ", end="") line_diff_keys = sorted(line_diff_stats.keys()) line_subsets = [] for diff in line_diff_keys: lines = line_diff_stats[diff] min_line = min(lines) max_line = max(lines) line_subsets.append(f"subset_{diff}({min_line}~{max_line})") print(",".join(line_subsets)) # 处理token_diff统计结果 print("Models, ", end="") token_diff_keys = sorted(token_diff_stats.keys()) token_subsets = [] for diff in token_diff_keys: tokens = token_diff_stats[diff] min_token = min(tokens) max_token = max(tokens) token_subsets.append(f"subset_{diff}({min_token}~{max_token})") print(",".join(token_subsets)) # 使用示例 if __name__ == "__main__": # 假设JSON文件名为data.json file_path = "EI.json" if os.path.exists("EI.json") else "QS.json" analyze_json_file(file_path)