Spaces:
Sleeping
Sleeping
| import json | |
| from collections import defaultdict | |
| import os | |
| def analyze_json_file(file_path): | |
| # 读取JSON文件 | |
| with open(file_path, 'r',encoding="utf-8") as f: | |
| data = json.load(f) | |
| # 初始化统计字典 | |
| line_diff_stats = defaultdict(list) | |
| token_diff_stats = defaultdict(list) | |
| # 收集数据 | |
| for entry in data: | |
| line_diff = entry['line_diff'] | |
| token_diff = entry['token_diff'] | |
| line = entry['line'] | |
| token = entry['token'] | |
| line_diff_stats[line_diff].append(line) | |
| token_diff_stats[token_diff].append(token) | |
| # 处理line_diff统计结果 | |
| print("Models, ", end="") | |
| line_diff_keys = sorted(line_diff_stats.keys()) | |
| line_subsets = [] | |
| for diff in line_diff_keys: | |
| lines = line_diff_stats[diff] | |
| min_line = min(lines) | |
| max_line = max(lines) | |
| line_subsets.append(f"subset_{diff}({min_line}~{max_line})") | |
| print(",".join(line_subsets)) | |
| # 处理token_diff统计结果 | |
| print("Models, ", end="") | |
| token_diff_keys = sorted(token_diff_stats.keys()) | |
| token_subsets = [] | |
| for diff in token_diff_keys: | |
| tokens = token_diff_stats[diff] | |
| min_token = min(tokens) | |
| max_token = max(tokens) | |
| token_subsets.append(f"subset_{diff}({min_token}~{max_token})") | |
| print(",".join(token_subsets)) | |
| # 使用示例 | |
| if __name__ == "__main__": | |
| # 假设JSON文件名为data.json | |
| file_path = "EI.json" if os.path.exists("EI.json") else "QS.json" | |
| analyze_json_file(file_path) |