Spaces:
Sleeping
Sleeping
File size: 1,605 Bytes
d2efd73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import json
from collections import defaultdict
import os
def analyze_json_file(file_path):
# 读取JSON文件
with open(file_path, 'r',encoding="utf-8") as f:
data = json.load(f)
# 初始化统计字典
line_diff_stats = defaultdict(list)
token_diff_stats = defaultdict(list)
# 收集数据
for entry in data:
line_diff = entry['line_diff']
token_diff = entry['token_diff']
line = entry['line']
token = entry['token']
line_diff_stats[line_diff].append(line)
token_diff_stats[token_diff].append(token)
# 处理line_diff统计结果
print("Models, ", end="")
line_diff_keys = sorted(line_diff_stats.keys())
line_subsets = []
for diff in line_diff_keys:
lines = line_diff_stats[diff]
min_line = min(lines)
max_line = max(lines)
line_subsets.append(f"subset_{diff}({min_line}~{max_line})")
print(",".join(line_subsets))
# 处理token_diff统计结果
print("Models, ", end="")
token_diff_keys = sorted(token_diff_stats.keys())
token_subsets = []
for diff in token_diff_keys:
tokens = token_diff_stats[diff]
min_token = min(tokens)
max_token = max(tokens)
token_subsets.append(f"subset_{diff}({min_token}~{max_token})")
print(",".join(token_subsets))
# 使用示例
if __name__ == "__main__":
# 假设JSON文件名为data.json
file_path = "EI.json" if os.path.exists("EI.json") else "QS.json"
analyze_json_file(file_path) |