File size: 1,605 Bytes
d2efd73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import json
from collections import defaultdict
import os

def analyze_json_file(file_path):
    # 读取JSON文件
    with open(file_path, 'r',encoding="utf-8") as f:
        data = json.load(f)
    
    # 初始化统计字典
    line_diff_stats = defaultdict(list)
    token_diff_stats = defaultdict(list)
    
    # 收集数据
    for entry in data:
        line_diff = entry['line_diff']
        token_diff = entry['token_diff']
        line = entry['line']
        token = entry['token']
        
        line_diff_stats[line_diff].append(line)
        token_diff_stats[token_diff].append(token)
    
    # 处理line_diff统计结果
    print("Models, ", end="")
    line_diff_keys = sorted(line_diff_stats.keys())
    line_subsets = []
    for diff in line_diff_keys:
        lines = line_diff_stats[diff]
        min_line = min(lines)
        max_line = max(lines)
        line_subsets.append(f"subset_{diff}({min_line}~{max_line})")
    print(",".join(line_subsets))
    
    # 处理token_diff统计结果
    print("Models, ", end="")
    token_diff_keys = sorted(token_diff_stats.keys())
    token_subsets = []
    for diff in token_diff_keys:
        tokens = token_diff_stats[diff]
        min_token = min(tokens)
        max_token = max(tokens)
        token_subsets.append(f"subset_{diff}({min_token}~{max_token})")
    print(",".join(token_subsets))

# 使用示例
if __name__ == "__main__":
    # 假设JSON文件名为data.json
    file_path = "EI.json" if os.path.exists("EI.json") else "QS.json"
    analyze_json_file(file_path)