Chen42 commited on
Commit
beb1105
·
verified ·
1 Parent(s): 6b1ac2e

Create winrate.py

Browse files
Files changed (1) hide show
  1. winrate.py +147 -0
winrate.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from collections import defaultdict
4
+
5
+ def calculate_avg_comet(root_dir):
6
+ results = []
7
+
8
+ for category in os.listdir(root_dir):
9
+ category_path = os.path.join(root_dir, category)
10
+ if not os.path.isdir(category_path):
11
+ continue
12
+
13
+ for subcategory in os.listdir(category_path):
14
+ subcategory_path = os.path.join(category_path, subcategory)
15
+ if not os.path.isdir(subcategory_path):
16
+ continue
17
+
18
+ report_path = os.path.join(subcategory_path, 'report', 'full_image_comet.csv')
19
+ if os.path.exists(report_path):
20
+ try:
21
+ df = pd.read_csv(report_path)
22
+ avg_score = df['avg_comet_sentence'].mean()
23
+ results.append({
24
+ 'category': category,
25
+ 'subcategory': subcategory,
26
+ 'avg_comet': avg_score
27
+ })
28
+ except Exception as e:
29
+ print(f"Error processing {report_path}: {e}")
30
+
31
+ result_df = pd.DataFrame(results)
32
+ result_df.to_csv('comet_avg_scores.csv', index=False)
33
+ return result_df
34
+
35
+ def compare_win_rate(dir1, dir2):
36
+ data1 = defaultdict(dict)
37
+ data2 = defaultdict(dict)
38
+
39
+ # 读取第一个目录的数据
40
+ for category in os.listdir(dir1):
41
+ category_path = os.path.join(dir1, category)
42
+ if not os.path.isdir(category_path):
43
+ continue
44
+
45
+ for subcategory in os.listdir(category_path):
46
+ subcategory_path = os.path.join(category_path, subcategory)
47
+ if not os.path.isdir(subcategory_path):
48
+ continue
49
+
50
+ report_path = os.path.join(subcategory_path, 'report', 'full_image_comet.csv')
51
+ if os.path.exists(report_path):
52
+ try:
53
+ df = pd.read_csv(report_path)
54
+ for _, row in df.iterrows():
55
+ data1[(category, subcategory)][row['file_name']] = row['avg_comet_sentence']
56
+ except Exception as e:
57
+ print(f"Error processing {report_path}: {e}")
58
+
59
+ # 读取第二个目录的数据
60
+ for category in os.listdir(dir2):
61
+ category_path = os.path.join(dir2, category)
62
+ if not os.path.isdir(category_path):
63
+ continue
64
+
65
+ for subcategory in os.listdir(category_path):
66
+ subcategory_path = os.path.join(category_path, subcategory)
67
+ if not os.path.isdir(subcategory_path):
68
+ continue
69
+
70
+ report_path = os.path.join(subcategory_path, 'report', 'full_image_comet.csv')
71
+ if os.path.exists(report_path):
72
+ try:
73
+ df = pd.read_csv(report_path)
74
+ for _, row in df.iterrows():
75
+ data2[(category, subcategory)][row['file_name']] = row['avg_comet_sentence']
76
+ except Exception as e:
77
+ print(f"Error processing {report_path}: {e}")
78
+
79
+ # 计算win rate
80
+ win_rate_results = []
81
+ category_stats = defaultdict(lambda: {'wins': 0, 'total': 0})
82
+
83
+ for key in set(data1.keys()).union(set(data2.keys())):
84
+ category, subcategory = key
85
+ scores1 = data1.get(key, {})
86
+ scores2 = data2.get(key, {})
87
+
88
+ common_files = set(scores1.keys()).intersection(set(scores2.keys()))
89
+ if not common_files:
90
+ continue
91
+
92
+ wins = 0
93
+ total = 0
94
+
95
+ for file in common_files:
96
+ total += 1
97
+ if scores1[file] > scores2[file]:
98
+ wins += 1
99
+
100
+ win_rate = wins / total if total > 0 else 0
101
+ win_rate_results.append({
102
+ 'category': category,
103
+ 'subcategory': subcategory,
104
+ 'win_rate': win_rate,
105
+ 'wins': wins,
106
+ 'total_comparisons': total
107
+ })
108
+
109
+ # 更新大类统计
110
+ category_stats[category]['wins'] += wins
111
+ category_stats[category]['total'] += total
112
+
113
+ # 添加大类统计结果
114
+ for category, stats in category_stats.items():
115
+ cat_win_rate = stats['wins'] / stats['total'] if stats['total'] > 0 else 0
116
+ win_rate_results.append({
117
+ 'category': category,
118
+ 'subcategory': 'ALL',
119
+ 'win_rate': cat_win_rate,
120
+ 'wins': stats['wins'],
121
+ 'total_comparisons': stats['total']
122
+ })
123
+
124
+ # 创建DataFrame并保存
125
+ win_rate_df = pd.DataFrame(win_rate_results)
126
+ win_rate_df.to_csv('win_rate_comparison.csv', index=False)
127
+
128
+ # 打印统计数据
129
+ print("\n=== 大类统计结果 ===")
130
+ for category, stats in category_stats.items():
131
+ win_rate = stats['wins'] / stats['total'] if stats['total'] > 0 else 0
132
+ print(f"大类: {category}")
133
+ print(f"总比较次数: {stats['total']}")
134
+ print(f"获胜次数: {stats['wins']}")
135
+ print(f"Win Rate: {win_rate:.2%}")
136
+ print("-" * 30)
137
+
138
+ return win_rate_df
139
+
140
+ if __name__ == "__main__":
141
+ print("Processing task 1...")
142
+ avg_scores = calculate_avg_comet('organized_data_1')
143
+ print("Average comet scores saved to comet_avg_scores.csv")
144
+
145
+ print("\nProcessing task 2...")
146
+ win_rate_df = compare_win_rate('organized_data_1', 'organized_data_2')
147
+ print("\nWin rate comparison saved to win_rate_comparison.csv")