Chen42 commited on
Commit
df6860d
·
verified ·
1 Parent(s): beb1105

Update winrate.py

Browse files
Files changed (1) hide show
  1. winrate.py +126 -2
winrate.py CHANGED
@@ -136,7 +136,131 @@ def compare_win_rate(dir1, dir2):
136
  print("-" * 30)
137
 
138
  return win_rate_df
139
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  if __name__ == "__main__":
141
  print("Processing task 1...")
142
  avg_scores = calculate_avg_comet('organized_data_1')
@@ -144,4 +268,4 @@ if __name__ == "__main__":
144
 
145
  print("\nProcessing task 2...")
146
  win_rate_df = compare_win_rate('organized_data_1', 'organized_data_2')
147
- print("\nWin rate comparison saved to win_rate_comparison.csv")
 
136
  print("-" * 30)
137
 
138
  return win_rate_df
139
+ def compare_category_scores(dir1, dir2):
140
+ """
141
+ 比较两个目录中每个大类和子类的平均分数,并输出总结性比较
142
+
143
+ 参数:
144
+ dir1: 第一个目录路径
145
+ dir2: 第二个目录路径
146
+
147
+ 返回:
148
+ DataFrame: 包含所有比较结果的DataFrame
149
+ """
150
+ # 收集两个目录的分数数据
151
+ def collect_scores(directory):
152
+ scores = defaultdict(lambda: {'total': 0, 'count': 0})
153
+ sub_scores = defaultdict(lambda: {'total': 0, 'count': 0})
154
+
155
+ for category in os.listdir(directory):
156
+ category_path = os.path.join(directory, category)
157
+ if not os.path.isdir(category_path):
158
+ continue
159
+
160
+ for subcategory in os.listdir(category_path):
161
+ subcategory_path = os.path.join(category_path, subcategory)
162
+ if not os.path.isdir(subcategory_path):
163
+ continue
164
+
165
+ report_path = os.path.join(subcategory_path, 'report', 'full_image_comet.csv')
166
+ if os.path.exists(report_path):
167
+ try:
168
+ df = pd.read_csv(report_path)
169
+ avg_score = df['avg_comet_sentence'].mean()
170
+ # 更新大类统计
171
+ scores[category]['total'] += avg_score
172
+ scores[category]['count'] += 1
173
+ # 更新子类统计
174
+ sub_scores[(category, subcategory)]['total'] = avg_score
175
+ sub_scores[(category, subcategory)]['count'] = 1
176
+ except Exception as e:
177
+ print(f"Error processing {report_path}: {e}")
178
+
179
+ # 计算平均值
180
+ for category in scores:
181
+ if scores[category]['count'] > 0:
182
+ scores[category]['avg'] = scores[category]['total'] / scores[category]['count']
183
+
184
+ for key in sub_scores:
185
+ if sub_scores[key]['count'] > 0:
186
+ sub_scores[key]['avg'] = sub_scores[key]['total'] / sub_scores[key]['count']
187
+
188
+ return scores, sub_scores
189
+
190
+ # 收集两个目录的数据
191
+ scores1, sub_scores1 = collect_scores(dir1)
192
+ scores2, sub_scores2 = collect_scores(dir2)
193
+
194
+ # 准备结果数据
195
+ results = []
196
+
197
+ # 比较大类
198
+ all_categories = set(scores1.keys()).union(set(scores2.keys()))
199
+ category_comparison = []
200
+
201
+ for category in all_categories:
202
+ avg1 = scores1.get(category, {}).get('avg', 0)
203
+ avg2 = scores2.get(category, {}).get('avg', 0)
204
+ comparison = "dir1 > dir2" if avg1 > avg2 else ("dir1 < dir2" if avg1 < avg2 else "dir1 == dir2")
205
+
206
+ category_comparison.append({
207
+ 'category': category,
208
+ 'type': 'category',
209
+ 'dir1_avg': round(avg1, 3),
210
+ 'dir2_avg': round(avg2, 3),
211
+ 'comparison': comparison
212
+ })
213
+
214
+ # 比较子类
215
+ all_subcategories = set(sub_scores1.keys()).union(set(sub_scores2.keys()))
216
+ subcategory_comparison = []
217
+
218
+ for (category, subcategory) in all_subcategories:
219
+ avg1 = sub_scores1.get((category, subcategory), {}).get('avg', 0)
220
+ avg2 = sub_scores2.get((category, subcategory), {}).get('avg', 0)
221
+ comparison = "dir1 > dir2" if avg1 > avg2 else ("dir1 < dir2" if avg1 < avg2 else "dir1 == dir2")
222
+
223
+ subcategory_comparison.append({
224
+ 'category': category,
225
+ 'type': 'subcategory',
226
+ 'subcategory': subcategory,
227
+ 'dir1_avg': round(avg1, 3),
228
+ 'dir2_avg': round(avg2, 3),
229
+ 'comparison': comparison
230
+ })
231
+
232
+ # 合并结果
233
+ results = category_comparison + subcategory_comparison
234
+
235
+ # 创建DataFrame
236
+ comparison_df = pd.DataFrame(results)
237
+
238
+ # 总结性比较
239
+ total_categories = len(all_categories)
240
+ dir1_win_categories = sum(1 for c in category_comparison if c['comparison'] == 'dir1 > dir2')
241
+ dir2_win_categories = sum(1 for c in category_comparison if c['comparison'] == 'dir1 < dir2')
242
+
243
+ total_subcategories = len(all_subcategories)
244
+ dir1_win_subcategories = sum(1 for c in subcategory_comparison if c['comparison'] == 'dir1 > dir2')
245
+ dir2_win_subcategories = sum(1 for c in subcategory_comparison if c['comparison'] == 'dir1 < dir2')
246
+
247
+ # 打印总结
248
+ print("\n=== 总结性比较 ===")
249
+ print(f"大类总数: {total_categories}")
250
+ print(f"dir1获胜的大类数: {dir1_win_categories} ({dir1_win_categories/total_categories:.1%})")
251
+ print(f"dir2获胜的大类数: {dir2_win_categories} ({dir2_win_categories/total_categories:.1%})")
252
+ print(f"平局的大类数: {total_categories - dir1_win_categories - dir2_win_categories}")
253
+
254
+ print(f"\n子类总数: {total_subcategories}")
255
+ print(f"dir1获胜的子类数: {dir1_win_subcategories} ({dir1_win_subcategories/total_subcategories:.1%})")
256
+ print(f"dir2获胜的子类数: {dir2_win_subcategories} ({dir2_win_subcategories/total_subcategories:.1%})")
257
+ print(f"平局的子类数: {total_subcategories - dir1_win_subcategories - dir2_win_subcategories}")
258
+
259
+ # 保存结果
260
+ comparison_df.to_csv('category_score_comparison.csv', index=False)
261
+ print("\n比较结果已保存到 category_score_comparison.csv")
262
+
263
+ return comparison_df
264
  if __name__ == "__main__":
265
  print("Processing task 1...")
266
  avg_scores = calculate_avg_comet('organized_data_1')
 
268
 
269
  print("\nProcessing task 2...")
270
  win_rate_df = compare_win_rate('organized_data_1', 'organized_data_2')
271
+ print("\nWin rate comparison saved to win_rate_comparison.csv")