Update winrate.py
Browse files- winrate.py +126 -2
winrate.py
CHANGED
|
@@ -136,7 +136,131 @@ def compare_win_rate(dir1, dir2):
|
|
| 136 |
print("-" * 30)
|
| 137 |
|
| 138 |
return win_rate_df
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
if __name__ == "__main__":
|
| 141 |
print("Processing task 1...")
|
| 142 |
avg_scores = calculate_avg_comet('organized_data_1')
|
|
@@ -144,4 +268,4 @@ if __name__ == "__main__":
|
|
| 144 |
|
| 145 |
print("\nProcessing task 2...")
|
| 146 |
win_rate_df = compare_win_rate('organized_data_1', 'organized_data_2')
|
| 147 |
-
print("\nWin rate comparison saved to win_rate_comparison.csv")
|
|
|
|
| 136 |
print("-" * 30)
|
| 137 |
|
| 138 |
return win_rate_df
|
| 139 |
+
def compare_category_scores(dir1, dir2):
|
| 140 |
+
"""
|
| 141 |
+
比较两个目录中每个大类和子类的平均分数,并输出总结性比较
|
| 142 |
+
|
| 143 |
+
参数:
|
| 144 |
+
dir1: 第一个目录路径
|
| 145 |
+
dir2: 第二个目录路径
|
| 146 |
+
|
| 147 |
+
返回:
|
| 148 |
+
DataFrame: 包含所有比较结果的DataFrame
|
| 149 |
+
"""
|
| 150 |
+
# 收集两个目录的分数数据
|
| 151 |
+
def collect_scores(directory):
|
| 152 |
+
scores = defaultdict(lambda: {'total': 0, 'count': 0})
|
| 153 |
+
sub_scores = defaultdict(lambda: {'total': 0, 'count': 0})
|
| 154 |
+
|
| 155 |
+
for category in os.listdir(directory):
|
| 156 |
+
category_path = os.path.join(directory, category)
|
| 157 |
+
if not os.path.isdir(category_path):
|
| 158 |
+
continue
|
| 159 |
+
|
| 160 |
+
for subcategory in os.listdir(category_path):
|
| 161 |
+
subcategory_path = os.path.join(category_path, subcategory)
|
| 162 |
+
if not os.path.isdir(subcategory_path):
|
| 163 |
+
continue
|
| 164 |
+
|
| 165 |
+
report_path = os.path.join(subcategory_path, 'report', 'full_image_comet.csv')
|
| 166 |
+
if os.path.exists(report_path):
|
| 167 |
+
try:
|
| 168 |
+
df = pd.read_csv(report_path)
|
| 169 |
+
avg_score = df['avg_comet_sentence'].mean()
|
| 170 |
+
# 更新大类统计
|
| 171 |
+
scores[category]['total'] += avg_score
|
| 172 |
+
scores[category]['count'] += 1
|
| 173 |
+
# 更新子类统计
|
| 174 |
+
sub_scores[(category, subcategory)]['total'] = avg_score
|
| 175 |
+
sub_scores[(category, subcategory)]['count'] = 1
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f"Error processing {report_path}: {e}")
|
| 178 |
+
|
| 179 |
+
# 计算平均值
|
| 180 |
+
for category in scores:
|
| 181 |
+
if scores[category]['count'] > 0:
|
| 182 |
+
scores[category]['avg'] = scores[category]['total'] / scores[category]['count']
|
| 183 |
+
|
| 184 |
+
for key in sub_scores:
|
| 185 |
+
if sub_scores[key]['count'] > 0:
|
| 186 |
+
sub_scores[key]['avg'] = sub_scores[key]['total'] / sub_scores[key]['count']
|
| 187 |
+
|
| 188 |
+
return scores, sub_scores
|
| 189 |
+
|
| 190 |
+
# 收集两个目录的数据
|
| 191 |
+
scores1, sub_scores1 = collect_scores(dir1)
|
| 192 |
+
scores2, sub_scores2 = collect_scores(dir2)
|
| 193 |
+
|
| 194 |
+
# 准备结果数据
|
| 195 |
+
results = []
|
| 196 |
+
|
| 197 |
+
# 比较大类
|
| 198 |
+
all_categories = set(scores1.keys()).union(set(scores2.keys()))
|
| 199 |
+
category_comparison = []
|
| 200 |
+
|
| 201 |
+
for category in all_categories:
|
| 202 |
+
avg1 = scores1.get(category, {}).get('avg', 0)
|
| 203 |
+
avg2 = scores2.get(category, {}).get('avg', 0)
|
| 204 |
+
comparison = "dir1 > dir2" if avg1 > avg2 else ("dir1 < dir2" if avg1 < avg2 else "dir1 == dir2")
|
| 205 |
+
|
| 206 |
+
category_comparison.append({
|
| 207 |
+
'category': category,
|
| 208 |
+
'type': 'category',
|
| 209 |
+
'dir1_avg': round(avg1, 3),
|
| 210 |
+
'dir2_avg': round(avg2, 3),
|
| 211 |
+
'comparison': comparison
|
| 212 |
+
})
|
| 213 |
+
|
| 214 |
+
# 比较子类
|
| 215 |
+
all_subcategories = set(sub_scores1.keys()).union(set(sub_scores2.keys()))
|
| 216 |
+
subcategory_comparison = []
|
| 217 |
+
|
| 218 |
+
for (category, subcategory) in all_subcategories:
|
| 219 |
+
avg1 = sub_scores1.get((category, subcategory), {}).get('avg', 0)
|
| 220 |
+
avg2 = sub_scores2.get((category, subcategory), {}).get('avg', 0)
|
| 221 |
+
comparison = "dir1 > dir2" if avg1 > avg2 else ("dir1 < dir2" if avg1 < avg2 else "dir1 == dir2")
|
| 222 |
+
|
| 223 |
+
subcategory_comparison.append({
|
| 224 |
+
'category': category,
|
| 225 |
+
'type': 'subcategory',
|
| 226 |
+
'subcategory': subcategory,
|
| 227 |
+
'dir1_avg': round(avg1, 3),
|
| 228 |
+
'dir2_avg': round(avg2, 3),
|
| 229 |
+
'comparison': comparison
|
| 230 |
+
})
|
| 231 |
+
|
| 232 |
+
# 合并结果
|
| 233 |
+
results = category_comparison + subcategory_comparison
|
| 234 |
+
|
| 235 |
+
# 创建DataFrame
|
| 236 |
+
comparison_df = pd.DataFrame(results)
|
| 237 |
+
|
| 238 |
+
# 总结性比较
|
| 239 |
+
total_categories = len(all_categories)
|
| 240 |
+
dir1_win_categories = sum(1 for c in category_comparison if c['comparison'] == 'dir1 > dir2')
|
| 241 |
+
dir2_win_categories = sum(1 for c in category_comparison if c['comparison'] == 'dir1 < dir2')
|
| 242 |
+
|
| 243 |
+
total_subcategories = len(all_subcategories)
|
| 244 |
+
dir1_win_subcategories = sum(1 for c in subcategory_comparison if c['comparison'] == 'dir1 > dir2')
|
| 245 |
+
dir2_win_subcategories = sum(1 for c in subcategory_comparison if c['comparison'] == 'dir1 < dir2')
|
| 246 |
+
|
| 247 |
+
# 打印总结
|
| 248 |
+
print("\n=== 总结性比较 ===")
|
| 249 |
+
print(f"大类总数: {total_categories}")
|
| 250 |
+
print(f"dir1获胜的大类数: {dir1_win_categories} ({dir1_win_categories/total_categories:.1%})")
|
| 251 |
+
print(f"dir2获胜的大类数: {dir2_win_categories} ({dir2_win_categories/total_categories:.1%})")
|
| 252 |
+
print(f"平局的大类数: {total_categories - dir1_win_categories - dir2_win_categories}")
|
| 253 |
+
|
| 254 |
+
print(f"\n子类总数: {total_subcategories}")
|
| 255 |
+
print(f"dir1获胜的子类数: {dir1_win_subcategories} ({dir1_win_subcategories/total_subcategories:.1%})")
|
| 256 |
+
print(f"dir2获胜的子类数: {dir2_win_subcategories} ({dir2_win_subcategories/total_subcategories:.1%})")
|
| 257 |
+
print(f"平局的子类数: {total_subcategories - dir1_win_subcategories - dir2_win_subcategories}")
|
| 258 |
+
|
| 259 |
+
# 保存结果
|
| 260 |
+
comparison_df.to_csv('category_score_comparison.csv', index=False)
|
| 261 |
+
print("\n比较结果已保存到 category_score_comparison.csv")
|
| 262 |
+
|
| 263 |
+
return comparison_df
|
| 264 |
if __name__ == "__main__":
|
| 265 |
print("Processing task 1...")
|
| 266 |
avg_scores = calculate_avg_comet('organized_data_1')
|
|
|
|
| 268 |
|
| 269 |
print("\nProcessing task 2...")
|
| 270 |
win_rate_df = compare_win_rate('organized_data_1', 'organized_data_2')
|
| 271 |
+
print("\nWin rate comparison saved to win_rate_comparison.csv")
|