zhimin-z commited on
Commit ·
7f2e001
1
Parent(s): c931f22
add
Browse files
msr.py
CHANGED
|
@@ -901,7 +901,7 @@ def calculate_issue_stats_from_metadata(metadata_list):
|
|
| 901 |
}
|
| 902 |
|
| 903 |
|
| 904 |
-
def
|
| 905 |
"""Calculate monthly metrics for all assistants for visualization."""
|
| 906 |
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 907 |
|
|
@@ -985,7 +985,7 @@ def calculate_discussion_stats_from_metadata(metadata_list):
|
|
| 985 |
}
|
| 986 |
|
| 987 |
|
| 988 |
-
def
|
| 989 |
"""Calculate monthly metrics for discussions for all assistants for visualization."""
|
| 990 |
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 991 |
|
|
@@ -1050,6 +1050,94 @@ def calculate_monthly_metrics_by_agent_discussions(all_discussions_dict, assista
|
|
| 1050 |
}
|
| 1051 |
|
| 1052 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, wanted_in_progress_dict=None, discussions_dict=None):
|
| 1054 |
"""Construct leaderboard from in-memory issue metadata and discussion metadata.
|
| 1055 |
|
|
@@ -1106,7 +1194,7 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
|
|
| 1106 |
return cache_dict
|
| 1107 |
|
| 1108 |
|
| 1109 |
-
def save_leaderboard_data_to_hf(leaderboard_dict, issue_monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None):
|
| 1110 |
"""Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
|
| 1111 |
try:
|
| 1112 |
token = get_hf_token()
|
|
@@ -1129,6 +1217,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, issue_monthly_metrics, wanted_
|
|
| 1129 |
'leaderboard': leaderboard_dict,
|
| 1130 |
'issue_monthly_metrics': issue_monthly_metrics,
|
| 1131 |
'wanted_issues': wanted_issues,
|
|
|
|
| 1132 |
'discussion_monthly_metrics': discussion_monthly_metrics
|
| 1133 |
}
|
| 1134 |
|
|
@@ -1216,12 +1305,15 @@ def mine_all_agents():
|
|
| 1216 |
leaderboard_dict = construct_leaderboard_from_metadata(
|
| 1217 |
agent_issues, assistants, wanted_resolved, wanted_in_progress, agent_discussions
|
| 1218 |
)
|
| 1219 |
-
issue_monthly_metrics =
|
| 1220 |
-
discussion_monthly_metrics =
|
| 1221 |
agent_discussions, assistants
|
| 1222 |
)
|
|
|
|
|
|
|
|
|
|
| 1223 |
save_leaderboard_data_to_hf(
|
| 1224 |
-
leaderboard_dict, issue_monthly_metrics, wanted_open, discussion_monthly_metrics
|
| 1225 |
)
|
| 1226 |
except Exception as e:
|
| 1227 |
print(f"Error saving leaderboard: {str(e)}")
|
|
|
|
| 901 |
}
|
| 902 |
|
| 903 |
|
| 904 |
+
def calculate_monthly_metrics_by_issues(all_metadata_dict, assistants):
|
| 905 |
"""Calculate monthly metrics for all assistants for visualization."""
|
| 906 |
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 907 |
|
|
|
|
| 985 |
}
|
| 986 |
|
| 987 |
|
| 988 |
+
def calculate_monthly_metrics_by_discussions(all_discussions_dict, assistants):
|
| 989 |
"""Calculate monthly metrics for discussions for all assistants for visualization."""
|
| 990 |
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 991 |
|
|
|
|
| 1050 |
}
|
| 1051 |
|
| 1052 |
|
| 1053 |
+
def calculate_monthly_metrics_by_wanted_issues(wanted_resolved_dict, wanted_in_progress_dict, assistants):
|
| 1054 |
+
"""Calculate monthly metrics for wanted issues for all assistants for visualization."""
|
| 1055 |
+
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 1056 |
+
|
| 1057 |
+
if not wanted_resolved_dict and not wanted_in_progress_dict:
|
| 1058 |
+
return {'assistants': [], 'months': [], 'data': {}}
|
| 1059 |
+
|
| 1060 |
+
agent_month_data = defaultdict(lambda: defaultdict(lambda: {'resolved': [], 'in_progress': []}))
|
| 1061 |
+
|
| 1062 |
+
# Process resolved wanted issues
|
| 1063 |
+
for agent_identifier, metadata_list in (wanted_resolved_dict or {}).items():
|
| 1064 |
+
for issue_meta in metadata_list:
|
| 1065 |
+
closed_at = issue_meta.get('closed_at')
|
| 1066 |
+
|
| 1067 |
+
if not closed_at or closed_at == 'N/A':
|
| 1068 |
+
continue
|
| 1069 |
+
|
| 1070 |
+
agent_name = identifier_to_name.get(agent_identifier, agent_identifier)
|
| 1071 |
+
|
| 1072 |
+
try:
|
| 1073 |
+
dt = datetime.fromisoformat(closed_at.replace('Z', '+00:00'))
|
| 1074 |
+
month_key = f"{dt.year}-{dt.month:02d}"
|
| 1075 |
+
agent_month_data[agent_name][month_key]['resolved'].append(issue_meta)
|
| 1076 |
+
except Exception as e:
|
| 1077 |
+
print(f"Warning: Could not parse wanted issue date '{closed_at}': {e}")
|
| 1078 |
+
continue
|
| 1079 |
+
|
| 1080 |
+
# Process in-progress wanted issues
|
| 1081 |
+
for agent_identifier, metadata_list in (wanted_in_progress_dict or {}).items():
|
| 1082 |
+
for issue_meta in metadata_list:
|
| 1083 |
+
created_at = issue_meta.get('created_at')
|
| 1084 |
+
|
| 1085 |
+
if not created_at or created_at == 'N/A':
|
| 1086 |
+
continue
|
| 1087 |
+
|
| 1088 |
+
agent_name = identifier_to_name.get(agent_identifier, agent_identifier)
|
| 1089 |
+
|
| 1090 |
+
try:
|
| 1091 |
+
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 1092 |
+
month_key = f"{dt.year}-{dt.month:02d}"
|
| 1093 |
+
agent_month_data[agent_name][month_key]['in_progress'].append(issue_meta)
|
| 1094 |
+
except Exception as e:
|
| 1095 |
+
print(f"Warning: Could not parse wanted issue date '{created_at}': {e}")
|
| 1096 |
+
continue
|
| 1097 |
+
|
| 1098 |
+
all_months = set()
|
| 1099 |
+
for agent_data in agent_month_data.values():
|
| 1100 |
+
all_months.update(agent_data.keys())
|
| 1101 |
+
months = sorted(list(all_months))
|
| 1102 |
+
|
| 1103 |
+
result_data = {}
|
| 1104 |
+
for agent_name, month_dict in agent_month_data.items():
|
| 1105 |
+
resolved_rates = []
|
| 1106 |
+
total_wanted_list = []
|
| 1107 |
+
resolved_wanted_list = []
|
| 1108 |
+
in_progress_wanted_list = []
|
| 1109 |
+
|
| 1110 |
+
for month in months:
|
| 1111 |
+
month_data = month_dict.get(month, {'resolved': [], 'in_progress': []})
|
| 1112 |
+
|
| 1113 |
+
resolved_count = len(month_data['resolved'])
|
| 1114 |
+
in_progress_count = len(month_data['in_progress'])
|
| 1115 |
+
total_count = resolved_count + in_progress_count
|
| 1116 |
+
|
| 1117 |
+
# Resolved rate = resolved / total * 100
|
| 1118 |
+
resolved_rate = (resolved_count / total_count * 100) if total_count > 0 else None
|
| 1119 |
+
|
| 1120 |
+
resolved_rates.append(resolved_rate)
|
| 1121 |
+
total_wanted_list.append(total_count)
|
| 1122 |
+
resolved_wanted_list.append(resolved_count)
|
| 1123 |
+
in_progress_wanted_list.append(in_progress_count)
|
| 1124 |
+
|
| 1125 |
+
result_data[agent_name] = {
|
| 1126 |
+
'resolved_rates': resolved_rates,
|
| 1127 |
+
'total_wanted': total_wanted_list,
|
| 1128 |
+
'resolved_wanted': resolved_wanted_list,
|
| 1129 |
+
'in_progress_wanted': in_progress_wanted_list
|
| 1130 |
+
}
|
| 1131 |
+
|
| 1132 |
+
agents_list = sorted(list(agent_month_data.keys()))
|
| 1133 |
+
|
| 1134 |
+
return {
|
| 1135 |
+
'assistants': agents_list,
|
| 1136 |
+
'months': months,
|
| 1137 |
+
'data': result_data
|
| 1138 |
+
}
|
| 1139 |
+
|
| 1140 |
+
|
| 1141 |
def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, wanted_in_progress_dict=None, discussions_dict=None):
|
| 1142 |
"""Construct leaderboard from in-memory issue metadata and discussion metadata.
|
| 1143 |
|
|
|
|
| 1194 |
return cache_dict
|
| 1195 |
|
| 1196 |
|
| 1197 |
+
def save_leaderboard_data_to_hf(leaderboard_dict, issue_monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None, wanted_monthly_metrics=None):
|
| 1198 |
"""Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
|
| 1199 |
try:
|
| 1200 |
token = get_hf_token()
|
|
|
|
| 1217 |
'leaderboard': leaderboard_dict,
|
| 1218 |
'issue_monthly_metrics': issue_monthly_metrics,
|
| 1219 |
'wanted_issues': wanted_issues,
|
| 1220 |
+
'wanted_monthly_metrics': wanted_monthly_metrics,
|
| 1221 |
'discussion_monthly_metrics': discussion_monthly_metrics
|
| 1222 |
}
|
| 1223 |
|
|
|
|
| 1305 |
leaderboard_dict = construct_leaderboard_from_metadata(
|
| 1306 |
agent_issues, assistants, wanted_resolved, wanted_in_progress, agent_discussions
|
| 1307 |
)
|
| 1308 |
+
issue_monthly_metrics = calculate_monthly_metrics_by_issues(agent_issues, assistants)
|
| 1309 |
+
discussion_monthly_metrics = calculate_monthly_metrics_by_discussions(
|
| 1310 |
agent_discussions, assistants
|
| 1311 |
)
|
| 1312 |
+
wanted_monthly_metrics = calculate_monthly_metrics_by_wanted_issues(
|
| 1313 |
+
wanted_resolved, wanted_in_progress, assistants
|
| 1314 |
+
)
|
| 1315 |
save_leaderboard_data_to_hf(
|
| 1316 |
+
leaderboard_dict, issue_monthly_metrics, wanted_open, discussion_monthly_metrics, wanted_monthly_metrics
|
| 1317 |
)
|
| 1318 |
except Exception as e:
|
| 1319 |
print(f"Error saving leaderboard: {str(e)}")
|