zhimin-z commited on
Commit ·
c0fff3a
1
Parent(s): aa557eb
add
Browse files- .gitignore +1 -0
- app.py +20 -21
- msr.py +74 -49
.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
*.claude
|
| 2 |
*.env
|
| 3 |
*.venv
|
|
|
|
| 1 |
+
*.vscode
|
| 2 |
*.claude
|
| 3 |
*.env
|
| 4 |
*.venv
|
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from gradio_leaderboard import Leaderboard, ColumnFilter
|
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
-
import subprocess
|
| 7 |
import requests
|
| 8 |
from huggingface_hub import HfApi, hf_hub_download
|
| 9 |
from huggingface_hub.errors import HfHubHTTPError
|
|
@@ -18,7 +17,7 @@ from apscheduler.triggers.cron import CronTrigger
|
|
| 18 |
from datetime import datetime, timezone
|
| 19 |
|
| 20 |
# Load environment variables
|
| 21 |
-
load_dotenv()
|
| 22 |
|
| 23 |
# =============================================================================
|
| 24 |
# CONFIGURATION
|
|
@@ -37,7 +36,9 @@ LEADERBOARD_COLUMNS = [
|
|
| 37 |
("Total Discussions", "number"),
|
| 38 |
("Issue Resolved Rate (%)", "number"),
|
| 39 |
("Discussion Resolved Rate (%)", "number"),
|
|
|
|
| 40 |
("Resolved Wanted Issues", "number"),
|
|
|
|
| 41 |
("Resolved Issues", "number"),
|
| 42 |
("Resolved Discussions", "number"),
|
| 43 |
]
|
|
@@ -495,11 +496,12 @@ def get_leaderboard_dataframe():
|
|
| 495 |
for identifier, data in cache_dict.items():
|
| 496 |
total_issues = data.get('total_issues', 0)
|
| 497 |
total_discussions = data.get('total_discussions', 0)
|
|
|
|
| 498 |
resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
|
| 499 |
-
|
| 500 |
|
| 501 |
-
# Filter out assistants with no activity (all
|
| 502 |
-
if total_issues == 0 and total_discussions == 0 and
|
| 503 |
filtered_count += 1
|
| 504 |
continue
|
| 505 |
|
|
@@ -511,7 +513,9 @@ def get_leaderboard_dataframe():
|
|
| 511 |
total_discussions, # Total Discussions
|
| 512 |
data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
|
| 513 |
data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
|
|
|
|
| 514 |
resolved_wanted_issues, # Resolved Wanted Issues
|
|
|
|
| 515 |
data.get('resolved_issues', 0), # Resolved Issues
|
| 516 |
data.get('resolved_discussions', 0), # Resolved Discussions
|
| 517 |
])
|
|
@@ -527,15 +531,16 @@ def get_leaderboard_dataframe():
|
|
| 527 |
numeric_cols = [
|
| 528 |
"Total Issues", "Total Discussions",
|
| 529 |
"Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
|
| 530 |
-
"
|
|
|
|
| 531 |
]
|
| 532 |
for col in numeric_cols:
|
| 533 |
if col in df.columns:
|
| 534 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 535 |
|
| 536 |
-
# Sort by Total Issues descending
|
| 537 |
-
if "Total Issues" in df.columns and not df.empty:
|
| 538 |
-
df = df.sort_values(by="Total Issues", ascending=False).reset_index(drop=True)
|
| 539 |
|
| 540 |
print(f"Final DataFrame shape: {df.shape}")
|
| 541 |
print("="*60 + "\n")
|
|
@@ -706,22 +711,16 @@ with gr.Blocks(title="SWE Assistant Issue & Discussion Leaderboard", theme=gr.th
|
|
| 706 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 707 |
datatype=LEADERBOARD_COLUMNS,
|
| 708 |
search_columns=["Assistant", "Website"],
|
|
|
|
|
|
|
| 709 |
filter_columns=[
|
| 710 |
ColumnFilter(
|
| 711 |
-
"Issue Resolved Rate (%)",
|
| 712 |
min=-1,
|
| 713 |
-
max=
|
| 714 |
-
default=[-1,
|
| 715 |
type="slider",
|
| 716 |
-
label="Issue Resolved Rate (%)"
|
| 717 |
-
),
|
| 718 |
-
ColumnFilter(
|
| 719 |
-
"Discussion Resolved Rate (%)",
|
| 720 |
-
min=-1,
|
| 721 |
-
max=101,
|
| 722 |
-
default=[-1, 101],
|
| 723 |
-
type="slider",
|
| 724 |
-
label="Discussion Resolved Rate (%)"
|
| 725 |
)
|
| 726 |
]
|
| 727 |
)
|
|
|
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
import time
|
|
|
|
| 6 |
import requests
|
| 7 |
from huggingface_hub import HfApi, hf_hub_download
|
| 8 |
from huggingface_hub.errors import HfHubHTTPError
|
|
|
|
| 17 |
from datetime import datetime, timezone
|
| 18 |
|
| 19 |
# Load environment variables
|
| 20 |
+
load_dotenv(override=True)
|
| 21 |
|
| 22 |
# =============================================================================
|
| 23 |
# CONFIGURATION
|
|
|
|
| 36 |
("Total Discussions", "number"),
|
| 37 |
("Issue Resolved Rate (%)", "number"),
|
| 38 |
("Discussion Resolved Rate (%)", "number"),
|
| 39 |
+
("Total Wanted Issues", "number"),
|
| 40 |
("Resolved Wanted Issues", "number"),
|
| 41 |
+
("Wanted Issue Resolved Rate (%)", "number"),
|
| 42 |
("Resolved Issues", "number"),
|
| 43 |
("Resolved Discussions", "number"),
|
| 44 |
]
|
|
|
|
| 496 |
for identifier, data in cache_dict.items():
|
| 497 |
total_issues = data.get('total_issues', 0)
|
| 498 |
total_discussions = data.get('total_discussions', 0)
|
| 499 |
+
total_wanted_issues = data.get('total_wanted_issues', 0)
|
| 500 |
resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
|
| 501 |
+
wanted_issue_resolved_rate = data.get('wanted_issue_resolved_rate', 0.0)
|
| 502 |
|
| 503 |
+
# Filter out assistants with no activity (all four metrics are zero)
|
| 504 |
+
if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
|
| 505 |
filtered_count += 1
|
| 506 |
continue
|
| 507 |
|
|
|
|
| 513 |
total_discussions, # Total Discussions
|
| 514 |
data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
|
| 515 |
data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
|
| 516 |
+
total_wanted_issues, # Total Wanted Issues
|
| 517 |
resolved_wanted_issues, # Resolved Wanted Issues
|
| 518 |
+
wanted_issue_resolved_rate, # Wanted Issue Resolved Rate (%)
|
| 519 |
data.get('resolved_issues', 0), # Resolved Issues
|
| 520 |
data.get('resolved_discussions', 0), # Resolved Discussions
|
| 521 |
])
|
|
|
|
| 531 |
numeric_cols = [
|
| 532 |
"Total Issues", "Total Discussions",
|
| 533 |
"Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
|
| 534 |
+
"Total Wanted Issues", "Resolved Wanted Issues", "Wanted Issue Resolved Rate (%)",
|
| 535 |
+
"Resolved Issues", "Resolved Discussions"
|
| 536 |
]
|
| 537 |
for col in numeric_cols:
|
| 538 |
if col in df.columns:
|
| 539 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 540 |
|
| 541 |
+
# Sort by Total Wanted Issues descending (primary metric for this leaderboard)
|
| 542 |
+
if "Total Wanted Issues" in df.columns and not df.empty:
|
| 543 |
+
df = df.sort_values(by="Total Wanted Issues", ascending=False).reset_index(drop=True)
|
| 544 |
|
| 545 |
print(f"Final DataFrame shape: {df.shape}")
|
| 546 |
print("="*60 + "\n")
|
|
|
|
| 711 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 712 |
datatype=LEADERBOARD_COLUMNS,
|
| 713 |
search_columns=["Assistant", "Website"],
|
| 714 |
+
# Note: Slider filters with min=0 have boundary issues in gradio_leaderboard
|
| 715 |
+
# Using min=-1 as workaround to include records with 0% rates
|
| 716 |
filter_columns=[
|
| 717 |
ColumnFilter(
|
| 718 |
+
"Wanted Issue Resolved Rate (%)",
|
| 719 |
min=-1,
|
| 720 |
+
max=100,
|
| 721 |
+
default=[-1, 100],
|
| 722 |
type="slider",
|
| 723 |
+
label="Wanted Issue Resolved Rate (%)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
)
|
| 725 |
]
|
| 726 |
)
|
msr.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
-
import time
|
| 4 |
from datetime import datetime, timezone, timedelta
|
| 5 |
from collections import defaultdict
|
| 6 |
from huggingface_hub import HfApi, hf_hub_download
|
|
@@ -14,11 +13,10 @@ from apscheduler.schedulers.blocking import BlockingScheduler
|
|
| 14 |
from apscheduler.triggers.cron import CronTrigger
|
| 15 |
import logging
|
| 16 |
import traceback
|
| 17 |
-
import subprocess
|
| 18 |
import re
|
| 19 |
|
| 20 |
# Load environment variables
|
| 21 |
-
load_dotenv()
|
| 22 |
|
| 23 |
# =============================================================================
|
| 24 |
# CONFIGURATION
|
|
@@ -753,7 +751,8 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
|
|
| 753 |
print(f"\n Post-processing {len(all_issues)} wanted issues...")
|
| 754 |
|
| 755 |
wanted_open = []
|
| 756 |
-
wanted_resolved = defaultdict(list)
|
|
|
|
| 757 |
current_time = datetime.now(timezone.utc)
|
| 758 |
|
| 759 |
for issue_url, issue_meta in all_issues.items():
|
|
@@ -762,74 +761,90 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
|
|
| 762 |
if not linked_prs:
|
| 763 |
continue
|
| 764 |
|
| 765 |
-
#
|
| 766 |
-
|
|
|
|
|
|
|
| 767 |
for pr_url in linked_prs:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
merged_at = pr_merged_at.get(pr_url)
|
| 769 |
if merged_at: # PR was merged
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 774 |
|
| 775 |
-
|
|
|
|
|
|
|
| 776 |
continue
|
| 777 |
|
| 778 |
# Process based on issue state
|
| 779 |
if issue_meta['state'] == 'open':
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
has_patch_label = False
|
| 783 |
-
for issue_label in issue_labels:
|
| 784 |
-
for wanted_label in PATCH_WANTED_LABELS:
|
| 785 |
-
if wanted_label.lower() in issue_label:
|
| 786 |
-
has_patch_label = True
|
| 787 |
-
break
|
| 788 |
-
if has_patch_label:
|
| 789 |
-
break
|
| 790 |
-
|
| 791 |
-
if not has_patch_label:
|
| 792 |
continue
|
| 793 |
|
| 794 |
-
#
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
if days_open >= LONGSTANDING_GAP_DAYS:
|
| 801 |
-
wanted_open.append(issue_meta)
|
| 802 |
-
except:
|
| 803 |
-
pass
|
| 804 |
|
| 805 |
elif issue_meta['state'] == 'closed':
|
| 806 |
-
# For closed issues: must be closed within time frame AND open 30+ days
|
| 807 |
closed_at_str = issue_meta.get('closed_at')
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
if closed_at_str and closed_at_str != 'N/A' and created_at_str and created_at_str != 'N/A':
|
| 811 |
-
try:
|
| 812 |
-
closed_dt = datetime.fromisoformat(closed_at_str.replace('Z', '+00:00'))
|
| 813 |
-
created_dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
|
| 814 |
|
| 815 |
-
|
| 816 |
-
|
|
|
|
| 817 |
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
|
|
|
|
|
|
| 823 |
|
| 824 |
print(f" ✓ Found {sum(len(issues) for issues in agent_issues.values())} assistant-assigned issues across {len(agent_issues)} assistants")
|
| 825 |
print(f" ✓ Found {len(wanted_open)} long-standing open wanted issues")
|
| 826 |
print(f" ✓ Found {sum(len(issues) for issues in wanted_resolved.values())} resolved wanted issues across {len(wanted_resolved)} assistants")
|
|
|
|
| 827 |
print(f" ✓ Found {sum(len(discussions) for discussions in discussions_by_agent.values())} discussions across {len(discussions_by_agent)} assistants")
|
| 828 |
|
| 829 |
return {
|
| 830 |
'agent_issues': dict(agent_issues),
|
| 831 |
'wanted_open': wanted_open,
|
| 832 |
'wanted_resolved': dict(wanted_resolved),
|
|
|
|
| 833 |
'agent_discussions': dict(discussions_by_agent)
|
| 834 |
}
|
| 835 |
|
|
@@ -1049,13 +1064,14 @@ def calculate_monthly_metrics_by_agent_discussions(all_discussions_dict, assista
|
|
| 1049 |
}
|
| 1050 |
|
| 1051 |
|
| 1052 |
-
def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, discussions_dict=None):
|
| 1053 |
"""Construct leaderboard from in-memory issue metadata and discussion metadata.
|
| 1054 |
|
| 1055 |
Args:
|
| 1056 |
all_metadata_dict: Dictionary mapping assistant ID to list of issue metadata (assistant-assigned issues)
|
| 1057 |
assistants: List of assistant metadata
|
| 1058 |
wanted_resolved_dict: Optional dictionary mapping assistant ID to list of resolved wanted issues
|
|
|
|
| 1059 |
discussions_dict: Optional dictionary mapping assistant ID to list of discussion metadata
|
| 1060 |
"""
|
| 1061 |
if not assistants:
|
|
@@ -1065,6 +1081,9 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
|
|
| 1065 |
if wanted_resolved_dict is None:
|
| 1066 |
wanted_resolved_dict = {}
|
| 1067 |
|
|
|
|
|
|
|
|
|
|
| 1068 |
if discussions_dict is None:
|
| 1069 |
discussions_dict = {}
|
| 1070 |
|
|
@@ -1077,8 +1096,11 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
|
|
| 1077 |
bot_data = all_metadata_dict.get(identifier, [])
|
| 1078 |
stats = calculate_issue_stats_from_metadata(bot_data)
|
| 1079 |
|
| 1080 |
-
# Add wanted issues
|
| 1081 |
resolved_wanted = len(wanted_resolved_dict.get(identifier, []))
|
|
|
|
|
|
|
|
|
|
| 1082 |
|
| 1083 |
# Add discussion stats
|
| 1084 |
discussion_metadata = discussions_dict.get(identifier, [])
|
|
@@ -1089,7 +1111,9 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
|
|
| 1089 |
'website': assistant.get('website', 'N/A'),
|
| 1090 |
'github_identifier': identifier,
|
| 1091 |
**stats,
|
|
|
|
| 1092 |
'resolved_wanted_issues': resolved_wanted,
|
|
|
|
| 1093 |
**discussion_stats
|
| 1094 |
}
|
| 1095 |
|
|
@@ -1191,6 +1215,7 @@ def mine_all_agents():
|
|
| 1191 |
agent_issues = results['agent_issues']
|
| 1192 |
wanted_open = results['wanted_open']
|
| 1193 |
wanted_resolved = results['wanted_resolved']
|
|
|
|
| 1194 |
agent_discussions = results['agent_discussions']
|
| 1195 |
except Exception as e:
|
| 1196 |
print(f"Error during DuckDB fetch: {str(e)}")
|
|
@@ -1203,7 +1228,7 @@ def mine_all_agents():
|
|
| 1203 |
|
| 1204 |
try:
|
| 1205 |
leaderboard_dict = construct_leaderboard_from_metadata(
|
| 1206 |
-
agent_issues, assistants, wanted_resolved, agent_discussions
|
| 1207 |
)
|
| 1208 |
issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
|
| 1209 |
discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
|
|
|
| 3 |
from datetime import datetime, timezone, timedelta
|
| 4 |
from collections import defaultdict
|
| 5 |
from huggingface_hub import HfApi, hf_hub_download
|
|
|
|
| 13 |
from apscheduler.triggers.cron import CronTrigger
|
| 14 |
import logging
|
| 15 |
import traceback
|
|
|
|
| 16 |
import re
|
| 17 |
|
| 18 |
# Load environment variables
|
| 19 |
+
load_dotenv(override=True)
|
| 20 |
|
| 21 |
# =============================================================================
|
| 22 |
# CONFIGURATION
|
|
|
|
| 751 |
print(f"\n Post-processing {len(all_issues)} wanted issues...")
|
| 752 |
|
| 753 |
wanted_open = []
|
| 754 |
+
wanted_resolved = defaultdict(list) # agent_id -> [resolved wanted issues]
|
| 755 |
+
wanted_in_progress = defaultdict(list) # agent_id -> [in-progress wanted issues (PR not merged)]
|
| 756 |
current_time = datetime.now(timezone.utc)
|
| 757 |
|
| 758 |
for issue_url, issue_meta in all_issues.items():
|
|
|
|
| 761 |
if not linked_prs:
|
| 762 |
continue
|
| 763 |
|
| 764 |
+
# Find all agents who have PRs for this issue and their merge status
|
| 765 |
+
agents_with_merged_pr = set()
|
| 766 |
+
agents_with_unmerged_pr = set()
|
| 767 |
+
|
| 768 |
for pr_url in linked_prs:
|
| 769 |
+
pr_creator = pr_creators.get(pr_url)
|
| 770 |
+
if pr_creator not in identifier_set:
|
| 771 |
+
continue
|
| 772 |
+
|
| 773 |
merged_at = pr_merged_at.get(pr_url)
|
| 774 |
if merged_at: # PR was merged
|
| 775 |
+
agents_with_merged_pr.add(pr_creator)
|
| 776 |
+
else: # PR not merged (in progress or rejected)
|
| 777 |
+
agents_with_unmerged_pr.add(pr_creator)
|
| 778 |
+
|
| 779 |
+
# Skip if no agent has a PR for this issue
|
| 780 |
+
if not agents_with_merged_pr and not agents_with_unmerged_pr:
|
| 781 |
+
continue
|
| 782 |
+
|
| 783 |
+
# Check if issue qualifies as a "wanted" issue (has patch label)
|
| 784 |
+
issue_labels = issue_meta.get('labels', [])
|
| 785 |
+
has_patch_label = False
|
| 786 |
+
for issue_label in issue_labels:
|
| 787 |
+
for wanted_label in PATCH_WANTED_LABELS:
|
| 788 |
+
if wanted_label.lower() in issue_label:
|
| 789 |
+
has_patch_label = True
|
| 790 |
break
|
| 791 |
+
if has_patch_label:
|
| 792 |
+
break
|
| 793 |
+
|
| 794 |
+
if not has_patch_label:
|
| 795 |
+
continue
|
| 796 |
+
|
| 797 |
+
# Check if long-standing (open 30+ days)
|
| 798 |
+
created_at_str = issue_meta.get('created_at')
|
| 799 |
+
if not created_at_str or created_at_str == 'N/A':
|
| 800 |
+
continue
|
| 801 |
|
| 802 |
+
try:
|
| 803 |
+
created_dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
|
| 804 |
+
except:
|
| 805 |
continue
|
| 806 |
|
| 807 |
# Process based on issue state
|
| 808 |
if issue_meta['state'] == 'open':
|
| 809 |
+
days_open = (current_time - created_dt).days
|
| 810 |
+
if days_open < LONGSTANDING_GAP_DAYS:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
continue
|
| 812 |
|
| 813 |
+
# Add to global wanted_open list
|
| 814 |
+
wanted_open.append(issue_meta)
|
| 815 |
+
|
| 816 |
+
# Track in-progress for agents with unmerged PRs (excluding those who already resolved it)
|
| 817 |
+
for agent_id in agents_with_unmerged_pr - agents_with_merged_pr:
|
| 818 |
+
wanted_in_progress[agent_id].append(issue_meta)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 819 |
|
| 820 |
elif issue_meta['state'] == 'closed':
|
|
|
|
| 821 |
closed_at_str = issue_meta.get('closed_at')
|
| 822 |
+
if not closed_at_str or closed_at_str == 'N/A':
|
| 823 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
| 824 |
|
| 825 |
+
try:
|
| 826 |
+
closed_dt = datetime.fromisoformat(closed_at_str.replace('Z', '+00:00'))
|
| 827 |
+
days_open = (closed_dt - created_dt).days
|
| 828 |
|
| 829 |
+
# Only include if closed within timeframe AND was open 30+ days
|
| 830 |
+
if start_date <= closed_dt <= end_date and days_open >= LONGSTANDING_GAP_DAYS:
|
| 831 |
+
# Track resolved for agents with merged PRs
|
| 832 |
+
for agent_id in agents_with_merged_pr:
|
| 833 |
+
wanted_resolved[agent_id].append(issue_meta)
|
| 834 |
+
except:
|
| 835 |
+
pass
|
| 836 |
|
| 837 |
print(f" ✓ Found {sum(len(issues) for issues in agent_issues.values())} assistant-assigned issues across {len(agent_issues)} assistants")
|
| 838 |
print(f" ✓ Found {len(wanted_open)} long-standing open wanted issues")
|
| 839 |
print(f" ✓ Found {sum(len(issues) for issues in wanted_resolved.values())} resolved wanted issues across {len(wanted_resolved)} assistants")
|
| 840 |
+
print(f" ✓ Found {sum(len(issues) for issues in wanted_in_progress.values())} in-progress wanted issues across {len(wanted_in_progress)} assistants")
|
| 841 |
print(f" ✓ Found {sum(len(discussions) for discussions in discussions_by_agent.values())} discussions across {len(discussions_by_agent)} assistants")
|
| 842 |
|
| 843 |
return {
|
| 844 |
'agent_issues': dict(agent_issues),
|
| 845 |
'wanted_open': wanted_open,
|
| 846 |
'wanted_resolved': dict(wanted_resolved),
|
| 847 |
+
'wanted_in_progress': dict(wanted_in_progress),
|
| 848 |
'agent_discussions': dict(discussions_by_agent)
|
| 849 |
}
|
| 850 |
|
|
|
|
| 1064 |
}
|
| 1065 |
|
| 1066 |
|
| 1067 |
+
def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, wanted_in_progress_dict=None, discussions_dict=None):
|
| 1068 |
"""Construct leaderboard from in-memory issue metadata and discussion metadata.
|
| 1069 |
|
| 1070 |
Args:
|
| 1071 |
all_metadata_dict: Dictionary mapping assistant ID to list of issue metadata (assistant-assigned issues)
|
| 1072 |
assistants: List of assistant metadata
|
| 1073 |
wanted_resolved_dict: Optional dictionary mapping assistant ID to list of resolved wanted issues
|
| 1074 |
+
wanted_in_progress_dict: Optional dictionary mapping assistant ID to list of in-progress wanted issues
|
| 1075 |
discussions_dict: Optional dictionary mapping assistant ID to list of discussion metadata
|
| 1076 |
"""
|
| 1077 |
if not assistants:
|
|
|
|
| 1081 |
if wanted_resolved_dict is None:
|
| 1082 |
wanted_resolved_dict = {}
|
| 1083 |
|
| 1084 |
+
if wanted_in_progress_dict is None:
|
| 1085 |
+
wanted_in_progress_dict = {}
|
| 1086 |
+
|
| 1087 |
if discussions_dict is None:
|
| 1088 |
discussions_dict = {}
|
| 1089 |
|
|
|
|
| 1096 |
bot_data = all_metadata_dict.get(identifier, [])
|
| 1097 |
stats = calculate_issue_stats_from_metadata(bot_data)
|
| 1098 |
|
| 1099 |
+
# Add wanted issues stats
|
| 1100 |
resolved_wanted = len(wanted_resolved_dict.get(identifier, []))
|
| 1101 |
+
in_progress_wanted = len(wanted_in_progress_dict.get(identifier, []))
|
| 1102 |
+
total_wanted = resolved_wanted + in_progress_wanted
|
| 1103 |
+
wanted_resolved_rate = (resolved_wanted / total_wanted * 100) if total_wanted > 0 else 0.0
|
| 1104 |
|
| 1105 |
# Add discussion stats
|
| 1106 |
discussion_metadata = discussions_dict.get(identifier, [])
|
|
|
|
| 1111 |
'website': assistant.get('website', 'N/A'),
|
| 1112 |
'github_identifier': identifier,
|
| 1113 |
**stats,
|
| 1114 |
+
'total_wanted_issues': total_wanted,
|
| 1115 |
'resolved_wanted_issues': resolved_wanted,
|
| 1116 |
+
'wanted_issue_resolved_rate': round(wanted_resolved_rate, 2),
|
| 1117 |
**discussion_stats
|
| 1118 |
}
|
| 1119 |
|
|
|
|
| 1215 |
agent_issues = results['agent_issues']
|
| 1216 |
wanted_open = results['wanted_open']
|
| 1217 |
wanted_resolved = results['wanted_resolved']
|
| 1218 |
+
wanted_in_progress = results['wanted_in_progress']
|
| 1219 |
agent_discussions = results['agent_discussions']
|
| 1220 |
except Exception as e:
|
| 1221 |
print(f"Error during DuckDB fetch: {str(e)}")
|
|
|
|
| 1228 |
|
| 1229 |
try:
|
| 1230 |
leaderboard_dict = construct_leaderboard_from_metadata(
|
| 1231 |
+
agent_issues, assistants, wanted_resolved, wanted_in_progress, agent_discussions
|
| 1232 |
)
|
| 1233 |
issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
|
| 1234 |
discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
|