zhimin-z commited on
Commit
c0fff3a
·
1 Parent(s): aa557eb
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +20 -21
  3. msr.py +74 -49
.gitignore CHANGED
@@ -1,3 +1,4 @@
 
1
  *.claude
2
  *.env
3
  *.venv
 
1
+ *.vscode
2
  *.claude
3
  *.env
4
  *.venv
app.py CHANGED
@@ -3,7 +3,6 @@ from gradio_leaderboard import Leaderboard, ColumnFilter
3
  import json
4
  import os
5
  import time
6
- import subprocess
7
  import requests
8
  from huggingface_hub import HfApi, hf_hub_download
9
  from huggingface_hub.errors import HfHubHTTPError
@@ -18,7 +17,7 @@ from apscheduler.triggers.cron import CronTrigger
18
  from datetime import datetime, timezone
19
 
20
  # Load environment variables
21
- load_dotenv()
22
 
23
  # =============================================================================
24
  # CONFIGURATION
@@ -37,7 +36,9 @@ LEADERBOARD_COLUMNS = [
37
  ("Total Discussions", "number"),
38
  ("Issue Resolved Rate (%)", "number"),
39
  ("Discussion Resolved Rate (%)", "number"),
 
40
  ("Resolved Wanted Issues", "number"),
 
41
  ("Resolved Issues", "number"),
42
  ("Resolved Discussions", "number"),
43
  ]
@@ -495,11 +496,12 @@ def get_leaderboard_dataframe():
495
  for identifier, data in cache_dict.items():
496
  total_issues = data.get('total_issues', 0)
497
  total_discussions = data.get('total_discussions', 0)
 
498
  resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
499
- print(f" Assistant '{identifier}': {total_issues} issues, {total_discussions} discussions, {resolved_wanted_issues} wanted issues resolved")
500
 
501
- # Filter out assistants with no activity (all three metrics are zero)
502
- if total_issues == 0 and total_discussions == 0 and resolved_wanted_issues == 0:
503
  filtered_count += 1
504
  continue
505
 
@@ -511,7 +513,9 @@ def get_leaderboard_dataframe():
511
  total_discussions, # Total Discussions
512
  data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
513
  data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
 
514
  resolved_wanted_issues, # Resolved Wanted Issues
 
515
  data.get('resolved_issues', 0), # Resolved Issues
516
  data.get('resolved_discussions', 0), # Resolved Discussions
517
  ])
@@ -527,15 +531,16 @@ def get_leaderboard_dataframe():
527
  numeric_cols = [
528
  "Total Issues", "Total Discussions",
529
  "Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
530
- "Resolved Issues", "Resolved Discussions", "Resolved Wanted Issues"
 
531
  ]
532
  for col in numeric_cols:
533
  if col in df.columns:
534
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
535
 
536
- # Sort by Total Issues descending
537
- if "Total Issues" in df.columns and not df.empty:
538
- df = df.sort_values(by="Total Issues", ascending=False).reset_index(drop=True)
539
 
540
  print(f"Final DataFrame shape: {df.shape}")
541
  print("="*60 + "\n")
@@ -706,22 +711,16 @@ with gr.Blocks(title="SWE Assistant Issue & Discussion Leaderboard", theme=gr.th
706
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
707
  datatype=LEADERBOARD_COLUMNS,
708
  search_columns=["Assistant", "Website"],
 
 
709
  filter_columns=[
710
  ColumnFilter(
711
- "Issue Resolved Rate (%)",
712
  min=-1,
713
- max=101,
714
- default=[-1, 101],
715
  type="slider",
716
- label="Issue Resolved Rate (%)"
717
- ),
718
- ColumnFilter(
719
- "Discussion Resolved Rate (%)",
720
- min=-1,
721
- max=101,
722
- default=[-1, 101],
723
- type="slider",
724
- label="Discussion Resolved Rate (%)"
725
  )
726
  ]
727
  )
 
3
  import json
4
  import os
5
  import time
 
6
  import requests
7
  from huggingface_hub import HfApi, hf_hub_download
8
  from huggingface_hub.errors import HfHubHTTPError
 
17
  from datetime import datetime, timezone
18
 
19
  # Load environment variables
20
+ load_dotenv(override=True)
21
 
22
  # =============================================================================
23
  # CONFIGURATION
 
36
  ("Total Discussions", "number"),
37
  ("Issue Resolved Rate (%)", "number"),
38
  ("Discussion Resolved Rate (%)", "number"),
39
+ ("Total Wanted Issues", "number"),
40
  ("Resolved Wanted Issues", "number"),
41
+ ("Wanted Issue Resolved Rate (%)", "number"),
42
  ("Resolved Issues", "number"),
43
  ("Resolved Discussions", "number"),
44
  ]
 
496
  for identifier, data in cache_dict.items():
497
  total_issues = data.get('total_issues', 0)
498
  total_discussions = data.get('total_discussions', 0)
499
+ total_wanted_issues = data.get('total_wanted_issues', 0)
500
  resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
501
+ wanted_issue_resolved_rate = data.get('wanted_issue_resolved_rate', 0.0)
502
 
503
+ # Filter out assistants with no activity (all four metrics are zero)
504
+ if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
505
  filtered_count += 1
506
  continue
507
 
 
513
  total_discussions, # Total Discussions
514
  data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
515
  data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
516
+ total_wanted_issues, # Total Wanted Issues
517
  resolved_wanted_issues, # Resolved Wanted Issues
518
+ wanted_issue_resolved_rate, # Wanted Issue Resolved Rate (%)
519
  data.get('resolved_issues', 0), # Resolved Issues
520
  data.get('resolved_discussions', 0), # Resolved Discussions
521
  ])
 
531
  numeric_cols = [
532
  "Total Issues", "Total Discussions",
533
  "Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
534
+ "Total Wanted Issues", "Resolved Wanted Issues", "Wanted Issue Resolved Rate (%)",
535
+ "Resolved Issues", "Resolved Discussions"
536
  ]
537
  for col in numeric_cols:
538
  if col in df.columns:
539
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
540
 
541
+ # Sort by Total Wanted Issues descending (primary metric for this leaderboard)
542
+ if "Total Wanted Issues" in df.columns and not df.empty:
543
+ df = df.sort_values(by="Total Wanted Issues", ascending=False).reset_index(drop=True)
544
 
545
  print(f"Final DataFrame shape: {df.shape}")
546
  print("="*60 + "\n")
 
711
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
712
  datatype=LEADERBOARD_COLUMNS,
713
  search_columns=["Assistant", "Website"],
714
+ # Note: Slider filters with min=0 have boundary issues in gradio_leaderboard
715
+ # Using min=-1 as workaround to include records with 0% rates
716
  filter_columns=[
717
  ColumnFilter(
718
+ "Wanted Issue Resolved Rate (%)",
719
  min=-1,
720
+ max=100,
721
+ default=[-1, 100],
722
  type="slider",
723
+ label="Wanted Issue Resolved Rate (%)"
 
 
 
 
 
 
 
 
724
  )
725
  ]
726
  )
msr.py CHANGED
@@ -1,6 +1,5 @@
1
  import json
2
  import os
3
- import time
4
  from datetime import datetime, timezone, timedelta
5
  from collections import defaultdict
6
  from huggingface_hub import HfApi, hf_hub_download
@@ -14,11 +13,10 @@ from apscheduler.schedulers.blocking import BlockingScheduler
14
  from apscheduler.triggers.cron import CronTrigger
15
  import logging
16
  import traceback
17
- import subprocess
18
  import re
19
 
20
  # Load environment variables
21
- load_dotenv()
22
 
23
  # =============================================================================
24
  # CONFIGURATION
@@ -753,7 +751,8 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
753
  print(f"\n Post-processing {len(all_issues)} wanted issues...")
754
 
755
  wanted_open = []
756
- wanted_resolved = defaultdict(list)
 
757
  current_time = datetime.now(timezone.utc)
758
 
759
  for issue_url, issue_meta in all_issues.items():
@@ -762,74 +761,90 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
762
  if not linked_prs:
763
  continue
764
 
765
- # Check if any linked PR was merged AND created by an assistant
766
- resolved_by = None
 
 
767
  for pr_url in linked_prs:
 
 
 
 
768
  merged_at = pr_merged_at.get(pr_url)
769
  if merged_at: # PR was merged
770
- pr_creator = pr_creators.get(pr_url)
771
- if pr_creator in identifier_set:
772
- resolved_by = pr_creator
 
 
 
 
 
 
 
 
 
 
 
 
773
  break
 
 
 
 
 
 
 
 
 
 
774
 
775
- if not resolved_by:
 
 
776
  continue
777
 
778
  # Process based on issue state
779
  if issue_meta['state'] == 'open':
780
- # For open issues: check if labels match PATCH_WANTED_LABELS
781
- issue_labels = issue_meta.get('labels', [])
782
- has_patch_label = False
783
- for issue_label in issue_labels:
784
- for wanted_label in PATCH_WANTED_LABELS:
785
- if wanted_label.lower() in issue_label:
786
- has_patch_label = True
787
- break
788
- if has_patch_label:
789
- break
790
-
791
- if not has_patch_label:
792
  continue
793
 
794
- # Check if long-standing
795
- created_at_str = issue_meta.get('created_at')
796
- if created_at_str and created_at_str != 'N/A':
797
- try:
798
- created_dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
799
- days_open = (current_time - created_dt).days
800
- if days_open >= LONGSTANDING_GAP_DAYS:
801
- wanted_open.append(issue_meta)
802
- except:
803
- pass
804
 
805
  elif issue_meta['state'] == 'closed':
806
- # For closed issues: must be closed within time frame AND open 30+ days
807
  closed_at_str = issue_meta.get('closed_at')
808
- created_at_str = issue_meta.get('created_at')
809
-
810
- if closed_at_str and closed_at_str != 'N/A' and created_at_str and created_at_str != 'N/A':
811
- try:
812
- closed_dt = datetime.fromisoformat(closed_at_str.replace('Z', '+00:00'))
813
- created_dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
814
 
815
- # Calculate how long the issue was open
816
- days_open = (closed_dt - created_dt).days
 
817
 
818
- # Only include if closed within timeframe AND was open 30+ days
819
- if start_date <= closed_dt <= end_date and days_open >= LONGSTANDING_GAP_DAYS:
820
- wanted_resolved[resolved_by].append(issue_meta)
821
- except:
822
- pass
 
 
823
 
824
  print(f" ✓ Found {sum(len(issues) for issues in agent_issues.values())} assistant-assigned issues across {len(agent_issues)} assistants")
825
  print(f" ✓ Found {len(wanted_open)} long-standing open wanted issues")
826
  print(f" ✓ Found {sum(len(issues) for issues in wanted_resolved.values())} resolved wanted issues across {len(wanted_resolved)} assistants")
 
827
  print(f" ✓ Found {sum(len(discussions) for discussions in discussions_by_agent.values())} discussions across {len(discussions_by_agent)} assistants")
828
 
829
  return {
830
  'agent_issues': dict(agent_issues),
831
  'wanted_open': wanted_open,
832
  'wanted_resolved': dict(wanted_resolved),
 
833
  'agent_discussions': dict(discussions_by_agent)
834
  }
835
 
@@ -1049,13 +1064,14 @@ def calculate_monthly_metrics_by_agent_discussions(all_discussions_dict, assista
1049
  }
1050
 
1051
 
1052
- def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, discussions_dict=None):
1053
  """Construct leaderboard from in-memory issue metadata and discussion metadata.
1054
 
1055
  Args:
1056
  all_metadata_dict: Dictionary mapping assistant ID to list of issue metadata (assistant-assigned issues)
1057
  assistants: List of assistant metadata
1058
  wanted_resolved_dict: Optional dictionary mapping assistant ID to list of resolved wanted issues
 
1059
  discussions_dict: Optional dictionary mapping assistant ID to list of discussion metadata
1060
  """
1061
  if not assistants:
@@ -1065,6 +1081,9 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
1065
  if wanted_resolved_dict is None:
1066
  wanted_resolved_dict = {}
1067
 
 
 
 
1068
  if discussions_dict is None:
1069
  discussions_dict = {}
1070
 
@@ -1077,8 +1096,11 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
1077
  bot_data = all_metadata_dict.get(identifier, [])
1078
  stats = calculate_issue_stats_from_metadata(bot_data)
1079
 
1080
- # Add wanted issues count
1081
  resolved_wanted = len(wanted_resolved_dict.get(identifier, []))
 
 
 
1082
 
1083
  # Add discussion stats
1084
  discussion_metadata = discussions_dict.get(identifier, [])
@@ -1089,7 +1111,9 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
1089
  'website': assistant.get('website', 'N/A'),
1090
  'github_identifier': identifier,
1091
  **stats,
 
1092
  'resolved_wanted_issues': resolved_wanted,
 
1093
  **discussion_stats
1094
  }
1095
 
@@ -1191,6 +1215,7 @@ def mine_all_agents():
1191
  agent_issues = results['agent_issues']
1192
  wanted_open = results['wanted_open']
1193
  wanted_resolved = results['wanted_resolved']
 
1194
  agent_discussions = results['agent_discussions']
1195
  except Exception as e:
1196
  print(f"Error during DuckDB fetch: {str(e)}")
@@ -1203,7 +1228,7 @@ def mine_all_agents():
1203
 
1204
  try:
1205
  leaderboard_dict = construct_leaderboard_from_metadata(
1206
- agent_issues, assistants, wanted_resolved, agent_discussions
1207
  )
1208
  issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
1209
  discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
 
1
  import json
2
  import os
 
3
  from datetime import datetime, timezone, timedelta
4
  from collections import defaultdict
5
  from huggingface_hub import HfApi, hf_hub_download
 
13
  from apscheduler.triggers.cron import CronTrigger
14
  import logging
15
  import traceback
 
16
  import re
17
 
18
  # Load environment variables
19
+ load_dotenv(override=True)
20
 
21
  # =============================================================================
22
  # CONFIGURATION
 
751
  print(f"\n Post-processing {len(all_issues)} wanted issues...")
752
 
753
  wanted_open = []
754
+ wanted_resolved = defaultdict(list) # agent_id -> [resolved wanted issues]
755
+ wanted_in_progress = defaultdict(list) # agent_id -> [in-progress wanted issues (PR not merged)]
756
  current_time = datetime.now(timezone.utc)
757
 
758
  for issue_url, issue_meta in all_issues.items():
 
761
  if not linked_prs:
762
  continue
763
 
764
+ # Find all agents who have PRs for this issue and their merge status
765
+ agents_with_merged_pr = set()
766
+ agents_with_unmerged_pr = set()
767
+
768
  for pr_url in linked_prs:
769
+ pr_creator = pr_creators.get(pr_url)
770
+ if pr_creator not in identifier_set:
771
+ continue
772
+
773
  merged_at = pr_merged_at.get(pr_url)
774
  if merged_at: # PR was merged
775
+ agents_with_merged_pr.add(pr_creator)
776
+ else: # PR not merged (in progress or rejected)
777
+ agents_with_unmerged_pr.add(pr_creator)
778
+
779
+ # Skip if no agent has a PR for this issue
780
+ if not agents_with_merged_pr and not agents_with_unmerged_pr:
781
+ continue
782
+
783
+ # Check if issue qualifies as a "wanted" issue (has patch label)
784
+ issue_labels = issue_meta.get('labels', [])
785
+ has_patch_label = False
786
+ for issue_label in issue_labels:
787
+ for wanted_label in PATCH_WANTED_LABELS:
788
+ if wanted_label.lower() in issue_label:
789
+ has_patch_label = True
790
  break
791
+ if has_patch_label:
792
+ break
793
+
794
+ if not has_patch_label:
795
+ continue
796
+
797
+ # Check if long-standing (open 30+ days)
798
+ created_at_str = issue_meta.get('created_at')
799
+ if not created_at_str or created_at_str == 'N/A':
800
+ continue
801
 
802
+ try:
803
+ created_dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
804
+ except:
805
  continue
806
 
807
  # Process based on issue state
808
  if issue_meta['state'] == 'open':
809
+ days_open = (current_time - created_dt).days
810
+ if days_open < LONGSTANDING_GAP_DAYS:
 
 
 
 
 
 
 
 
 
 
811
  continue
812
 
813
+ # Add to global wanted_open list
814
+ wanted_open.append(issue_meta)
815
+
816
+ # Track in-progress for agents with unmerged PRs (excluding those who already resolved it)
817
+ for agent_id in agents_with_unmerged_pr - agents_with_merged_pr:
818
+ wanted_in_progress[agent_id].append(issue_meta)
 
 
 
 
819
 
820
  elif issue_meta['state'] == 'closed':
 
821
  closed_at_str = issue_meta.get('closed_at')
822
+ if not closed_at_str or closed_at_str == 'N/A':
823
+ continue
 
 
 
 
824
 
825
+ try:
826
+ closed_dt = datetime.fromisoformat(closed_at_str.replace('Z', '+00:00'))
827
+ days_open = (closed_dt - created_dt).days
828
 
829
+ # Only include if closed within timeframe AND was open 30+ days
830
+ if start_date <= closed_dt <= end_date and days_open >= LONGSTANDING_GAP_DAYS:
831
+ # Track resolved for agents with merged PRs
832
+ for agent_id in agents_with_merged_pr:
833
+ wanted_resolved[agent_id].append(issue_meta)
834
+ except:
835
+ pass
836
 
837
  print(f" ✓ Found {sum(len(issues) for issues in agent_issues.values())} assistant-assigned issues across {len(agent_issues)} assistants")
838
  print(f" ✓ Found {len(wanted_open)} long-standing open wanted issues")
839
  print(f" ✓ Found {sum(len(issues) for issues in wanted_resolved.values())} resolved wanted issues across {len(wanted_resolved)} assistants")
840
+ print(f" ✓ Found {sum(len(issues) for issues in wanted_in_progress.values())} in-progress wanted issues across {len(wanted_in_progress)} assistants")
841
  print(f" ✓ Found {sum(len(discussions) for discussions in discussions_by_agent.values())} discussions across {len(discussions_by_agent)} assistants")
842
 
843
  return {
844
  'agent_issues': dict(agent_issues),
845
  'wanted_open': wanted_open,
846
  'wanted_resolved': dict(wanted_resolved),
847
+ 'wanted_in_progress': dict(wanted_in_progress),
848
  'agent_discussions': dict(discussions_by_agent)
849
  }
850
 
 
1064
  }
1065
 
1066
 
1067
+ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_resolved_dict=None, wanted_in_progress_dict=None, discussions_dict=None):
1068
  """Construct leaderboard from in-memory issue metadata and discussion metadata.
1069
 
1070
  Args:
1071
  all_metadata_dict: Dictionary mapping assistant ID to list of issue metadata (assistant-assigned issues)
1072
  assistants: List of assistant metadata
1073
  wanted_resolved_dict: Optional dictionary mapping assistant ID to list of resolved wanted issues
1074
+ wanted_in_progress_dict: Optional dictionary mapping assistant ID to list of in-progress wanted issues
1075
  discussions_dict: Optional dictionary mapping assistant ID to list of discussion metadata
1076
  """
1077
  if not assistants:
 
1081
  if wanted_resolved_dict is None:
1082
  wanted_resolved_dict = {}
1083
 
1084
+ if wanted_in_progress_dict is None:
1085
+ wanted_in_progress_dict = {}
1086
+
1087
  if discussions_dict is None:
1088
  discussions_dict = {}
1089
 
 
1096
  bot_data = all_metadata_dict.get(identifier, [])
1097
  stats = calculate_issue_stats_from_metadata(bot_data)
1098
 
1099
+ # Add wanted issues stats
1100
  resolved_wanted = len(wanted_resolved_dict.get(identifier, []))
1101
+ in_progress_wanted = len(wanted_in_progress_dict.get(identifier, []))
1102
+ total_wanted = resolved_wanted + in_progress_wanted
1103
+ wanted_resolved_rate = (resolved_wanted / total_wanted * 100) if total_wanted > 0 else 0.0
1104
 
1105
  # Add discussion stats
1106
  discussion_metadata = discussions_dict.get(identifier, [])
 
1111
  'website': assistant.get('website', 'N/A'),
1112
  'github_identifier': identifier,
1113
  **stats,
1114
+ 'total_wanted_issues': total_wanted,
1115
  'resolved_wanted_issues': resolved_wanted,
1116
+ 'wanted_issue_resolved_rate': round(wanted_resolved_rate, 2),
1117
  **discussion_stats
1118
  }
1119
 
 
1215
  agent_issues = results['agent_issues']
1216
  wanted_open = results['wanted_open']
1217
  wanted_resolved = results['wanted_resolved']
1218
+ wanted_in_progress = results['wanted_in_progress']
1219
  agent_discussions = results['agent_discussions']
1220
  except Exception as e:
1221
  print(f"Error during DuckDB fetch: {str(e)}")
 
1228
 
1229
  try:
1230
  leaderboard_dict = construct_leaderboard_from_metadata(
1231
+ agent_issues, assistants, wanted_resolved, wanted_in_progress, agent_discussions
1232
  )
1233
  issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
1234
  discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(