zhiminy commited on
Commit
be5580b
·
1 Parent(s): 16d02f4
Files changed (2) hide show
  1. app.py +21 -8
  2. msr.py +7 -3
app.py CHANGED
@@ -1288,8 +1288,10 @@ def save_leaderboard_to_hf(cache_dict):
1288
  # Skip saving in debug mode - use in-memory cache instead
1289
  if DEBUG_MODE:
1290
  global DEBUG_LEADERBOARD_CACHE
1291
- DEBUG_LEADERBOARD_CACHE = cache_dict.copy()
1292
- data_list = dict_to_cache(cache_dict)
 
 
1293
  print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
1294
  return True
1295
 
@@ -1298,8 +1300,10 @@ def save_leaderboard_to_hf(cache_dict):
1298
  if not token:
1299
  raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
1300
 
 
 
1301
  # Convert to DataFrame
1302
- data_list = dict_to_cache(cache_dict)
1303
  df = pd.DataFrame(data_list)
1304
 
1305
  # Save to CSV with year as filename
@@ -1477,7 +1481,7 @@ def construct_leaderboard_from_metadata():
1477
  def initialize_data():
1478
  """
1479
  Initialize data on application startup.
1480
- Priority: 1) Leaderboard dataset, 2) Issue metadata (if available), 3) Full GitHub mining
1481
 
1482
  In DEBUG MODE:
1483
  - If no data available, automatically mine up to 10 issues per query per agent
@@ -1485,23 +1489,29 @@ def initialize_data():
1485
  """
1486
  print("🚀 Initializing leaderboard data...")
1487
 
1488
- # Try loading existing leaderboard
 
 
1489
  leaderboard_data = load_leaderboard_dataset()
1490
  if leaderboard_data:
1491
- print("✓ Initialized from leaderboard dataset")
1492
  return
1493
 
1494
- # Try constructing from issue metadata (fast, memory-efficient)
 
1495
  try:
1496
  cache_dict = construct_leaderboard_from_metadata()
1497
  # Check if there's actually meaningful data (at least one agent with issues)
1498
  has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
1499
  if cache_dict and has_data:
 
1500
  save_leaderboard_to_hf(cache_dict)
1501
  print("✓ Initialized from issue metadata")
1502
  return
 
 
1503
  except Exception as e:
1504
- print(f"Could not construct from metadata: {e}")
1505
 
1506
  # If in debug mode and no data available, mine immediately
1507
  if DEBUG_MODE:
@@ -1673,6 +1683,9 @@ def get_leaderboard_dataframe():
1673
 
1674
  rows = []
1675
  for data in leaderboard_data:
 
 
 
1676
  # Only include display-relevant fields
1677
  rows.append([
1678
  data.get('agent_name', 'Unknown'),
 
1288
  # Skip saving in debug mode - use in-memory cache instead
1289
  if DEBUG_MODE:
1290
  global DEBUG_LEADERBOARD_CACHE
1291
+ # Filter out agents with zero total issues
1292
+ filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
1293
+ DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
1294
+ data_list = dict_to_cache(filtered_cache_dict)
1295
  print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
1296
  return True
1297
 
 
1300
  if not token:
1301
  raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
1302
 
1303
+ # Filter out agents with zero total issues
1304
+ filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
1305
  # Convert to DataFrame
1306
+ data_list = dict_to_cache(filtered_cache_dict)
1307
  df = pd.DataFrame(data_list)
1308
 
1309
  # Save to CSV with year as filename
 
1481
  def initialize_data():
1482
  """
1483
  Initialize data on application startup.
1484
+ Priority: 1) Leaderboard dataset ({year}.csv), 2) Issue metadata (if available), 3) Full GitHub mining
1485
 
1486
  In DEBUG MODE:
1487
  - If no data available, automatically mine up to 10 issues per query per agent
 
1489
  """
1490
  print("🚀 Initializing leaderboard data...")
1491
 
1492
+ # STEP 1: Try loading existing leaderboard CSV file for current year
1493
+ current_year = datetime.now().year
1494
+ print(f"📂 Checking for {current_year}.csv in {LEADERBOARD_REPO}...")
1495
  leaderboard_data = load_leaderboard_dataset()
1496
  if leaderboard_data:
1497
+ print(f"✓ Initialized from leaderboard dataset ({current_year}.csv)")
1498
  return
1499
 
1500
+ # STEP 2: Try constructing from issue metadata (fast, memory-efficient)
1501
+ print(f"📂 {current_year}.csv not found. Checking {ISSUE_METADATA_REPO} for existing data...")
1502
  try:
1503
  cache_dict = construct_leaderboard_from_metadata()
1504
  # Check if there's actually meaningful data (at least one agent with issues)
1505
  has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
1506
  if cache_dict and has_data:
1507
+ print(f"✓ Found existing issue metadata. Building leaderboard from {ISSUE_METADATA_REPO}...")
1508
  save_leaderboard_to_hf(cache_dict)
1509
  print("✓ Initialized from issue metadata")
1510
  return
1511
+ else:
1512
+ print(f" No meaningful data found in {ISSUE_METADATA_REPO}")
1513
  except Exception as e:
1514
+ print(f" Could not construct from metadata: {e}")
1515
 
1516
  # If in debug mode and no data available, mine immediately
1517
  if DEBUG_MODE:
 
1683
 
1684
  rows = []
1685
  for data in leaderboard_data:
1686
+ # Filter out agents with zero total issues
1687
+ if data.get('total_issues', 0) == 0:
1688
+ continue
1689
  # Only include display-relevant fields
1690
  rows.append([
1691
  data.get('agent_name', 'Unknown'),
msr.py CHANGED
@@ -623,15 +623,19 @@ def get_already_mined_dates(agent_identifier, n_months=6):
623
  def save_leaderboard_to_hf(cache_dict):
624
  if DEBUG_MODE:
625
  global DEBUG_LEADERBOARD_CACHE
626
- DEBUG_LEADERBOARD_CACHE = cache_dict.copy()
627
- data_list = dict_to_cache(cache_dict)
 
 
628
  print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
629
  return True
630
  try:
631
  token = get_hf_token()
632
  if not token:
633
  raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
634
- data_list = dict_to_cache(cache_dict)
 
 
635
  df = pd.DataFrame(data_list)
636
  year = datetime.now().year
637
  filename = f"{year}.csv"
 
623
  def save_leaderboard_to_hf(cache_dict):
624
  if DEBUG_MODE:
625
  global DEBUG_LEADERBOARD_CACHE
626
+ # Filter out agents with zero total issues
627
+ filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
628
+ DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
629
+ data_list = dict_to_cache(filtered_cache_dict)
630
  print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
631
  return True
632
  try:
633
  token = get_hf_token()
634
  if not token:
635
  raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
636
+ # Filter out agents with zero total issues
637
+ filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
638
+ data_list = dict_to_cache(filtered_cache_dict)
639
  df = pd.DataFrame(data_list)
640
  year = datetime.now().year
641
  filename = f"{year}.csv"