refine
Browse files
app.py
CHANGED
|
@@ -1288,8 +1288,10 @@ def save_leaderboard_to_hf(cache_dict):
|
|
| 1288 |
# Skip saving in debug mode - use in-memory cache instead
|
| 1289 |
if DEBUG_MODE:
|
| 1290 |
global DEBUG_LEADERBOARD_CACHE
|
| 1291 |
-
|
| 1292 |
-
|
|
|
|
|
|
|
| 1293 |
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 1294 |
return True
|
| 1295 |
|
|
@@ -1298,8 +1300,10 @@ def save_leaderboard_to_hf(cache_dict):
|
|
| 1298 |
if not token:
|
| 1299 |
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
|
| 1300 |
|
|
|
|
|
|
|
| 1301 |
# Convert to DataFrame
|
| 1302 |
-
data_list = dict_to_cache(
|
| 1303 |
df = pd.DataFrame(data_list)
|
| 1304 |
|
| 1305 |
# Save to CSV with year as filename
|
|
@@ -1477,7 +1481,7 @@ def construct_leaderboard_from_metadata():
|
|
| 1477 |
def initialize_data():
|
| 1478 |
"""
|
| 1479 |
Initialize data on application startup.
|
| 1480 |
-
Priority: 1) Leaderboard dataset, 2) Issue metadata (if available), 3) Full GitHub mining
|
| 1481 |
|
| 1482 |
In DEBUG MODE:
|
| 1483 |
- If no data available, automatically mine up to 10 issues per query per agent
|
|
@@ -1485,23 +1489,29 @@ def initialize_data():
|
|
| 1485 |
"""
|
| 1486 |
print("🚀 Initializing leaderboard data...")
|
| 1487 |
|
| 1488 |
-
# Try loading existing leaderboard
|
|
|
|
|
|
|
| 1489 |
leaderboard_data = load_leaderboard_dataset()
|
| 1490 |
if leaderboard_data:
|
| 1491 |
-
print("✓ Initialized from leaderboard dataset")
|
| 1492 |
return
|
| 1493 |
|
| 1494 |
-
# Try constructing from issue metadata (fast, memory-efficient)
|
|
|
|
| 1495 |
try:
|
| 1496 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1497 |
# Check if there's actually meaningful data (at least one agent with issues)
|
| 1498 |
has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
|
| 1499 |
if cache_dict and has_data:
|
|
|
|
| 1500 |
save_leaderboard_to_hf(cache_dict)
|
| 1501 |
print("✓ Initialized from issue metadata")
|
| 1502 |
return
|
|
|
|
|
|
|
| 1503 |
except Exception as e:
|
| 1504 |
-
print(f"Could not construct from metadata: {e}")
|
| 1505 |
|
| 1506 |
# If in debug mode and no data available, mine immediately
|
| 1507 |
if DEBUG_MODE:
|
|
@@ -1673,6 +1683,9 @@ def get_leaderboard_dataframe():
|
|
| 1673 |
|
| 1674 |
rows = []
|
| 1675 |
for data in leaderboard_data:
|
|
|
|
|
|
|
|
|
|
| 1676 |
# Only include display-relevant fields
|
| 1677 |
rows.append([
|
| 1678 |
data.get('agent_name', 'Unknown'),
|
|
|
|
| 1288 |
# Skip saving in debug mode - use in-memory cache instead
|
| 1289 |
if DEBUG_MODE:
|
| 1290 |
global DEBUG_LEADERBOARD_CACHE
|
| 1291 |
+
# Filter out agents with zero total issues
|
| 1292 |
+
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 1293 |
+
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 1294 |
+
data_list = dict_to_cache(filtered_cache_dict)
|
| 1295 |
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 1296 |
return True
|
| 1297 |
|
|
|
|
| 1300 |
if not token:
|
| 1301 |
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
|
| 1302 |
|
| 1303 |
+
# Filter out agents with zero total issues
|
| 1304 |
+
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 1305 |
# Convert to DataFrame
|
| 1306 |
+
data_list = dict_to_cache(filtered_cache_dict)
|
| 1307 |
df = pd.DataFrame(data_list)
|
| 1308 |
|
| 1309 |
# Save to CSV with year as filename
|
|
|
|
| 1481 |
def initialize_data():
|
| 1482 |
"""
|
| 1483 |
Initialize data on application startup.
|
| 1484 |
+
Priority: 1) Leaderboard dataset ({year}.csv), 2) Issue metadata (if available), 3) Full GitHub mining
|
| 1485 |
|
| 1486 |
In DEBUG MODE:
|
| 1487 |
- If no data available, automatically mine up to 10 issues per query per agent
|
|
|
|
| 1489 |
"""
|
| 1490 |
print("🚀 Initializing leaderboard data...")
|
| 1491 |
|
| 1492 |
+
# STEP 1: Try loading existing leaderboard CSV file for current year
|
| 1493 |
+
current_year = datetime.now().year
|
| 1494 |
+
print(f"📂 Checking for {current_year}.csv in {LEADERBOARD_REPO}...")
|
| 1495 |
leaderboard_data = load_leaderboard_dataset()
|
| 1496 |
if leaderboard_data:
|
| 1497 |
+
print(f"✓ Initialized from leaderboard dataset ({current_year}.csv)")
|
| 1498 |
return
|
| 1499 |
|
| 1500 |
+
# STEP 2: Try constructing from issue metadata (fast, memory-efficient)
|
| 1501 |
+
print(f"📂 {current_year}.csv not found. Checking {ISSUE_METADATA_REPO} for existing data...")
|
| 1502 |
try:
|
| 1503 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1504 |
# Check if there's actually meaningful data (at least one agent with issues)
|
| 1505 |
has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
|
| 1506 |
if cache_dict and has_data:
|
| 1507 |
+
print(f"✓ Found existing issue metadata. Building leaderboard from {ISSUE_METADATA_REPO}...")
|
| 1508 |
save_leaderboard_to_hf(cache_dict)
|
| 1509 |
print("✓ Initialized from issue metadata")
|
| 1510 |
return
|
| 1511 |
+
else:
|
| 1512 |
+
print(f" No meaningful data found in {ISSUE_METADATA_REPO}")
|
| 1513 |
except Exception as e:
|
| 1514 |
+
print(f" Could not construct from metadata: {e}")
|
| 1515 |
|
| 1516 |
# If in debug mode and no data available, mine immediately
|
| 1517 |
if DEBUG_MODE:
|
|
|
|
| 1683 |
|
| 1684 |
rows = []
|
| 1685 |
for data in leaderboard_data:
|
| 1686 |
+
# Filter out agents with zero total issues
|
| 1687 |
+
if data.get('total_issues', 0) == 0:
|
| 1688 |
+
continue
|
| 1689 |
# Only include display-relevant fields
|
| 1690 |
rows.append([
|
| 1691 |
data.get('agent_name', 'Unknown'),
|
msr.py
CHANGED
|
@@ -623,15 +623,19 @@ def get_already_mined_dates(agent_identifier, n_months=6):
|
|
| 623 |
def save_leaderboard_to_hf(cache_dict):
|
| 624 |
if DEBUG_MODE:
|
| 625 |
global DEBUG_LEADERBOARD_CACHE
|
| 626 |
-
|
| 627 |
-
|
|
|
|
|
|
|
| 628 |
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 629 |
return True
|
| 630 |
try:
|
| 631 |
token = get_hf_token()
|
| 632 |
if not token:
|
| 633 |
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
|
| 634 |
-
|
|
|
|
|
|
|
| 635 |
df = pd.DataFrame(data_list)
|
| 636 |
year = datetime.now().year
|
| 637 |
filename = f"{year}.csv"
|
|
|
|
| 623 |
def save_leaderboard_to_hf(cache_dict):
|
| 624 |
if DEBUG_MODE:
|
| 625 |
global DEBUG_LEADERBOARD_CACHE
|
| 626 |
+
# Filter out agents with zero total issues
|
| 627 |
+
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 628 |
+
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 629 |
+
data_list = dict_to_cache(filtered_cache_dict)
|
| 630 |
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 631 |
return True
|
| 632 |
try:
|
| 633 |
token = get_hf_token()
|
| 634 |
if not token:
|
| 635 |
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
|
| 636 |
+
# Filter out agents with zero total issues
|
| 637 |
+
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 638 |
+
data_list = dict_to_cache(filtered_cache_dict)
|
| 639 |
df = pd.DataFrame(data_list)
|
| 640 |
year = datetime.now().year
|
| 641 |
filename = f"{year}.csv"
|