add
Browse files
README.md
CHANGED
|
@@ -58,13 +58,13 @@ We search GitHub using multiple query patterns to catch all issues associated wi
|
|
| 58 |
The leaderboard refreshes automatically every day at 12:00 AM UTC.
|
| 59 |
|
| 60 |
**Community Submissions**
|
| 61 |
-
Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and
|
| 62 |
|
| 63 |
## Using the Leaderboard
|
| 64 |
|
| 65 |
### Just Browsing?
|
| 66 |
Head to the Leaderboard tab where you'll find:
|
| 67 |
-
- **Searchable table**: Search by agent name or
|
| 68 |
- **Filterable columns**: Filter by resolution rate to find top performers
|
| 69 |
- **Monthly charts**: Scroll down to see resolution rate trends and issue activity over time
|
| 70 |
|
|
@@ -74,8 +74,8 @@ The charts use color-coded lines and bars so you can easily track individual age
|
|
| 74 |
In the Submit Agent tab, provide:
|
| 75 |
- **GitHub identifier*** (required): Your agent's GitHub username or bot account
|
| 76 |
- **Agent name*** (required): Display name for the leaderboard
|
| 77 |
-
- **Organization*** (required): Your organization or team name
|
| 78 |
-
- **Website*** (required): Link to your agent's homepage or documentation
|
| 79 |
- **Description** (optional): Brief explanation of what your agent does
|
| 80 |
|
| 81 |
Click Submit. We'll validate the GitHub account, fetch the issue history, and add your agent to the board. Initial data loading takes a few seconds.
|
|
|
|
| 58 |
The leaderboard refreshes automatically every day at 12:00 AM UTC.
|
| 59 |
|
| 60 |
**Community Submissions**
|
| 61 |
+
Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and issue metadata in (`SWE-Arena/issue_metadata`). The leaderboard is dynamically constructed from the issue metadata. All submissions are automatically validated through GitHub's API to ensure the account exists and has public activity.
|
| 62 |
|
| 63 |
## Using the Leaderboard
|
| 64 |
|
| 65 |
### Just Browsing?
|
| 66 |
Head to the Leaderboard tab where you'll find:
|
| 67 |
+
- **Searchable table**: Search by agent name or website
|
| 68 |
- **Filterable columns**: Filter by resolution rate to find top performers
|
| 69 |
- **Monthly charts**: Scroll down to see resolution rate trends and issue activity over time
|
| 70 |
|
|
|
|
| 74 |
In the Submit Agent tab, provide:
|
| 75 |
- **GitHub identifier*** (required): Your agent's GitHub username or bot account
|
| 76 |
- **Agent name*** (required): Display name for the leaderboard
|
| 77 |
+
- **Organization*** (required): Your organization or team name (stored in agent metadata)
|
| 78 |
+
- **Website*** (required): Link to your agent's homepage or documentation (displayed in leaderboard)
|
| 79 |
- **Description** (optional): Brief explanation of what your agent does
|
| 80 |
|
| 81 |
Click Submit. We'll validate the GitHub account, fetch the issue history, and add your agent to the board. Initial data loading takes a few seconds.
|
app.py
CHANGED
|
@@ -44,16 +44,14 @@ else:
|
|
| 44 |
DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
|
| 45 |
|
| 46 |
# In-memory cache for debug mode (data persists during session but NOT saved to HF)
|
| 47 |
-
DEBUG_LEADERBOARD_CACHE = {}
|
| 48 |
DEBUG_ISSUE_METADATA_CACHE = defaultdict(list)
|
| 49 |
|
| 50 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 51 |
-
LEADERBOARD_REPO = "SWE-Arena/issue_leaderboard"
|
| 52 |
ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata" # HuggingFace dataset for issue metadata
|
| 53 |
|
| 54 |
LEADERBOARD_COLUMNS = [
|
| 55 |
("Agent Name", "string"),
|
| 56 |
-
("
|
| 57 |
("Total Issues", "number"),
|
| 58 |
("Resolved Issues", "number"),
|
| 59 |
("Resolved Rate (%)", "number"),
|
|
@@ -1163,34 +1161,6 @@ def load_agents_from_hf():
|
|
| 1163 |
return None
|
| 1164 |
|
| 1165 |
|
| 1166 |
-
def load_leaderboard_dataset():
|
| 1167 |
-
"""Load leaderboard data from HuggingFace dataset for current year.
|
| 1168 |
-
In debug mode, loads from in-memory cache if available."""
|
| 1169 |
-
# In debug mode, check in-memory cache first
|
| 1170 |
-
if DEBUG_MODE and DEBUG_LEADERBOARD_CACHE:
|
| 1171 |
-
print(f"🐛 DEBUG MODE: Loading leaderboard from in-memory cache ({len(DEBUG_LEADERBOARD_CACHE)} entries)")
|
| 1172 |
-
return list(DEBUG_LEADERBOARD_CACHE.values())
|
| 1173 |
-
|
| 1174 |
-
try:
|
| 1175 |
-
year = datetime.now().year
|
| 1176 |
-
filename = f"{year}.csv"
|
| 1177 |
-
|
| 1178 |
-
# Try to download the CSV file for current year
|
| 1179 |
-
file_path = hf_hub_download(
|
| 1180 |
-
repo_id=LEADERBOARD_REPO,
|
| 1181 |
-
filename=filename,
|
| 1182 |
-
repo_type="dataset"
|
| 1183 |
-
)
|
| 1184 |
-
|
| 1185 |
-
# Load CSV into list of dicts
|
| 1186 |
-
df = pd.read_csv(file_path)
|
| 1187 |
-
data = df.to_dict('records')
|
| 1188 |
-
print(f"✓ Loaded {len(data)} entries from {filename}")
|
| 1189 |
-
return data
|
| 1190 |
-
|
| 1191 |
-
except Exception as e:
|
| 1192 |
-
print(f"Could not load leaderboard dataset for year {datetime.now().year}: {str(e)}")
|
| 1193 |
-
return None
|
| 1194 |
|
| 1195 |
|
| 1196 |
def get_hf_token():
|
|
@@ -1282,56 +1252,6 @@ def save_agent_to_hf(data):
|
|
| 1282 |
return False
|
| 1283 |
|
| 1284 |
|
| 1285 |
-
def save_leaderboard_to_hf(cache_dict):
|
| 1286 |
-
"""Save complete leaderboard to HuggingFace dataset as CSV.
|
| 1287 |
-
In debug mode, saves to in-memory cache only."""
|
| 1288 |
-
# Skip saving in debug mode - use in-memory cache instead
|
| 1289 |
-
if DEBUG_MODE:
|
| 1290 |
-
global DEBUG_LEADERBOARD_CACHE
|
| 1291 |
-
# Filter out agents with zero total issues
|
| 1292 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 1293 |
-
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 1294 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 1295 |
-
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 1296 |
-
return True
|
| 1297 |
-
|
| 1298 |
-
try:
|
| 1299 |
-
token = get_hf_token()
|
| 1300 |
-
if not token:
|
| 1301 |
-
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
|
| 1302 |
-
|
| 1303 |
-
# Filter out agents with zero total issues
|
| 1304 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 1305 |
-
# Convert to DataFrame
|
| 1306 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 1307 |
-
df = pd.DataFrame(data_list)
|
| 1308 |
-
|
| 1309 |
-
# Save to CSV with year as filename
|
| 1310 |
-
year = datetime.now().year
|
| 1311 |
-
filename = f"{year}.csv"
|
| 1312 |
-
df.to_csv(filename, index=False)
|
| 1313 |
-
|
| 1314 |
-
try:
|
| 1315 |
-
# Upload to HuggingFace
|
| 1316 |
-
api = HfApi()
|
| 1317 |
-
upload_with_retry(
|
| 1318 |
-
api=api,
|
| 1319 |
-
path_or_fileobj=filename,
|
| 1320 |
-
path_in_repo=filename,
|
| 1321 |
-
repo_id=LEADERBOARD_REPO,
|
| 1322 |
-
repo_type="dataset",
|
| 1323 |
-
token=token
|
| 1324 |
-
)
|
| 1325 |
-
print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
|
| 1326 |
-
return True
|
| 1327 |
-
finally:
|
| 1328 |
-
# Always clean up local file, even if upload fails
|
| 1329 |
-
if os.path.exists(filename):
|
| 1330 |
-
os.remove(filename)
|
| 1331 |
-
|
| 1332 |
-
except Exception as e:
|
| 1333 |
-
print(f"✗ Error saving leaderboard: {str(e)}")
|
| 1334 |
-
return False
|
| 1335 |
|
| 1336 |
|
| 1337 |
# =============================================================================
|
|
@@ -1421,7 +1341,7 @@ def update_all_agents_incremental():
|
|
| 1421 |
# Merge metadata with stats
|
| 1422 |
cache_dict[identifier] = {
|
| 1423 |
'agent_name': agent_name,
|
| 1424 |
-
'
|
| 1425 |
'github_identifier': identifier,
|
| 1426 |
**stats
|
| 1427 |
}
|
|
@@ -1470,7 +1390,7 @@ def construct_leaderboard_from_metadata():
|
|
| 1470 |
|
| 1471 |
cache_dict[identifier] = {
|
| 1472 |
'agent_name': agent_name,
|
| 1473 |
-
'
|
| 1474 |
'github_identifier': identifier,
|
| 1475 |
**stats
|
| 1476 |
}
|
|
@@ -1481,7 +1401,7 @@ def construct_leaderboard_from_metadata():
|
|
| 1481 |
def initialize_data():
|
| 1482 |
"""
|
| 1483 |
Initialize data on application startup.
|
| 1484 |
-
|
| 1485 |
|
| 1486 |
In DEBUG MODE:
|
| 1487 |
- If no data available, automatically mine up to 10 issues per query per agent
|
|
@@ -1489,24 +1409,14 @@ def initialize_data():
|
|
| 1489 |
"""
|
| 1490 |
print("🚀 Initializing leaderboard data...")
|
| 1491 |
|
| 1492 |
-
#
|
| 1493 |
-
|
| 1494 |
-
print(f"📂 Checking for {current_year}.csv in {LEADERBOARD_REPO}...")
|
| 1495 |
-
leaderboard_data = load_leaderboard_dataset()
|
| 1496 |
-
if leaderboard_data:
|
| 1497 |
-
print(f"✓ Initialized from leaderboard dataset ({current_year}.csv)")
|
| 1498 |
-
return
|
| 1499 |
-
|
| 1500 |
-
# STEP 2: Try constructing from issue metadata (fast, memory-efficient)
|
| 1501 |
-
print(f"📂 {current_year}.csv not found. Checking {ISSUE_METADATA_REPO} for existing data...")
|
| 1502 |
try:
|
| 1503 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1504 |
# Check if there's actually meaningful data (at least one agent with issues)
|
| 1505 |
has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
|
| 1506 |
if cache_dict and has_data:
|
| 1507 |
-
print(f"✓ Found existing issue metadata.
|
| 1508 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1509 |
-
print("✓ Initialized from issue metadata")
|
| 1510 |
return
|
| 1511 |
else:
|
| 1512 |
print(f" No meaningful data found in {ISSUE_METADATA_REPO}")
|
|
@@ -1521,10 +1431,7 @@ def initialize_data():
|
|
| 1521 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1522 |
print("⛏️ Mining GitHub data in debug mode (limited to 10 issues per query)...")
|
| 1523 |
cache_dict = update_all_agents_incremental()
|
| 1524 |
-
|
| 1525 |
-
# In debug mode, this won't actually save to HF
|
| 1526 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1527 |
-
print("✓ Debug mining complete (data NOT saved to HuggingFace)")
|
| 1528 |
return
|
| 1529 |
else:
|
| 1530 |
print("⚠️ No agents found. Waiting for first submission...")
|
|
@@ -1536,8 +1443,6 @@ def initialize_data():
|
|
| 1536 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1537 |
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 1538 |
cache_dict = update_all_agents_incremental()
|
| 1539 |
-
if cache_dict:
|
| 1540 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1541 |
return
|
| 1542 |
|
| 1543 |
# No data available
|
|
@@ -1670,26 +1575,26 @@ def create_monthly_metrics_plot():
|
|
| 1670 |
|
| 1671 |
def get_leaderboard_dataframe():
|
| 1672 |
"""
|
| 1673 |
-
|
| 1674 |
-
Returns formatted DataFrame sorted by
|
| 1675 |
"""
|
| 1676 |
-
#
|
| 1677 |
-
|
| 1678 |
|
| 1679 |
-
if not
|
| 1680 |
# Return empty DataFrame with correct columns if no data
|
| 1681 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
| 1682 |
return pd.DataFrame(columns=column_names)
|
| 1683 |
|
| 1684 |
rows = []
|
| 1685 |
-
for data in
|
| 1686 |
# Filter out agents with zero total issues
|
| 1687 |
if data.get('total_issues', 0) == 0:
|
| 1688 |
continue
|
| 1689 |
# Only include display-relevant fields
|
| 1690 |
rows.append([
|
| 1691 |
data.get('agent_name', 'Unknown'),
|
| 1692 |
-
data.get('
|
| 1693 |
data.get('total_issues', 0),
|
| 1694 |
data.get('resolved', 0),
|
| 1695 |
data.get('resolved_rate', 0.0),
|
|
@@ -1774,19 +1679,7 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1774 |
# Calculate stats from metadata
|
| 1775 |
stats = calculate_issue_stats_from_metadata(metadata_list)
|
| 1776 |
|
| 1777 |
-
|
| 1778 |
-
leaderboard_data = load_leaderboard_dataset()
|
| 1779 |
-
if not leaderboard_data:
|
| 1780 |
-
leaderboard_data = []
|
| 1781 |
-
|
| 1782 |
-
# Convert to dict for easy updating
|
| 1783 |
-
cache_dict = {entry['github_identifier']: entry for entry in leaderboard_data}
|
| 1784 |
-
cache_dict[identifier] = {**submission, **stats}
|
| 1785 |
-
|
| 1786 |
-
# Save to HuggingFace
|
| 1787 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1788 |
-
|
| 1789 |
-
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1790 |
|
| 1791 |
except Exception as e:
|
| 1792 |
error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch issue data: {str(e)}"
|
|
@@ -1853,15 +1746,6 @@ def daily_update_task():
|
|
| 1853 |
print(f" Issues updated (closed/resolved): {total_updated}")
|
| 1854 |
print(f"{'='*80}")
|
| 1855 |
|
| 1856 |
-
# Reconstruct leaderboard from all stored metadata
|
| 1857 |
-
print(f"\n📈 Rebuilding leaderboard from refreshed data...")
|
| 1858 |
-
cache_dict = construct_leaderboard_from_metadata()
|
| 1859 |
-
|
| 1860 |
-
if cache_dict:
|
| 1861 |
-
# Save leaderboard
|
| 1862 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1863 |
-
print("✓ Leaderboard updated successfully")
|
| 1864 |
-
|
| 1865 |
print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1866 |
|
| 1867 |
except Exception as e:
|
|
@@ -1923,7 +1807,7 @@ with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as a
|
|
| 1923 |
leaderboard_table = Leaderboard(
|
| 1924 |
value=get_leaderboard_dataframe(),
|
| 1925 |
datatype=LEADERBOARD_COLUMNS,
|
| 1926 |
-
search_columns=["Agent Name", "
|
| 1927 |
filter_columns=["Resolved Rate (%)"]
|
| 1928 |
)
|
| 1929 |
|
|
|
|
| 44 |
DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
|
| 45 |
|
| 46 |
# In-memory cache for debug mode (data persists during session but NOT saved to HF)
|
|
|
|
| 47 |
DEBUG_ISSUE_METADATA_CACHE = defaultdict(list)
|
| 48 |
|
| 49 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
|
|
|
| 50 |
ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata" # HuggingFace dataset for issue metadata
|
| 51 |
|
| 52 |
LEADERBOARD_COLUMNS = [
|
| 53 |
("Agent Name", "string"),
|
| 54 |
+
("Website", "string"),
|
| 55 |
("Total Issues", "number"),
|
| 56 |
("Resolved Issues", "number"),
|
| 57 |
("Resolved Rate (%)", "number"),
|
|
|
|
| 1161 |
return None
|
| 1162 |
|
| 1163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1164 |
|
| 1165 |
|
| 1166 |
def get_hf_token():
|
|
|
|
| 1252 |
return False
|
| 1253 |
|
| 1254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1255 |
|
| 1256 |
|
| 1257 |
# =============================================================================
|
|
|
|
| 1341 |
# Merge metadata with stats
|
| 1342 |
cache_dict[identifier] = {
|
| 1343 |
'agent_name': agent_name,
|
| 1344 |
+
'website': agent.get('website', 'N/A'),
|
| 1345 |
'github_identifier': identifier,
|
| 1346 |
**stats
|
| 1347 |
}
|
|
|
|
| 1390 |
|
| 1391 |
cache_dict[identifier] = {
|
| 1392 |
'agent_name': agent_name,
|
| 1393 |
+
'website': agent.get('website', 'N/A'),
|
| 1394 |
'github_identifier': identifier,
|
| 1395 |
**stats
|
| 1396 |
}
|
|
|
|
| 1401 |
def initialize_data():
|
| 1402 |
"""
|
| 1403 |
Initialize data on application startup.
|
| 1404 |
+
Constructs leaderboard from issue metadata.
|
| 1405 |
|
| 1406 |
In DEBUG MODE:
|
| 1407 |
- If no data available, automatically mine up to 10 issues per query per agent
|
|
|
|
| 1409 |
"""
|
| 1410 |
print("🚀 Initializing leaderboard data...")
|
| 1411 |
|
| 1412 |
+
# Try constructing from issue metadata (fast, memory-efficient)
|
| 1413 |
+
print(f"📂 Checking {ISSUE_METADATA_REPO} for existing data...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1414 |
try:
|
| 1415 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1416 |
# Check if there's actually meaningful data (at least one agent with issues)
|
| 1417 |
has_data = any(entry.get('total_issues', 0) > 0 for entry in cache_dict.values())
|
| 1418 |
if cache_dict and has_data:
|
| 1419 |
+
print(f"✓ Found existing issue metadata. Leaderboard constructed from {ISSUE_METADATA_REPO}")
|
|
|
|
|
|
|
| 1420 |
return
|
| 1421 |
else:
|
| 1422 |
print(f" No meaningful data found in {ISSUE_METADATA_REPO}")
|
|
|
|
| 1431 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1432 |
print("⛏️ Mining GitHub data in debug mode (limited to 10 issues per query)...")
|
| 1433 |
cache_dict = update_all_agents_incremental()
|
| 1434 |
+
print("✓ Debug mining complete (data NOT saved to HuggingFace)")
|
|
|
|
|
|
|
|
|
|
| 1435 |
return
|
| 1436 |
else:
|
| 1437 |
print("⚠️ No agents found. Waiting for first submission...")
|
|
|
|
| 1443 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1444 |
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 1445 |
cache_dict = update_all_agents_incremental()
|
|
|
|
|
|
|
| 1446 |
return
|
| 1447 |
|
| 1448 |
# No data available
|
|
|
|
| 1575 |
|
| 1576 |
def get_leaderboard_dataframe():
|
| 1577 |
"""
|
| 1578 |
+
Construct leaderboard from issue metadata and convert to pandas DataFrame for display.
|
| 1579 |
+
Returns formatted DataFrame sorted by resolved rate.
|
| 1580 |
"""
|
| 1581 |
+
# Construct leaderboard from metadata
|
| 1582 |
+
cache_dict = construct_leaderboard_from_metadata()
|
| 1583 |
|
| 1584 |
+
if not cache_dict:
|
| 1585 |
# Return empty DataFrame with correct columns if no data
|
| 1586 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
| 1587 |
return pd.DataFrame(columns=column_names)
|
| 1588 |
|
| 1589 |
rows = []
|
| 1590 |
+
for data in cache_dict.values():
|
| 1591 |
# Filter out agents with zero total issues
|
| 1592 |
if data.get('total_issues', 0) == 0:
|
| 1593 |
continue
|
| 1594 |
# Only include display-relevant fields
|
| 1595 |
rows.append([
|
| 1596 |
data.get('agent_name', 'Unknown'),
|
| 1597 |
+
data.get('website', 'N/A'),
|
| 1598 |
data.get('total_issues', 0),
|
| 1599 |
data.get('resolved', 0),
|
| 1600 |
data.get('resolved_rate', 0.0),
|
|
|
|
| 1679 |
# Calculate stats from metadata
|
| 1680 |
stats = calculate_issue_stats_from_metadata(metadata_list)
|
| 1681 |
|
| 1682 |
+
return f"✅ Successfully submitted {agent_name}! Stats: {stats['total_issues']} issues, {stats['resolved_rate']}% resolved", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1683 |
|
| 1684 |
except Exception as e:
|
| 1685 |
error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch issue data: {str(e)}"
|
|
|
|
| 1746 |
print(f" Issues updated (closed/resolved): {total_updated}")
|
| 1747 |
print(f"{'='*80}")
|
| 1748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1749 |
print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1750 |
|
| 1751 |
except Exception as e:
|
|
|
|
| 1807 |
leaderboard_table = Leaderboard(
|
| 1808 |
value=get_leaderboard_dataframe(),
|
| 1809 |
datatype=LEADERBOARD_COLUMNS,
|
| 1810 |
+
search_columns=["Agent Name", "Website"],
|
| 1811 |
filter_columns=["Resolved Rate (%)"]
|
| 1812 |
)
|
| 1813 |
|
msr.py
CHANGED
|
@@ -64,11 +64,9 @@ else:
|
|
| 64 |
# Constants (match app.py)
|
| 65 |
# =============================================================================
|
| 66 |
|
| 67 |
-
DEBUG_LEADERBOARD_CACHE = {}
|
| 68 |
DEBUG_ISSUE_METADATA_CACHE = defaultdict(list)
|
| 69 |
|
| 70 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 71 |
-
LEADERBOARD_REPO = "SWE-Arena/issue_leaderboard"
|
| 72 |
ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata"
|
| 73 |
|
| 74 |
|
|
@@ -620,45 +618,6 @@ def get_already_mined_dates(agent_identifier, n_months=6):
|
|
| 620 |
return set()
|
| 621 |
|
| 622 |
|
| 623 |
-
def save_leaderboard_to_hf(cache_dict):
|
| 624 |
-
if DEBUG_MODE:
|
| 625 |
-
global DEBUG_LEADERBOARD_CACHE
|
| 626 |
-
# Filter out agents with zero total issues
|
| 627 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 628 |
-
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 629 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 630 |
-
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 631 |
-
return True
|
| 632 |
-
try:
|
| 633 |
-
token = get_hf_token()
|
| 634 |
-
if not token:
|
| 635 |
-
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
|
| 636 |
-
# Filter out agents with zero total issues
|
| 637 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_issues', 0) > 0}
|
| 638 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 639 |
-
df = pd.DataFrame(data_list)
|
| 640 |
-
year = datetime.now().year
|
| 641 |
-
filename = f"{year}.csv"
|
| 642 |
-
df.to_csv(filename, index=False)
|
| 643 |
-
api = HfApi()
|
| 644 |
-
try:
|
| 645 |
-
upload_with_retry(
|
| 646 |
-
api=api,
|
| 647 |
-
path_or_fileobj=filename,
|
| 648 |
-
path_in_repo=filename,
|
| 649 |
-
repo_id=LEADERBOARD_REPO,
|
| 650 |
-
repo_type="dataset",
|
| 651 |
-
token=token
|
| 652 |
-
)
|
| 653 |
-
print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
|
| 654 |
-
return True
|
| 655 |
-
finally:
|
| 656 |
-
# Always clean up local file, even if upload fails
|
| 657 |
-
if os.path.exists(filename):
|
| 658 |
-
os.remove(filename)
|
| 659 |
-
except Exception as e:
|
| 660 |
-
print(f"✗ Error saving leaderboard: {str(e)}")
|
| 661 |
-
return False
|
| 662 |
|
| 663 |
|
| 664 |
def calculate_issue_stats_from_metadata(metadata_list):
|
|
@@ -741,7 +700,7 @@ def update_all_agents_incremental():
|
|
| 741 |
stats = calculate_issue_stats_from_metadata(agent_metadata)
|
| 742 |
cache_dict[identifier] = {
|
| 743 |
'agent_name': agent_name,
|
| 744 |
-
'
|
| 745 |
'github_identifier': identifier,
|
| 746 |
**stats
|
| 747 |
}
|
|
@@ -758,7 +717,7 @@ def run_once():
|
|
| 758 |
print("\n🚀 Immediate mining run started")
|
| 759 |
cache_dict = update_all_agents_incremental()
|
| 760 |
if cache_dict:
|
| 761 |
-
|
| 762 |
print("✅ Immediate mining run completed\n")
|
| 763 |
|
| 764 |
|
|
|
|
| 64 |
# Constants (match app.py)
|
| 65 |
# =============================================================================
|
| 66 |
|
|
|
|
| 67 |
DEBUG_ISSUE_METADATA_CACHE = defaultdict(list)
|
| 68 |
|
| 69 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
|
|
|
| 70 |
ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata"
|
| 71 |
|
| 72 |
|
|
|
|
| 618 |
return set()
|
| 619 |
|
| 620 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
|
| 622 |
|
| 623 |
def calculate_issue_stats_from_metadata(metadata_list):
|
|
|
|
| 700 |
stats = calculate_issue_stats_from_metadata(agent_metadata)
|
| 701 |
cache_dict[identifier] = {
|
| 702 |
'agent_name': agent_name,
|
| 703 |
+
'website': agent.get('website', 'N/A'),
|
| 704 |
'github_identifier': identifier,
|
| 705 |
**stats
|
| 706 |
}
|
|
|
|
| 717 |
print("\n🚀 Immediate mining run started")
|
| 718 |
cache_dict = update_all_agents_incremental()
|
| 719 |
if cache_dict:
|
| 720 |
+
print(f"✓ Updated {len(cache_dict)} agents")
|
| 721 |
print("✅ Immediate mining run completed\n")
|
| 722 |
|
| 723 |
|