zhimin-z
commited on
Commit
·
e4c408e
1
Parent(s):
c0fff3a
add
Browse files
app.py
CHANGED
|
@@ -34,11 +34,11 @@ LEADERBOARD_COLUMNS = [
|
|
| 34 |
("Website", "string"),
|
| 35 |
("Total Issues", "number"),
|
| 36 |
("Total Discussions", "number"),
|
|
|
|
| 37 |
("Issue Resolved Rate (%)", "number"),
|
| 38 |
("Discussion Resolved Rate (%)", "number"),
|
| 39 |
-
("Total Wanted Issues", "number"),
|
| 40 |
-
("Resolved Wanted Issues", "number"),
|
| 41 |
("Wanted Issue Resolved Rate (%)", "number"),
|
|
|
|
| 42 |
("Resolved Issues", "number"),
|
| 43 |
("Resolved Discussions", "number"),
|
| 44 |
]
|
|
@@ -497,8 +497,6 @@ def get_leaderboard_dataframe():
|
|
| 497 |
total_issues = data.get('total_issues', 0)
|
| 498 |
total_discussions = data.get('total_discussions', 0)
|
| 499 |
total_wanted_issues = data.get('total_wanted_issues', 0)
|
| 500 |
-
resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
|
| 501 |
-
wanted_issue_resolved_rate = data.get('wanted_issue_resolved_rate', 0.0)
|
| 502 |
|
| 503 |
# Filter out assistants with no activity (all four metrics are zero)
|
| 504 |
if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
|
|
@@ -511,11 +509,11 @@ def get_leaderboard_dataframe():
|
|
| 511 |
data.get('website', 'N/A'),
|
| 512 |
total_issues, # Total Issues
|
| 513 |
total_discussions, # Total Discussions
|
|
|
|
| 514 |
data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
|
| 515 |
data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
|
| 516 |
-
|
| 517 |
-
resolved_wanted_issues, # Resolved Wanted Issues
|
| 518 |
-
wanted_issue_resolved_rate, # Wanted Issue Resolved Rate (%)
|
| 519 |
data.get('resolved_issues', 0), # Resolved Issues
|
| 520 |
data.get('resolved_discussions', 0), # Resolved Discussions
|
| 521 |
])
|
|
@@ -529,9 +527,9 @@ def get_leaderboard_dataframe():
|
|
| 529 |
|
| 530 |
# Ensure numeric types
|
| 531 |
numeric_cols = [
|
| 532 |
-
"Total Issues", "Total Discussions",
|
| 533 |
"Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
|
| 534 |
-
"
|
| 535 |
"Resolved Issues", "Resolved Discussions"
|
| 536 |
]
|
| 537 |
for col in numeric_cols:
|
|
@@ -539,8 +537,8 @@ def get_leaderboard_dataframe():
|
|
| 539 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 540 |
|
| 541 |
# Sort by Total Wanted Issues descending (primary metric for this leaderboard)
|
| 542 |
-
if "
|
| 543 |
-
df = df.sort_values(by="
|
| 544 |
|
| 545 |
print(f"Final DataFrame shape: {df.shape}")
|
| 546 |
print("="*60 + "\n")
|
|
|
|
| 34 |
("Website", "string"),
|
| 35 |
("Total Issues", "number"),
|
| 36 |
("Total Discussions", "number"),
|
| 37 |
+
("Total Wanted Issues", "number"),
|
| 38 |
("Issue Resolved Rate (%)", "number"),
|
| 39 |
("Discussion Resolved Rate (%)", "number"),
|
|
|
|
|
|
|
| 40 |
("Wanted Issue Resolved Rate (%)", "number"),
|
| 41 |
+
("Resolved Wanted Issues", "number"),
|
| 42 |
("Resolved Issues", "number"),
|
| 43 |
("Resolved Discussions", "number"),
|
| 44 |
]
|
|
|
|
| 497 |
total_issues = data.get('total_issues', 0)
|
| 498 |
total_discussions = data.get('total_discussions', 0)
|
| 499 |
total_wanted_issues = data.get('total_wanted_issues', 0)
|
|
|
|
|
|
|
| 500 |
|
| 501 |
# Filter out assistants with no activity (all four metrics are zero)
|
| 502 |
if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
|
|
|
|
| 509 |
data.get('website', 'N/A'),
|
| 510 |
total_issues, # Total Issues
|
| 511 |
total_discussions, # Total Discussions
|
| 512 |
+
total_wanted_issues, # Total Wanted Issues
|
| 513 |
data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
|
| 514 |
data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
|
| 515 |
+
data.get('wanted_issue_resolved_rate', 0.0), # Wanted Issue Resolved Rate (%)
|
| 516 |
+
data.get('resolved_wanted_issues', 0), # Resolved Wanted Issues
|
|
|
|
| 517 |
data.get('resolved_issues', 0), # Resolved Issues
|
| 518 |
data.get('resolved_discussions', 0), # Resolved Discussions
|
| 519 |
])
|
|
|
|
| 527 |
|
| 528 |
# Ensure numeric types
|
| 529 |
numeric_cols = [
|
| 530 |
+
"Total Issues", "Total Discussions", "Total Wanted Issues",
|
| 531 |
"Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
|
| 532 |
+
"Wanted Issue Resolved Rate (%)", "Resolved Wanted Issues",
|
| 533 |
"Resolved Issues", "Resolved Discussions"
|
| 534 |
]
|
| 535 |
for col in numeric_cols:
|
|
|
|
| 537 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 538 |
|
| 539 |
# Sort by Total Wanted Issues descending (primary metric for this leaderboard)
|
| 540 |
+
if "Issue Resolved Rate (%)" in df.columns and not df.empty:
|
| 541 |
+
df = df.sort_values(by="Issue Resolved Rate (%)", ascending=False).reset_index(drop=True)
|
| 542 |
|
| 543 |
print(f"Final DataFrame shape: {df.shape}")
|
| 544 |
print("="*60 + "\n")
|
msr.py
CHANGED
|
@@ -9,9 +9,6 @@ import duckdb
|
|
| 9 |
import backoff
|
| 10 |
import requests
|
| 11 |
import requests.exceptions
|
| 12 |
-
from apscheduler.schedulers.blocking import BlockingScheduler
|
| 13 |
-
from apscheduler.triggers.cron import CronTrigger
|
| 14 |
-
import logging
|
| 15 |
import traceback
|
| 16 |
import re
|
| 17 |
|
|
@@ -57,17 +54,6 @@ BATCH_SIZE_DAYS = 1 # Process 1 day at a time (~24 hourly files)
|
|
| 57 |
# Retry configuration
|
| 58 |
MAX_RETRIES = 5
|
| 59 |
|
| 60 |
-
# Upload configuration
|
| 61 |
-
UPLOAD_DELAY_SECONDS = 5
|
| 62 |
-
UPLOAD_MAX_BACKOFF = 3600
|
| 63 |
-
|
| 64 |
-
# Scheduler configuration
|
| 65 |
-
SCHEDULE_ENABLED = False
|
| 66 |
-
SCHEDULE_DAY_OF_WEEK = 'fri' # Friday
|
| 67 |
-
SCHEDULE_HOUR = 0
|
| 68 |
-
SCHEDULE_MINUTE = 0
|
| 69 |
-
SCHEDULE_TIMEZONE = 'UTC'
|
| 70 |
-
|
| 71 |
# =============================================================================
|
| 72 |
# UTILITY FUNCTIONS
|
| 73 |
# =============================================================================
|
|
@@ -1249,51 +1235,9 @@ def mine_all_agents():
|
|
| 1249 |
except Exception as e:
|
| 1250 |
print(f" ⚠ Failed to remove cache file: {str(e)}")
|
| 1251 |
|
| 1252 |
-
|
| 1253 |
-
# =============================================================================
|
| 1254 |
-
# SCHEDULER SETUP
|
| 1255 |
-
# =============================================================================
|
| 1256 |
-
|
| 1257 |
-
def setup_scheduler():
|
| 1258 |
-
"""Set up APScheduler to run mining jobs periodically."""
|
| 1259 |
-
logging.basicConfig(
|
| 1260 |
-
level=logging.INFO,
|
| 1261 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 1262 |
-
)
|
| 1263 |
-
|
| 1264 |
-
logging.getLogger('httpx').setLevel(logging.WARNING)
|
| 1265 |
-
|
| 1266 |
-
scheduler = BlockingScheduler(timezone=SCHEDULE_TIMEZONE)
|
| 1267 |
-
|
| 1268 |
-
trigger = CronTrigger(
|
| 1269 |
-
day_of_week=SCHEDULE_DAY_OF_WEEK,
|
| 1270 |
-
hour=SCHEDULE_HOUR,
|
| 1271 |
-
minute=SCHEDULE_MINUTE,
|
| 1272 |
-
timezone=SCHEDULE_TIMEZONE
|
| 1273 |
-
)
|
| 1274 |
-
|
| 1275 |
-
scheduler.add_job(
|
| 1276 |
-
mine_all_agents,
|
| 1277 |
-
trigger=trigger,
|
| 1278 |
-
id='mine_all_agents',
|
| 1279 |
-
name='Mine GHArchive data for all assistants',
|
| 1280 |
-
replace_existing=True
|
| 1281 |
-
)
|
| 1282 |
-
|
| 1283 |
-
next_run = trigger.get_next_fire_time(None, datetime.now(trigger.timezone))
|
| 1284 |
-
print(f"Scheduler: Weekly on {SCHEDULE_DAY_OF_WEEK} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
|
| 1285 |
-
print(f"Next run: {next_run}\n")
|
| 1286 |
-
|
| 1287 |
-
print(f"\nScheduler started")
|
| 1288 |
-
scheduler.start()
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
# =============================================================================
|
| 1292 |
# ENTRY POINT
|
| 1293 |
# =============================================================================
|
| 1294 |
|
| 1295 |
if __name__ == "__main__":
|
| 1296 |
-
|
| 1297 |
-
setup_scheduler()
|
| 1298 |
-
else:
|
| 1299 |
-
mine_all_agents()
|
|
|
|
| 9 |
import backoff
|
| 10 |
import requests
|
| 11 |
import requests.exceptions
|
|
|
|
|
|
|
|
|
|
| 12 |
import traceback
|
| 13 |
import re
|
| 14 |
|
|
|
|
| 54 |
# Retry configuration
|
| 55 |
MAX_RETRIES = 5
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# =============================================================================
|
| 58 |
# UTILITY FUNCTIONS
|
| 59 |
# =============================================================================
|
|
|
|
| 1235 |
except Exception as e:
|
| 1236 |
print(f" ⚠ Failed to remove cache file: {str(e)}")
|
| 1237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1238 |
# =============================================================================
|
| 1239 |
# ENTRY POINT
|
| 1240 |
# =============================================================================
|
| 1241 |
|
| 1242 |
if __name__ == "__main__":
|
| 1243 |
+
mine_all_agents()
|
|
|
|
|
|
|
|
|