zhimin-z commited on
Commit
e4c408e
·
1 Parent(s): c0fff3a
Files changed (2) hide show
  1. app.py +9 -11
  2. msr.py +1 -57
app.py CHANGED
@@ -34,11 +34,11 @@ LEADERBOARD_COLUMNS = [
34
  ("Website", "string"),
35
  ("Total Issues", "number"),
36
  ("Total Discussions", "number"),
 
37
  ("Issue Resolved Rate (%)", "number"),
38
  ("Discussion Resolved Rate (%)", "number"),
39
- ("Total Wanted Issues", "number"),
40
- ("Resolved Wanted Issues", "number"),
41
  ("Wanted Issue Resolved Rate (%)", "number"),
 
42
  ("Resolved Issues", "number"),
43
  ("Resolved Discussions", "number"),
44
  ]
@@ -497,8 +497,6 @@ def get_leaderboard_dataframe():
497
  total_issues = data.get('total_issues', 0)
498
  total_discussions = data.get('total_discussions', 0)
499
  total_wanted_issues = data.get('total_wanted_issues', 0)
500
- resolved_wanted_issues = data.get('resolved_wanted_issues', 0)
501
- wanted_issue_resolved_rate = data.get('wanted_issue_resolved_rate', 0.0)
502
 
503
  # Filter out assistants with no activity (all four metrics are zero)
504
  if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
@@ -511,11 +509,11 @@ def get_leaderboard_dataframe():
511
  data.get('website', 'N/A'),
512
  total_issues, # Total Issues
513
  total_discussions, # Total Discussions
 
514
  data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
515
  data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
516
- total_wanted_issues, # Total Wanted Issues
517
- resolved_wanted_issues, # Resolved Wanted Issues
518
- wanted_issue_resolved_rate, # Wanted Issue Resolved Rate (%)
519
  data.get('resolved_issues', 0), # Resolved Issues
520
  data.get('resolved_discussions', 0), # Resolved Discussions
521
  ])
@@ -529,9 +527,9 @@ def get_leaderboard_dataframe():
529
 
530
  # Ensure numeric types
531
  numeric_cols = [
532
- "Total Issues", "Total Discussions",
533
  "Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
534
- "Total Wanted Issues", "Resolved Wanted Issues", "Wanted Issue Resolved Rate (%)",
535
  "Resolved Issues", "Resolved Discussions"
536
  ]
537
  for col in numeric_cols:
@@ -539,8 +537,8 @@ def get_leaderboard_dataframe():
539
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
540
 
541
  # Sort by Total Wanted Issues descending (primary metric for this leaderboard)
542
- if "Total Wanted Issues" in df.columns and not df.empty:
543
- df = df.sort_values(by="Total Wanted Issues", ascending=False).reset_index(drop=True)
544
 
545
  print(f"Final DataFrame shape: {df.shape}")
546
  print("="*60 + "\n")
 
34
  ("Website", "string"),
35
  ("Total Issues", "number"),
36
  ("Total Discussions", "number"),
37
+ ("Total Wanted Issues", "number"),
38
  ("Issue Resolved Rate (%)", "number"),
39
  ("Discussion Resolved Rate (%)", "number"),
 
 
40
  ("Wanted Issue Resolved Rate (%)", "number"),
41
+ ("Resolved Wanted Issues", "number"),
42
  ("Resolved Issues", "number"),
43
  ("Resolved Discussions", "number"),
44
  ]
 
497
  total_issues = data.get('total_issues', 0)
498
  total_discussions = data.get('total_discussions', 0)
499
  total_wanted_issues = data.get('total_wanted_issues', 0)
 
 
500
 
501
  # Filter out assistants with no activity (all four metrics are zero)
502
  if total_issues == 0 and total_discussions == 0 and total_wanted_issues == 0:
 
509
  data.get('website', 'N/A'),
510
  total_issues, # Total Issues
511
  total_discussions, # Total Discussions
512
+ total_wanted_issues, # Total Wanted Issues
513
  data.get('resolved_rate', 0.0), # Issue Resolved Rate (%)
514
  data.get('discussion_resolved_rate', 0.0), # Discussion Resolved Rate (%)
515
+ data.get('wanted_issue_resolved_rate', 0.0), # Wanted Issue Resolved Rate (%)
516
+ data.get('resolved_wanted_issues', 0), # Resolved Wanted Issues
 
517
  data.get('resolved_issues', 0), # Resolved Issues
518
  data.get('resolved_discussions', 0), # Resolved Discussions
519
  ])
 
527
 
528
  # Ensure numeric types
529
  numeric_cols = [
530
+ "Total Issues", "Total Discussions", "Total Wanted Issues",
531
  "Issue Resolved Rate (%)", "Discussion Resolved Rate (%)",
532
+ "Wanted Issue Resolved Rate (%)", "Resolved Wanted Issues",
533
  "Resolved Issues", "Resolved Discussions"
534
  ]
535
  for col in numeric_cols:
 
537
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
538
 
539
  # Sort by Total Wanted Issues descending (primary metric for this leaderboard)
540
+ if "Issue Resolved Rate (%)" in df.columns and not df.empty:
541
+ df = df.sort_values(by="Issue Resolved Rate (%)", ascending=False).reset_index(drop=True)
542
 
543
  print(f"Final DataFrame shape: {df.shape}")
544
  print("="*60 + "\n")
msr.py CHANGED
@@ -9,9 +9,6 @@ import duckdb
9
  import backoff
10
  import requests
11
  import requests.exceptions
12
- from apscheduler.schedulers.blocking import BlockingScheduler
13
- from apscheduler.triggers.cron import CronTrigger
14
- import logging
15
  import traceback
16
  import re
17
 
@@ -57,17 +54,6 @@ BATCH_SIZE_DAYS = 1 # Process 1 day at a time (~24 hourly files)
57
  # Retry configuration
58
  MAX_RETRIES = 5
59
 
60
- # Upload configuration
61
- UPLOAD_DELAY_SECONDS = 5
62
- UPLOAD_MAX_BACKOFF = 3600
63
-
64
- # Scheduler configuration
65
- SCHEDULE_ENABLED = False
66
- SCHEDULE_DAY_OF_WEEK = 'fri' # Friday
67
- SCHEDULE_HOUR = 0
68
- SCHEDULE_MINUTE = 0
69
- SCHEDULE_TIMEZONE = 'UTC'
70
-
71
  # =============================================================================
72
  # UTILITY FUNCTIONS
73
  # =============================================================================
@@ -1249,51 +1235,9 @@ def mine_all_agents():
1249
  except Exception as e:
1250
  print(f" ⚠ Failed to remove cache file: {str(e)}")
1251
 
1252
-
1253
- # =============================================================================
1254
- # SCHEDULER SETUP
1255
- # =============================================================================
1256
-
1257
- def setup_scheduler():
1258
- """Set up APScheduler to run mining jobs periodically."""
1259
- logging.basicConfig(
1260
- level=logging.INFO,
1261
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
1262
- )
1263
-
1264
- logging.getLogger('httpx').setLevel(logging.WARNING)
1265
-
1266
- scheduler = BlockingScheduler(timezone=SCHEDULE_TIMEZONE)
1267
-
1268
- trigger = CronTrigger(
1269
- day_of_week=SCHEDULE_DAY_OF_WEEK,
1270
- hour=SCHEDULE_HOUR,
1271
- minute=SCHEDULE_MINUTE,
1272
- timezone=SCHEDULE_TIMEZONE
1273
- )
1274
-
1275
- scheduler.add_job(
1276
- mine_all_agents,
1277
- trigger=trigger,
1278
- id='mine_all_agents',
1279
- name='Mine GHArchive data for all assistants',
1280
- replace_existing=True
1281
- )
1282
-
1283
- next_run = trigger.get_next_fire_time(None, datetime.now(trigger.timezone))
1284
- print(f"Scheduler: Weekly on {SCHEDULE_DAY_OF_WEEK} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
1285
- print(f"Next run: {next_run}\n")
1286
-
1287
- print(f"\nScheduler started")
1288
- scheduler.start()
1289
-
1290
-
1291
  # =============================================================================
1292
  # ENTRY POINT
1293
  # =============================================================================
1294
 
1295
  if __name__ == "__main__":
1296
- if SCHEDULE_ENABLED:
1297
- setup_scheduler()
1298
- else:
1299
- mine_all_agents()
 
9
  import backoff
10
  import requests
11
  import requests.exceptions
 
 
 
12
  import traceback
13
  import re
14
 
 
54
  # Retry configuration
55
  MAX_RETRIES = 5
56
 
 
 
 
 
 
 
 
 
 
 
 
57
  # =============================================================================
58
  # UTILITY FUNCTIONS
59
  # =============================================================================
 
1235
  except Exception as e:
1236
  print(f" ⚠ Failed to remove cache file: {str(e)}")
1237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1238
  # =============================================================================
1239
  # ENTRY POINT
1240
  # =============================================================================
1241
 
1242
  if __name__ == "__main__":
1243
+ mine_all_agents()