zhiminy commited on
Commit
d8e431a
·
1 Parent(s): 439a9c4

fix remine

Browse files
Files changed (1) hide show
  1. app.py +17 -51
app.py CHANGED
@@ -1456,14 +1456,14 @@ def update_all_agents_incremental():
1456
 
1457
  if already_mined_dates:
1458
  print(f"📅 Found {len(already_mined_dates)} already-mined dates")
1459
- print(f" Skipping these dates and fetching only new data...")
1460
- # Fetch only issues from dates not yet mined
1461
  new_metadata = fetch_all_issues_metadata(
1462
  identifier,
1463
  agent_name,
1464
  token,
1465
  start_from_date=None, # Use full 6-month range
1466
- exclude_dates=already_mined_dates # But exclude already-mined dates
1467
  )
1468
  else:
1469
  print(f"📅 No existing data found. Mining everything from scratch...")
@@ -1847,61 +1847,27 @@ def submit_agent(identifier, agent_name, organization, description, website):
1847
 
1848
  def daily_update_task():
1849
  """
1850
- Daily scheduled task (runs at 12:00 AM UTC) for smart issue updates.
1851
 
1852
  Strategy:
1853
- 1. For each agent, refresh open issues from last 6 months
1854
- 2. Skip issues that are already closed/resolved (no API calls)
1855
- 3. Only fetch status for open issues to check if they've been closed/resolved
1856
- 4. Update leaderboard with refreshed data
1857
 
1858
- This is much more efficient than fetching all issues every time.
1859
  """
1860
  print(f"\n{'='*80}")
1861
- print(f"🕛 Daily update started at {datetime.now(timezone.utc).isoformat()}")
1862
  print(f"{'='*80}")
1863
 
1864
  try:
1865
- token = get_github_token()
1866
-
1867
- # Load all agents
1868
- agents = load_agents_from_hf()
1869
- if not agents:
1870
- print("No agents found")
1871
- return
1872
-
1873
- print(f"📋 Processing {len(agents)} agents...")
1874
-
1875
- total_checked = 0
1876
- total_updated = 0
1877
-
1878
- # Refresh open issues for each agent (last 6 months)
1879
- for agent in agents:
1880
- identifier = agent.get('github_identifier')
1881
- agent_name = agent.get('agent_name', 'Unknown')
1882
-
1883
- if not identifier:
1884
- continue
1885
-
1886
- print(f"\n{'='*60}")
1887
- print(f"Processing: {agent_name} ({identifier})")
1888
- print(f"{'='*60}")
1889
-
1890
- # Refresh open issues from last 6 months
1891
- checked, updated = refresh_open_issues_for_agent(identifier, token)
1892
- total_checked += checked
1893
- total_updated += updated
1894
-
1895
- print(f"\n{'='*80}")
1896
- print(f"📊 Refresh Summary:")
1897
- print(f" Total open issues checked: {total_checked}")
1898
- print(f" Issues updated (closed/resolved): {total_updated}")
1899
- print(f"{'='*80}")
1900
 
1901
- print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
1902
 
1903
  except Exception as e:
1904
- print(f"✗ Daily update failed: {str(e)}")
1905
  import traceback
1906
  traceback.print_exc()
1907
 
@@ -1934,17 +1900,17 @@ else:
1934
 
1935
  initialize_data()
1936
 
1937
- # Start APScheduler for daily updates at 12:00 AM UTC
1938
  scheduler = BackgroundScheduler(timezone="UTC")
1939
  scheduler.add_job(
1940
  daily_update_task,
1941
  trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
1942
- id='daily_issue_refresh',
1943
- name='Daily Issue Status Refresh',
1944
  replace_existing=True
1945
  )
1946
  scheduler.start()
1947
- print("✓ Scheduler started: Daily updates at 12:00 AM UTC")
1948
 
1949
  # Create Gradio interface
1950
  with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as app:
 
1456
 
1457
  if already_mined_dates:
1458
  print(f"📅 Found {len(already_mined_dates)} already-mined dates")
1459
+ print(f" Re-mining ALL dates to ensure metadata is up-to-date...")
1460
+ # ALWAYS re-mine all dates to catch metadata changes (e.g., resolved → unresolved)
1461
  new_metadata = fetch_all_issues_metadata(
1462
  identifier,
1463
  agent_name,
1464
  token,
1465
  start_from_date=None, # Use full 6-month range
1466
+ exclude_dates=None # Re-mine everything, don't exclude any dates
1467
  )
1468
  else:
1469
  print(f"📅 No existing data found. Mining everything from scratch...")
 
1847
 
1848
  def daily_update_task():
1849
  """
1850
+ Daily scheduled task (runs at 12:00 AM UTC) for regular issue mining.
1851
 
1852
  Strategy:
1853
+ 1. Re-mine ALL issues within the 6-month window for each agent
1854
+ 2. This ensures metadata is always fresh (catches resolved → unresolved changes)
1855
+ 3. Update leaderboard with completely refreshed data
 
1856
 
1857
+ This ensures no stale metadata exists in the system.
1858
  """
1859
  print(f"\n{'='*80}")
1860
+ print(f"🕛 Daily regular mining started at {datetime.now(timezone.utc).isoformat()}")
1861
  print(f"{'='*80}")
1862
 
1863
  try:
1864
+ # Use the incremental update function which now re-mines everything
1865
+ update_all_agents_incremental()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1866
 
1867
+ print(f"\n✅ Daily regular mining completed at {datetime.now(timezone.utc).isoformat()}")
1868
 
1869
  except Exception as e:
1870
+ print(f"✗ Daily regular mining failed: {str(e)}")
1871
  import traceback
1872
  traceback.print_exc()
1873
 
 
1900
 
1901
  initialize_data()
1902
 
1903
+ # Start APScheduler for daily regular issue mining at 12:00 AM UTC
1904
  scheduler = BackgroundScheduler(timezone="UTC")
1905
  scheduler.add_job(
1906
  daily_update_task,
1907
  trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
1908
+ id='daily_regular_mining',
1909
+ name='Daily Regular Issue Mining',
1910
  replace_existing=True
1911
  )
1912
  scheduler.start()
1913
+ print("✓ Scheduler started: Daily regular issue mining at 12:00 AM UTC")
1914
 
1915
  # Create Gradio interface
1916
  with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as app: