fix remine
Browse files
app.py
CHANGED
|
@@ -1456,14 +1456,14 @@ def update_all_agents_incremental():
|
|
| 1456 |
|
| 1457 |
if already_mined_dates:
|
| 1458 |
print(f"📅 Found {len(already_mined_dates)} already-mined dates")
|
| 1459 |
-
print(f"
|
| 1460 |
-
#
|
| 1461 |
new_metadata = fetch_all_issues_metadata(
|
| 1462 |
identifier,
|
| 1463 |
agent_name,
|
| 1464 |
token,
|
| 1465 |
start_from_date=None, # Use full 6-month range
|
| 1466 |
-
exclude_dates=
|
| 1467 |
)
|
| 1468 |
else:
|
| 1469 |
print(f"📅 No existing data found. Mining everything from scratch...")
|
|
@@ -1847,61 +1847,27 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1847 |
|
| 1848 |
def daily_update_task():
|
| 1849 |
"""
|
| 1850 |
-
Daily scheduled task (runs at 12:00 AM UTC) for
|
| 1851 |
|
| 1852 |
Strategy:
|
| 1853 |
-
1.
|
| 1854 |
-
2.
|
| 1855 |
-
3.
|
| 1856 |
-
4. Update leaderboard with refreshed data
|
| 1857 |
|
| 1858 |
-
This
|
| 1859 |
"""
|
| 1860 |
print(f"\n{'='*80}")
|
| 1861 |
-
print(f"🕛 Daily
|
| 1862 |
print(f"{'='*80}")
|
| 1863 |
|
| 1864 |
try:
|
| 1865 |
-
|
| 1866 |
-
|
| 1867 |
-
# Load all agents
|
| 1868 |
-
agents = load_agents_from_hf()
|
| 1869 |
-
if not agents:
|
| 1870 |
-
print("No agents found")
|
| 1871 |
-
return
|
| 1872 |
-
|
| 1873 |
-
print(f"📋 Processing {len(agents)} agents...")
|
| 1874 |
-
|
| 1875 |
-
total_checked = 0
|
| 1876 |
-
total_updated = 0
|
| 1877 |
-
|
| 1878 |
-
# Refresh open issues for each agent (last 6 months)
|
| 1879 |
-
for agent in agents:
|
| 1880 |
-
identifier = agent.get('github_identifier')
|
| 1881 |
-
agent_name = agent.get('agent_name', 'Unknown')
|
| 1882 |
-
|
| 1883 |
-
if not identifier:
|
| 1884 |
-
continue
|
| 1885 |
-
|
| 1886 |
-
print(f"\n{'='*60}")
|
| 1887 |
-
print(f"Processing: {agent_name} ({identifier})")
|
| 1888 |
-
print(f"{'='*60}")
|
| 1889 |
-
|
| 1890 |
-
# Refresh open issues from last 6 months
|
| 1891 |
-
checked, updated = refresh_open_issues_for_agent(identifier, token)
|
| 1892 |
-
total_checked += checked
|
| 1893 |
-
total_updated += updated
|
| 1894 |
-
|
| 1895 |
-
print(f"\n{'='*80}")
|
| 1896 |
-
print(f"📊 Refresh Summary:")
|
| 1897 |
-
print(f" Total open issues checked: {total_checked}")
|
| 1898 |
-
print(f" Issues updated (closed/resolved): {total_updated}")
|
| 1899 |
-
print(f"{'='*80}")
|
| 1900 |
|
| 1901 |
-
print(f"\n✅ Daily
|
| 1902 |
|
| 1903 |
except Exception as e:
|
| 1904 |
-
print(f"✗ Daily
|
| 1905 |
import traceback
|
| 1906 |
traceback.print_exc()
|
| 1907 |
|
|
@@ -1934,17 +1900,17 @@ else:
|
|
| 1934 |
|
| 1935 |
initialize_data()
|
| 1936 |
|
| 1937 |
-
# Start APScheduler for daily
|
| 1938 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 1939 |
scheduler.add_job(
|
| 1940 |
daily_update_task,
|
| 1941 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 1942 |
-
id='
|
| 1943 |
-
name='Daily Issue
|
| 1944 |
replace_existing=True
|
| 1945 |
)
|
| 1946 |
scheduler.start()
|
| 1947 |
-
print("✓ Scheduler started: Daily
|
| 1948 |
|
| 1949 |
# Create Gradio interface
|
| 1950 |
with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as app:
|
|
|
|
| 1456 |
|
| 1457 |
if already_mined_dates:
|
| 1458 |
print(f"📅 Found {len(already_mined_dates)} already-mined dates")
|
| 1459 |
+
print(f" Re-mining ALL dates to ensure metadata is up-to-date...")
|
| 1460 |
+
# ALWAYS re-mine all dates to catch metadata changes (e.g., resolved → unresolved)
|
| 1461 |
new_metadata = fetch_all_issues_metadata(
|
| 1462 |
identifier,
|
| 1463 |
agent_name,
|
| 1464 |
token,
|
| 1465 |
start_from_date=None, # Use full 6-month range
|
| 1466 |
+
exclude_dates=None # Re-mine everything, don't exclude any dates
|
| 1467 |
)
|
| 1468 |
else:
|
| 1469 |
print(f"📅 No existing data found. Mining everything from scratch...")
|
|
|
|
| 1847 |
|
| 1848 |
def daily_update_task():
|
| 1849 |
"""
|
| 1850 |
+
Daily scheduled task (runs at 12:00 AM UTC) for regular issue mining.
|
| 1851 |
|
| 1852 |
Strategy:
|
| 1853 |
+
1. Re-mine ALL issues within the 6-month window for each agent
|
| 1854 |
+
2. This ensures metadata is always fresh (catches resolved → unresolved changes)
|
| 1855 |
+
3. Update leaderboard with completely refreshed data
|
|
|
|
| 1856 |
|
| 1857 |
+
This ensures no stale metadata exists in the system.
|
| 1858 |
"""
|
| 1859 |
print(f"\n{'='*80}")
|
| 1860 |
+
print(f"🕛 Daily regular mining started at {datetime.now(timezone.utc).isoformat()}")
|
| 1861 |
print(f"{'='*80}")
|
| 1862 |
|
| 1863 |
try:
|
| 1864 |
+
# Use the incremental update function which now re-mines everything
|
| 1865 |
+
update_all_agents_incremental()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1866 |
|
| 1867 |
+
print(f"\n✅ Daily regular mining completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1868 |
|
| 1869 |
except Exception as e:
|
| 1870 |
+
print(f"✗ Daily regular mining failed: {str(e)}")
|
| 1871 |
import traceback
|
| 1872 |
traceback.print_exc()
|
| 1873 |
|
|
|
|
| 1900 |
|
| 1901 |
initialize_data()
|
| 1902 |
|
| 1903 |
+
# Start APScheduler for daily regular issue mining at 12:00 AM UTC
|
| 1904 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 1905 |
scheduler.add_job(
|
| 1906 |
daily_update_task,
|
| 1907 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 1908 |
+
id='daily_regular_mining',
|
| 1909 |
+
name='Daily Regular Issue Mining',
|
| 1910 |
replace_existing=True
|
| 1911 |
)
|
| 1912 |
scheduler.start()
|
| 1913 |
+
print("✓ Scheduler started: Daily regular issue mining at 12:00 AM UTC")
|
| 1914 |
|
| 1915 |
# Create Gradio interface
|
| 1916 |
with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as app:
|