Zhu Jiajun (jz28583) Claude Opus 4.7 (1M context) commited on
Commit
1a157a1
·
1 Parent(s): 9cb903d

Fix kaggle async: use sentinel -1.0 for pending (NOT NULL) + correct col names

Browse files
Files changed (1) hide show
  1. server/api.py +19 -12
server/api.py CHANGED
@@ -51,6 +51,11 @@ MANIFEST_PATH = Path(os.environ.get(
51
  ))
52
  QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
53
  BYPASS_KEY = os.environ.get("GT_BYPASS_KEY", "").strip() or None
 
 
 
 
 
54
  MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
55
 
56
 
@@ -199,13 +204,13 @@ def _kaggle_poll_loop(competition: str, description: str, run_id: str,
199
  final = (float("nan"), {"error": f"kaggle scoring failed: {err}"})
200
  break # found our row; if still pending the inner loop falls through
201
  if final is None:
202
- final = (float("nan"), {"error": f"polled {timeout_s}s without complete"})
203
  primary, secondary = final
204
- # NaN can't go through SQLite NUMERIC; persist as NULL when scoring failed.
205
- primary_db = None if primary != primary else primary # NaN check
206
  conn = _db()
207
  conn.execute(
208
- "UPDATE submissions SET primary_metric = ?, secondary = ? "
209
  "WHERE run_id = ?",
210
  (primary_db, _json.dumps(secondary), run_id),
211
  )
@@ -312,7 +317,8 @@ def submit():
312
  # the client never has to hold open a long-running connection
313
  # (HF Space's reverse proxy kills these around the 5-min mark).
314
  description = _kaggle_submit(comp, raw, run_id)
315
- scored = {"primary": None, "secondary": {"status": "pending"},
 
316
  "n_rows": -1}
317
  pending = True
318
  else:
@@ -389,7 +395,7 @@ def leaderboard(task: str):
389
  SELECT agent, MAX(primary_metric) as best, COUNT(*) as n_subs,
390
  MIN(submitted_at) as first_seen
391
  FROM submissions
392
- WHERE task = ? AND primary_metric IS NOT NULL
393
  GROUP BY agent
394
  ORDER BY best DESC
395
  """, (task,)).fetchall()
@@ -411,7 +417,7 @@ def leaderboard_all():
411
  rows = conn.execute("""
412
  SELECT task, agent, MAX(primary_metric) as best
413
  FROM submissions
414
- WHERE primary_metric IS NOT NULL
415
  GROUP BY task, agent
416
  """).fetchall()
417
  conn.close()
@@ -474,8 +480,8 @@ def run_status(run_id: str):
474
  """
475
  conn = _db()
476
  row = conn.execute("""
477
- SELECT run_id, task, agent, primary_metric, secondary, sha256,
478
- n_rows, ts
479
  FROM submissions WHERE run_id = ?
480
  """, (run_id,)).fetchone()
481
  conn.close()
@@ -483,11 +489,12 @@ def run_status(run_id: str):
483
  return jsonify({"error": f"no run '{run_id}'"}), 404
484
  rid, task, agent, primary, secondary, sha, n_rows, ts = row
485
  sec = json.loads(secondary) if secondary else {}
486
- if primary is None:
487
- # Kaggle backend, still polling
488
  status = "pending"
 
489
  elif sec.get("error"):
490
  status = "failed"
 
491
  else:
492
  status = "complete"
493
  return jsonify({
@@ -1267,7 +1274,7 @@ def landing():
1267
  SELECT agent, MAX(primary_metric) AS p, COUNT(*) AS n,
1268
  MIN(submitted_at) AS f
1269
  FROM submissions
1270
- WHERE task = ? AND primary_metric IS NOT NULL
1271
  GROUP BY agent ORDER BY p DESC
1272
  """, (name,)).fetchall()
1273
  n_rows_cfg = s.get("n_rows")
 
51
  ))
52
  QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
53
  BYPASS_KEY = os.environ.get("GT_BYPASS_KEY", "").strip() or None
54
+
55
+ # Sentinel for kaggle-backend rows whose score is still being polled. The
56
+ # submissions table has primary_metric NOT NULL so we can't store NULL —
57
+ # leaderboard queries filter `primary_metric > -1`.
58
+ _PENDING_SENTINEL = -1.0
59
  MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
60
 
61
 
 
204
  final = (float("nan"), {"error": f"kaggle scoring failed: {err}"})
205
  break # found our row; if still pending the inner loop falls through
206
  if final is None:
207
+ final = (-1.0, {"error": f"polled {timeout_s}s without complete"})
208
  primary, secondary = final
209
+ # On failure leave the sentinel so it stays out of the leaderboard.
210
+ primary_db = -1.0 if primary != primary else primary # NaN check
211
  conn = _db()
212
  conn.execute(
213
+ "UPDATE submissions SET primary_metric = ?, secondary_json = ? "
214
  "WHERE run_id = ?",
215
  (primary_db, _json.dumps(secondary), run_id),
216
  )
 
317
  # the client never has to hold open a long-running connection
318
  # (HF Space's reverse proxy kills these around the 5-min mark).
319
  description = _kaggle_submit(comp, raw, run_id)
320
+ scored = {"primary": _PENDING_SENTINEL,
321
+ "secondary": {"status": "pending"},
322
  "n_rows": -1}
323
  pending = True
324
  else:
 
395
  SELECT agent, MAX(primary_metric) as best, COUNT(*) as n_subs,
396
  MIN(submitted_at) as first_seen
397
  FROM submissions
398
+ WHERE task = ? AND primary_metric > -1
399
  GROUP BY agent
400
  ORDER BY best DESC
401
  """, (task,)).fetchall()
 
417
  rows = conn.execute("""
418
  SELECT task, agent, MAX(primary_metric) as best
419
  FROM submissions
420
+ WHERE primary_metric > -1
421
  GROUP BY task, agent
422
  """).fetchall()
423
  conn.close()
 
480
  """
481
  conn = _db()
482
  row = conn.execute("""
483
+ SELECT run_id, task, agent, primary_metric, secondary_json,
484
+ submission_sha256, n_rows, submitted_at
485
  FROM submissions WHERE run_id = ?
486
  """, (run_id,)).fetchone()
487
  conn.close()
 
489
  return jsonify({"error": f"no run '{run_id}'"}), 404
490
  rid, task, agent, primary, secondary, sha, n_rows, ts = row
491
  sec = json.loads(secondary) if secondary else {}
492
+ if primary == _PENDING_SENTINEL:
 
493
  status = "pending"
494
+ primary = None
495
  elif sec.get("error"):
496
  status = "failed"
497
+ primary = None
498
  else:
499
  status = "complete"
500
  return jsonify({
 
1274
  SELECT agent, MAX(primary_metric) AS p, COUNT(*) AS n,
1275
  MIN(submitted_at) AS f
1276
  FROM submissions
1277
+ WHERE task = ? AND primary_metric > -1
1278
  GROUP BY agent ORDER BY p DESC
1279
  """, (name,)).fetchall()
1280
  n_rows_cfg = s.get("n_rows")