Spaces:
Running
Running
Zhu Jiajun (jz28583) Claude Opus 4.7 (1M context) commited on
Commit ·
1a157a1
1
Parent(s): 9cb903d
Fix kaggle async: use sentinel -1.0 for pending (NOT NULL) + correct col names
Browse files- server/api.py +19 -12
server/api.py
CHANGED
|
@@ -51,6 +51,11 @@ MANIFEST_PATH = Path(os.environ.get(
|
|
| 51 |
))
|
| 52 |
QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
|
| 53 |
BYPASS_KEY = os.environ.get("GT_BYPASS_KEY", "").strip() or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
|
| 55 |
|
| 56 |
|
|
@@ -199,13 +204,13 @@ def _kaggle_poll_loop(competition: str, description: str, run_id: str,
|
|
| 199 |
final = (float("nan"), {"error": f"kaggle scoring failed: {err}"})
|
| 200 |
break # found our row; if still pending the inner loop falls through
|
| 201 |
if final is None:
|
| 202 |
-
final = (
|
| 203 |
primary, secondary = final
|
| 204 |
-
#
|
| 205 |
-
primary_db =
|
| 206 |
conn = _db()
|
| 207 |
conn.execute(
|
| 208 |
-
"UPDATE submissions SET primary_metric = ?,
|
| 209 |
"WHERE run_id = ?",
|
| 210 |
(primary_db, _json.dumps(secondary), run_id),
|
| 211 |
)
|
|
@@ -312,7 +317,8 @@ def submit():
|
|
| 312 |
# the client never has to hold open a long-running connection
|
| 313 |
# (HF Space's reverse proxy kills these around the 5-min mark).
|
| 314 |
description = _kaggle_submit(comp, raw, run_id)
|
| 315 |
-
scored = {"primary":
|
|
|
|
| 316 |
"n_rows": -1}
|
| 317 |
pending = True
|
| 318 |
else:
|
|
@@ -389,7 +395,7 @@ def leaderboard(task: str):
|
|
| 389 |
SELECT agent, MAX(primary_metric) as best, COUNT(*) as n_subs,
|
| 390 |
MIN(submitted_at) as first_seen
|
| 391 |
FROM submissions
|
| 392 |
-
WHERE task = ? AND primary_metric
|
| 393 |
GROUP BY agent
|
| 394 |
ORDER BY best DESC
|
| 395 |
""", (task,)).fetchall()
|
|
@@ -411,7 +417,7 @@ def leaderboard_all():
|
|
| 411 |
rows = conn.execute("""
|
| 412 |
SELECT task, agent, MAX(primary_metric) as best
|
| 413 |
FROM submissions
|
| 414 |
-
WHERE primary_metric
|
| 415 |
GROUP BY task, agent
|
| 416 |
""").fetchall()
|
| 417 |
conn.close()
|
|
@@ -474,8 +480,8 @@ def run_status(run_id: str):
|
|
| 474 |
"""
|
| 475 |
conn = _db()
|
| 476 |
row = conn.execute("""
|
| 477 |
-
SELECT run_id, task, agent, primary_metric,
|
| 478 |
-
n_rows,
|
| 479 |
FROM submissions WHERE run_id = ?
|
| 480 |
""", (run_id,)).fetchone()
|
| 481 |
conn.close()
|
|
@@ -483,11 +489,12 @@ def run_status(run_id: str):
|
|
| 483 |
return jsonify({"error": f"no run '{run_id}'"}), 404
|
| 484 |
rid, task, agent, primary, secondary, sha, n_rows, ts = row
|
| 485 |
sec = json.loads(secondary) if secondary else {}
|
| 486 |
-
if primary
|
| 487 |
-
# Kaggle backend, still polling
|
| 488 |
status = "pending"
|
|
|
|
| 489 |
elif sec.get("error"):
|
| 490 |
status = "failed"
|
|
|
|
| 491 |
else:
|
| 492 |
status = "complete"
|
| 493 |
return jsonify({
|
|
@@ -1267,7 +1274,7 @@ def landing():
|
|
| 1267 |
SELECT agent, MAX(primary_metric) AS p, COUNT(*) AS n,
|
| 1268 |
MIN(submitted_at) AS f
|
| 1269 |
FROM submissions
|
| 1270 |
-
WHERE task = ? AND primary_metric
|
| 1271 |
GROUP BY agent ORDER BY p DESC
|
| 1272 |
""", (name,)).fetchall()
|
| 1273 |
n_rows_cfg = s.get("n_rows")
|
|
|
|
| 51 |
))
|
| 52 |
QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
|
| 53 |
BYPASS_KEY = os.environ.get("GT_BYPASS_KEY", "").strip() or None
|
| 54 |
+
|
| 55 |
+
# Sentinel for kaggle-backend rows whose score is still being polled. The
|
| 56 |
+
# submissions table has primary_metric NOT NULL so we can't store NULL —
|
| 57 |
+
# leaderboard queries filter `primary_metric > -1`.
|
| 58 |
+
_PENDING_SENTINEL = -1.0
|
| 59 |
MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
|
| 60 |
|
| 61 |
|
|
|
|
| 204 |
final = (float("nan"), {"error": f"kaggle scoring failed: {err}"})
|
| 205 |
break # found our row; if still pending the inner loop falls through
|
| 206 |
if final is None:
|
| 207 |
+
final = (-1.0, {"error": f"polled {timeout_s}s without complete"})
|
| 208 |
primary, secondary = final
|
| 209 |
+
# On failure leave the sentinel so it stays out of the leaderboard.
|
| 210 |
+
primary_db = -1.0 if primary != primary else primary # NaN check
|
| 211 |
conn = _db()
|
| 212 |
conn.execute(
|
| 213 |
+
"UPDATE submissions SET primary_metric = ?, secondary_json = ? "
|
| 214 |
"WHERE run_id = ?",
|
| 215 |
(primary_db, _json.dumps(secondary), run_id),
|
| 216 |
)
|
|
|
|
| 317 |
# the client never has to hold open a long-running connection
|
| 318 |
# (HF Space's reverse proxy kills these around the 5-min mark).
|
| 319 |
description = _kaggle_submit(comp, raw, run_id)
|
| 320 |
+
scored = {"primary": _PENDING_SENTINEL,
|
| 321 |
+
"secondary": {"status": "pending"},
|
| 322 |
"n_rows": -1}
|
| 323 |
pending = True
|
| 324 |
else:
|
|
|
|
| 395 |
SELECT agent, MAX(primary_metric) as best, COUNT(*) as n_subs,
|
| 396 |
MIN(submitted_at) as first_seen
|
| 397 |
FROM submissions
|
| 398 |
+
WHERE task = ? AND primary_metric > -1
|
| 399 |
GROUP BY agent
|
| 400 |
ORDER BY best DESC
|
| 401 |
""", (task,)).fetchall()
|
|
|
|
| 417 |
rows = conn.execute("""
|
| 418 |
SELECT task, agent, MAX(primary_metric) as best
|
| 419 |
FROM submissions
|
| 420 |
+
WHERE primary_metric > -1
|
| 421 |
GROUP BY task, agent
|
| 422 |
""").fetchall()
|
| 423 |
conn.close()
|
|
|
|
| 480 |
"""
|
| 481 |
conn = _db()
|
| 482 |
row = conn.execute("""
|
| 483 |
+
SELECT run_id, task, agent, primary_metric, secondary_json,
|
| 484 |
+
submission_sha256, n_rows, submitted_at
|
| 485 |
FROM submissions WHERE run_id = ?
|
| 486 |
""", (run_id,)).fetchone()
|
| 487 |
conn.close()
|
|
|
|
| 489 |
return jsonify({"error": f"no run '{run_id}'"}), 404
|
| 490 |
rid, task, agent, primary, secondary, sha, n_rows, ts = row
|
| 491 |
sec = json.loads(secondary) if secondary else {}
|
| 492 |
+
if primary == _PENDING_SENTINEL:
|
|
|
|
| 493 |
status = "pending"
|
| 494 |
+
primary = None
|
| 495 |
elif sec.get("error"):
|
| 496 |
status = "failed"
|
| 497 |
+
primary = None
|
| 498 |
else:
|
| 499 |
status = "complete"
|
| 500 |
return jsonify({
|
|
|
|
| 1274 |
SELECT agent, MAX(primary_metric) AS p, COUNT(*) AS n,
|
| 1275 |
MIN(submitted_at) AS f
|
| 1276 |
FROM submissions
|
| 1277 |
+
WHERE task = ? AND primary_metric > -1
|
| 1278 |
GROUP BY agent ORDER BY p DESC
|
| 1279 |
""", (name,)).fetchall()
|
| 1280 |
n_rows_cfg = s.get("n_rows")
|