KSvend Claude Happy commited on
Commit ·
6ccfecb
1
Parent(s): 5bf6898
fix: exit poll loop 10 min after first batch job finishes
Browse filesInstead of waiting 90 min for all 3 jobs, exit once any job
finishes + 10 min grace period for the others. Harvest with
whatever completed — _SkippedJob wraps the rest.
Typical flow: current finishes at ~10 min, grace gives baseline/
true-color until ~20 min, then harvest with partial results.
Much better than blocking 90 min.
Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
- app/worker.py +14 -1
app/worker.py
CHANGED
|
@@ -96,9 +96,11 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
|
|
| 96 |
logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
|
| 97 |
jobs = None
|
| 98 |
|
| 99 |
-
# Poll
|
|
|
|
| 100 |
if jobs is not None:
|
| 101 |
poll_start = time.monotonic()
|
|
|
|
| 102 |
finished = False
|
| 103 |
while True:
|
| 104 |
elapsed = time.monotonic() - poll_start
|
|
@@ -114,6 +116,17 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
|
|
| 114 |
logger.warning("Batch job failed for %s: %s", indicator_id, statuses)
|
| 115 |
break
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if elapsed >= BATCH_TIMEOUT:
|
| 118 |
logger.warning("Batch poll timeout after %.0fs for %s", elapsed, indicator_id)
|
| 119 |
break
|
|
|
|
| 96 |
logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
|
| 97 |
jobs = None
|
| 98 |
|
| 99 |
+
# Poll — exit early once first job finishes + grace period for others
|
| 100 |
+
GRACE_PERIOD = 600 # 10 min grace after first job finishes
|
| 101 |
if jobs is not None:
|
| 102 |
poll_start = time.monotonic()
|
| 103 |
+
first_finished_at = None
|
| 104 |
finished = False
|
| 105 |
while True:
|
| 106 |
elapsed = time.monotonic() - poll_start
|
|
|
|
| 116 |
logger.warning("Batch job failed for %s: %s", indicator_id, statuses)
|
| 117 |
break
|
| 118 |
|
| 119 |
+
# Track when first job finishes
|
| 120 |
+
if first_finished_at is None and any(s == "finished" for s in statuses):
|
| 121 |
+
first_finished_at = time.monotonic()
|
| 122 |
+
print(f"[Aperture] {indicator_id}: first job finished, {GRACE_PERIOD}s grace for remaining")
|
| 123 |
+
|
| 124 |
+
# Grace period: once any job finished, give others 10 min then harvest partial
|
| 125 |
+
if first_finished_at and (time.monotonic() - first_finished_at) >= GRACE_PERIOD:
|
| 126 |
+
logger.info("Grace period expired for %s, harvesting partial results", indicator_id)
|
| 127 |
+
print(f"[Aperture] {indicator_id}: grace period expired, proceeding with partial results")
|
| 128 |
+
break
|
| 129 |
+
|
| 130 |
if elapsed >= BATCH_TIMEOUT:
|
| 131 |
logger.warning("Batch poll timeout after %.0fs for %s", elapsed, indicator_id)
|
| 132 |
break
|