KSvend Claude Happy commited on
Commit
6ccfecb
·
1 Parent(s): 5bf6898

fix: exit poll loop 10 min after first batch job finishes

Browse files

Instead of waiting 90 min for all 3 jobs, exit once any job
finishes + 10 min grace period for the others. Harvest with
whatever completed — _SkippedJob wraps the rest.

Typical flow: current finishes at ~10 min, grace gives baseline/
true-color until ~20 min, then harvest with partial results.
Much better than blocking 90 min.

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>

Files changed (1) hide show
  1. app/worker.py +14 -1
app/worker.py CHANGED
@@ -96,9 +96,11 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
96
  logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
97
  jobs = None
98
 
99
- # Poll
 
100
  if jobs is not None:
101
  poll_start = time.monotonic()
 
102
  finished = False
103
  while True:
104
  elapsed = time.monotonic() - poll_start
@@ -114,6 +116,17 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
114
  logger.warning("Batch job failed for %s: %s", indicator_id, statuses)
115
  break
116
 
 
 
 
 
 
 
 
 
 
 
 
117
  if elapsed >= BATCH_TIMEOUT:
118
  logger.warning("Batch poll timeout after %.0fs for %s", elapsed, indicator_id)
119
  break
 
96
  logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
97
  jobs = None
98
 
99
+ # Poll — exit early once first job finishes + grace period for others
100
+ GRACE_PERIOD = 600 # 10 min grace after first job finishes
101
  if jobs is not None:
102
  poll_start = time.monotonic()
103
+ first_finished_at = None
104
  finished = False
105
  while True:
106
  elapsed = time.monotonic() - poll_start
 
116
  logger.warning("Batch job failed for %s: %s", indicator_id, statuses)
117
  break
118
 
119
+ # Track when first job finishes
120
+ if first_finished_at is None and any(s == "finished" for s in statuses):
121
+ first_finished_at = time.monotonic()
122
+ print(f"[Aperture] {indicator_id}: first job finished, {GRACE_PERIOD}s grace for remaining")
123
+
124
+ # Grace period: once any job finished, give others 10 min then harvest partial
125
+ if first_finished_at and (time.monotonic() - first_finished_at) >= GRACE_PERIOD:
126
+ logger.info("Grace period expired for %s, harvesting partial results", indicator_id)
127
+ print(f"[Aperture] {indicator_id}: grace period expired, proceeding with partial results")
128
+ break
129
+
130
  if elapsed >= BATCH_TIMEOUT:
131
  logger.warning("Batch poll timeout after %.0fs for %s", elapsed, indicator_id)
132
  break