KSvend Claude Happy commited on
Commit ·
cb6d0e1
1
Parent(s): 7946db4
debug: log batch job IDs and poll statuses to diagnose CDSE timeouts
Browse filesGenerated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
- app/worker.py +5 -1
app/worker.py
CHANGED
|
@@ -79,6 +79,8 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
|
|
| 79 |
season_months=job.request.season_months(),
|
| 80 |
)
|
| 81 |
batch_submissions[indicator_id] = jobs
|
|
|
|
|
|
|
| 82 |
await db.update_job_progress(job_id, indicator_id, "processing on CDSE")
|
| 83 |
except Exception as exc:
|
| 84 |
logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
|
|
@@ -90,9 +92,12 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
|
|
| 90 |
|
| 91 |
while pending:
|
| 92 |
# Check current statuses before sleeping
|
|
|
|
| 93 |
for indicator_id in list(pending.keys()):
|
| 94 |
jobs = pending[indicator_id]
|
| 95 |
statuses = [j.status() for j in jobs]
|
|
|
|
|
|
|
| 96 |
if all(s == "finished" for s in statuses):
|
| 97 |
logger.info("Batch jobs finished for %s", indicator_id)
|
| 98 |
del pending[indicator_id]
|
|
@@ -103,7 +108,6 @@ async def process_job(job_id: str, db: Database, registry: IndicatorRegistry) ->
|
|
| 103 |
if not pending:
|
| 104 |
break
|
| 105 |
|
| 106 |
-
elapsed = time.monotonic() - poll_start
|
| 107 |
if elapsed >= BATCH_TIMEOUT:
|
| 108 |
logger.warning("Batch poll timeout after %.0fs, remaining: %s", elapsed, list(pending.keys()))
|
| 109 |
fallback_ids.update(pending.keys())
|
|
|
|
| 79 |
season_months=job.request.season_months(),
|
| 80 |
)
|
| 81 |
batch_submissions[indicator_id] = jobs
|
| 82 |
+
job_ids = [getattr(j, 'job_id', '?') for j in jobs]
|
| 83 |
+
print(f"[Aperture] Submitted {indicator_id} batch jobs: {job_ids}")
|
| 84 |
await db.update_job_progress(job_id, indicator_id, "processing on CDSE")
|
| 85 |
except Exception as exc:
|
| 86 |
logger.warning("Batch submit failed for %s, will use fallback: %s", indicator_id, exc)
|
|
|
|
| 92 |
|
| 93 |
while pending:
|
| 94 |
# Check current statuses before sleeping
|
| 95 |
+
elapsed = time.monotonic() - poll_start
|
| 96 |
for indicator_id in list(pending.keys()):
|
| 97 |
jobs = pending[indicator_id]
|
| 98 |
statuses = [j.status() for j in jobs]
|
| 99 |
+
job_ids = [getattr(j, 'job_id', '?') for j in jobs]
|
| 100 |
+
print(f"[Aperture] Poll {indicator_id} ({elapsed:.0f}s): {list(zip(job_ids, statuses))}")
|
| 101 |
if all(s == "finished" for s in statuses):
|
| 102 |
logger.info("Batch jobs finished for %s", indicator_id)
|
| 103 |
del pending[indicator_id]
|
|
|
|
| 108 |
if not pending:
|
| 109 |
break
|
| 110 |
|
|
|
|
| 111 |
if elapsed >= BATCH_TIMEOUT:
|
| 112 |
logger.warning("Batch poll timeout after %.0fs, remaining: %s", elapsed, list(pending.keys()))
|
| 113 |
fallback_ids.update(pending.keys())
|