somratpro Claude Haiku 4.5 commited on
Commit
df96d5e
·
1 Parent(s): e77bcc1

Patch Paperclip source to cap recovery chain depth at 500

Browse files

Found the actual bug. Confirmed via @paperclipai/server npm tarball:
firstBlockedChainFinding does proper cycle detection via Set.has,
but the chain in user data is genuinely 1000+ deep (created by a
runaway recovery-issue loop). Each recursive call also clones the
Set into new Set(seen), so memory blows up too.

Fix: sed-patch the if-check to also bail when seen.size > 500.
This short-circuits deep linear chains without affecting normal
graphs. Patch runs in the builder stage before pnpm build.

Reverted SQL purge and --unhandled-rejections=none from start.sh —
neither addressed the root cause.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. Dockerfile +7 -0
  2. start.sh +1 -32
Dockerfile CHANGED
@@ -11,6 +11,13 @@ RUN apt-get update && apt-get install -y \
11
  # Clone Paperclip (depth=1 for speed, uses repo's default branch)
12
  RUN git clone --depth=1 https://github.com/paperclipai/paperclip.git .
13
 
 
 
 
 
 
 
 
14
  # Install dependencies (corepack picks correct pnpm version from packageManager field)
15
  RUN pnpm install
16
 
 
11
  # Clone Paperclip (depth=1 for speed, uses repo's default branch)
12
  RUN git clone --depth=1 https://github.com/paperclipai/paperclip.git .
13
 
14
+ # Patch firstBlockedChainFinding to cap chain depth at 500.
15
+ # Default 1MB stack overflows on deep recovery-issue chains created by
16
+ # runaway agents. Cycle detection is already in place via the Set; we
17
+ # add a size cap so deep linear chains short-circuit instead of crashing.
18
+ RUN sed -i 's|if (seen.has(current.id))|if (seen.size > 500 || seen.has(current.id))|' \
19
+ server/src/services/recovery/issue-graph-liveness.ts
20
+
21
  # Install dependencies (corepack picks correct pnpm version from packageManager field)
22
  RUN pnpm install
23
 
start.sh CHANGED
@@ -212,40 +212,9 @@ cleanup() {
212
  }
213
  trap cleanup SIGTERM SIGINT
214
 
215
- # ── Purge orphaned heartbeat run that triggers recovery stack overflow ───────
216
- ORPHAN_RUN_ID="1fc2bf0e-f983-4ec7-b941-2df338d53ab4"
217
- echo "Purging orphaned run ${ORPHAN_RUN_ID} from all UUID columns..."
218
- su - postgres -c "psql paperclip" <<SQLEOF >/dev/null 2>&1 || true
219
- \echo 'Heartbeat/run tables:'
220
- SELECT tablename FROM pg_tables
221
- WHERE schemaname='public'
222
- AND (tablename LIKE '%heartbeat%' OR tablename LIKE '%run%');
223
-
224
- DO \$\$
225
- DECLARE
226
- rec RECORD;
227
- rid uuid := '${ORPHAN_RUN_ID}';
228
- BEGIN
229
- FOR rec IN
230
- SELECT n.nspname AS s, t.relname AS tn, a.attname AS cn
231
- FROM pg_attribute a
232
- JOIN pg_class t ON a.attrelid = t.oid
233
- JOIN pg_namespace n ON t.relnamespace = n.oid
234
- JOIN pg_type ty ON a.atttypid = ty.oid
235
- WHERE n.nspname='public' AND t.relkind='r'
236
- AND ty.typname='uuid' AND NOT a.attisdropped
237
- LOOP
238
- BEGIN
239
- EXECUTE format('DELETE FROM %I.%I WHERE %I = \$1', rec.s, rec.tn, rec.cn) USING rid;
240
- EXCEPTION WHEN OTHERS THEN NULL;
241
- END;
242
- END LOOP;
243
- END \$\$;
244
- SQLEOF
245
-
246
  # ── Launch Paperclip ──────────────────────────────────────────────────────────
247
  echo "Starting Paperclip..."
248
- node --unhandled-rejections=none --import ./server/node_modules/tsx/dist/loader.mjs server/dist/index.js &
249
  PAPERCLIP_PID=$!
250
 
251
  # Wait for API ready (max 90s)
 
212
  }
213
  trap cleanup SIGTERM SIGINT
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  # ── Launch Paperclip ──────────────────────────────────────────────────────────
216
  echo "Starting Paperclip..."
217
+ node --import ./server/node_modules/tsx/dist/loader.mjs server/dist/index.js &
218
  PAPERCLIP_PID=$!
219
 
220
  # Wait for API ready (max 90s)