File size: 2,856 Bytes
9c12e58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env bash
# Boot guard for the api-server workflow. Solves the recurring
# `EADDRINUSE :::8080` flap where the Replit workflow manager
# double-fires a restart or a previous node process orphans on the
# port. Mirrors scripts/start-research-engine.sh.
#
# Behaviour, in order:
#   1. If a healthy api-server is already serving on $PORT, this
#      wrapper exits cleanly via `exec sleep infinity` so the workflow
#      stays in RUNNING (Replit treats an immediate exit as a crash
#      and would restart again, re-triggering the loop).
#   2. Otherwise, attempt to evict any squatting node process bound
#      to the built bundle path, sleep briefly, and exec the real
#      start command. The in-process EADDRINUSE FATAL in
#      src/index.ts:126 stays as a last-line backstop.
#
# Bypass for one-off debugging:  set START_API_SERVER_BYPASS=1 to
# skip the health probe + squatter eviction and exec node directly.
#
# This script must be run from the api-server package cwd (which is
# what `pnpm --filter @workspace/api-server run dev` does for us);
# it relies on ./dist/index.mjs being relative to that cwd.

set -euo pipefail

PORT="${PORT:-8080}"
HEALTH_URL="http://127.0.0.1:${PORT}/api/healthz"
BUNDLE_REL="./dist/index.mjs"
BUNDLE_PATTERN="dist/index\\.mjs"

if [[ "${START_API_SERVER_BYPASS:-}" == "1" ]]; then
  echo "start-api-server: BYPASS=1, skipping guard" >&2
  exec node --enable-source-maps "${BUNDLE_REL}"
fi

if curl -fsS --max-time 2 "${HEALTH_URL}" >/dev/null 2>&1; then
  echo "start-api-server: healthy instance already on :${PORT}, skipping start" >&2
  # Block forever so the workflow stays in RUNNING; otherwise Replit
  # treats the immediate exit as a crash and restarts again.
  exec sleep infinity
fi

# No healthy instance, but a squatter may still hold the port (e.g.
# a previous boot crashed mid-startup). Kill any lingering node
# process running our built bundle. Match by command-line so we do
# not touch unrelated node processes.
#
# Note: this NixOS environment does not provide `fuser`, `lsof`,
# `ss`, or `netstat`, so a strict port-scoped kill is not available.
# The bundle-path pattern is unique enough in practice.
STALE_PIDS="$(pgrep -f "${BUNDLE_PATTERN}" 2>/dev/null || true)"
if [[ -n "${STALE_PIDS}" ]]; then
  echo "start-api-server: evicting stale node bundle on :${PORT} (pids: ${STALE_PIDS//$'\n'/ })" >&2
  pkill -TERM -f "${BUNDLE_PATTERN}" || true
  # Give the kernel a moment to release the listen socket.
  sleep 0.3
  STALE_PIDS_AFTER_TERM="$(pgrep -f "${BUNDLE_PATTERN}" 2>/dev/null || true)"
  if [[ -n "${STALE_PIDS_AFTER_TERM}" ]]; then
    echo "start-api-server: SIGTERM did not clear, escalating to SIGKILL (pids: ${STALE_PIDS_AFTER_TERM//$'\n'/ })" >&2
    pkill -KILL -f "${BUNDLE_PATTERN}" || true
    sleep 0.2
  fi
fi

exec node --enable-source-maps "${BUNDLE_REL}"