#!/usr/bin/env bash # Boot guard for the api-server workflow. Solves the recurring # `EADDRINUSE :::8080` flap where the Replit workflow manager # double-fires a restart or a previous node process orphans on the # port. Mirrors scripts/start-research-engine.sh. # # Behaviour, in order: # 1. If a healthy api-server is already serving on $PORT, this # wrapper exits cleanly via `exec sleep infinity` so the workflow # stays in RUNNING (Replit treats an immediate exit as a crash # and would restart again, re-triggering the loop). # 2. Otherwise, attempt to evict any squatting node process bound # to the built bundle path, sleep briefly, and exec the real # start command. The in-process EADDRINUSE FATAL in # src/index.ts:126 stays as a last-line backstop. # # Bypass for one-off debugging: set START_API_SERVER_BYPASS=1 to # skip the health probe + squatter eviction and exec node directly. # # This script must be run from the api-server package cwd (which is # what `pnpm --filter @workspace/api-server run dev` does for us); # it relies on ./dist/index.mjs being relative to that cwd. set -euo pipefail PORT="${PORT:-8080}" HEALTH_URL="http://127.0.0.1:${PORT}/api/healthz" BUNDLE_REL="./dist/index.mjs" BUNDLE_PATTERN="dist/index\\.mjs" if [[ "${START_API_SERVER_BYPASS:-}" == "1" ]]; then echo "start-api-server: BYPASS=1, skipping guard" >&2 exec node --enable-source-maps "${BUNDLE_REL}" fi if curl -fsS --max-time 2 "${HEALTH_URL}" >/dev/null 2>&1; then echo "start-api-server: healthy instance already on :${PORT}, skipping start" >&2 # Block forever so the workflow stays in RUNNING; otherwise Replit # treats the immediate exit as a crash and restarts again. exec sleep infinity fi # No healthy instance, but a squatter may still hold the port (e.g. # a previous boot crashed mid-startup). Kill any lingering node # process running our built bundle. Match by command-line so we do # not touch unrelated node processes. # # Note: this NixOS environment does not provide `fuser`, `lsof`, # `ss`, or `netstat`, so a strict port-scoped kill is not available. # The bundle-path pattern is unique enough in practice. STALE_PIDS="$(pgrep -f "${BUNDLE_PATTERN}" 2>/dev/null || true)" if [[ -n "${STALE_PIDS}" ]]; then echo "start-api-server: evicting stale node bundle on :${PORT} (pids: ${STALE_PIDS//$'\n'/ })" >&2 pkill -TERM -f "${BUNDLE_PATTERN}" || true # Give the kernel a moment to release the listen socket. sleep 0.3 STALE_PIDS_AFTER_TERM="$(pgrep -f "${BUNDLE_PATTERN}" 2>/dev/null || true)" if [[ -n "${STALE_PIDS_AFTER_TERM}" ]]; then echo "start-api-server: SIGTERM did not clear, escalating to SIGKILL (pids: ${STALE_PIDS_AFTER_TERM//$'\n'/ })" >&2 pkill -KILL -f "${BUNDLE_PATTERN}" || true sleep 0.2 fi fi exec node --enable-source-maps "${BUNDLE_REL}"