CausalGrok / code /scripts /list_runs.sh
nileshsarkar-ai's picture
Upload code/scripts
42c0d23 verified
#!/usr/bin/env bash
# scripts/list_runs.sh
# Quick health-check across every experiments/runs/<run_id>/.
# - state: RUNNING / DONE / DEAD
# - PID: from run.pid (if process is still alive)
# - LAST_LOG: most recent line of train.log
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"
RUNS_DIR="experiments/runs"
if [[ ! -d "${RUNS_DIR}" ]]; then
echo "No ${RUNS_DIR}/ directory yet."; exit 0
fi
printf "%-46s %-8s %-9s %s\n" "RUN_ID" "STATE" "PID" "LAST_LOG"
printf "%-46s %-8s %-9s %s\n" "------" "-----" "---" "--------"
for d in $(ls -1t "${RUNS_DIR}" 2>/dev/null); do
run_dir="${RUNS_DIR}/$d"
[[ -d "$run_dir" ]] || continue
pid=""; state="unknown"
if [[ -f "$run_dir/run.pid" ]]; then
pid="$(cat "$run_dir/run.pid")"
if kill -0 "$pid" 2>/dev/null; then
state="RUNNING"
elif [[ -f "$run_dir/results/summary.json" ]]; then
state="DONE"
else
state="DEAD"
fi
elif [[ -f "$run_dir/results/summary.json" ]]; then
state="DONE"
fi
last=""
if [[ -f "$run_dir/logs/train.log" ]]; then
last="$(tail -n 1 "$run_dir/logs/train.log" 2>/dev/null | tr -s ' ' | cut -c1-90)"
fi
printf "%-46s %-8s %-9s %s\n" "$d" "$state" "${pid:-—}" "$last"
done