File size: 1,314 Bytes
42c0d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env bash
# scripts/list_runs.sh
# Quick health-check across every experiments/runs/<run_id>/.
#   - state:    RUNNING / DONE / DEAD
#   - PID:      from run.pid (if process is still alive)
#   - LAST_LOG: most recent line of train.log

set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"

RUNS_DIR="experiments/runs"
if [[ ! -d "${RUNS_DIR}" ]]; then
    echo "No ${RUNS_DIR}/ directory yet."; exit 0
fi

printf "%-46s %-8s %-9s %s\n" "RUN_ID" "STATE" "PID" "LAST_LOG"
printf "%-46s %-8s %-9s %s\n" "------" "-----" "---" "--------"
for d in $(ls -1t "${RUNS_DIR}" 2>/dev/null); do
    run_dir="${RUNS_DIR}/$d"
    [[ -d "$run_dir" ]] || continue

    pid=""; state="unknown"
    if [[ -f "$run_dir/run.pid" ]]; then
        pid="$(cat "$run_dir/run.pid")"
        if kill -0 "$pid" 2>/dev/null; then
            state="RUNNING"
        elif [[ -f "$run_dir/results/summary.json" ]]; then
            state="DONE"
        else
            state="DEAD"
        fi
    elif [[ -f "$run_dir/results/summary.json" ]]; then
        state="DONE"
    fi

    last=""
    if [[ -f "$run_dir/logs/train.log" ]]; then
        last="$(tail -n 1 "$run_dir/logs/train.log" 2>/dev/null | tr -s ' ' | cut -c1-90)"
    fi

    printf "%-46s %-8s %-9s %s\n" "$d" "$state" "${pid:-—}" "$last"
done