File size: 997 Bytes
e2bfccc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env bash
set -euo pipefail

RUN_ID="${RUN_ID:-}"
JOB_ROOT="${JOB_ROOT:-/home/student/YouZheng/jobs/taotern}"

if [[ -z "$RUN_ID" ]]; then
  echo "RUN_ID is required" >&2
  exit 2
fi

safe_run_id="$(printf '%s' "$RUN_ID" | tr -c 'A-Za-z0-9_.-' '_')"
job_dir="${JOB_ROOT%/}/${safe_run_id}"

if [[ ! -d "$job_dir" ]]; then
  echo "Job directory not found: $job_dir" >&2
  exit 1
fi

echo "== status.json =="
cat "$job_dir/status.json" 2>/dev/null || true
echo
echo "== markers =="
ls -1 "$job_dir"/DONE "$job_dir"/FAILED 2>/dev/null || true
echo
echo "== tmux =="
tmux ls 2>/dev/null | grep -F "taotern_${safe_run_id}" || true
echo
echo "== recent log =="
tail -n "${TAIL_LINES:-80}" "$job_dir/train.log" 2>/dev/null || true
echo
echo "== outputs =="
find "$job_dir/outputs" -maxdepth 2 -type f 2>/dev/null | sort | tail -n 40 || true
echo
echo "== checkpoints =="
find "$job_dir/checkpoints" -maxdepth 1 -type f 2>/dev/null | sort | tail -n 20 || true