File size: 753 Bytes
9477b5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/usr/bin/env bash
# Tail summary of the pilot run.
WORK=/home/ubuntu/work/blt_pilot1
echo "==== PID ===="
cat $WORK/run.pid 2>/dev/null
echo ""
echo "==== PROC ===="
PID=$(cat $WORK/run.pid 2>/dev/null)
[ -n "$PID" ] && ps -p $PID -o pid,etime,pcpu,pmem 2>/dev/null | head -3
echo ""
echo "==== GPU ===="
nvidia-smi --query-gpu=memory.used,memory.free,utilization.gpu --format=csv,noheader
echo ""
echo "==== LAST 10 STEP LINES ===="
grep "step=" $WORK/run.log 2>/dev/null | tail -10
echo ""
echo "==== LAST EVAL ===="
grep "eval" $WORK/run.log 2>/dev/null | tail -5
echo ""
echo "==== ANY ERRORS? ===="
grep -E "OutOfMemory|Traceback|Error" $WORK/run.log 2>/dev/null | tail -5
echo ""
echo "==== CKPTS ===="
ls -la $WORK/ckpt-* 2>/dev/null | tail -5