| # Tail summary of the pilot run. | |
| WORK=/home/ubuntu/work/blt_pilot1 | |
| echo "==== PID ====" | |
| cat $WORK/run.pid 2>/dev/null | |
| echo "" | |
| echo "==== PROC ====" | |
| PID=$(cat $WORK/run.pid 2>/dev/null) | |
| [ -n "$PID" ] && ps -p $PID -o pid,etime,pcpu,pmem 2>/dev/null | head -3 | |
| echo "" | |
| echo "==== GPU ====" | |
| nvidia-smi --query-gpu=memory.used,memory.free,utilization.gpu --format=csv,noheader | |
| echo "" | |
| echo "==== LAST 10 STEP LINES ====" | |
| grep "step=" $WORK/run.log 2>/dev/null | tail -10 | |
| echo "" | |
| echo "==== LAST EVAL ====" | |
| grep "eval" $WORK/run.log 2>/dev/null | tail -5 | |
| echo "" | |
| echo "==== ANY ERRORS? ====" | |
| grep -E "OutOfMemory|Traceback|Error" $WORK/run.log 2>/dev/null | tail -5 | |
| echo "" | |
| echo "==== CKPTS ====" | |
| ls -la $WORK/ckpt-* 2>/dev/null | tail -5 | |