#!/usr/bin/env bash # Tail summary of the pilot run. WORK=/home/ubuntu/work/blt_pilot1 echo "==== PID ====" cat $WORK/run.pid 2>/dev/null echo "" echo "==== PROC ====" PID=$(cat $WORK/run.pid 2>/dev/null) [ -n "$PID" ] && ps -p $PID -o pid,etime,pcpu,pmem 2>/dev/null | head -3 echo "" echo "==== GPU ====" nvidia-smi --query-gpu=memory.used,memory.free,utilization.gpu --format=csv,noheader echo "" echo "==== LAST 10 STEP LINES ====" grep "step=" $WORK/run.log 2>/dev/null | tail -10 echo "" echo "==== LAST EVAL ====" grep "eval" $WORK/run.log 2>/dev/null | tail -5 echo "" echo "==== ANY ERRORS? ====" grep -E "OutOfMemory|Traceback|Error" $WORK/run.log 2>/dev/null | tail -5 echo "" echo "==== CKPTS ====" ls -la $WORK/ckpt-* 2>/dev/null | tail -5