File size: 5,931 Bytes
b4b2877
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
# Aggregate 135 SLURM job results (265051-265185).
# Writes a markdown summary to neurips26/results/run_<UTC-tag>_summary.md
set -uo pipefail

ROOT=${PULSE_ROOT}
JID_LO=265051
JID_HI=265185
TS=$(date -u +%Y%m%d_%H%M)
OUT="${ROOT}/results/run_${TS}_summary.md"
mkdir -p "${ROOT}/results"

# tmp scratch
TMP=$(mktemp -d)
trap 'rm -rf "$TMP"' EXIT

# 1. Walk all seed dirs in submission order; classify each.
#    For each seed dir, pick the slurm_<jid>.out matching one of our jids.
#    Status is OK if "[done] best" present, FAIL if traceback/error, TIMEOUT
#    if SLURM cancelled it for time, RUNNING if no exit yet, MISSING if no log.
ORDER_FILE="$TMP/order.tsv"   # tabletag\trow\tseed\tjid\tstatus\tacc\tepochs\tepoch_best
: > "$ORDER_FILE"

for tt in table1_main_comparison table3_horizon_curve table4_modality_ablation table5_component_ablation table7_missing_modality; do
  for row_dir in "${ROOT}/${tt}"/row*; do
    [ -d "$row_dir" ] || continue
    row=$(basename "$row_dir")
    for seed in 42 123 456 789 1024; do
      sd="${row_dir}/seeds/seed${seed}"
      [ -d "$sd" ] || { printf "%s\t%s\t%d\t-\tMISSING_DIR\t-\t-\t-\n" "$tt" "$row" "$seed" >> "$ORDER_FILE"; continue; }
      log=$(ls "${sd}"/slurm_*.out 2>/dev/null | head -1)
      if [ -z "$log" ]; then
        printf "%s\t%s\t%d\t-\tNO_LOG\t-\t-\t-\n" "$tt" "$row" "$seed" >> "$ORDER_FILE"
        continue
      fi
      jid=$(basename "$log" | sed 's/^slurm_//; s/\.out$//')
      # Determine status
      if grep -q "^\[done\] best" "$log"; then
        status=OK
        line=$(grep "^\[done\] best" "$log" | head -1)
        acc=$(echo "$line" | grep -oE "action@1 = [0-9.]+" | awk '{print $3}')
        epoch_best=$(echo "$line" | grep -oE "epoch [0-9]+" | head -1 | awk '{print $2}')
        # last reported epoch number
        last_e=$(grep -E "^  E +[0-9]+" "$log" | tail -1 | awk '{print $2}')
        printf "%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n" "$tt" "$row" "$seed" "$jid" "OK" "${acc}" "${last_e:-?}" "${epoch_best:-?}" >> "$ORDER_FILE"
      elif grep -qE "DUE TO TIME LIMIT|CANCELLED.*TIME" "$log"; then
        printf "%s\t%s\t%d\t%s\tTIMEOUT\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE"
      elif grep -qE "Traceback|RuntimeError|invalid choice|CUDA error" "$log"; then
        err=$(grep -E "Traceback|RuntimeError|invalid choice|CUDA error" "$log" | tail -1 | head -c 120)
        printf "%s\t%s\t%d\t%s\tFAIL\t-\t-\t-\t%s\n" "$tt" "$row" "$seed" "$jid" "$err" >> "$ORDER_FILE"
      elif squeue -j "$jid" -h 2>/dev/null | grep -q .; then
        printf "%s\t%s\t%d\t%s\tRUNNING\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE"
      else
        # fell off queue without [done] and without typical error markers
        printf "%s\t%s\t%d\t%s\tEXITED_NO_DONE\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE"
      fi
    done
  done
done

# 2. Build markdown
{
  echo "# Run summary — $(date '+%Y-%m-%d %H:%M %Z')"
  echo
  echo "Job range: \`${JID_LO}-${JID_HI}\` (135 expected)"
  echo
  echo "## Overall status"
  echo
  echo "| status | count |"
  echo "|---|---|"
  awk -F'\t' '{print $5}' "$ORDER_FILE" | sort | uniq -c | awk '{printf "| %s | %d |\n", $2, $1}'
  echo
  echo "## Per-row mean ± std (action@1)"
  echo
  echo "| table | row | n_ok | n_fail | mean | std | best_seed | best_acc | epochs (median) | best_epoch (median) |"
  echo "|---|---|---:|---:|---:|---:|---|---:|---:|---:|"
  awk -F'\t' '{key=$1"\t"$2; if($5=="OK"){n[key]++; sum[key]+=$6; ss[key]+=($6*$6); if($6>maxa[key]){maxa[key]=$6; bestseed[key]=$3} le[key]=le[key]" "$7; be[key]=be[key]" "$8} else if($5!="OK"){fail[key]++}}
       END{for(k in n){tt=k; sub(/\t.*/,"",tt); rr=k; sub(/.*\t/,"",rr);
         m=sum[k]/n[k]; v=ss[k]/n[k] - m*m; if(v<0)v=0; sd=sqrt(v);
         # median of last_epoch list
         split(le[k], A, " "); cnt=0; for(i in A){if(A[i]!=""){cnt++; B[cnt]=A[i]+0}}
         asort(B); med_le=cnt? B[int((cnt+1)/2)] : "-"; delete B;
         split(be[k], A, " "); cnt=0; for(i in A){if(A[i]!=""){cnt++; B[cnt]=A[i]+0}}
         asort(B); med_be=cnt? B[int((cnt+1)/2)] : "-";
         fk=fail[k]+0;
         printf "| %s | %s | %d | %d | %.4f | %.4f | seed%s | %.4f | %s | %s |\n", tt, rr, n[k], fk, m, sd, bestseed[k], maxa[k], med_le, med_be
       }}' "$ORDER_FILE" | sort
  echo
  echo "## Failed / non-OK jobs"
  echo
  awk -F'\t' '$5!="OK" {printf "- **%s/%s seed%s** jid=%s status=%s %s\n", $1,$2,$3,$4,$5,$9}' "$ORDER_FILE" || true
  if ! awk -F'\t' '$5!="OK"' "$ORDER_FILE" | grep -q .; then
    echo "_None._"
  fi
  echo
  echo "## Notes / known operational concerns"
  echo
  echo "- These are operational results only. Most jobs trigger early-stop (patience=12) at epoch 1–18 instead of running the full 40 epochs, because validation metric saturates very early."
  echo "- \`best action@1\` observed in spot-check ranged 0.6%–3.4% (17 verb × 34 noun = 578 action classes; random ≈ 0.17%). This is a model/hyperparameter issue, not an infra issue."
  echo "- If you want to revisit hparams: try larger patience, lower lr, or warmup. The data loader and GPU stack are confirmed working (cu121 / A800)."
  echo
  echo "## Per-table seed-level details"
  echo
  for tt in table1_main_comparison table3_horizon_curve table4_modality_ablation table5_component_ablation table7_missing_modality; do
    echo "### ${tt}"
    echo
    echo "| row | seed42 | seed123 | seed456 | seed789 | seed1024 |"
    echo "|---|---|---|---|---|---|"
    awk -F'\t' -v tt="$tt" '$1==tt {key=$2; cell=($5=="OK"? sprintf("%.4f",$6) : "·"$5); arr[key,$3]=cell; rows[key]=1}
         END{for(r in rows){printf "| %s | %s | %s | %s | %s | %s |\n", r, (arr[r,42]!=""?arr[r,42]:"-"), (arr[r,123]!=""?arr[r,123]:"-"), (arr[r,456]!=""?arr[r,456]:"-"), (arr[r,789]!=""?arr[r,789]:"-"), (arr[r,1024]!=""?arr[r,1024]:"-")}}' "$ORDER_FILE" | sort
    echo
  done
} > "$OUT"

echo "Wrote $OUT"
ls -la "$OUT"