| #!/bin/bash |
| |
| |
| |
| set -e |
|
|
| cd /mnt/local-fast/zhangt/forensics_grpo |
|
|
| CKPT_ROOT=/mnt/local-fast/zhangt/baselines/tempsamp_r1/logs/TempSampR1_single_span_forensics_7B_8gpu_4ep |
| |
| MODEL=$(ls -d "${CKPT_ROOT}"/checkpoint-* 2>/dev/null \ |
| | awk -F'-' '{print $NF, $0}' | sort -n -k1,1 | tail -1 | awk '{print $2}') |
| if [ -z "$MODEL" ]; then |
| echo "No checkpoint found under ${CKPT_ROOT}/checkpoint-*" >&2 |
| exit 1 |
| fi |
| CKPT_TAG=$(basename "$MODEL" | sed 's/checkpoint-/ckpt/') |
| OUT=eval_tempsamp_single_span_${CKPT_TAG} |
| mkdir -p "$OUT/logs" |
| echo "Evaluating $MODEL -> $OUT" |
|
|
| export PATH="/mnt/local-fast/zhangt/torch_env/bin:$PATH" |
| export LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" |
| export PYTHONPATH=".:$PYTHONPATH" |
|
|
| for R in 0 1 2 3 4 5 6 7; do |
| CUDA_VISIBLE_DEVICES=$R python evaluate_forensics.py \ |
| --model_path "$MODEL" \ |
| --rank $R --world_size 8 --device 0 \ |
| --out_dir "$OUT" \ |
| --cot false --max_new_tokens 64 --temperature 0.0 \ |
| > "$OUT/logs/rank_${R}.log" 2>&1 & |
| done |
| wait |
| echo "all 8 ranks done" |
|
|
| python evaluate_grounding_metrics.py --out_dir "$OUT" | tee "$OUT/grounding_metrics.txt" |
|
|