ITFormer / scripts /run_compare_adapters_4gpu.sh
a12354's picture
Add files using upload-large-folder tool
aabdb98 verified
Raw
History Blame Contribute Delete
2.75 kB
#!/bin/bash
set -uo pipefail
# Run adapter comparison experiments sequentially:
# 1. Original ITFormer adapter
# 2. Plain Q-Former adapter
#
# Each experiment writes its own log. A failure is recorded and the next
# experiment still runs.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
cd "$PROJECT_DIR"
COMPARE_RUN_ID="${COMPARE_RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
LOG_DIR="${LOG_DIR:-logs}"
SUMMARY_LOG="${SUMMARY_LOG:-${LOG_DIR}/compare_adapters_4gpu_${COMPARE_RUN_ID}.log}"
BASE_SCRIPT="${BASE_SCRIPT:-${PROJECT_DIR}/scripts/run_full_train_4gpu.sh}"
mkdir -p "$LOG_DIR"
touch "$SUMMARY_LOG"
exec > >(tee -a "$SUMMARY_LOG") 2>&1
echo "Compare run id: $COMPARE_RUN_ID"
echo "Summary log: $SUMMARY_LOG"
echo "Base script: $BASE_SCRIPT"
run_experiment() {
local name="$1"
local adapter_type="$2"
local output_dir="$3"
local experiment_log="$4"
local run_id="${COMPARE_RUN_ID}_${name}"
echo ""
echo "========== START ${name} =========="
echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
echo "Adapter: $adapter_type"
echo "Run id: $run_id"
echo "Output dir: $output_dir"
echo "Experiment log: $experiment_log"
if RUN_ID="$run_id" \
ADAPTER_TYPE="$adapter_type" \
RUN_STAGE_A="${RUN_STAGE_A:-auto}" \
SFT_OUTPUT_DIR="$output_dir" \
LOG_FILE="$experiment_log" \
bash "$BASE_SCRIPT"; then
echo "========== SUCCESS ${name} =========="
echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
return 0
else
local exit_code="$?"
echo "========== FAILED ${name} =========="
echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
echo "Exit code: $exit_code"
echo "See experiment log: $experiment_log"
return "$exit_code"
fi
}
ITFORMER_STATUS=0
QFORMER_STATUS=0
run_experiment \
"itformer" \
"itformer" \
"save/sft_itformer_qwen2.5_3B_${COMPARE_RUN_ID}" \
"${LOG_DIR}/compare_itformer_${COMPARE_RUN_ID}.log" || ITFORMER_STATUS="$?"
run_experiment \
"qformer" \
"qformer" \
"save/sft_qformer_qwen2.5_3B_${COMPARE_RUN_ID}" \
"${LOG_DIR}/compare_qformer_${COMPARE_RUN_ID}.log" || QFORMER_STATUS="$?"
echo ""
echo "========== COMPARISON SUMMARY =========="
echo "ITFormer status: $ITFORMER_STATUS"
echo "Q-Former status: $QFORMER_STATUS"
echo "Summary log: $SUMMARY_LOG"
echo "ITFormer output: save/sft_itformer_qwen2.5_3B_${COMPARE_RUN_ID}"
echo "Q-Former output: save/sft_qformer_qwen2.5_3B_${COMPARE_RUN_ID}"
if [ "$ITFORMER_STATUS" -ne 0 ] || [ "$QFORMER_STATUS" -ne 0 ]; then
echo "One or more experiments failed, but all scheduled experiments were attempted."
exit 1
fi
echo "All experiments completed successfully."