File size: 2,754 Bytes
aabdb98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
set -uo pipefail

# Run adapter comparison experiments sequentially:
#   1. Original ITFormer adapter
#   2. Plain Q-Former adapter
#
# Each experiment writes its own log. A failure is recorded and the next
# experiment still runs.

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
cd "$PROJECT_DIR"

COMPARE_RUN_ID="${COMPARE_RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
LOG_DIR="${LOG_DIR:-logs}"
SUMMARY_LOG="${SUMMARY_LOG:-${LOG_DIR}/compare_adapters_4gpu_${COMPARE_RUN_ID}.log}"
BASE_SCRIPT="${BASE_SCRIPT:-${PROJECT_DIR}/scripts/run_full_train_4gpu.sh}"

mkdir -p "$LOG_DIR"
touch "$SUMMARY_LOG"
exec > >(tee -a "$SUMMARY_LOG") 2>&1

echo "Compare run id: $COMPARE_RUN_ID"
echo "Summary log: $SUMMARY_LOG"
echo "Base script: $BASE_SCRIPT"

run_experiment() {
    local name="$1"
    local adapter_type="$2"
    local output_dir="$3"
    local experiment_log="$4"
    local run_id="${COMPARE_RUN_ID}_${name}"

    echo ""
    echo "========== START ${name} =========="
    echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
    echo "Adapter: $adapter_type"
    echo "Run id: $run_id"
    echo "Output dir: $output_dir"
    echo "Experiment log: $experiment_log"

    if RUN_ID="$run_id" \
       ADAPTER_TYPE="$adapter_type" \
       RUN_STAGE_A="${RUN_STAGE_A:-auto}" \
       SFT_OUTPUT_DIR="$output_dir" \
       LOG_FILE="$experiment_log" \
       bash "$BASE_SCRIPT"; then
        echo "========== SUCCESS ${name} =========="
        echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
        return 0
    else
        local exit_code="$?"
        echo "========== FAILED ${name} =========="
        echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
        echo "Exit code: $exit_code"
        echo "See experiment log: $experiment_log"
        return "$exit_code"
    fi
}

ITFORMER_STATUS=0
QFORMER_STATUS=0

run_experiment \
    "itformer" \
    "itformer" \
    "save/sft_itformer_qwen2.5_3B_${COMPARE_RUN_ID}" \
    "${LOG_DIR}/compare_itformer_${COMPARE_RUN_ID}.log" || ITFORMER_STATUS="$?"

run_experiment \
    "qformer" \
    "qformer" \
    "save/sft_qformer_qwen2.5_3B_${COMPARE_RUN_ID}" \
    "${LOG_DIR}/compare_qformer_${COMPARE_RUN_ID}.log" || QFORMER_STATUS="$?"

echo ""
echo "========== COMPARISON SUMMARY =========="
echo "ITFormer status: $ITFORMER_STATUS"
echo "Q-Former status: $QFORMER_STATUS"
echo "Summary log: $SUMMARY_LOG"
echo "ITFormer output: save/sft_itformer_qwen2.5_3B_${COMPARE_RUN_ID}"
echo "Q-Former output: save/sft_qformer_qwen2.5_3B_${COMPARE_RUN_ID}"

if [ "$ITFORMER_STATUS" -ne 0 ] || [ "$QFORMER_STATUS" -ne 0 ]; then
    echo "One or more experiments failed, but all scheduled experiments were attempted."
    exit 1
fi

echo "All experiments completed successfully."