File size: 4,929 Bytes
80d8c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env bash
# ReplicaLab training entrypoint for Northflank GPU jobs.
#
# Usage:
#   MODE=train ./scripts/train.sh          # full training (scientist + lab manager)
#   MODE=scientist ./scripts/train.sh      # scientist GRPO only
#   MODE=lab-manager ./scripts/train.sh    # lab manager SFT only
#   MODE=eval ./scripts/train.sh           # baseline evaluation only
#   MODE=server ./scripts/train.sh         # just run server (default)
#
# The script starts the ReplicaLab server in the background (needed for
# rollout evaluation), then runs the requested training flow.

set -euo pipefail

MODE="${MODE:-server}"
SEED_COUNT="${SEED_COUNT:-8}"
MAX_STEPS="${MAX_STEPS:-300}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3.5-9B}"
PERSIST_ROOT="${REPLICALAB_PERSIST_ROOT:-/app/outputs/training}"
BASE_URL="http://localhost:7860"

echo "=========================================="
echo " ReplicaLab Training Pipeline"
echo "=========================================="
echo " Mode:        $MODE"
echo " Model:       $MODEL_NAME"
echo " Seeds:       $SEED_COUNT"
echo " Max steps:   $MAX_STEPS"
echo " Persist:     $PERSIST_ROOT"
echo " Server URL:  $BASE_URL"
echo "=========================================="

# ── Start server in background (needed for eval rollouts) ──────────────
start_server() {
    echo "[train.sh] Starting ReplicaLab server on port 7860..."
    uvicorn server.app:app --host 0.0.0.0 --port 7860 &
    SERVER_PID=$!
    echo "[train.sh] Server PID: $SERVER_PID"

    # Wait for server to be ready
    for i in $(seq 1 30); do
        if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
            echo "[train.sh] Server is ready."
            return 0
        fi
        sleep 1
    done
    echo "[train.sh] WARNING: Server did not become ready in 30s, continuing anyway."
}

# ── Scientist GRPO training ───────────────────────────────────────────
run_scientist_train() {
    echo ""
    echo "=== Phase 1: Scientist GRPO Training ==="
    echo ""

    # Preview first (no GPU needed)
    python -m replicalab.training.cli scientist-preview \
        --persist-root "$PERSIST_ROOT" \
        --model-name "$MODEL_NAME" \
        --seed-count "$SEED_COUNT"

    # Full training
    python -m replicalab.training.cli scientist-train \
        --persist-root "$PERSIST_ROOT" \
        --model-name "$MODEL_NAME" \
        --seed-count "$SEED_COUNT" \
        --max-steps "$MAX_STEPS"

    echo "[train.sh] Scientist GRPO training complete."
}

# ── Lab Manager SFT training ─────────────────────────────────────────
run_lab_manager_train() {
    echo ""
    echo "=== Phase 2: Lab Manager SFT Training ==="
    echo ""

    # Preview first
    python -m replicalab.training.cli lab-manager-preview \
        --persist-root "$PERSIST_ROOT" \
        --model-name "$MODEL_NAME" \
        --seed-count "$SEED_COUNT"

    # Full training
    python -m replicalab.training.cli lab-manager-train \
        --persist-root "$PERSIST_ROOT" \
        --model-name "$MODEL_NAME" \
        --seed-count "$SEED_COUNT"

    echo "[train.sh] Lab Manager SFT training complete."
}

# ── Baseline evaluation ──────────────────────────────────────────────
run_eval() {
    echo ""
    echo "=== Baseline Evaluation ==="
    echo ""

    python -m replicalab.training.cli baseline-eval \
        --persist-root "$PERSIST_ROOT" \
        --base-url "$BASE_URL" \
        --seed-count "$SEED_COUNT"

    echo "[train.sh] Evaluation complete."
}

# ── Mode dispatch ────────────────────────────────────────────────────

case "$MODE" in
    server)
        echo "[train.sh] Server-only mode."
        exec uvicorn server.app:app --host 0.0.0.0 --port 7860
        ;;

    train)
        start_server
        run_scientist_train
        run_lab_manager_train
        run_eval
        echo ""
        echo "=========================================="
        echo " All training complete!"
        echo " Artifacts saved to: $PERSIST_ROOT"
        echo "=========================================="
        # Keep container alive so artifacts can be retrieved
        echo "[train.sh] Training done. Keeping container alive..."
        wait $SERVER_PID
        ;;

    scientist)
        run_scientist_train
        ;;

    lab-manager)
        run_lab_manager_train
        ;;

    eval)
        start_server
        run_eval
        wait $SERVER_PID
        ;;

    *)
        echo "Unknown MODE: $MODE"
        echo "Valid modes: server, train, scientist, lab-manager, eval"
        exit 1
        ;;
esac