Spaces:
Running
Running
File size: 4,929 Bytes
80d8c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | #!/usr/bin/env bash
# ReplicaLab training entrypoint for Northflank GPU jobs.
#
# Usage:
# MODE=train ./scripts/train.sh # full training (scientist + lab manager)
# MODE=scientist ./scripts/train.sh # scientist GRPO only
# MODE=lab-manager ./scripts/train.sh # lab manager SFT only
# MODE=eval ./scripts/train.sh # baseline evaluation only
# MODE=server ./scripts/train.sh # just run server (default)
#
# The script starts the ReplicaLab server in the background (needed for
# rollout evaluation), then runs the requested training flow.
set -euo pipefail
MODE="${MODE:-server}"
SEED_COUNT="${SEED_COUNT:-8}"
MAX_STEPS="${MAX_STEPS:-300}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3.5-9B}"
PERSIST_ROOT="${REPLICALAB_PERSIST_ROOT:-/app/outputs/training}"
BASE_URL="http://localhost:7860"
echo "=========================================="
echo " ReplicaLab Training Pipeline"
echo "=========================================="
echo " Mode: $MODE"
echo " Model: $MODEL_NAME"
echo " Seeds: $SEED_COUNT"
echo " Max steps: $MAX_STEPS"
echo " Persist: $PERSIST_ROOT"
echo " Server URL: $BASE_URL"
echo "=========================================="
# ββ Start server in background (needed for eval rollouts) ββββββββββββββ
start_server() {
echo "[train.sh] Starting ReplicaLab server on port 7860..."
uvicorn server.app:app --host 0.0.0.0 --port 7860 &
SERVER_PID=$!
echo "[train.sh] Server PID: $SERVER_PID"
# Wait for server to be ready
for i in $(seq 1 30); do
if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
echo "[train.sh] Server is ready."
return 0
fi
sleep 1
done
echo "[train.sh] WARNING: Server did not become ready in 30s, continuing anyway."
}
# ββ Scientist GRPO training βββββββββββββββββββββββββββββββββββββββββββ
run_scientist_train() {
echo ""
echo "=== Phase 1: Scientist GRPO Training ==="
echo ""
# Preview first (no GPU needed)
python -m replicalab.training.cli scientist-preview \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
# Full training
python -m replicalab.training.cli scientist-train \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT" \
--max-steps "$MAX_STEPS"
echo "[train.sh] Scientist GRPO training complete."
}
# ββ Lab Manager SFT training βββββββββββββββββββββββββββββββββββββββββ
run_lab_manager_train() {
echo ""
echo "=== Phase 2: Lab Manager SFT Training ==="
echo ""
# Preview first
python -m replicalab.training.cli lab-manager-preview \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
# Full training
python -m replicalab.training.cli lab-manager-train \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
echo "[train.sh] Lab Manager SFT training complete."
}
# ββ Baseline evaluation ββββββββββββββββββββββββββββββββββββββββββββββ
run_eval() {
echo ""
echo "=== Baseline Evaluation ==="
echo ""
python -m replicalab.training.cli baseline-eval \
--persist-root "$PERSIST_ROOT" \
--base-url "$BASE_URL" \
--seed-count "$SEED_COUNT"
echo "[train.sh] Evaluation complete."
}
# ββ Mode dispatch ββββββββββββββββββββββββββββββββββββββββββββββββββββ
case "$MODE" in
server)
echo "[train.sh] Server-only mode."
exec uvicorn server.app:app --host 0.0.0.0 --port 7860
;;
train)
start_server
run_scientist_train
run_lab_manager_train
run_eval
echo ""
echo "=========================================="
echo " All training complete!"
echo " Artifacts saved to: $PERSIST_ROOT"
echo "=========================================="
# Keep container alive so artifacts can be retrieved
echo "[train.sh] Training done. Keeping container alive..."
wait $SERVER_PID
;;
scientist)
run_scientist_train
;;
lab-manager)
run_lab_manager_train
;;
eval)
start_server
run_eval
wait $SERVER_PID
;;
*)
echo "Unknown MODE: $MODE"
echo "Valid modes: server, train, scientist, lab-manager, eval"
exit 1
;;
esac
|