replicalab / scripts /train.sh
maxxie114's picture
Initial HF Spaces deployment
80d8c84
#!/usr/bin/env bash
# ReplicaLab training entrypoint for Northflank GPU jobs.
#
# Usage:
# MODE=train ./scripts/train.sh # full training (scientist + lab manager)
# MODE=scientist ./scripts/train.sh # scientist GRPO only
# MODE=lab-manager ./scripts/train.sh # lab manager SFT only
# MODE=eval ./scripts/train.sh # baseline evaluation only
# MODE=server ./scripts/train.sh # just run server (default)
#
# The script starts the ReplicaLab server in the background (needed for
# rollout evaluation), then runs the requested training flow.
set -euo pipefail
MODE="${MODE:-server}"
SEED_COUNT="${SEED_COUNT:-8}"
MAX_STEPS="${MAX_STEPS:-300}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3.5-9B}"
PERSIST_ROOT="${REPLICALAB_PERSIST_ROOT:-/app/outputs/training}"
BASE_URL="http://localhost:7860"
echo "=========================================="
echo " ReplicaLab Training Pipeline"
echo "=========================================="
echo " Mode: $MODE"
echo " Model: $MODEL_NAME"
echo " Seeds: $SEED_COUNT"
echo " Max steps: $MAX_STEPS"
echo " Persist: $PERSIST_ROOT"
echo " Server URL: $BASE_URL"
echo "=========================================="
# ── Start server in background (needed for eval rollouts) ──────────────
start_server() {
echo "[train.sh] Starting ReplicaLab server on port 7860..."
uvicorn server.app:app --host 0.0.0.0 --port 7860 &
SERVER_PID=$!
echo "[train.sh] Server PID: $SERVER_PID"
# Wait for server to be ready
for i in $(seq 1 30); do
if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
echo "[train.sh] Server is ready."
return 0
fi
sleep 1
done
echo "[train.sh] WARNING: Server did not become ready in 30s, continuing anyway."
}
# ── Scientist GRPO training ───────────────────────────────────────────
run_scientist_train() {
echo ""
echo "=== Phase 1: Scientist GRPO Training ==="
echo ""
# Preview first (no GPU needed)
python -m replicalab.training.cli scientist-preview \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
# Full training
python -m replicalab.training.cli scientist-train \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT" \
--max-steps "$MAX_STEPS"
echo "[train.sh] Scientist GRPO training complete."
}
# ── Lab Manager SFT training ─────────────────────────────────────────
run_lab_manager_train() {
echo ""
echo "=== Phase 2: Lab Manager SFT Training ==="
echo ""
# Preview first
python -m replicalab.training.cli lab-manager-preview \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
# Full training
python -m replicalab.training.cli lab-manager-train \
--persist-root "$PERSIST_ROOT" \
--model-name "$MODEL_NAME" \
--seed-count "$SEED_COUNT"
echo "[train.sh] Lab Manager SFT training complete."
}
# ── Baseline evaluation ──────────────────────────────────────────────
run_eval() {
echo ""
echo "=== Baseline Evaluation ==="
echo ""
python -m replicalab.training.cli baseline-eval \
--persist-root "$PERSIST_ROOT" \
--base-url "$BASE_URL" \
--seed-count "$SEED_COUNT"
echo "[train.sh] Evaluation complete."
}
# ── Mode dispatch ────────────────────────────────────────────────────
case "$MODE" in
server)
echo "[train.sh] Server-only mode."
exec uvicorn server.app:app --host 0.0.0.0 --port 7860
;;
train)
start_server
run_scientist_train
run_lab_manager_train
run_eval
echo ""
echo "=========================================="
echo " All training complete!"
echo " Artifacts saved to: $PERSIST_ROOT"
echo "=========================================="
# Keep container alive so artifacts can be retrieved
echo "[train.sh] Training done. Keeping container alive..."
wait $SERVER_PID
;;
scientist)
run_scientist_train
;;
lab-manager)
run_lab_manager_train
;;
eval)
start_server
run_eval
wait $SERVER_PID
;;
*)
echo "Unknown MODE: $MODE"
echo "Valid modes: server, train, scientist, lab-manager, eval"
exit 1
;;
esac