Spaces:
Running
Running
File size: 4,647 Bytes
099bec8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | #!/usr/bin/env bash
# Launch a vLLM-powered eval as a HF Job for a trained ClarifyRL checkpoint.
#
# Usage:
# HF_TOKEN=hf_xxx ./scripts/launch_eval_job.sh \
# --model agarwalanu3103/clarify-rl-grpo-qwen3-0-6b \
# --flavor a10g-small \
# --limit 50
#
# Or as positional shortcuts:
# HF_TOKEN=hf_xxx ./scripts/launch_eval_job.sh agarwalanu3103/clarify-rl-grpo-qwen3-0-6b a10g-small 50
#
# This works around the fact that HF Inference Router does not auto-warm
# fine-tuned community uploads — vllm must be hosted ourselves. We use the
# cheapest GPU that fits the model: a10g-small (24 GB) for ≤4B, a10g-large
# for 7-8B.
#
# Environment:
# HF_TOKEN (required) write token of the account hosting the eval.
# ENV_BASE_URL env Space URL (default: agarwalanu3103-clarify-rl).
# PUSH_TO_REPO override push target (default = MODEL).
# EVAL_LABEL suffix for output filename (default n${LIMIT}).
# GPU_MEM_UTIL vLLM GPU mem util (default 0.85).
# TIMEOUT HF Jobs timeout (default 1h).
# IMAGE docker image override.
#
# Example multi-checkpoint sweep:
# for m in clarify-rl-grpo-qwen3-0-6b clarify-rl-grpo-qwen3-1-7b; do
# HF_TOKEN=$HF_TOKEN ./scripts/launch_eval_job.sh agarwalanu3103/$m a10g-small 50
# done
set -euo pipefail
MODEL=""
FLAVOR="a10g-small"
LIMIT="50"
if [ "$#" -ge 1 ] && [ "${1:0:2}" != "--" ]; then
MODEL="${1}"
[ "$#" -ge 2 ] && FLAVOR="${2}"
[ "$#" -ge 3 ] && LIMIT="${3}"
else
while [ "$#" -gt 0 ]; do
case "$1" in
--model) MODEL="$2"; shift 2;;
--flavor) FLAVOR="$2"; shift 2;;
--limit) LIMIT="$2"; shift 2;;
--image) IMAGE="$2"; shift 2;;
--timeout) TIMEOUT="$2"; shift 2;;
-h|--help)
grep '^#' "$0" | sed 's/^# \{0,1\}//'
exit 0;;
*)
echo "Unknown arg: $1" >&2
exit 1;;
esac
done
fi
: "${MODEL:?MODEL is required (e.g. agarwalanu3103/clarify-rl-grpo-qwen3-0-6b)}"
: "${HF_TOKEN:?HF_TOKEN is required}"
: "${ENV_BASE_URL:=https://agarwalanu3103-clarify-rl.hf.space}"
: "${PUSH_TO_REPO:=$MODEL}"
: "${EVAL_LABEL:=n${LIMIT}}"
: "${GPU_MEM_UTIL:=0.85}"
: "${TIMEOUT:=1h}"
: "${IMAGE:=}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
EVAL_SCRIPT="$SCRIPT_DIR/scripts/eval_with_vllm.py"
RUN_EVAL="$SCRIPT_DIR/scripts/run_eval.py"
INFERENCE_PY="$SCRIPT_DIR/inference.py"
SCENARIOS="$SCRIPT_DIR/scenarios/eval_held_out.json"
for f in "$EVAL_SCRIPT" "$RUN_EVAL" "$INFERENCE_PY" "$SCENARIOS"; do
[ -f "$f" ] || { echo "ERROR: missing $f" >&2; exit 1; }
done
cat <<EOF
=========================================================================
ClarifyRL vLLM eval HF Jobs launcher
=========================================================================
Model: $MODEL
Flavor: $FLAVOR
Limit: $LIMIT
Push target: $PUSH_TO_REPO
Eval label: $EVAL_LABEL
Env base URL: $ENV_BASE_URL
GPU mem util: $GPU_MEM_UTIL
Timeout: $TIMEOUT
Image: ${IMAGE:-<HF Jobs default uv-python>}
=========================================================================
EOF
CMD=(
hf jobs uv run
--flavor "$FLAVOR"
--timeout "$TIMEOUT"
--secrets "HF_TOKEN=$HF_TOKEN"
--token "$HF_TOKEN"
-e "MODEL_NAME=$MODEL"
-e "ENV_BASE_URL=$ENV_BASE_URL"
-e "PUSH_TO_REPO=$PUSH_TO_REPO"
-e "LIMIT=$LIMIT"
-e "EVAL_LABEL=$EVAL_LABEL"
-e "GPU_MEM_UTIL=$GPU_MEM_UTIL"
-e "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
-e "VLLM_USE_V1=1"
)
if [ -n "$IMAGE" ]; then
CMD+=(--image "$IMAGE")
fi
: "${DETACH:=1}"
if [ "$DETACH" = "1" ]; then
CMD+=(-d)
fi
# vLLM + openai (HTTP client used by run_eval.py via inference.py) +
# websockets (env Space connection) + huggingface_hub (Hub upload).
# We DO NOT pull `trl` here — eval is purely inference + HTTP.
CMD+=(
--with "vllm"
--with "openai>=1.40.0"
--with "websockets>=12.0"
--with "jmespath"
--with "huggingface_hub"
--with "truststore"
"$EVAL_SCRIPT"
)
# Prefer the venv hf binary so SSL truststore patch applies.
VENV_HF="$SCRIPT_DIR/.venv/bin/hf"
if [ -x "$VENV_HF" ]; then
HF_BIN="$VENV_HF"
elif command -v hf >/dev/null 2>&1; then
HF_BIN="$(command -v hf)"
else
echo "ERROR: 'hf' CLI not found." >&2
exit 1
fi
CMD[0]="$HF_BIN"
if [ "${DRY_RUN:-0}" = "1" ]; then
echo "DRY_RUN=1 — would run:"
printf ' %q\n' "${CMD[@]}"
exit 0
fi
echo "Launching with: $HF_BIN"
echo
"${CMD[@]}"
|