File size: 1,441 Bytes
6d7449a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | #!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PORT="${KAIJU_VLLM_PORT:-18084}"
MODEL="${KAIJU_VLLM_MODEL_NAME:-kaiju-coder-7}"
CONTEXT="${KAIJU_VLLM_CONTEXT:-16384}"
READY_TIMEOUT="${KAIJU_VLLM_READY_TIMEOUT:-900}"
KEEP_VLLM="${KAIJU_VLLM_KEEP_RUNNING:-0}"
PROMPTS="${KAIJU_VLLM_PROMPTS:-identity code_patch}"
MAX_TOKENS="${KAIJU_VLLM_MAX_TOKENS:-128}"
TIMEOUT="${KAIJU_VLLM_PROMPT_TIMEOUT:-300}"
BASE_URL="http://100.109.109.14:${PORT}/v1"
restore_sglang() {
if [[ "${KEEP_VLLM}" == "1" ]]; then
return
fi
"${ROOT}/scripts/stop-qwen36-merged-vllm.sh" >/dev/null 2>&1 || true
KAIJU_QWEN36_MERGED_CONTEXT="${KAIJU_QWEN36_MERGED_CONTEXT:-32768}" \
"${ROOT}/scripts/start-qwen36-merged-sglang.sh" >/dev/null 2>&1 || true
}
trap restore_sglang EXIT
"${ROOT}/scripts/stop-qwen36-merged-sglang.sh"
"${ROOT}/scripts/stop-qwen36-merged-vllm.sh"
KAIJU_VLLM_CONTEXT="${CONTEXT}" "${ROOT}/scripts/start-qwen36-merged-vllm.sh"
deadline=$((SECONDS + READY_TIMEOUT))
until curl -fsSL "${BASE_URL}/models" | grep -q "\"${MODEL}\""; do
if (( SECONDS >= deadline )); then
echo "vLLM endpoint did not become ready at ${BASE_URL}" >&2
exit 1
fi
sleep 10
done
python3 "${ROOT}/scripts/benchmark_kaiju_serving.py" \
--base-url "${BASE_URL}" \
--model "${MODEL}" \
--contexts "${CONTEXT}" \
--prompts ${PROMPTS} \
--max-tokens "${MAX_TOKENS}" \
--timeout "${TIMEOUT}"
|