#!/usr/bin/env bash set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PORT="${KAIJU_VLLM_PORT:-18084}" MODEL="${KAIJU_VLLM_MODEL_NAME:-kaiju-coder-7}" CONTEXT="${KAIJU_VLLM_CONTEXT:-16384}" READY_TIMEOUT="${KAIJU_VLLM_READY_TIMEOUT:-900}" KEEP_VLLM="${KAIJU_VLLM_KEEP_RUNNING:-0}" PROMPTS="${KAIJU_VLLM_PROMPTS:-identity code_patch}" MAX_TOKENS="${KAIJU_VLLM_MAX_TOKENS:-128}" TIMEOUT="${KAIJU_VLLM_PROMPT_TIMEOUT:-300}" BASE_URL="http://100.109.109.14:${PORT}/v1" restore_sglang() { if [[ "${KEEP_VLLM}" == "1" ]]; then return fi "${ROOT}/scripts/stop-qwen36-merged-vllm.sh" >/dev/null 2>&1 || true KAIJU_QWEN36_MERGED_CONTEXT="${KAIJU_QWEN36_MERGED_CONTEXT:-32768}" \ "${ROOT}/scripts/start-qwen36-merged-sglang.sh" >/dev/null 2>&1 || true } trap restore_sglang EXIT "${ROOT}/scripts/stop-qwen36-merged-sglang.sh" "${ROOT}/scripts/stop-qwen36-merged-vllm.sh" KAIJU_VLLM_CONTEXT="${CONTEXT}" "${ROOT}/scripts/start-qwen36-merged-vllm.sh" deadline=$((SECONDS + READY_TIMEOUT)) until curl -fsSL "${BASE_URL}/models" | grep -q "\"${MODEL}\""; do if (( SECONDS >= deadline )); then echo "vLLM endpoint did not become ready at ${BASE_URL}" >&2 exit 1 fi sleep 10 done python3 "${ROOT}/scripts/benchmark_kaiju_serving.py" \ --base-url "${BASE_URL}" \ --model "${MODEL}" \ --contexts "${CONTEXT}" \ --prompts ${PROMPTS} \ --max-tokens "${MAX_TOKENS}" \ --timeout "${TIMEOUT}"