| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| PORT="${KAIJU_VLLM_PORT:-18084}" |
| MODEL="${KAIJU_VLLM_MODEL_NAME:-kaiju-coder-7}" |
| CONTEXT="${KAIJU_VLLM_CONTEXT:-16384}" |
| READY_TIMEOUT="${KAIJU_VLLM_READY_TIMEOUT:-900}" |
| KEEP_VLLM="${KAIJU_VLLM_KEEP_RUNNING:-0}" |
| PROMPTS="${KAIJU_VLLM_PROMPTS:-identity code_patch}" |
| MAX_TOKENS="${KAIJU_VLLM_MAX_TOKENS:-128}" |
| TIMEOUT="${KAIJU_VLLM_PROMPT_TIMEOUT:-300}" |
| BASE_URL="http://100.109.109.14:${PORT}/v1" |
|
|
| restore_sglang() { |
| if [[ "${KEEP_VLLM}" == "1" ]]; then |
| return |
| fi |
| "${ROOT}/scripts/stop-qwen36-merged-vllm.sh" >/dev/null 2>&1 || true |
| KAIJU_QWEN36_MERGED_CONTEXT="${KAIJU_QWEN36_MERGED_CONTEXT:-32768}" \ |
| "${ROOT}/scripts/start-qwen36-merged-sglang.sh" >/dev/null 2>&1 || true |
| } |
| trap restore_sglang EXIT |
|
|
| "${ROOT}/scripts/stop-qwen36-merged-sglang.sh" |
| "${ROOT}/scripts/stop-qwen36-merged-vllm.sh" |
| KAIJU_VLLM_CONTEXT="${CONTEXT}" "${ROOT}/scripts/start-qwen36-merged-vllm.sh" |
|
|
| deadline=$((SECONDS + READY_TIMEOUT)) |
| until curl -fsSL "${BASE_URL}/models" | grep -q "\"${MODEL}\""; do |
| if (( SECONDS >= deadline )); then |
| echo "vLLM endpoint did not become ready at ${BASE_URL}" >&2 |
| exit 1 |
| fi |
| sleep 10 |
| done |
|
|
| python3 "${ROOT}/scripts/benchmark_kaiju_serving.py" \ |
| --base-url "${BASE_URL}" \ |
| --model "${MODEL}" \ |
| --contexts "${CONTEXT}" \ |
| --prompts ${PROMPTS} \ |
| --max-tokens "${MAX_TOKENS}" \ |
| --timeout "${TIMEOUT}" |
|
|