#!/usr/bin/env bash set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=scripts/gojira-b-ssh-lib.sh source "${SCRIPT_DIR}/gojira-b-ssh-lib.sh" kaiju_gojira_b_init STAMP="$(date -u +%Y%m%dT%H%M%SZ)" RUN_DIR="${ROOT}/runs/quantization-probes/${STAMP}" LOG="${RUN_DIR}/persisted-quantization-probe.log" SUMMARY="${RUN_DIR}/summary.md" MODEL_REMOTE="${KAIJU_QUANT_MODEL_REMOTE:-/home/richardecholsai5/kaiju-coder/models/Kaiju-Coder-Qwen3.6-27B-v1.8-merged}" VLLM_IMAGE="${KAIJU_QUANT_VLLM_IMAGE:-gojira/vllm-openai-ray:nightly}" LLAMA_DIR="${KAIJU_LLAMA_CPP_REMOTE:-/home/richardecholsai5/tools/llama.cpp}" mkdir -p "${RUN_DIR}" printf -v MODEL_REMOTE_Q "%q" "${MODEL_REMOTE}" printf -v VLLM_IMAGE_Q "%q" "${VLLM_IMAGE}" printf -v LLAMA_DIR_Q "%q" "${LLAMA_DIR}" set +e kaiju_gojira_b_ssh "MODEL_REMOTE=${MODEL_REMOTE_Q} VLLM_IMAGE=${VLLM_IMAGE_Q} LLAMA_DIR=${LLAMA_DIR_Q} bash -s" <<'REMOTE' 2>&1 | tee "${LOG}" set -euo pipefail echo "== Host and model ==" test -d "${MODEL_REMOTE}" || { echo "missing model: ${MODEL_REMOTE}" >&2; exit 2; } du -sh "${MODEL_REMOTE}" df -h /home | tail -1 free -h | sed -n '1,3p' nvidia-smi --query-gpu=name,memory.total,memory.used,memory.free --format=csv,noheader || true docker ps --format "{{.Names}} {{.Status}} {{.Image}}" | grep -Ei "qwen|kaiju|sglang|vllm" || true echo echo "== Model config ==" MODEL_REMOTE="${MODEL_REMOTE}" python3 - <<'PY' import json import os from pathlib import Path config = json.loads((Path(os.environ["MODEL_REMOTE"]) / "config.json").read_text()) text = config.get("text_config") or {} print("model_type:", config.get("model_type")) print("architectures:", config.get("architectures")) print("text_model_type:", text.get("model_type")) print("layers:", text.get("num_hidden_layers")) print("layer_types:", ",".join(sorted(set(text.get("layer_types") or [])))) PY echo echo "== vLLM/Qwen3.5-capable Python stack ==" docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc ' set -euo pipefail python3 - </tmp/kaiju-pip-${pkg}.log 2>&1; then echo "${pkg}: install ok" else echo "${pkg}: install failed" sed -n "1,120p" "/tmp/kaiju-pip-${pkg}.log" fi done python3 - </tmp/kaiju-pip-llmcompressor-nodeps.log 2>&1 || { echo "llmcompressor --no-deps install failed" sed -n "1,120p" /tmp/kaiju-pip-llmcompressor-nodeps.log } python3 - </dev/null 2>&1 || true git -C "${LLAMA_DIR}" checkout -q FETCH_HEAD >/dev/null 2>&1 || true else rm -rf "${LLAMA_DIR}" git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}" >/dev/null fi docker run --rm --entrypoint bash \ -v "${MODEL_REMOTE}":/models/kaiju:ro \ -v "${LLAMA_DIR}":/llama.cpp:ro \ "${VLLM_IMAGE}" -lc ' set -euo pipefail cd /llama.cpp python3 convert_hf_to_gguf.py --print-supported-models 2>&1 | grep -Ei "qwen3_5|qwen3.5|qwen35|qwen3" | head -40 || true python3 convert_hf_to_gguf.py --help | grep -E -- "--dry-run|--outtype|--vocab-only" || true set +e python3 convert_hf_to_gguf.py \ --dry-run \ --outtype q8_0 \ --outfile /tmp/kaiju-coder-7-q8_0-dry-run.gguf \ /models/kaiju 2>&1 | sed -n "1,220p" DRY_STATUS=${PIPESTATUS[0]} set -e echo "gguf_dry_run_exit: ${DRY_STATUS}" exit 0 ' REMOTE STATUS=${PIPESTATUS[0]} set -e { echo "# Kaiju Coder 7 Persisted Quantization Probe" echo echo "- Timestamp: \`${STAMP}\`" echo "- Model: \`${MODEL_REMOTE}\`" echo "- vLLM image: \`${VLLM_IMAGE}\`" echo "- llama.cpp path: \`${LLAMA_DIR}\`" echo "- Exit code: \`${STATUS}\`" echo "- Log: \`${LOG}\`" echo echo "## Interpretation" echo if grep -q "Model architecture: QWEN35" "${LOG}" || grep -qi "QWEN35" "${LOG}"; then echo "- GGUF conversion support probe found Qwen3.5/QWEN35 handling." else echo "- GGUF conversion support is not proven by this probe." fi if grep -q "AutoConfig: Qwen3_5Config" "${LOG}"; then echo "- The pinned vLLM nightly stack recognizes Kaiju's Qwen3.5 config." else echo "- The pinned vLLM nightly stack did not recognize Kaiju's config." fi if grep -q "llmcompressor:" "${LOG}"; then echo "- LLM Compressor package import was probed." fi echo echo "Do not claim a persisted quantized artifact exists unless a later run writes" echo "and verifies the quantized weights." } > "${SUMMARY}" echo "Summary: ${SUMMARY}" exit "${STATUS}"