kaiju-coder-7-quantized-runtime / scripts /probe-gojira-b-persisted-quantization.sh
restokes92's picture
Upload Kaiju Coder 7 runtime quantization recipe
785f3d7 verified
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=scripts/gojira-b-ssh-lib.sh
source "${SCRIPT_DIR}/gojira-b-ssh-lib.sh"
kaiju_gojira_b_init
STAMP="$(date -u +%Y%m%dT%H%M%SZ)"
RUN_DIR="${ROOT}/runs/quantization-probes/${STAMP}"
LOG="${RUN_DIR}/persisted-quantization-probe.log"
SUMMARY="${RUN_DIR}/summary.md"
MODEL_REMOTE="${KAIJU_QUANT_MODEL_REMOTE:-/home/richardecholsai5/kaiju-coder/models/Kaiju-Coder-Qwen3.6-27B-v1.8-merged}"
VLLM_IMAGE="${KAIJU_QUANT_VLLM_IMAGE:-gojira/vllm-openai-ray:nightly}"
LLAMA_DIR="${KAIJU_LLAMA_CPP_REMOTE:-/home/richardecholsai5/tools/llama.cpp}"
mkdir -p "${RUN_DIR}"
printf -v MODEL_REMOTE_Q "%q" "${MODEL_REMOTE}"
printf -v VLLM_IMAGE_Q "%q" "${VLLM_IMAGE}"
printf -v LLAMA_DIR_Q "%q" "${LLAMA_DIR}"
set +e
kaiju_gojira_b_ssh "MODEL_REMOTE=${MODEL_REMOTE_Q} VLLM_IMAGE=${VLLM_IMAGE_Q} LLAMA_DIR=${LLAMA_DIR_Q} bash -s" <<'REMOTE' 2>&1 | tee "${LOG}"
set -euo pipefail
echo "== Host and model =="
test -d "${MODEL_REMOTE}" || { echo "missing model: ${MODEL_REMOTE}" >&2; exit 2; }
du -sh "${MODEL_REMOTE}"
df -h /home | tail -1
free -h | sed -n '1,3p'
nvidia-smi --query-gpu=name,memory.total,memory.used,memory.free --format=csv,noheader || true
docker ps --format "{{.Names}} {{.Status}} {{.Image}}" | grep -Ei "qwen|kaiju|sglang|vllm" || true
echo
echo "== Model config =="
MODEL_REMOTE="${MODEL_REMOTE}" python3 - <<'PY'
import json
import os
from pathlib import Path
config = json.loads((Path(os.environ["MODEL_REMOTE"]) / "config.json").read_text())
text = config.get("text_config") or {}
print("model_type:", config.get("model_type"))
print("architectures:", config.get("architectures"))
print("text_model_type:", text.get("model_type"))
print("layers:", text.get("num_hidden_layers"))
print("layer_types:", ",".join(sorted(set(text.get("layer_types") or []))))
PY
echo
echo "== vLLM/Qwen3.5-capable Python stack =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
python3 - <<PY
from transformers import AutoConfig
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
PY
python3 - <<PY
for mod in ["torch", "transformers", "safetensors", "vllm", "huggingface_hub"]:
m = __import__(mod)
version = getattr(m, "__version__", "installed")
print(mod + ": " + str(version))
PY
'
echo
echo "== Persistent quantization package import probe =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
for pkg in llmcompressor autoawq auto-gptq; do
echo "-- pip install ${pkg}"
if python3 -m pip install -q --no-cache-dir "${pkg}" >/tmp/kaiju-pip-${pkg}.log 2>&1; then
echo "${pkg}: install ok"
else
echo "${pkg}: install failed"
sed -n "1,120p" "/tmp/kaiju-pip-${pkg}.log"
fi
done
python3 - <<PY
mods = [("llmcompressor", "llmcompressor"), ("autoawq", "awq"), ("auto-gptq", "auto_gptq")]
for label, mod in mods:
try:
m = __import__(mod)
version = getattr(m, "__version__", "installed")
print(label + ": import ok: " + str(version))
except Exception as exc:
print(f"{label}: import failed: {type(exc).__name__}: {exc}")
PY
python3 - <<PY
from transformers import AutoConfig
try:
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("post-install AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
except Exception as exc:
print("post-install AutoConfig failed:", type(exc).__name__, exc)
PY
'
echo
echo "== LLM Compressor no-deps stack-preservation probe =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
python3 -m pip install -q --no-cache-dir --no-deps llmcompressor >/tmp/kaiju-pip-llmcompressor-nodeps.log 2>&1 || {
echo "llmcompressor --no-deps install failed"
sed -n "1,120p" /tmp/kaiju-pip-llmcompressor-nodeps.log
}
python3 - <<PY
try:
import llmcompressor
print("llmcompressor no-deps import:", getattr(llmcompressor, "__version__", "installed"))
except Exception as exc:
print("llmcompressor no-deps import failed:", type(exc).__name__, exc)
from transformers import AutoConfig
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("no-deps AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
PY
'
echo
echo "== llama.cpp GGUF support probe =="
mkdir -p "$(dirname "${LLAMA_DIR}")"
if [[ -d "${LLAMA_DIR}/.git" ]]; then
git -C "${LLAMA_DIR}" fetch --depth 1 origin master >/dev/null 2>&1 || true
git -C "${LLAMA_DIR}" checkout -q FETCH_HEAD >/dev/null 2>&1 || true
else
rm -rf "${LLAMA_DIR}"
git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}" >/dev/null
fi
docker run --rm --entrypoint bash \
-v "${MODEL_REMOTE}":/models/kaiju:ro \
-v "${LLAMA_DIR}":/llama.cpp:ro \
"${VLLM_IMAGE}" -lc '
set -euo pipefail
cd /llama.cpp
python3 convert_hf_to_gguf.py --print-supported-models 2>&1 | grep -Ei "qwen3_5|qwen3.5|qwen35|qwen3" | head -40 || true
python3 convert_hf_to_gguf.py --help | grep -E -- "--dry-run|--outtype|--vocab-only" || true
set +e
python3 convert_hf_to_gguf.py \
--dry-run \
--outtype q8_0 \
--outfile /tmp/kaiju-coder-7-q8_0-dry-run.gguf \
/models/kaiju 2>&1 | sed -n "1,220p"
DRY_STATUS=${PIPESTATUS[0]}
set -e
echo "gguf_dry_run_exit: ${DRY_STATUS}"
exit 0
'
REMOTE
STATUS=${PIPESTATUS[0]}
set -e
{
echo "# Kaiju Coder 7 Persisted Quantization Probe"
echo
echo "- Timestamp: \`${STAMP}\`"
echo "- Model: \`${MODEL_REMOTE}\`"
echo "- vLLM image: \`${VLLM_IMAGE}\`"
echo "- llama.cpp path: \`${LLAMA_DIR}\`"
echo "- Exit code: \`${STATUS}\`"
echo "- Log: \`${LOG}\`"
echo
echo "## Interpretation"
echo
if grep -q "Model architecture: QWEN35" "${LOG}" || grep -qi "QWEN35" "${LOG}"; then
echo "- GGUF conversion support probe found Qwen3.5/QWEN35 handling."
else
echo "- GGUF conversion support is not proven by this probe."
fi
if grep -q "AutoConfig: Qwen3_5Config" "${LOG}"; then
echo "- The pinned vLLM nightly stack recognizes Kaiju's Qwen3.5 config."
else
echo "- The pinned vLLM nightly stack did not recognize Kaiju's config."
fi
if grep -q "llmcompressor:" "${LOG}"; then
echo "- LLM Compressor package import was probed."
fi
echo
echo "Do not claim a persisted quantized artifact exists unless a later run writes"
echo "and verifies the quantized weights."
} > "${SUMMARY}"
echo "Summary: ${SUMMARY}"
exit "${STATUS}"