| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| |
| source "${SCRIPT_DIR}/gojira-b-ssh-lib.sh" |
| kaiju_gojira_b_init |
|
|
| STAMP="$(date -u +%Y%m%dT%H%M%SZ)" |
| RUN_DIR="${ROOT}/runs/quantization-probes/${STAMP}" |
| LOG="${RUN_DIR}/persisted-quantization-probe.log" |
| SUMMARY="${RUN_DIR}/summary.md" |
| MODEL_REMOTE="${KAIJU_QUANT_MODEL_REMOTE:-/home/richardecholsai5/kaiju-coder/models/Kaiju-Coder-Qwen3.6-27B-v1.8-merged}" |
| VLLM_IMAGE="${KAIJU_QUANT_VLLM_IMAGE:-gojira/vllm-openai-ray:nightly}" |
| LLAMA_DIR="${KAIJU_LLAMA_CPP_REMOTE:-/home/richardecholsai5/tools/llama.cpp}" |
|
|
| mkdir -p "${RUN_DIR}" |
| printf -v MODEL_REMOTE_Q "%q" "${MODEL_REMOTE}" |
| printf -v VLLM_IMAGE_Q "%q" "${VLLM_IMAGE}" |
| printf -v LLAMA_DIR_Q "%q" "${LLAMA_DIR}" |
|
|
| set +e |
| kaiju_gojira_b_ssh "MODEL_REMOTE=${MODEL_REMOTE_Q} VLLM_IMAGE=${VLLM_IMAGE_Q} LLAMA_DIR=${LLAMA_DIR_Q} bash -s" <<'REMOTE' 2>&1 | tee "${LOG}" |
| set -euo pipefail |
|
|
| echo "== Host and model ==" |
| test -d "${MODEL_REMOTE}" || { echo "missing model: ${MODEL_REMOTE}" >&2; exit 2; } |
| du -sh "${MODEL_REMOTE}" |
| df -h /home | tail -1 |
| free -h | sed -n '1,3p' |
| nvidia-smi --query-gpu=name,memory.total,memory.used,memory.free --format=csv,noheader || true |
| docker ps --format "{{.Names}} {{.Status}} {{.Image}}" | grep -Ei "qwen|kaiju|sglang|vllm" || true |
|
|
| echo |
| echo "== Model config ==" |
| MODEL_REMOTE="${MODEL_REMOTE}" python3 - <<'PY' |
| import json |
| import os |
| from pathlib import Path |
|
|
| config = json.loads((Path(os.environ["MODEL_REMOTE"]) / "config.json").read_text()) |
| text = config.get("text_config") or {} |
| print("model_type:", config.get("model_type")) |
| print("architectures:", config.get("architectures")) |
| print("text_model_type:", text.get("model_type")) |
| print("layers:", text.get("num_hidden_layers")) |
| print("layer_types:", ",".join(sorted(set(text.get("layer_types") or [])))) |
| PY |
|
|
| echo |
| echo "== vLLM/Qwen3.5-capable Python stack ==" |
| docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc ' |
| set -euo pipefail |
| python3 - <<PY |
| from transformers import AutoConfig |
| cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True) |
| print("AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None)) |
| PY |
| python3 - <<PY |
| for mod in ["torch", "transformers", "safetensors", "vllm", "huggingface_hub"]: |
| m = __import__(mod) |
| version = getattr(m, "__version__", "installed") |
| print(mod + ": " + str(version)) |
| PY |
| ' |
|
|
| echo |
| echo "== Persistent quantization package import probe ==" |
| docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc ' |
| set -euo pipefail |
| for pkg in llmcompressor autoawq auto-gptq; do |
| echo "-- pip install ${pkg}" |
| if python3 -m pip install -q --no-cache-dir "${pkg}" >/tmp/kaiju-pip-${pkg}.log 2>&1; then |
| echo "${pkg}: install ok" |
| else |
| echo "${pkg}: install failed" |
| sed -n "1,120p" "/tmp/kaiju-pip-${pkg}.log" |
| fi |
| done |
| python3 - <<PY |
| mods = [("llmcompressor", "llmcompressor"), ("autoawq", "awq"), ("auto-gptq", "auto_gptq")] |
| for label, mod in mods: |
| try: |
| m = __import__(mod) |
| version = getattr(m, "__version__", "installed") |
| print(label + ": import ok: " + str(version)) |
| except Exception as exc: |
| print(f"{label}: import failed: {type(exc).__name__}: {exc}") |
| PY |
| python3 - <<PY |
| from transformers import AutoConfig |
| try: |
| cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True) |
| print("post-install AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None)) |
| except Exception as exc: |
| print("post-install AutoConfig failed:", type(exc).__name__, exc) |
| PY |
| ' |
|
|
| echo |
| echo "== LLM Compressor no-deps stack-preservation probe ==" |
| docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc ' |
| set -euo pipefail |
| python3 -m pip install -q --no-cache-dir --no-deps llmcompressor >/tmp/kaiju-pip-llmcompressor-nodeps.log 2>&1 || { |
| echo "llmcompressor --no-deps install failed" |
| sed -n "1,120p" /tmp/kaiju-pip-llmcompressor-nodeps.log |
| } |
| python3 - <<PY |
| try: |
| import llmcompressor |
| print("llmcompressor no-deps import:", getattr(llmcompressor, "__version__", "installed")) |
| except Exception as exc: |
| print("llmcompressor no-deps import failed:", type(exc).__name__, exc) |
| from transformers import AutoConfig |
| cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True) |
| print("no-deps AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None)) |
| PY |
| ' |
|
|
| echo |
| echo "== llama.cpp GGUF support probe ==" |
| mkdir -p "$(dirname "${LLAMA_DIR}")" |
| if [[ -d "${LLAMA_DIR}/.git" ]]; then |
| git -C "${LLAMA_DIR}" fetch --depth 1 origin master >/dev/null 2>&1 || true |
| git -C "${LLAMA_DIR}" checkout -q FETCH_HEAD >/dev/null 2>&1 || true |
| else |
| rm -rf "${LLAMA_DIR}" |
| git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}" >/dev/null |
| fi |
| docker run --rm --entrypoint bash \ |
| -v "${MODEL_REMOTE}":/models/kaiju:ro \ |
| -v "${LLAMA_DIR}":/llama.cpp:ro \ |
| "${VLLM_IMAGE}" -lc ' |
| set -euo pipefail |
| cd /llama.cpp |
| python3 convert_hf_to_gguf.py --print-supported-models 2>&1 | grep -Ei "qwen3_5|qwen3.5|qwen35|qwen3" | head -40 || true |
| python3 convert_hf_to_gguf.py --help | grep -E -- "--dry-run|--outtype|--vocab-only" || true |
| set +e |
| python3 convert_hf_to_gguf.py \ |
| --dry-run \ |
| --outtype q8_0 \ |
| --outfile /tmp/kaiju-coder-7-q8_0-dry-run.gguf \ |
| /models/kaiju 2>&1 | sed -n "1,220p" |
| DRY_STATUS=${PIPESTATUS[0]} |
| set -e |
| echo "gguf_dry_run_exit: ${DRY_STATUS}" |
| exit 0 |
| ' |
| REMOTE |
| STATUS=${PIPESTATUS[0]} |
| set -e |
|
|
| { |
| echo "# Kaiju Coder 7 Persisted Quantization Probe" |
| echo |
| echo "- Timestamp: \`${STAMP}\`" |
| echo "- Model: \`${MODEL_REMOTE}\`" |
| echo "- vLLM image: \`${VLLM_IMAGE}\`" |
| echo "- llama.cpp path: \`${LLAMA_DIR}\`" |
| echo "- Exit code: \`${STATUS}\`" |
| echo "- Log: \`${LOG}\`" |
| echo |
| echo "## Interpretation" |
| echo |
| if grep -q "Model architecture: QWEN35" "${LOG}" || grep -qi "QWEN35" "${LOG}"; then |
| echo "- GGUF conversion support probe found Qwen3.5/QWEN35 handling." |
| else |
| echo "- GGUF conversion support is not proven by this probe." |
| fi |
| if grep -q "AutoConfig: Qwen3_5Config" "${LOG}"; then |
| echo "- The pinned vLLM nightly stack recognizes Kaiju's Qwen3.5 config." |
| else |
| echo "- The pinned vLLM nightly stack did not recognize Kaiju's config." |
| fi |
| if grep -q "llmcompressor:" "${LOG}"; then |
| echo "- LLM Compressor package import was probed." |
| fi |
| echo |
| echo "Do not claim a persisted quantized artifact exists unless a later run writes" |
| echo "and verifies the quantized weights." |
| } > "${SUMMARY}" |
|
|
| echo "Summary: ${SUMMARY}" |
| exit "${STATUS}" |
|
|