File size: 6,695 Bytes
785f3d7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | #!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=scripts/gojira-b-ssh-lib.sh
source "${SCRIPT_DIR}/gojira-b-ssh-lib.sh"
kaiju_gojira_b_init
STAMP="$(date -u +%Y%m%dT%H%M%SZ)"
RUN_DIR="${ROOT}/runs/quantization-probes/${STAMP}"
LOG="${RUN_DIR}/persisted-quantization-probe.log"
SUMMARY="${RUN_DIR}/summary.md"
MODEL_REMOTE="${KAIJU_QUANT_MODEL_REMOTE:-/home/richardecholsai5/kaiju-coder/models/Kaiju-Coder-Qwen3.6-27B-v1.8-merged}"
VLLM_IMAGE="${KAIJU_QUANT_VLLM_IMAGE:-gojira/vllm-openai-ray:nightly}"
LLAMA_DIR="${KAIJU_LLAMA_CPP_REMOTE:-/home/richardecholsai5/tools/llama.cpp}"
mkdir -p "${RUN_DIR}"
printf -v MODEL_REMOTE_Q "%q" "${MODEL_REMOTE}"
printf -v VLLM_IMAGE_Q "%q" "${VLLM_IMAGE}"
printf -v LLAMA_DIR_Q "%q" "${LLAMA_DIR}"
set +e
kaiju_gojira_b_ssh "MODEL_REMOTE=${MODEL_REMOTE_Q} VLLM_IMAGE=${VLLM_IMAGE_Q} LLAMA_DIR=${LLAMA_DIR_Q} bash -s" <<'REMOTE' 2>&1 | tee "${LOG}"
set -euo pipefail
echo "== Host and model =="
test -d "${MODEL_REMOTE}" || { echo "missing model: ${MODEL_REMOTE}" >&2; exit 2; }
du -sh "${MODEL_REMOTE}"
df -h /home | tail -1
free -h | sed -n '1,3p'
nvidia-smi --query-gpu=name,memory.total,memory.used,memory.free --format=csv,noheader || true
docker ps --format "{{.Names}} {{.Status}} {{.Image}}" | grep -Ei "qwen|kaiju|sglang|vllm" || true
echo
echo "== Model config =="
MODEL_REMOTE="${MODEL_REMOTE}" python3 - <<'PY'
import json
import os
from pathlib import Path
config = json.loads((Path(os.environ["MODEL_REMOTE"]) / "config.json").read_text())
text = config.get("text_config") or {}
print("model_type:", config.get("model_type"))
print("architectures:", config.get("architectures"))
print("text_model_type:", text.get("model_type"))
print("layers:", text.get("num_hidden_layers"))
print("layer_types:", ",".join(sorted(set(text.get("layer_types") or []))))
PY
echo
echo "== vLLM/Qwen3.5-capable Python stack =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
python3 - <<PY
from transformers import AutoConfig
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
PY
python3 - <<PY
for mod in ["torch", "transformers", "safetensors", "vllm", "huggingface_hub"]:
m = __import__(mod)
version = getattr(m, "__version__", "installed")
print(mod + ": " + str(version))
PY
'
echo
echo "== Persistent quantization package import probe =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
for pkg in llmcompressor autoawq auto-gptq; do
echo "-- pip install ${pkg}"
if python3 -m pip install -q --no-cache-dir "${pkg}" >/tmp/kaiju-pip-${pkg}.log 2>&1; then
echo "${pkg}: install ok"
else
echo "${pkg}: install failed"
sed -n "1,120p" "/tmp/kaiju-pip-${pkg}.log"
fi
done
python3 - <<PY
mods = [("llmcompressor", "llmcompressor"), ("autoawq", "awq"), ("auto-gptq", "auto_gptq")]
for label, mod in mods:
try:
m = __import__(mod)
version = getattr(m, "__version__", "installed")
print(label + ": import ok: " + str(version))
except Exception as exc:
print(f"{label}: import failed: {type(exc).__name__}: {exc}")
PY
python3 - <<PY
from transformers import AutoConfig
try:
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("post-install AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
except Exception as exc:
print("post-install AutoConfig failed:", type(exc).__name__, exc)
PY
'
echo
echo "== LLM Compressor no-deps stack-preservation probe =="
docker run --rm --entrypoint bash -v "${MODEL_REMOTE}":/models/kaiju:ro "${VLLM_IMAGE}" -lc '
set -euo pipefail
python3 -m pip install -q --no-cache-dir --no-deps llmcompressor >/tmp/kaiju-pip-llmcompressor-nodeps.log 2>&1 || {
echo "llmcompressor --no-deps install failed"
sed -n "1,120p" /tmp/kaiju-pip-llmcompressor-nodeps.log
}
python3 - <<PY
try:
import llmcompressor
print("llmcompressor no-deps import:", getattr(llmcompressor, "__version__", "installed"))
except Exception as exc:
print("llmcompressor no-deps import failed:", type(exc).__name__, exc)
from transformers import AutoConfig
cfg = AutoConfig.from_pretrained("/models/kaiju", trust_remote_code=True)
print("no-deps AutoConfig:", type(cfg).__name__, getattr(cfg, "model_type", None))
PY
'
echo
echo "== llama.cpp GGUF support probe =="
mkdir -p "$(dirname "${LLAMA_DIR}")"
if [[ -d "${LLAMA_DIR}/.git" ]]; then
git -C "${LLAMA_DIR}" fetch --depth 1 origin master >/dev/null 2>&1 || true
git -C "${LLAMA_DIR}" checkout -q FETCH_HEAD >/dev/null 2>&1 || true
else
rm -rf "${LLAMA_DIR}"
git clone --depth 1 https://github.com/ggml-org/llama.cpp "${LLAMA_DIR}" >/dev/null
fi
docker run --rm --entrypoint bash \
-v "${MODEL_REMOTE}":/models/kaiju:ro \
-v "${LLAMA_DIR}":/llama.cpp:ro \
"${VLLM_IMAGE}" -lc '
set -euo pipefail
cd /llama.cpp
python3 convert_hf_to_gguf.py --print-supported-models 2>&1 | grep -Ei "qwen3_5|qwen3.5|qwen35|qwen3" | head -40 || true
python3 convert_hf_to_gguf.py --help | grep -E -- "--dry-run|--outtype|--vocab-only" || true
set +e
python3 convert_hf_to_gguf.py \
--dry-run \
--outtype q8_0 \
--outfile /tmp/kaiju-coder-7-q8_0-dry-run.gguf \
/models/kaiju 2>&1 | sed -n "1,220p"
DRY_STATUS=${PIPESTATUS[0]}
set -e
echo "gguf_dry_run_exit: ${DRY_STATUS}"
exit 0
'
REMOTE
STATUS=${PIPESTATUS[0]}
set -e
{
echo "# Kaiju Coder 7 Persisted Quantization Probe"
echo
echo "- Timestamp: \`${STAMP}\`"
echo "- Model: \`${MODEL_REMOTE}\`"
echo "- vLLM image: \`${VLLM_IMAGE}\`"
echo "- llama.cpp path: \`${LLAMA_DIR}\`"
echo "- Exit code: \`${STATUS}\`"
echo "- Log: \`${LOG}\`"
echo
echo "## Interpretation"
echo
if grep -q "Model architecture: QWEN35" "${LOG}" || grep -qi "QWEN35" "${LOG}"; then
echo "- GGUF conversion support probe found Qwen3.5/QWEN35 handling."
else
echo "- GGUF conversion support is not proven by this probe."
fi
if grep -q "AutoConfig: Qwen3_5Config" "${LOG}"; then
echo "- The pinned vLLM nightly stack recognizes Kaiju's Qwen3.5 config."
else
echo "- The pinned vLLM nightly stack did not recognize Kaiju's config."
fi
if grep -q "llmcompressor:" "${LOG}"; then
echo "- LLM Compressor package import was probed."
fi
echo
echo "Do not claim a persisted quantized artifact exists unless a later run writes"
echo "and verifies the quantized weights."
} > "${SUMMARY}"
echo "Summary: ${SUMMARY}"
exit "${STATUS}"
|