Thanatos-27B / scripts /smoke_test.sh
FoolDev's picture
Rename back: Thanatos-27B-Heretic → Thanatos-27B (HF repo also renamed)
7197abd
#!/usr/bin/env bash
# Thanatos-27B — smoke test against a running Ollama daemon.
#
# Verifies:
# 1. The Ollama server is reachable.
# 2. The target model is loaded / loadable.
# 3. The model exposes the `tools` capability (Modelfile TEMPLATE wired).
# 4. A single chat round-trip succeeds and produces non-empty output.
# 5. No chat-template control tokens leak into the response.
# 6. (TOOLS_TEST=1) An end-to-end tool-call round-trip emits a structured
# tool_calls array with the expected name and arguments. Off by default
# because it costs ~5-10 sec of inference; on for comprehensive runs.
#
# Usage:
# ./scripts/smoke_test.sh # fast checks only
# TOOLS_TEST=1 ./scripts/smoke_test.sh # add tool-call round-trip
# MODEL=hf.co/FoolDev/Thanatos-27B:Q4_K_M ./scripts/smoke_test.sh
# HOST=http://localhost:11434 ./scripts/smoke_test.sh
set -euo pipefail
MODEL="${MODEL:-thanatos-27b}"
HOST="${HOST:-http://localhost:11434}"
PROMPT="${PROMPT:-Reply with the single word: OK}"
red() { printf "\033[31m%s\033[0m\n" "$*"; }
green() { printf "\033[32m%s\033[0m\n" "$*"; }
blue() { printf "\033[34m%s\033[0m\n" "$*"; }
require() {
if ! command -v "$1" >/dev/null 2>&1; then
red "[!] missing dependency: $1"; exit 1
fi
}
require curl
require jq
blue "[*] host: ${HOST}"
blue "[*] model: ${MODEL}"
# 1. Server up?
if ! curl -fsS "${HOST}/api/tags" >/dev/null; then
red "[!] Ollama not reachable at ${HOST}. Is 'ollama serve' running?"
exit 1
fi
green "[+] server reachable"
# 2. Model present? Match case-insensitively: Ollama 0.24 normalizes
# model names at lookup but preserves whatever case was first registered
# on disk (e.g. `make load-bundle` may produce `Thanatos-27B:latest`
# even when invoked with TAG=thanatos-27b, if an earlier session left a
# Thanatos-27B manifest dir behind). The exact tag the user typed is
# still valid for `ollama run` — the comparison just needs to be
# case-folded to match.
if ! curl -fsS "${HOST}/api/tags" | jq -e --arg m "${MODEL}" '.models[] | select((.name | ascii_downcase) | startswith($m | ascii_downcase))' >/dev/null; then
red "[!] Model '${MODEL}' not found. Build it first:"
red " ./scripts/build.sh # Q4_K_M"
red " ./scripts/build.sh Q3_K_S # smaller quant"
red " ./scripts/load_bundle.sh # load this repo's qwen36 bundle"
exit 1
fi
green "[+] model present"
# 3. Capability guard: the Modelfile TEMPLATE must expose .Tools / .ToolCalls
# so Ollama lists `tools` under capabilities. Without it, /api/chat with a
# tools array returns 400 "does not support tools" even though plain chat
# works. Catches Modelfile regressions that strip or break the TEMPLATE.
CAPS="$(curl -fsS "${HOST}/api/show" -H 'Content-Type: application/json' \
-d "$(jq -n --arg m "${MODEL}" '{name: $m}')" | jq -r '.capabilities[]?')"
if ! grep -qx -- 'tools' <<<"${CAPS}"; then
red "[!] model missing capability: tools"
red " Modelfile likely missing TEMPLATE that references .Tools / .ToolCalls."
echo "----- present capabilities -----"
echo "${CAPS:-<none>}"
echo "--------------------------------"
exit 1
fi
green "[+] capabilities include: tools"
# 4. Round-trip
blue "[*] sending test prompt..."
RESP="$(curl -fsS "${HOST}/api/chat" \
-H 'Content-Type: application/json' \
-d "$(jq -n --arg m "${MODEL}" --arg p "${PROMPT}" '{
model: $m,
messages: [{role:"user", content:$p}],
stream: false
}')" | jq -r '.message.content // empty')"
if [[ -z "${RESP}" ]]; then
red "[!] empty response from model"
exit 1
fi
# Token-leakage guard: if any of the chat-template control tokens show up
# verbatim in the response, the Modelfile stop-token list is broken and
# the model is bleeding past EOS. We caught this in a real regression
# (commit 6672746) — the model said OK then emitted "<|endoftext|>
# <|im_start|>user ..." and Ollama kept generating.
LEAKED=()
for tok in '<|im_start|>' '<|im_end|>' '<|endoftext|>'; do
if grep -qF -- "${tok}" <<<"${RESP}"; then
LEAKED+=("${tok}")
fi
done
if (( ${#LEAKED[@]} )); then
red "[!] response contains raw control tokens: ${LEAKED[*]}"
red " Modelfile likely missing PARAMETER stop directives."
echo "----- model said -----"
echo "${RESP}"
echo "----------------------"
exit 1
fi
green "[+] round-trip OK"
echo "----- model said -----"
echo "${RESP}"
echo "----------------------"
# 6. Tool-call round-trip (opt-in via TOOLS_TEST=1)
#
# Capability advertisement (step 3) only checks the TEMPLATE references
# .Tools / .ToolCalls. It does NOT check the model actually emits a
# parseable tool call. A regression in the prompt scaffolding (e.g. the
# system-prompt instructions inside the TEMPLATE going stale) can leave
# capabilities reported correctly but tool calls failing — the assistant
# prose-describes the call instead of emitting <tool_call>{...}</tool_call>.
# This block sends a tools-array request, parses .message.tool_calls, and
# asserts the shape matches.
if [[ "${TOOLS_TEST:-0}" == "1" ]]; then
blue "[*] tool-call round-trip..."
TOOL_RESP="$(curl -fsS "${HOST}/api/chat" \
-H 'Content-Type: application/json' \
-d "$(jq -n --arg m "${MODEL}" '{
model: $m,
messages: [{role:"user", content:"Call get_weather for Tokyo. Respond ONLY with the tool call."}],
tools: [{
type: "function",
function: {
name: "get_weather",
description: "Get the weather for a city",
parameters: {
type: "object",
properties: {city: {type: "string"}},
required: ["city"]
}
}
}],
stream: false,
options: {num_predict: 1024, temperature: 0.3}
}')")"
TC_COUNT="$(jq -r '.message.tool_calls // [] | length' <<<"${TOOL_RESP}")"
if [[ "${TC_COUNT}" -lt 1 ]]; then
red "[!] model did not emit a tool call"
echo "----- response -----"
echo "${TOOL_RESP}" | jq .
echo "--------------------"
exit 1
fi
TC_NAME="$(jq -r '.message.tool_calls[0].function.name // empty' <<<"${TOOL_RESP}")"
TC_CITY="$(jq -r '.message.tool_calls[0].function.arguments.city // empty' <<<"${TOOL_RESP}")"
if [[ "${TC_NAME}" != "get_weather" ]]; then
red "[!] unexpected tool name: '${TC_NAME}' (wanted 'get_weather')"
exit 1
fi
if [[ "${TC_CITY,,}" != "tokyo" ]]; then
red "[!] unexpected city argument: '${TC_CITY}' (wanted 'Tokyo' case-insensitive)"
exit 1
fi
green "[+] tool-call round-trip OK (name=${TC_NAME} city=${TC_CITY})"
fi