#!/usr/bin/env bash # Thanatos-27B — smoke test against a running Ollama daemon. # # Verifies: # 1. The Ollama server is reachable. # 2. The target model is loaded / loadable. # 3. The model exposes the `tools` capability (Modelfile TEMPLATE wired). # 4. A single chat round-trip succeeds and produces non-empty output. # 5. No chat-template control tokens leak into the response. # 6. (TOOLS_TEST=1) An end-to-end tool-call round-trip emits a structured # tool_calls array with the expected name and arguments. Off by default # because it costs ~5-10 sec of inference; on for comprehensive runs. # # Usage: # ./scripts/smoke_test.sh # fast checks only # TOOLS_TEST=1 ./scripts/smoke_test.sh # add tool-call round-trip # MODEL=hf.co/FoolDev/Thanatos-27B:Q4_K_M ./scripts/smoke_test.sh # HOST=http://localhost:11434 ./scripts/smoke_test.sh set -euo pipefail MODEL="${MODEL:-thanatos-27b}" HOST="${HOST:-http://localhost:11434}" PROMPT="${PROMPT:-Reply with the single word: OK}" red() { printf "\033[31m%s\033[0m\n" "$*"; } green() { printf "\033[32m%s\033[0m\n" "$*"; } blue() { printf "\033[34m%s\033[0m\n" "$*"; } require() { if ! command -v "$1" >/dev/null 2>&1; then red "[!] missing dependency: $1"; exit 1 fi } require curl require jq blue "[*] host: ${HOST}" blue "[*] model: ${MODEL}" # 1. Server up? if ! curl -fsS "${HOST}/api/tags" >/dev/null; then red "[!] Ollama not reachable at ${HOST}. Is 'ollama serve' running?" exit 1 fi green "[+] server reachable" # 2. Model present? Match case-insensitively: Ollama 0.24 normalizes # model names at lookup but preserves whatever case was first registered # on disk (e.g. `make load-bundle` may produce `Thanatos-27B:latest` # even when invoked with TAG=thanatos-27b, if an earlier session left a # Thanatos-27B manifest dir behind). The exact tag the user typed is # still valid for `ollama run` — the comparison just needs to be # case-folded to match. if ! curl -fsS "${HOST}/api/tags" | jq -e --arg m "${MODEL}" '.models[] | select((.name | ascii_downcase) | startswith($m | ascii_downcase))' >/dev/null; then red "[!] Model '${MODEL}' not found. Build it first:" red " ./scripts/build.sh # Q4_K_M" red " ./scripts/build.sh Q3_K_S # smaller quant" red " ./scripts/load_bundle.sh # load this repo's qwen36 bundle" exit 1 fi green "[+] model present" # 3. Capability guard: the Modelfile TEMPLATE must expose .Tools / .ToolCalls # so Ollama lists `tools` under capabilities. Without it, /api/chat with a # tools array returns 400 "does not support tools" even though plain chat # works. Catches Modelfile regressions that strip or break the TEMPLATE. CAPS="$(curl -fsS "${HOST}/api/show" -H 'Content-Type: application/json' \ -d "$(jq -n --arg m "${MODEL}" '{name: $m}')" | jq -r '.capabilities[]?')" if ! grep -qx -- 'tools' <<<"${CAPS}"; then red "[!] model missing capability: tools" red " Modelfile likely missing TEMPLATE that references .Tools / .ToolCalls." echo "----- present capabilities -----" echo "${CAPS:-}" echo "--------------------------------" exit 1 fi green "[+] capabilities include: tools" # 4. Round-trip blue "[*] sending test prompt..." RESP="$(curl -fsS "${HOST}/api/chat" \ -H 'Content-Type: application/json' \ -d "$(jq -n --arg m "${MODEL}" --arg p "${PROMPT}" '{ model: $m, messages: [{role:"user", content:$p}], stream: false }')" | jq -r '.message.content // empty')" if [[ -z "${RESP}" ]]; then red "[!] empty response from model" exit 1 fi # Token-leakage guard: if any of the chat-template control tokens show up # verbatim in the response, the Modelfile stop-token list is broken and # the model is bleeding past EOS. We caught this in a real regression # (commit 6672746) — the model said OK then emitted "<|endoftext|> # <|im_start|>user ..." and Ollama kept generating. LEAKED=() for tok in '<|im_start|>' '<|im_end|>' '<|endoftext|>'; do if grep -qF -- "${tok}" <<<"${RESP}"; then LEAKED+=("${tok}") fi done if (( ${#LEAKED[@]} )); then red "[!] response contains raw control tokens: ${LEAKED[*]}" red " Modelfile likely missing PARAMETER stop directives." echo "----- model said -----" echo "${RESP}" echo "----------------------" exit 1 fi green "[+] round-trip OK" echo "----- model said -----" echo "${RESP}" echo "----------------------" # 6. Tool-call round-trip (opt-in via TOOLS_TEST=1) # # Capability advertisement (step 3) only checks the TEMPLATE references # .Tools / .ToolCalls. It does NOT check the model actually emits a # parseable tool call. A regression in the prompt scaffolding (e.g. the # system-prompt instructions inside the TEMPLATE going stale) can leave # capabilities reported correctly but tool calls failing — the assistant # prose-describes the call instead of emitting {...}. # This block sends a tools-array request, parses .message.tool_calls, and # asserts the shape matches. if [[ "${TOOLS_TEST:-0}" == "1" ]]; then blue "[*] tool-call round-trip..." TOOL_RESP="$(curl -fsS "${HOST}/api/chat" \ -H 'Content-Type: application/json' \ -d "$(jq -n --arg m "${MODEL}" '{ model: $m, messages: [{role:"user", content:"Call get_weather for Tokyo. Respond ONLY with the tool call."}], tools: [{ type: "function", function: { name: "get_weather", description: "Get the weather for a city", parameters: { type: "object", properties: {city: {type: "string"}}, required: ["city"] } } }], stream: false, options: {num_predict: 1024, temperature: 0.3} }')")" TC_COUNT="$(jq -r '.message.tool_calls // [] | length' <<<"${TOOL_RESP}")" if [[ "${TC_COUNT}" -lt 1 ]]; then red "[!] model did not emit a tool call" echo "----- response -----" echo "${TOOL_RESP}" | jq . echo "--------------------" exit 1 fi TC_NAME="$(jq -r '.message.tool_calls[0].function.name // empty' <<<"${TOOL_RESP}")" TC_CITY="$(jq -r '.message.tool_calls[0].function.arguments.city // empty' <<<"${TOOL_RESP}")" if [[ "${TC_NAME}" != "get_weather" ]]; then red "[!] unexpected tool name: '${TC_NAME}' (wanted 'get_weather')" exit 1 fi if [[ "${TC_CITY,,}" != "tokyo" ]]; then red "[!] unexpected city argument: '${TC_CITY}' (wanted 'Tokyo' case-insensitive)" exit 1 fi green "[+] tool-call round-trip OK (name=${TC_NAME} city=${TC_CITY})" fi