File size: 6,267 Bytes
ef3c5d9
7197abd
ef3c5d9
5426482
 
 
 
ef3c5d9
 
 
5426482
 
 
 
 
 
ef3c5d9
7197abd
 
ac94e67
ef3c5d9
 
7197abd
 
ef3c5d9
 
 
 
 
 
 
 
7197abd
 
ef3c5d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env bash
# Thanatos-27B — load this repo's bundle into Ollama as a local tag.
#
# The bundled GGUF (Thanatos-27B.Q4_K_M.gguf) is qwen35-stamped and
# loads directly on stock llama.cpp / Ollama. This script is the
# one-shot path from "I just cloned this repo" to "I have a working
# local Ollama tag":
#
#   1. Resolve the bundle. If it's an LFS pointer (cloned without
#      `git lfs pull`), download the real ~17 GB blob via `hf download`.
#   2. Inspect `general.architecture`. If qwen35 / qwen35moe (current
#      bundle), skip straight to step 3. If qwen36 (legacy v0.6.0 or
#      3rd-round-trip-era checkout), rebadge to qwen35 via
#      scripts/rename_arch.py (metadata-only, byte-identical tensors).
#   3. Run `ollama create <tag> -f <temp Modelfile pointing at the
#      resolved bundle>`.
#
# Useful if you want a bare local tag (`thanatos-27b`) rather than
# the `hf.co/FoolDev/Thanatos-27B` path. The legacy qwen36 rebadge
# branch is kept for anyone working from a pre-e03e10e checkout.
#
# Usage:
#   ./scripts/load_bundle.sh                 # default tag: thanatos-27b
#   TAG=thanatos-27b-bundle ./scripts/load_bundle.sh
#   BUNDLE=/path/to/Thanatos-27B.Q4_K_M.gguf ./scripts/load_bundle.sh
#
# Requires: ollama, python3 with the `gguf` package, hf (if the bundle
# needs to be downloaded).
set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
BUNDLE="${BUNDLE:-${ROOT}/Thanatos-27B.Q4_K_M.gguf}"
TAG="${TAG:-thanatos-27b}"
REPO_ID="${REPO_ID:-FoolDev/Thanatos-27B}"
MODELFILE="${ROOT}/Modelfile"

red()    { printf "\033[31m%s\033[0m\n" "$*"; }
green()  { printf "\033[32m%s\033[0m\n" "$*"; }
blue()   { printf "\033[34m%s\033[0m\n" "$*"; }

blue "[*] bundle:    ${BUNDLE}"
blue "[*] tag:       ${TAG}"

# ---- 1. Sanity ---------------------------------------------------------------

if ! command -v ollama >/dev/null 2>&1; then
    red "[!] ollama not found in PATH"; exit 1
fi
if [[ ! -f "${MODELFILE}" ]]; then
    red "[!] missing ${MODELFILE}"; exit 1
fi

# ---- 2. Resolve bundle (smudge LFS pointer if needed) ------------------------

resolve_bundle() {
    local file="$1"
    if [[ ! -f "${file}" ]]; then
        return 1
    fi
    # LFS pointer files are tiny (a couple hundred bytes) and start with
    # `version https://git-lfs.github.com/spec/v1`.
    local size
    size="$(stat -c '%s' "${file}")"
    if (( size < 1024 )) && head -n1 "${file}" | grep -q 'git-lfs'; then
        return 1
    fi
    return 0
}

if ! resolve_bundle "${BUNDLE}"; then
    # Download to a side path under .cache/ so we don't overwrite the
    # LFS pointer in the working tree. Without git-lfs installed, the
    # pointer never auto-smudges and the user expects the file in the
    # repo root to stay 136 bytes. The rebadge step downstream reads
    # whichever path BUNDLE points at, so just re-point it here.
    CACHE_DIR="${ROOT}/.cache"
    BUNDLE_NAME="$(basename "${BUNDLE}")"
    CACHED="${CACHE_DIR}/${BUNDLE_NAME}"
    if resolve_bundle "${CACHED}"; then
        blue "[=] using previously downloaded bundle at ${CACHED}"
        BUNDLE="${CACHED}"
    else
        blue "[*] bundle missing or LFS-pointer-only — downloading from ${REPO_ID} to ${CACHED} ..."
        HF=""
        if command -v hf >/dev/null 2>&1; then
            HF="hf"
        elif command -v huggingface-cli >/dev/null 2>&1; then
            HF="huggingface-cli"
        else
            red "[!] neither 'hf' nor 'huggingface-cli' installed; can't fetch bundle"
            red "    pip install -U huggingface_hub"
            exit 1
        fi
        mkdir -p "${CACHE_DIR}"
        case "${HF}" in
            hf)              hf download "${REPO_ID}" "${BUNDLE_NAME}" --local-dir "${CACHE_DIR}" ;;
            huggingface-cli) huggingface-cli download "${REPO_ID}" "${BUNDLE_NAME}" --local-dir "${CACHE_DIR}" ;;
        esac
        BUNDLE="${CACHED}"
    fi
    if ! resolve_bundle "${BUNDLE}"; then
        red "[!] still no usable bundle at ${BUNDLE} after download"; exit 1
    fi
fi

# ---- 3. Inspect arch + rebadge if needed -------------------------------------

ARCH="$(python3 - "${BUNDLE}" <<'PY'
import sys
from gguf import GGUFReader, constants
r = GGUFReader(sys.argv[1], "r")
f = r.get_field(constants.Keys.General.ARCHITECTURE)
print(bytes(f.parts[f.data[0]]).decode())
PY
)"
blue "[*] bundle arch: ${ARCH}"

LOAD_TARGET="${BUNDLE}"
if [[ "${ARCH}" == "qwen36" ]]; then
    REBADGED="${BUNDLE%.gguf}.qwen35.gguf"
    if [[ -f "${REBADGED}" ]]; then
        REBADGED_ARCH="$(python3 - "${REBADGED}" <<'PY'
import sys
from gguf import GGUFReader, constants
r = GGUFReader(sys.argv[1], "r")
f = r.get_field(constants.Keys.General.ARCHITECTURE)
print(bytes(f.parts[f.data[0]]).decode())
PY
)"
        if [[ "${REBADGED_ARCH}" == "qwen35" ]]; then
            blue "[=] rebadged copy already present at ${REBADGED} — reusing."
        else
            blue "[*] existing ${REBADGED} has arch=${REBADGED_ARCH}, regenerating ..."
            rm -f "${REBADGED}"
            python3 "${ROOT}/scripts/rename_arch.py" \
                --from-arch qwen36 --to-arch qwen35 \
                "${BUNDLE}" "${REBADGED}"
        fi
    else
        blue "[*] rebadging qwen36 -> qwen35 (metadata only, tensors byte-identical) ..."
        python3 "${ROOT}/scripts/rename_arch.py" \
            --from-arch qwen36 --to-arch qwen35 \
            "${BUNDLE}" "${REBADGED}"
    fi
    LOAD_TARGET="${REBADGED}"
elif [[ "${ARCH}" != "qwen35" && "${ARCH}" != "qwen35moe" ]]; then
    red "[!] unexpected arch '${ARCH}' — refusing to load. Edit this script if intentional."
    exit 1
fi

# ---- 4. Build a Modelfile copy with FROM pointing at LOAD_TARGET -------------

TMP_MODELFILE="$(mktemp -t thanatos27b-loadbundle.XXXXXX)"
trap 'rm -f "${TMP_MODELFILE}"' EXIT
awk -v p="${LOAD_TARGET}" '
    /^FROM[[:space:]]/ && !done { print "FROM " p; done=1; next }
    { print }
' "${MODELFILE}" > "${TMP_MODELFILE}"

# ---- 5. Create the Ollama model ----------------------------------------------

blue "[*] ollama create ${TAG} -f <patched modelfile pointing at ${LOAD_TARGET}>"
ollama create "${TAG}" -f "${TMP_MODELFILE}"

echo
green "[+] Done. Try it:"
echo "    ollama run ${TAG}"
echo "    MODEL=${TAG} make smoke"