Spaces:

ai-engineering-at
/

llama-cpp-turboquant-guide

Running

llama-cpp-turboquant-guide / scripts /download-model.sh

AI Engineering Lab

Initial release: TurboQuant practical guide for consumer hardware

87efc66 about 2 months ago

2.09 kB

	#!/usr/bin/env bash
	# Download Mistral-Small-3.2-24B-Instruct Q4_K_M GGUF model
	#
	# Usage: export HF_TOKEN=hf_... && bash scripts/download-model.sh
	#
	# Always verify the repo name via HF Search API before downloading.
	# HF repo names change and compressed context can reconstruct them incorrectly.

	set -e

	MODEL_REPO="bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF"
	MODEL_FILE="mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf"
	VOLUME_NAME="${VOLUME_NAME:-turboquant-models}"

	if [ -z "$HF_TOKEN" ]; then
	echo "ERROR: HF_TOKEN not set."
	echo "Get a free token at: https://huggingface.co/settings/tokens"
	echo "Then: export HF_TOKEN=hf_..."
	exit 1
	fi

	echo "=== Verifying repo exists ==="
	HF_CHECK=$(curl -s -o /dev/null -w "%{http_code}" \
	-H "Authorization: Bearer $HF_TOKEN" \
	"https://huggingface.co/api/models/${MODEL_REPO}")

	if [ "$HF_CHECK" != "200" ]; then
	echo "ERROR: Repo not found or unauthorized (HTTP $HF_CHECK)"
	echo "Search for available repos:"
	curl -s -H "Authorization: Bearer $HF_TOKEN" \
	"https://huggingface.co/api/models?search=bartowski+mistral+small+3.2&limit=5" \
	\| python3 -c "import sys,json; [print(m['modelId']) for m in json.load(sys.stdin)]"
	exit 1
	fi

	echo "Repo: ${MODEL_REPO} ✓"
	echo "File: ${MODEL_FILE}"
	echo "Volume: ${VOLUME_NAME}"
	echo ""

	docker volume create ${VOLUME_NAME} 2>/dev/null \|\| true

	echo "=== Downloading (~14 GB, may take 20-30 min) ==="
	docker run --rm \
	-v ${VOLUME_NAME}:/models \
	-e HF_TOKEN="${HF_TOKEN}" \
	python:3.11-slim \
	bash -c "
	pip install -q huggingface_hub && \
	python -c \"
	import os
	from huggingface_hub import hf_hub_download
	path = hf_hub_download(
	repo_id='${MODEL_REPO}',
	filename='${MODEL_FILE}',
	local_dir='/models',
	resume_download=True,
	token=os.environ.get('HF_TOKEN')
	)
	print('Downloaded to:', path)
	print('Size: {:.1f} GB'.format(os.path.getsize(path) / 1e9))
	\"
	"

	echo ""
	echo "=== Done ==="
	echo "Model ready at: /models/${MODEL_FILE} (in Docker volume '${VOLUME_NAME}')"
	echo "Run: bash scripts/run-baseline.sh"