prometheus04
/

qwen3-4b-thinking-microagent

Model card Files Files and versions

qwen3-4b-thinking-microagent / scripts /setup_a100.sh

prometheus04's picture

Upload folder using huggingface_hub

a15ae41 verified 11 days ago

history blame contribute delete

2.12 kB

	#!/usr/bin/env bash
	# One-shot installer for the A100 training box.
	# Tested combos that don't fight each other (May 2026):
	# torch 2.5.1 + cu124 + unsloth 2025.5 + flash-attn 2.7.x + bitsandbytes 0.43.x
	#
	# Run on a fresh A100 instance (Lambda/RunPod/Modal):
	# bash scripts/setup_a100.sh

	set -euo pipefail

	echo "[setup] python: $(python --version)"
	nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv

	# Core wheel mirror (use UV if available — 10x faster than pip)
	if command -v uv >/dev/null 2>&1; then
	PIP="uv pip install"
	else
	PIP="pip install -U"
	fi

	echo "[setup] installing torch (CUDA 12.4 build, A100 sm_80 compatible)..."
	$PIP --index-url https://download.pytorch.org/whl/cu124 \
	"torch==2.5.1" "torchvision==0.20.1"

	echo "[setup] installing core HF stack..."
	$PIP \
	"transformers>=4.46,<4.50" \
	"datasets>=3.0" \
	"accelerate>=1.0" \
	"peft>=0.13" \
	"trl>=0.13" \
	"huggingface_hub>=0.26"

	echo "[setup] installing Flash Attention 2 (prebuilt wheel for cu124 + torch2.5)..."
	# Use the prebuilt wheel — building from source on a fresh box takes 30+ min
	$PIP "flash-attn==2.7.4.post1" --no-build-isolation

	echo "[setup] installing bitsandbytes for 8-bit paged AdamW..."
	$PIP "bitsandbytes>=0.43"

	echo "[setup] installing Unsloth..."
	# Unsloth pins its own torch/cuda combos via extras
	$PIP "unsloth[cu124-torch250] @ git+https://github.com/unslothai/unsloth.git"
	$PIP "unsloth_zoo"

	echo "[setup] sanity checks..."
	python - <<'PY'
	import torch
	print(f"torch: {torch.__version__}")
	print(f"cuda available: {torch.cuda.is_available()}")
	print(f"device: {torch.cuda.get_device_name(0)}")
	print(f"compute capability: {torch.cuda.get_device_capability(0)}")

	import flash_attn
	print(f"flash_attn: {flash_attn.__version__}")

	import bitsandbytes
	print(f"bitsandbytes: {bitsandbytes.__version__}")

	from unsloth import FastLanguageModel
	print(f"unsloth: imported OK")

	# Probe TF32 / BF16
	print(f"bf16 supported: {torch.cuda.is_bf16_supported()}")
	print(f"tf32 enabled: {torch.backends.cuda.matmul.allow_tf32}")
	PY

	echo "[setup] DONE — ready to train"