Instructions to use prometheus04/qwen3-4b-thinking-microagent with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use prometheus04/qwen3-4b-thinking-microagent with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| # One-shot installer for the A100 training box. | |
| # Tested combos that don't fight each other (May 2026): | |
| # torch 2.5.1 + cu124 + unsloth 2025.5 + flash-attn 2.7.x + bitsandbytes 0.43.x | |
| # | |
| # Run on a fresh A100 instance (Lambda/RunPod/Modal): | |
| # bash scripts/setup_a100.sh | |
| set -euo pipefail | |
| echo "[setup] python: $(python --version)" | |
| nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv | |
| # Core wheel mirror (use UV if available — 10x faster than pip) | |
| if command -v uv >/dev/null 2>&1; then | |
| PIP="uv pip install" | |
| else | |
| PIP="pip install -U" | |
| fi | |
| echo "[setup] installing torch (CUDA 12.4 build, A100 sm_80 compatible)..." | |
| $PIP --index-url https://download.pytorch.org/whl/cu124 \ | |
| "torch==2.5.1" "torchvision==0.20.1" | |
| echo "[setup] installing core HF stack..." | |
| $PIP \ | |
| "transformers>=4.46,<4.50" \ | |
| "datasets>=3.0" \ | |
| "accelerate>=1.0" \ | |
| "peft>=0.13" \ | |
| "trl>=0.13" \ | |
| "huggingface_hub>=0.26" | |
| echo "[setup] installing Flash Attention 2 (prebuilt wheel for cu124 + torch2.5)..." | |
| # Use the prebuilt wheel — building from source on a fresh box takes 30+ min | |
| $PIP "flash-attn==2.7.4.post1" --no-build-isolation | |
| echo "[setup] installing bitsandbytes for 8-bit paged AdamW..." | |
| $PIP "bitsandbytes>=0.43" | |
| echo "[setup] installing Unsloth..." | |
| # Unsloth pins its own torch/cuda combos via extras | |
| $PIP "unsloth[cu124-torch250] @ git+https://github.com/unslothai/unsloth.git" | |
| $PIP "unsloth_zoo" | |
| echo "[setup] sanity checks..." | |
| python - <<'PY' | |
| import torch | |
| print(f"torch: {torch.__version__}") | |
| print(f"cuda available: {torch.cuda.is_available()}") | |
| print(f"device: {torch.cuda.get_device_name(0)}") | |
| print(f"compute capability: {torch.cuda.get_device_capability(0)}") | |
| import flash_attn | |
| print(f"flash_attn: {flash_attn.__version__}") | |
| import bitsandbytes | |
| print(f"bitsandbytes: {bitsandbytes.__version__}") | |
| from unsloth import FastLanguageModel | |
| print(f"unsloth: imported OK") | |
| # Probe TF32 / BF16 | |
| print(f"bf16 supported: {torch.cuda.is_bf16_supported()}") | |
| print(f"tf32 enabled: {torch.backends.cuda.matmul.allow_tf32}") | |
| PY | |
| echo "[setup] DONE — ready to train" | |