nanogpt-tr-v5-code / thunder_setup.sh
musabc's picture
Upload thunder_setup.sh
4a57982 verified
Raw
History Blame Contribute Delete
1.89 kB
#!/bin/bash
# Thunder Compute H100 instance kurulum scripti
# Kullanim:
# chmod +x thunder_setup.sh
# ./thunder_setup.sh
# HF_TOKEN=<token> ./thunder_setup.sh musabc
set -e
USER="${1:-musabc}"
echo "=========================================="
echo "Thunder Compute V5 Bootstrap"
echo "HF user: $USER"
echo "=========================================="
# GPU kontrol
echo ""
echo "[1/6] GPU kontrol..."
nvidia-smi | head -25
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
echo " GPU: $GPU_NAME"
# Python ortam
echo ""
echo "[2/6] Python ortami..."
python --version
pip install --upgrade pip
# PyTorch (H100 icin CUDA 12.4+ gerek)
echo ""
echo "[3/6] PyTorch (CUDA 12.4) kurulumu..."
pip install --index-url https://download.pytorch.org/whl/cu124 \
torch torchvision torchaudio
# Diger bagimliliklar
echo ""
echo "[4/6] Bagimliliklar..."
pip install \
huggingface_hub \
tokenizers \
numpy \
tqdm \
pyarrow \
liger-kernel
# HF login kontrol
echo ""
echo "[5/6] HuggingFace token kontrol..."
if [ -z "$HF_TOKEN" ]; then
echo " HF_TOKEN env var yok — public repo varsayilir"
echo " Private icin: export HF_TOKEN=hf_..."
fi
# Veri + ckpt + kod indir
echo ""
echo "[6/6] Veri + kod indiriliyor..."
if [ ! -f "hf_pull_v5.py" ]; then
echo " hf_pull_v5.py yok, HF'ten cekiliyor..."
huggingface-cli download "$USER/nanogpt-tr-v5-code" \
hf_pull_v5.py --local-dir . || \
wget "https://huggingface.co/$USER/nanogpt-tr-v5-code/resolve/main/hf_pull_v5.py"
fi
python hf_pull_v5.py --user "$USER"
echo ""
echo "=========================================="
echo "KURULUM TAMAM"
echo "=========================================="
echo ""
echo "GPU: $GPU_NAME"
echo ""
echo "Egitime devam:"
echo " python 05_train_v5.py --compile --resume"
echo ""
echo "Sample testi:"
echo " python 06_sample.py --latest"