nghyane/train-lama / scripts /train_server.sh
nghyane's picture
download
raw
2.39 kB
#!/bin/bash
set -euo pipefail
# ═══════════════════════════════════════════════════════════════
# LaMa Training — Run ON the GPU server
#
# Setup:
# pip install huggingface_hub
# huggingface-cli download nghyane/train-lama --repo-type bucket --local-dir /workspace
# bash scripts/train_server.sh
# ═══════════════════════════════════════════════════════════════
cd /workspace
# ── Install deps ──
pip install --break-system-packages -q safetensors tensorboard tqdm Pillow 2>/dev/null \
|| pip install -q safetensors tensorboard tqdm Pillow
python3 -c "
import torch
assert torch.cuda.is_available(), 'CUDA not available!'
print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')
print(f'GPU: {torch.cuda.get_device_name(0)} ({torch.cuda.get_device_properties(0).total_mem // 1024**3}GB)')
"
IMG_COUNT=$(find data/training -type f \( -name "*.webp" -o -name "*.png" -o -name "*.jpg" \) | wc -l)
echo "Training images: $IMG_COUNT"
# ── Auto batch size ──
GPU_MEM=$(python3 -c "import torch; print(torch.cuda.get_device_properties(0).total_mem // 1024**3)")
if [ "$GPU_MEM" -ge 40 ]; then
BS=16
elif [ "$GPU_MEM" -ge 20 ]; then
BS=8
else
BS=4
fi
echo "GPU ${GPU_MEM}GB → batch_size=$BS"
# ── Train: crop=512, perceptual=0.3 ──
python3 scripts/finetune_lama.py \
--data_dir data/training \
--pretrained models/lama-manga.safetensors \
--output_dir runs/lama_finetune \
--epochs 80 --batch_size "$BS" --crop_size 512 \
--lr 1e-4 --perceptual_weight 0.3 \
--workers 8 --device cuda --save_every 5 \
2>&1 | tee runs/lama_finetune/train.log
# ── Export ──
python3 scripts/finetune_lama.py \
--export runs/lama_finetune/best.pt \
-o /workspace/lama-manga-ft.safetensors
echo ""
echo "════════════════════════════════════════"
echo " Done! Model: /workspace/lama-manga-ft.safetensors"
echo " Download: python3 -m http.server 9998"
echo "════════════════════════════════════════"

Xet Storage Details

Size:
2.39 kB
·
Xet hash:
8f0fc3d4acbe113726a4dcf06beb8832cb19364c594ce37b1ac383e50d166e25

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.