File size: 2,364 Bytes
da6917d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | #!/bin/bash
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# MonSub v3 โ A40 Setup Script
# RunPod A40 48GB pod ะดััั ะฐะถะธะปะปััะปะฝะฐ
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
set -e
# โโ HF Token โโ
export HF_TOKEN="${HF_TOKEN}" # RunPod env-ะด ัะพั
ะธัััะปะฝะฐ
export HUGGINGFACE_HUB_TOKEN="$HF_TOKEN"
# โโ ะงะฃะฅะะ: Cache-ะณ workspace volume ััั ัะธะณะปาฏาฏะปัั
โโ
# Container disk (50GB) ะดาฏาฏัะดัะณ โ workspace volume ะฐัะธะณะปะฐะฝะฐ
export HF_HOME=/workspace/.cache
export TMPDIR=/workspace/tmp
mkdir -p /workspace/.cache /workspace/tmp
echo "=============================================="
echo "MonSub v3 โ A40 Setup"
echo "HF_HOME=$HF_HOME"
echo "TMPDIR=$TMPDIR"
echo "=============================================="
# โโ Dependencies โโ
echo ""
echo "=== Installing dependencies ==="
pip install -q \
"transformers>=4.46.0" \
"datasets==2.21.0" \
accelerate \
evaluate \
jiwer \
soundfile \
librosa
# datasets==2.21.0 ะะะะะะ (latest โ torchcodec ImportError)
# โโ GPU check โโ
echo ""
echo "=== GPU Info ==="
python -c "
import torch
if torch.cuda.is_available():
name = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f'GPU: {name}')
print(f'VRAM: {vram:.1f}GB')
else:
print('WARNING: No GPU!')
"
# โโ Download training script โโ
echo ""
echo "=== Downloading training script ==="
cd /workspace
# HuggingFace-ััั ะฐะฒะฐั
(ััะฒัะป paste ั
ะธะนะฝั)
python -c "
from huggingface_hub import hf_hub_download
try:
path = hf_hub_download('Tsedee/monsub-training-scripts', 'run_finetune_v3.py', token=os.environ['HF_TOKEN'])
import shutil
shutil.copy(path, '/workspace/run_finetune_v3.py')
print('Downloaded from HF')
except:
print('HF download failed - paste the script manually')
"
# โโ Start training โโ
echo ""
echo "=== Starting v3 training ==="
echo "Log: /workspace/train_v3.log"
echo ""
python /workspace/run_finetune_v3.py 2>&1 | tee /workspace/train_v3.log
|