File size: 2,364 Bytes
da6917d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# MonSub v3 โ€” A40 Setup Script
# RunPod A40 48GB pod ะดััั€ ะฐะถะธะปะปัƒัƒะปะฝะฐ
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•

set -e

# โ”€โ”€ HF Token โ”€โ”€
export HF_TOKEN="${HF_TOKEN}"  # RunPod env-ะด ั‚ะพั…ะธั€ัƒัƒะปะฝะฐ
export HUGGINGFACE_HUB_TOKEN="$HF_TOKEN"

# โ”€โ”€ ะงะฃะฅะะ›: Cache-ะณ workspace volume ั€ัƒัƒ ั‡ะธะณะปาฏาฏะปัั… โ”€โ”€
# Container disk (50GB) ะดาฏาฏั€ะดัะณ โ†’ workspace volume ะฐัˆะธะณะปะฐะฝะฐ
export HF_HOME=/workspace/.cache
export TMPDIR=/workspace/tmp
mkdir -p /workspace/.cache /workspace/tmp

echo "=============================================="
echo "MonSub v3 โ€” A40 Setup"
echo "HF_HOME=$HF_HOME"
echo "TMPDIR=$TMPDIR"
echo "=============================================="

# โ”€โ”€ Dependencies โ”€โ”€
echo ""
echo "=== Installing dependencies ==="
pip install -q \
    "transformers>=4.46.0" \
    "datasets==2.21.0" \
    accelerate \
    evaluate \
    jiwer \
    soundfile \
    librosa

# datasets==2.21.0 ะ—ะะะ’ะะ› (latest โ†’ torchcodec ImportError)

# โ”€โ”€ GPU check โ”€โ”€
echo ""
echo "=== GPU Info ==="
python -c "
import torch
if torch.cuda.is_available():
    name = torch.cuda.get_device_name(0)
    vram = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f'GPU: {name}')
    print(f'VRAM: {vram:.1f}GB')
else:
    print('WARNING: No GPU!')
"

# โ”€โ”€ Download training script โ”€โ”€
echo ""
echo "=== Downloading training script ==="
cd /workspace

# HuggingFace-ััั ะฐะฒะฐั… (ััะฒัะป paste ั…ะธะนะฝั)
python -c "
from huggingface_hub import hf_hub_download
try:
    path = hf_hub_download('Tsedee/monsub-training-scripts', 'run_finetune_v3.py', token=os.environ['HF_TOKEN'])
    import shutil
    shutil.copy(path, '/workspace/run_finetune_v3.py')
    print('Downloaded from HF')
except:
    print('HF download failed - paste the script manually')
"

# โ”€โ”€ Start training โ”€โ”€
echo ""
echo "=== Starting v3 training ==="
echo "Log: /workspace/train_v3.log"
echo ""
python /workspace/run_finetune_v3.py 2>&1 | tee /workspace/train_v3.log