#!/bin/bash # ═══════════════════════════════════════════════════════════ # MonSub v3 — A40 Setup Script # RunPod A40 48GB pod дээр ажиллуулна # ═══════════════════════════════════════════════════════════ set -e # ── HF Token ── export HF_TOKEN="${HF_TOKEN}" # RunPod env-д тохируулна export HUGGINGFACE_HUB_TOKEN="$HF_TOKEN" # ── ЧУХАЛ: Cache-г workspace volume руу чиглүүлэх ── # Container disk (50GB) дүүрдэг → workspace volume ашиглана export HF_HOME=/workspace/.cache export TMPDIR=/workspace/tmp mkdir -p /workspace/.cache /workspace/tmp echo "==============================================" echo "MonSub v3 — A40 Setup" echo "HF_HOME=$HF_HOME" echo "TMPDIR=$TMPDIR" echo "==============================================" # ── Dependencies ── echo "" echo "=== Installing dependencies ===" pip install -q \ "transformers>=4.46.0" \ "datasets==2.21.0" \ accelerate \ evaluate \ jiwer \ soundfile \ librosa # datasets==2.21.0 ЗААВАЛ (latest → torchcodec ImportError) # ── GPU check ── echo "" echo "=== GPU Info ===" python -c " import torch if torch.cuda.is_available(): name = torch.cuda.get_device_name(0) vram = torch.cuda.get_device_properties(0).total_memory / 1e9 print(f'GPU: {name}') print(f'VRAM: {vram:.1f}GB') else: print('WARNING: No GPU!') " # ── Download training script ── echo "" echo "=== Downloading training script ===" cd /workspace # HuggingFace-ээс авах (эсвэл paste хийнэ) python -c " from huggingface_hub import hf_hub_download try: path = hf_hub_download('Tsedee/monsub-training-scripts', 'run_finetune_v3.py', token=os.environ['HF_TOKEN']) import shutil shutil.copy(path, '/workspace/run_finetune_v3.py') print('Downloaded from HF') except: print('HF download failed - paste the script manually') " # ── Start training ── echo "" echo "=== Starting v3 training ===" echo "Log: /workspace/train_v3.log" echo "" python /workspace/run_finetune_v3.py 2>&1 | tee /workspace/train_v3.log