File size: 2,408 Bytes
13e402e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# SkyPilot Multi-GPU Configuration for Fast Fine-tuning
# Uses 8x GPUs for parallel training and dataset annotation

name: ensemble-multi-gpu

resources:
  use_spot: true
  accelerators: A100:8  # 8x A100 GPUs
  # Alternative cheaper options:
  # accelerators: V100:8  # 8x V100
  # accelerators: L4:8    # 8x L4 (cheaper)

  memory: 128+  # 128GB+ RAM for multi-GPU
  disk_size: 500  # 500GB for datasets

setup: |
  set -e

  echo "πŸ”§ Setting up multi-GPU environment..."

  # Install dependencies
  sudo apt-get update -qq
  pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
  pip install --quiet transformers datasets librosa soundfile accelerate
  pip install --quiet huggingface_hub pandas numpy tqdm scikit-learn

  # Clone repo
  if [ ! -d "ensemble-tts-annotation" ]; then
    git clone https://huggingface.co/marcosremar2/ensemble-tts-annotation
  fi

  cd ensemble-tts-annotation

  echo "βœ… Setup complete!"
  echo "GPUs available:"
  nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader

run: |
  cd ensemble-tts-annotation

  # Check GPU count
  GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
  echo "πŸš€ Multi-GPU Training with $GPU_COUNT GPUs"
  echo "================================================"

  # Create synthetic data
  echo "πŸ“Š Creating synthetic dataset (larger for multi-GPU)..."
  python scripts/data/create_synthetic_test_data.py \
    --output data/raw/synthetic_large/ \
    --samples 200

  # Prepare dataset
  echo "πŸ“¦ Preparing dataset..."
  python scripts/data/download_ptbr_datasets.py \
    --prepare-local data/raw/synthetic_large/

  # Fine-tune with multi-GPU (using accelerate)
  echo "πŸ”₯ Fine-tuning with $GPU_COUNT GPUs..."
  accelerate launch --multi_gpu --num_processes=$GPU_COUNT \
    scripts/training/finetune_emotion2vec.py \
    --dataset data/prepared/synthetic_large_prepared \
    --epochs 20 \
    --batch-size 64 \
    --device cuda \
    --augment \
    --output models/emotion/emotion2vec_finetuned_multigpu/

  echo "βœ… Fine-tuning complete!"

  # Benchmark
  echo "πŸ“Š Performance benchmark:"
  python scripts/test/test_quick.py --mode balanced

  echo "================================================"
  echo "πŸ’‘ Upload results with:"
  echo "sky storage upload models/emotion/emotion2vec_finetuned_multigpu/ s3://my-bucket/"

num_nodes: 1