File size: 4,354 Bytes
a1190da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/bin/bash
# Launch multiple AWS instances for parallel evaluation
# Instance 1: Basic quality + complexity evaluation
# Instance 2-3: Nguyen suite with RL (split 1-6 and 7-12)

set -e

HF_TOKEN="${1:-}"
WANDB_KEY="${2:-}"

if [ -z "$HF_TOKEN" ] || [ -z "$WANDB_KEY" ]; then
    echo "Usage: $0 <HF_TOKEN> <WANDB_KEY>"
    exit 1
fi

INSTANCE_TYPE="g5.xlarge"
AMI_ID="ami-0c2b0d3d5d8a8a0a0"  # Deep Learning AMI
KEY_NAME="chave-gpu"
SECURITY_GROUP="sg-0deaa73e23482e3f6"

# Auto-detect AMI
echo "Auto-detecting Deep Learning AMI..."
AMI_ID=$(aws ec2 describe-images \
  --owners amazon \
  --filters "Name=name,Values=Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04)*" \
  "Name=state,Values=available" \
  --query 'reverse(sort_by(Images, &CreationDate))[:1].ImageId' \
  --output text \
  --region us-east-1)

echo "Using AMI: $AMI_ID"

# Function to create userdata script
create_userdata() {
    local instance_name=$1
    cat > aws/temp/userdata_${instance_name}.sh << 'USERDATA_EOF'
#!/bin/bash
set -x
exec > >(tee -a /home/ubuntu/setup.log) 2>&1

echo "===== Starting Setup ====="
date

# Update system
apt-get update
apt-get install -y git python3-pip python3-venv htop

# Clone repo
cd /home/ubuntu
sudo -u ubuntu git clone https://github.com/augustocsc/seriguela.git || true
cd seriguela
sudo -u ubuntu git pull

# Setup venv
sudo -u ubuntu python3 -m venv venv
sudo -u ubuntu bash -c "source venv/bin/activate && pip install --upgrade pip"
sudo -u ubuntu bash -c "source venv/bin/activate && pip install torch --index-url https://download.pytorch.org/whl/cu121"
sudo -u ubuntu bash -c "source venv/bin/activate && pip install -r requirements.txt"

# Set credentials
echo "export HF_TOKEN=PLACEHOLDER_HF_TOKEN" >> /home/ubuntu/.bashrc
echo "export WANDB_API_KEY=PLACEHOLDER_WANDB_KEY" >> /home/ubuntu/.bashrc

# Create directories
sudo -u ubuntu mkdir -p /home/ubuntu/seriguela/output
sudo -u ubuntu mkdir -p /home/ubuntu/seriguela/results

echo "===== Setup Complete ====="
touch /home/ubuntu/.setup_complete
date
USERDATA_EOF

    # Replace placeholders
    sed -i "s/PLACEHOLDER_HF_TOKEN/$HF_TOKEN/g" aws/temp/userdata_${instance_name}.sh
    sed -i "s/PLACEHOLDER_WANDB_KEY/$WANDB_KEY/g" aws/temp/userdata_${instance_name}.sh
}

# Launch instances
launch_instance() {
    local name=$1
    echo ""
    echo "Launching instance: $name"

    create_userdata "$name"

    INSTANCE_ID=$(aws ec2 run-instances \
      --image-id $AMI_ID \
      --instance-type $INSTANCE_TYPE \
      --key-name $KEY_NAME \
      --security-group-ids $SECURITY_GROUP \
      --user-data file://aws/temp/userdata_${name}.sh \
      --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=seriguela-${name}}]" \
      --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":100,"VolumeType":"gp3"}}]' \
      --query 'Instances[0].InstanceId' \
      --output text)

    echo "Instance launched: $INSTANCE_ID"
    echo "$name=$INSTANCE_ID" >> aws/temp/instance_ids.txt
}

# Clear old instance IDs
rm -f aws/temp/instance_ids.txt

# Launch all instances in parallel
echo "=========================================="
echo "Launching 3 AWS instances in parallel"
echo "=========================================="

launch_instance "eval-basic" &
launch_instance "nguyen-1-6" &
launch_instance "nguyen-7-12" &

wait

echo ""
echo "All instances launched!"
echo ""
cat aws/temp/instance_ids.txt
echo ""
echo "Waiting for instances to be running..."

# Get all instance IDs
INSTANCE_IDS=$(cat aws/temp/instance_ids.txt | cut -d'=' -f2 | tr '\n' ' ')

aws ec2 wait instance-running --instance-ids $INSTANCE_IDS

echo ""
echo "All instances are running!"
echo ""
echo "Getting public IPs..."

for line in $(cat aws/temp/instance_ids.txt); do
    name=$(echo $line | cut -d'=' -f1)
    id=$(echo $line | cut -d'=' -f2)
    ip=$(aws ec2 describe-instances \
      --instance-ids $id \
      --query 'Reservations[0].Instances[0].PublicIpAddress' \
      --output text)
    echo "$name: $ip (ID: $id)"
    echo "$name=$ip" >> aws/temp/instance_ips.txt
done

echo ""
echo "=========================================="
echo "Instances ready!"
echo "=========================================="
echo ""
echo "Next steps:"
echo "1. Wait ~3 minutes for setup to complete"
echo "2. Upload models to instances"
echo "3. Start evaluations"
echo ""