File size: 7,430 Bytes
2c4ca2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/bin/bash
#
# Launch comprehensive evaluation on AWS EC2
# Runs all models on all Nguyen benchmarks with PPO and GRPO
#

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Default values
INSTANCE_TYPE="g5.2xlarge"  # Larger instance for parallel execution
AMI_ID="ami-0e86e20dae9224db8"  # Ubuntu 24.04 in us-east-1
KEY_NAME="chave-gpu-nova"
SECURITY_GROUP="sg-0deaa73e23482e3f6"
INSTANCE_NAME="augusto-seriguela-comprehensive-eval"

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --instance-type)
            INSTANCE_TYPE="$2"
            shift 2
            ;;
        --wandb-key)
            WANDB_KEY="$2"
            shift 2
            ;;
        --hf-token)
            HF_TOKEN="$2"
            shift 2
            ;;
        --models)
            MODELS="$2"
            shift 2
            ;;
        --benchmarks)
            BENCHMARKS="$2"
            shift 2
            ;;
        --algorithms)
            ALGORITHMS="$2"
            shift 2
            ;;
        --epochs)
            EPOCHS="$2"
            shift 2
            ;;
        --quick-test)
            QUICK_TEST="true"
            shift
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

# Set defaults
EPOCHS=${EPOCHS:-20}
ALGORITHMS=${ALGORITHMS:-"ppo grpo"}

# Load credentials if not provided
if [ -z "$WANDB_KEY" ] || [ -z "$HF_TOKEN" ]; then
    if [ -f ~/.tokens.txt ]; then
        if [ -z "$HF_TOKEN" ]; then
            HF_TOKEN=$(grep "huggingface" ~/.tokens.txt | cut -d'=' -f2 | tr -d ' ')
        fi
        if [ -z "$WANDB_KEY" ]; then
            WANDB_KEY=$(grep "wandb" ~/.tokens.txt | cut -d'=' -f2 | tr -d ' ')
        fi
    fi
fi

# Validate credentials
if [ -z "$WANDB_KEY" ] || [ -z "$HF_TOKEN" ]; then
    echo -e "${RED}Error: Missing credentials. Provide --wandb-key and --hf-token${NC}"
    exit 1
fi

echo -e "${GREEN}Launching comprehensive evaluation on AWS${NC}"
echo "Instance type: $INSTANCE_TYPE"
echo "Models: ${MODELS:-all}"
echo "Benchmarks: ${BENCHMARKS:-all}"
echo "Algorithms: $ALGORITHMS"
echo "Epochs: $EPOCHS"

# Create user data script (Windows-compatible path)
TEMP_DIR="${TMPDIR:-/tmp}"
if [ -d "/c/Users/madeinweb/temp" ]; then
    TEMP_DIR="/c/Users/madeinweb/temp"
fi
mkdir -p "$TEMP_DIR"

cat > "$TEMP_DIR/userdata_eval.sh" << 'EOF'
#!/bin/bash
exec > >(tee -a /home/ubuntu/setup.log)
exec 2>&1

echo "Starting setup at $(date)"

# Wait for cloud-init to complete (with timeout)
timeout 300 cloud-init status --wait || echo "cloud-init wait timed out"

# Update system
apt-get update
apt-get install -y python3-pip python3-venv git htop nvtop

# Install NVIDIA drivers if not present
if ! nvidia-smi; then
    apt-get install -y nvidia-driver-535
fi

# Switch to ubuntu user for the rest
su - ubuntu << 'EOFU'
cd ~

# Create virtual environment
python3 -m venv seriguela_env
source seriguela_env/bin/activate

# Clone repository
if [ ! -d "seriguela" ]; then
    git clone https://github.com/Agentes-I-A/Seriguela.git seriguela
fi

cd seriguela
git pull origin main

# Install dependencies
pip install --upgrade pip
pip install -r requirements.txt
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
pip install matplotlib seaborn

# Set up credentials
EOF

# Add credentials to user data
cat >> "$TEMP_DIR/userdata_eval.sh" << EOF
export HUGGINGFACE_TOKEN="$HF_TOKEN"
export WANDB_API_KEY="$WANDB_KEY"

# Login to HuggingFace
huggingface-cli login --token \$HUGGINGFACE_TOKEN

# Login to Wandb
wandb login \$WANDB_API_KEY

# Create tokens file for scripts
echo "huggingface = \$HUGGINGFACE_TOKEN" > ~/.tokens.txt
echo "wandb = \$WANDB_API_KEY" >> ~/.tokens.txt

# Pull models from HuggingFace if needed
echo "Downloading models..."
python -c "
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Download infix model
print('Downloading infix model...')
model = AutoModelForCausalLM.from_pretrained('augustocsc/Se124M_700K_infix_v3_json',
                                            torch_dtype=torch.float16,
                                            trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained('augustocsc/Se124M_700K_infix_v3_json')
print('Infix model downloaded')
"

# Run evaluation
echo "Starting comprehensive evaluation at \$(date)"

# Build command
CMD="python scripts/run_comprehensive_evaluation.py --output_dir ./evaluation_results --epochs $EPOCHS"

# Add optional parameters
EOF

# Add model/benchmark selection if specified
if [ -n "$MODELS" ]; then
    cat >> "$TEMP_DIR/userdata_eval.sh" << EOF
CMD="\$CMD --models $MODELS"
EOF
fi

if [ -n "$BENCHMARKS" ]; then
    cat >> "$TEMP_DIR/userdata_eval.sh" << EOF
CMD="\$CMD --benchmarks $BENCHMARKS"
EOF
fi

if [ "$QUICK_TEST" == "true" ]; then
    cat >> "$TEMP_DIR/userdata_eval.sh" << EOF
CMD="\$CMD --quick_test"
EOF
fi

cat >> "$TEMP_DIR/userdata_eval.sh" << EOF
CMD="\$CMD --algorithms $ALGORITHMS"

echo "Running: \$CMD"
nohup \$CMD > evaluation.log 2>&1 &

echo "Evaluation started in background. Check evaluation.log for progress."

# Also run analysis periodically
(
    while true; do
        sleep 300  # Every 5 minutes
        if [ -d "./evaluation_results" ]; then
            python scripts/analyze_evaluation_results.py --results_dir ./evaluation_results > analysis.log 2>&1
        fi
    done
) &

EOFU

# Mark completion
touch /home/ubuntu/.setup_complete
echo "Setup complete at \$(date)"
EOF

# Launch instance
echo -e "${YELLOW}Launching EC2 instance...${NC}"

# Convert path to Windows format if needed
USERDATA_PATH="$TEMP_DIR/userdata_eval.sh"
if [[ "$USERDATA_PATH" == /c/* ]]; then
    USERDATA_PATH=$(echo "$USERDATA_PATH" | sed 's|^/c/|C:/|')
fi

INSTANCE_ID=$(aws ec2 run-instances \
    --image-id $AMI_ID \
    --instance-type $INSTANCE_TYPE \
    --key-name $KEY_NAME \
    --security-group-ids $SECURITY_GROUP \
    --user-data "file://$USERDATA_PATH" \
    --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=$INSTANCE_NAME}]" \
    --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":100,"VolumeType":"gp3"}}]' \
    --query 'Instances[0].InstanceId' \
    --output text)

echo -e "${GREEN}Instance launched: $INSTANCE_ID${NC}"

# Wait for instance to be running
echo "Waiting for instance to be running..."
aws ec2 wait instance-running --instance-ids $INSTANCE_ID

# Get public IP
PUBLIC_IP=$(aws ec2 describe-instances \
    --instance-ids $INSTANCE_ID \
    --query 'Reservations[0].Instances[0].PublicIpAddress' \
    --output text)

echo -e "${GREEN}Instance is running!${NC}"
echo "Instance ID: $INSTANCE_ID"
echo "Public IP: $PUBLIC_IP"
echo ""
echo "SSH command:"
echo "  ssh -i ~/.ssh/${KEY_NAME}.pem ubuntu@$PUBLIC_IP"
echo ""
echo "Monitor setup:"
echo "  ssh -i ~/.ssh/${KEY_NAME}.pem ubuntu@$PUBLIC_IP 'tail -f setup.log'"
echo ""
echo "Monitor evaluation:"
echo "  ssh -i ~/.ssh/${KEY_NAME}.pem ubuntu@$PUBLIC_IP 'tail -f seriguela/evaluation.log'"
echo ""
echo "Check GPU:"
echo "  ssh -i ~/.ssh/${KEY_NAME}.pem ubuntu@$PUBLIC_IP 'nvidia-smi'"
echo ""
echo "Download results when complete:"
echo "  scp -r -i ~/.ssh/${KEY_NAME}.pem ubuntu@$PUBLIC_IP:~/seriguela/evaluation_results ./"
echo ""
echo -e "${YELLOW}IMPORTANT: Remember to stop the instance when done!${NC}"
echo "  aws ec2 stop-instances --instance-ids $INSTANCE_ID"