File size: 4,229 Bytes
aa65d00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
# Google Cloud Compute Engine deployment script (with GPU support)
# This creates a VM instance with GPU for running the router agent

set -e

PROJECT_ID=${GCP_PROJECT_ID:-"your-project-id"}
ZONE=${GCP_ZONE:-"us-central1-a"}
INSTANCE_NAME="router-agent-gpu"
MACHINE_TYPE="n1-standard-4"
GPU_TYPE="nvidia-tesla-t4"
GPU_COUNT=1
IMAGE_NAME="gcr.io/${PROJECT_ID}/router-agent:latest"
BOOT_DISK_SIZE="100GB"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

echo -e "${GREEN}πŸš€ Setting up Compute Engine VM with GPU for Router Agent${NC}"

# Check if gcloud is installed
if ! command -v gcloud &> /dev/null; then
    echo -e "${RED}❌ gcloud CLI not found. Please install it: https://cloud.google.com/sdk/docs/install${NC}"
    exit 1
fi

# Set project
gcloud config set project ${PROJECT_ID}

# Check if instance already exists
if gcloud compute instances describe ${INSTANCE_NAME} --zone=${ZONE} &>/dev/null; then
    echo -e "${YELLOW}⚠️  Instance ${INSTANCE_NAME} already exists.${NC}"
    read -p "Delete and recreate? (y/N): " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
        echo -e "${YELLOW}πŸ—‘οΈ  Deleting existing instance...${NC}"
        gcloud compute instances delete ${INSTANCE_NAME} --zone=${ZONE} --quiet
    else
        echo -e "${GREEN}βœ… Using existing instance.${NC}"
        INSTANCE_IP=$(gcloud compute instances describe ${INSTANCE_NAME} --zone=${ZONE} --format='get(networkInterfaces[0].accessConfigs[0].natIP)')
        echo -e "${GREEN}🌐 Instance IP: ${INSTANCE_IP}${NC}"
        echo -e "${YELLOW}   Access via: http://${INSTANCE_IP}:7860${NC}"
        exit 0
    fi
fi

# Create startup script
cat > /tmp/startup-script.sh << 'EOF'
#!/bin/bash
set -e

# Install Docker
curl -fsSL https://get.docker.com -o get-docker.sh
sh get-docker.sh
usermod -aG docker $USER

# Install NVIDIA Container Toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list

apt-get update
apt-get install -y nvidia-container-toolkit
systemctl restart docker

# Pull and run the container
docker pull gcr.io/PROJECT_ID/router-agent:latest
docker run -d \
    --name router-agent \
    --gpus all \
    -p 7860:7860 \
    -e HF_TOKEN="${HF_TOKEN}" \
    -e GRADIO_SERVER_NAME=0.0.0.0 \
    -e GRADIO_SERVER_PORT=7860 \
    gcr.io/PROJECT_ID/router-agent:latest

# Install firewall rule (if needed)
gcloud compute firewall-rules create allow-router-agent \
    --allow tcp:7860 \
    --source-ranges 0.0.0.0/0 \
    --description "Allow Router Agent Gradio UI" \
    --quiet || true
EOF

# Replace PROJECT_ID in startup script
sed -i "s/PROJECT_ID/${PROJECT_ID}/g" /tmp/startup-script.sh

echo -e "${GREEN}πŸ–₯️  Creating VM instance with GPU...${NC}"
gcloud compute instances create ${INSTANCE_NAME} \
    --zone=${ZONE} \
    --machine-type=${MACHINE_TYPE} \
    --accelerator="type=${GPU_TYPE},count=${GPU_COUNT}" \
    --maintenance-policy=TERMINATE \
    --provisioning-model=STANDARD \
    --image-family=cos-stable \
    --image-project=cos-cloud \
    --boot-disk-size=${BOOT_DISK_SIZE} \
    --boot-disk-type=pd-standard \
    --metadata-from-file startup-script=/tmp/startup-script.sh \
    --scopes=https://www.googleapis.com/auth/cloud-platform \
    --metadata="HF_TOKEN=${HF_TOKEN:-your-token-here}" \
    --tags=http-server,https-server

echo -e "${GREEN}βœ… Instance created!${NC}"
echo -e "${YELLOW}⏳ Waiting for instance to start (this may take a few minutes)...${NC}"

# Wait for instance to be ready
sleep 30

# Get instance IP
INSTANCE_IP=$(gcloud compute instances describe ${INSTANCE_NAME} --zone=${ZONE} --format='get(networkInterfaces[0].accessConfigs[0].natIP)')

echo -e "${GREEN}🌐 Instance IP: ${INSTANCE_IP}${NC}"
echo -e "${YELLOW}⏳ Waiting for application to start (check logs with: gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone=${ZONE})${NC}"
echo -e "${GREEN}πŸ“ Access the application at: http://${INSTANCE_IP}:7860${NC}"

# Cleanup
rm -f /tmp/startup-script.sh