File size: 13,664 Bytes
3259ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf2697b
 
 
 
3259ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf2697b
3259ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# AntiAtropos AWS Infrastructure Deploy Script (PowerShell)
#
# Deploys: EKS cluster, sample workloads, AMP workspace, Prometheus Agent,
#          AMG workspace, Cluster Autoscaler, and generates kubeconfig for HF Spaces.
#
# The AntiAtropos FastAPI server runs on Hugging Face Spaces, NOT on AWS.
# This script only sets up the infrastructure that HF Spaces connects to.
#
# Prerequisites: aws cli, eksctl, kubectl, helm
#
# Usage:
#   .\deploy\aws\deploy.ps1
#
# Environment variables:
#   $env:AWS_REGION     - AWS region (default: ap-south-1)
#   $env:CLUSTER_NAME   - EKS cluster name (default: antiatropos)

$ErrorActionPreference = "Stop"

$Region = if ($env:AWS_REGION) { $env:AWS_REGION } else { "ap-south-1" }
$ClusterName = if ($env:CLUSTER_NAME) { $env:CLUSTER_NAME } else { "antiatropos" }
$AwsDir = Split-Path -Parent $MyInvocation.MyCommand.Path

Write-Host ""
Write-Host "=== AntiAtropos AWS Infrastructure Deployment ===" -ForegroundColor Cyan
Write-Host "Region:      $Region"
Write-Host "Cluster:     $ClusterName"
Write-Host "FastAPI:     Runs on HF Spaces (not deployed here)"
Write-Host ""

# --- Check prerequisites ---
$missing = @()
foreach ($cmd in @("aws", "eksctl", "kubectl", "helm")) {
    if (-not (Get-Command $cmd -ErrorAction SilentlyContinue)) {
        $missing += $cmd
    }
}
if ($missing.Count -gt 0) {
    Write-Host "ERROR: Missing prerequisites: $($missing -join ', ')" -ForegroundColor Red
    Write-Host "Install them first:" -ForegroundColor Yellow
    Write-Host "  choco install awscli eksctl kubernetes-cli kubernetes-helm -y" -ForegroundColor Yellow
    exit 1
}

# --- Phase 1: Create EKS Cluster ---
Write-Host ""
Write-Host ">>> Phase 1: Creating EKS cluster (without nodegroup)..." -ForegroundColor Yellow

$clusterExists = $false
try {
    eksctl get cluster --name $ClusterName --region $Region 2>$null | Out-Null
    $clusterExists = $true
} catch {}

if ($clusterExists) {
    Write-Host "Cluster $ClusterName already exists, skipping creation."
} else {
    # Create cluster without nodegroup first (faster, avoids timeout)
    $TempClusterConfig = Join-Path $AwsDir "eksctl-cluster-only.yaml"
    $ClusterYaml = Get-Content (Join-Path $AwsDir "eksctl-cluster.yaml") -Raw
    # Remove nodegroups section for initial cluster creation
    $ClusterOnlyYaml = $ClusterYaml -replace '(?s)(managedNodeGroups:.*)', ''
    $ClusterOnlyYaml | Out-File -FilePath $TempClusterConfig -Encoding utf8
    eksctl create cluster -f $TempClusterConfig
    Remove-Item $TempClusterConfig -Force
    Write-Host "Cluster created." -ForegroundColor Green
}

aws eks update-kubeconfig --name $ClusterName --region $Region
Write-Host "kubeconfig updated."

# --- Phase 1b: Create Nodegroup Separately ---
Write-Host ""
Write-Host ">>> Phase 1b: Creating nodegroup (separate step to avoid timeout)..." -ForegroundColor Yellow

$nodegroupExists = $false
try {
    eksctl get nodegroup --cluster $ClusterName --region $Region 2>$null | Select-String "linux-nodes" | Out-Null
    $nodegroupExists = $true
} catch {}

if ($nodegroupExists) {
    Write-Host "Nodegroup already exists, skipping creation."
} else {
    # Create nodegroup separately (better error handling, can retry)
    eksctl create nodegroup --config-file (Join-Path $AwsDir "eksctl-cluster.yaml")
    Write-Host "Nodegroup created." -ForegroundColor Green
}

# Verify nodes are ready
Write-Host "Waiting for nodes to be ready..."
$nodesReady = $false
for ($i = 0; $i -lt 30; $i++) {
    $nodes = kubectl get nodes --no-headers 2>$null
    if ($nodes) {
        Write-Host "Nodes ready:" -ForegroundColor Green
        kubectl get nodes
        $nodesReady = $true
        break
    }
    Start-Sleep -Seconds 10
}
if (-not $nodesReady) {
    Write-Host "WARNING: Nodes not ready yet. Check with: kubectl get nodes" -ForegroundColor Yellow
}

Write-Host "Enabling Prefix Delegation on VPC CNI..."
kubectl set env daemonset aws-node -n kube-system ENABLE_PREFIX_DELEGATION=true
Write-Host "Prefix Delegation enabled."

# --- Phase 2: Deploy Sample Workloads ---
Write-Host ""
Write-Host ">>> Phase 2: Deploying sample workloads (payments, checkout, catalog, cart, auth)..." -ForegroundColor Yellow
kubectl apply -f (Join-Path $AwsDir "k8s-workloads.yaml")
Write-Host "Workloads deployed." -ForegroundColor Green
kubectl get pods -n prod-sre

# --- Phase 3: Create AMP Workspace ---
Write-Host ""
Write-Host ">>> Phase 3: Creating Amazon Managed Prometheus workspace..." -ForegroundColor Yellow

$AmpWsId = $null
try {
    $AmpWsId = aws amp list-workspaces --alias antiatropos-metrics --region $Region --query 'workspaces[0].workspaceId' --output text 2>$null
    if ($AmpWsId -eq "None") { $AmpWsId = $null }
} catch {}

if ([string]::IsNullOrWhiteSpace($AmpWsId)) {
    $AmpWsId = aws amp create-workspace `
        --alias antiatropos-metrics `
        --region $Region `
        --query 'workspaceId' `
        --output text
    Write-Host "AMP workspace created: $AmpWsId" -ForegroundColor Green
} else {
    Write-Host "AMP workspace already exists: $AmpWsId"
}

$AmpUrl = "https://aps-workspaces.$Region.amazonaws.com/workspaces/$AmpWsId"
Write-Host "AMP URL: $AmpUrl"

# --- Phase 4: Set up IRSA for Prometheus Agent ---
Write-Host ""
Write-Host ">>> Phase 4: Setting up IRSA for Prometheus Agent..." -ForegroundColor Yellow

$saExists = $false
try {
    kubectl get serviceaccount prometheus-sa -n monitoring 2>$null | Out-Null
    $saExists = $true
} catch {}

if ($saExists) {
    Write-Host "prometheus-sa already exists."
} else {
    eksctl create iamserviceaccount `
        --cluster $ClusterName `
        --namespace monitoring `
        --name prometheus-sa `
        --attach-policy-arn "arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess" `
        --approve `
        --override-existing-serviceaccounts
    Write-Host "prometheus-sa created." -ForegroundColor Green
}

# --- Phase 5: Install Prometheus Agent ---
Write-Host ""
Write-Host ">>> Phase 5: Installing Prometheus Agent (remote-writes to AMP)..." -ForegroundColor Yellow

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts 2>$null
helm repo update

$agentInstalled = $false
try {
    helm status prometheus-agent -n monitoring 2>$null | Out-Null
    $agentInstalled = $true
} catch {}

$promValuesYaml = Join-Path $AwsDir "prometheus-agent-values.yaml"
$remoteWriteUrl = "$AmpUrl/api/v1/remote_write"

if ($agentInstalled) {
    Write-Host "prometheus-agent already installed, upgrading..."
    helm upgrade prometheus-agent prometheus-community/prometheus `
        --namespace monitoring `
        -f $promValuesYaml `
        --set "prometheus.prometheusSpec.remoteWrite[0].url=$remoteWriteUrl"
} else {
    helm install prometheus-agent prometheus-community/prometheus `
        --namespace monitoring --create-namespace `
        -f $promValuesYaml `
        --set "prometheus.prometheusSpec.remoteWrite[0].url=$remoteWriteUrl"
    Write-Host "prometheus-agent installed." -ForegroundColor Green
}

# --- Phase 6: Install Self-Hosted Grafana on EKS ---
Write-Host ""
Write-Host ">>> Phase 6: Installing self-hosted Grafana on EKS..." -ForegroundColor Yellow

# Add Grafana Helm repo
helm repo add grafana https://grafana.github.io/helm-charts 2>$null
helm repo update

# Create a secret with the dashboard JSON files for Grafana to import
$DashboardsDir = Join-Path $PSScriptRoot "..\..\grafana\provisioning\dashboards\json"
if (Test-Path $DashboardsDir) {
    Write-Host "Creating dashboard secret from $DashboardsDir..."
    kubectl create secret generic antiatropos-grafana-dashboards `
        --from-file=antiatropos-overview.json=$(Join-Path $DashboardsDir "antiatropos-overview.json") `
        --from-file=antiatropos-live.json=$(Join-Path $DashboardsDir "antiatropos-live.json") `
        --namespace monitoring `
        --dry-run=client -o yaml | kubectl apply -f -
    Write-Host "Dashboard secret created." -ForegroundColor Green
} else {
    Write-Host "Dashboard JSON directory not found at $DashboardsDir, skipping."
}

# Install Grafana
$GrafanaValuesYaml = Join-Path $AwsDir "grafana-values.yaml"

if (helm status grafana -n monitoring 2>$null) {
    Write-Host "Grafana already installed, upgrading..."
    helm upgrade grafana grafana/grafana --namespace monitoring -f $GrafanaValuesYaml
} else {
    helm install grafana grafana/grafana --namespace monitoring -f $GrafanaValuesYaml
    Write-Host "Grafana installed." -ForegroundColor Green
}

# Wait for Grafana pod to be ready
Write-Host "Waiting for Grafana pod to be ready..."
kubectl rollout status deployment/grafana --namespace monitoring --timeout=120s 2>$null | Out-Null

$GrafanaPod = kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}' 2>$null
Write-Host "Grafana pod: $GrafanaPod"
Write-Host "To access Grafana: kubectl port-forward svc/grafana 3000 -n monitoring" -ForegroundColor Yellow
Write-Host "Login: admin / antiatropos"

# --- Phase 7: Install Cluster Autoscaler ---
Write-Host ""
Write-Host ">>> Phase 7: Installing Cluster Autoscaler..." -ForegroundColor Yellow

helm repo add autoscaler https://kubernetes.github.io/autoscaler 2>$null
helm repo update

$autoscalerInstalled = $false
try {
    helm status cluster-autoscaler -n kube-system 2>$null | Out-Null
    $autoscalerInstalled = $true
} catch {}

$autoscalerValues = Join-Path $AwsDir "cluster-autoscaler-values.yaml"

if ($autoscalerInstalled) {
    Write-Host "cluster-autoscaler already installed, upgrading..."
    helm upgrade cluster-autoscaler autoscaler/cluster-autoscaler `
        --namespace kube-system `
        -f $autoscalerValues
} else {
    helm install cluster-autoscaler autoscaler/cluster-autoscaler `
        --namespace kube-system `
        -f $autoscalerValues
    Write-Host "cluster-autoscaler installed." -ForegroundColor Green
}

# --- Phase 8: Generate Kubeconfig for HF Spaces ---
Write-Host ""
Write-Host ">>> Phase 8: Generating kubeconfig for HF Spaces..." -ForegroundColor Yellow

$generateScript = Join-Path $AwsDir "generate-kubeconfig.ps1"
if (Test-Path $generateScript) {
    & $generateScript
} else {
    # Inline kubeconfig generation if the .ps1 version doesn't exist yet
    $output = Join-Path $AwsDir "kubeconfig-antiatropos.yaml"

    # Verify cluster exists
    $clusterCheck = $false
    try {
        eksctl get cluster --name $ClusterName --region $Region 2>$null | Out-Null
        $clusterCheck = $true
    } catch {}
    if (-not $clusterCheck) {
        Write-Host "ERROR: Cluster $ClusterName not found." -ForegroundColor Red
        exit 1
    }

    $ClusterEndpoint = aws eks describe-cluster --name $ClusterName --region $Region --query 'cluster.endpoint' --output text
    $ClusterCa = aws eks describe-cluster --name $ClusterName --region $Region --query 'cluster.certificateAuthority.data' --output text
    $Timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")

    $kubeconfig = @"
# Kubeconfig for AntiAtropos on Hugging Face Spaces
# Generated: $Timestamp
# Cluster:   $ClusterName
# Region:    $Region
#
# This kubeconfig uses AWS IAM authenticator.
# The HF Space container must have aws-cli available,
# OR the kubernetes Python client must be configured with AWS credentials.

apiVersion: v1
kind: Config
clusters:
  - cluster:
      certificate-authority-data: $ClusterCa
      server: $ClusterEndpoint
    name: $ClusterName

contexts:
  - context:
      cluster: $ClusterName
      user: antiatropos-hf-user
    name: $ClusterName

current-context: $ClusterName

preferences: {}

users:
  - name: antiatropos-hf-user
    user:
      exec:
        apiVersion: client.authentication.k8s.io/v1beta1
        command: aws
        args:
          - eks
          - get-token
          - --region
          - $Region
          - --cluster-name
          - $ClusterName
        env:
          - name: AWS_STS_REGIONAL_ENDPOINTS
            value: regional
          - name: AWS_DEFAULT_REGION
            value: $Region
        interactiveMode: IfAvailable
"@

    $kubeconfig | Out-File -FilePath $output -Encoding utf8 -Force
    Write-Host "Kubeconfig written to: $output" -ForegroundColor Green
    Write-Host ""
    Write-Host "To encode for HF Spaces secret:" -ForegroundColor Yellow
    Write-Host "  [Convert]::ToBase64String([System.IO.File]::ReadAllBytes('$output'))"
}

# --- Done ---
Write-Host ""
Write-Host "==========================================" -ForegroundColor Cyan
Write-Host "   AntiAtropos AWS Infrastructure Ready!" -ForegroundColor Cyan
Write-Host "==========================================" -ForegroundColor Cyan
Write-Host ""
Write-Host "AMP Workspace ID:  $AmpWsId"
Write-Host "AMP URL:           $AmpUrl"
Write-Host ""
Write-Host "Grafana: Self-hosted on EKS (monitoring namespace)"
Write-Host "  Access: kubectl port-forward svc/grafana 3000 -n monitoring"
Write-Host "  Login: admin / antiatropos"
Write-Host "  URL: http://localhost:3000"
Write-Host ""
Write-Host "Kubeconfig saved:  $(Join-Path $AwsDir 'kubeconfig-antiatropos.yaml')"
Write-Host ""
Write-Host "Next steps - configure your HF Space:" -ForegroundColor Yellow
Write-Host "  1. Set secret KUBECONFIG_CONTENT = base64 of kubeconfig-antiatropos.yaml"
Write-Host "  2. Set env var PROMETHEUS_URL = $AmpUrl"
Write-Host "  3. Set env var KUBECONFIG = /app/kubeconfig.yaml"
Write-Host "  4. Set env var ANTIATROPOS_ENV_MODE = live"
Write-Host "  5. Set env var ANTIATROPOS_MAX_REPLICAS = 6"
Write-Host "  6. Set env var ANTIATROPOS_WORKLOAD_MAP = (see OPERATIONS.md)"
Write-Host "  7. Add kubeconfig decode to deploy/entrypoint.sh (see OPERATIONS.md)"