fredmo commited on
Commit
4472750
·
verified ·
1 Parent(s): 13e6b23

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +391 -704
index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>The MLOps Engineer's Cheatsheet for Model Serving</title>
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet">
@@ -14,6 +14,7 @@
14
  --primary-color: #1E88E5; /* Blue */
15
  --primary-dark: #1565C0;
16
  --secondary-color: #004d40; /* Dark Teal for contrast */
 
17
  --background-color: #f4f6f8;
18
  --card-bg-color: #ffffff;
19
  --text-color: #333;
@@ -36,750 +37,324 @@
36
  }
37
 
38
  /* --- Layout & Containers --- */
39
- .container {
40
- max-width: 1200px;
41
- margin: 0 auto;
42
- padding: 2rem;
43
- }
44
-
45
- header {
46
- text-align: center;
47
- margin-bottom: 2rem;
48
- }
49
-
50
- header h1 {
51
- color: var(--heading-color);
52
- font-weight: 700;
53
- font-size: 2.8rem;
54
- margin-bottom: 0.5rem;
55
- }
56
-
57
- header p {
58
- font-size: 1.1rem;
59
- color: var(--subtle-text-color);
60
- max-width: 800px;
61
- margin: 0 auto;
62
- }
63
 
64
  .main-section-title {
65
- font-size: 2.2rem;
66
- color: var(--heading-color);
67
- border-bottom: 3px solid var(--primary-color);
68
- padding-bottom: 0.75rem;
69
- margin-top: 3rem;
70
- margin-bottom: 2rem;
71
- display: flex;
72
- align-items: center;
73
- }
74
-
75
- .main-section-title .material-icons {
76
- font-size: 2.8rem;
77
- margin-right: 1rem;
78
- }
79
-
80
- /* --- Tile Navigation --- */
81
- .tile-container {
82
- display: grid;
83
- grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
84
- gap: 1.5rem;
85
- margin-bottom: 2.5rem;
86
  }
 
 
 
 
 
 
 
 
87
 
88
- .tile {
89
- background-color: var(--card-bg-color);
90
- border: 2px solid var(--border-color);
91
- border-radius: 8px;
92
- padding: 1.5rem;
93
- text-align: center;
94
- cursor: pointer;
95
- transition: transform 0.2s ease, box-shadow 0.2s ease, border-color 0.2s ease;
96
- display: flex;
97
- flex-direction: column;
98
- align-items: center;
99
- justify-content: center;
100
- min-height: 150px;
101
- }
102
-
103
- .tile:hover {
104
- transform: translateY(-5px);
105
- box-shadow: var(--tile-hover-shadow);
106
- border-color: var(--primary-color);
107
- }
108
-
109
- .tile.active {
110
- border-color: var(--primary-color);
111
- box-shadow: var(--tile-hover-shadow);
112
- background-color: #f0f7ff;
113
- }
114
 
115
- .tile .material-icons {
116
- font-size: 3rem;
117
- color: var(--primary-color);
118
- margin-bottom: 1rem;
 
 
 
119
  }
 
 
 
120
 
121
- .tile h4 {
122
- margin: 0;
123
- font-size: 1.2rem;
124
- color: var(--heading-color);
125
  }
 
126
 
127
- /* --- Content Panels --- */
128
- .content-panel {
129
- display: none; /* Hidden by default, shown by JS */
130
- background-color: var(--card-bg-color);
131
- border-radius: 8px;
132
- box-shadow: var(--shadow);
133
- padding: 2.5rem;
134
- margin-top: 1rem;
135
  }
136
-
137
- .content-panel.active {
138
- display: block;
 
 
139
  }
140
-
141
- .stack-layer {
142
- margin-bottom: 2.5rem;
143
- padding-bottom: 1.5rem;
144
- border-bottom: 1px solid var(--border-color);
 
 
 
 
 
 
145
  }
146
 
147
- .stack-layer:last-child {
148
- border-bottom: none;
149
- margin-bottom: 0;
150
- }
151
-
152
- .stack-layer h3 {
153
- font-size: 1.6rem;
154
- color: var(--secondary-color);
155
- margin-top: 0;
156
- display: flex;
157
- align-items: center;
158
- }
159
-
160
- .stack-layer h3 .material-icons {
161
- margin-right: 12px;
162
- font-size: 2rem;
163
- }
164
-
165
- /* --- Collapsible Sections & Code --- */
166
- details {
167
- border: 1px solid var(--border-color);
168
- border-radius: 6px;
169
  margin-bottom: 1rem;
170
- background-color: #f9fafb;
171
- transition: background-color 0.2s ease-in-out;
172
- }
173
-
174
- details[open] { background-color: var(--card-bg-color); }
175
- summary {
176
- cursor: pointer;
177
- padding: 1rem;
178
- font-weight: 500;
179
- font-size: 1.1rem;
180
- list-style: none;
181
- display: flex;
182
- align-items: center;
183
- justify-content: space-between;
184
- }
185
- summary::-webkit-details-marker { display: none; }
186
- summary::after {
187
- font-family: 'Material Icons';
188
- content: 'expand_more';
189
- transform: rotate(0deg);
190
- transition: transform 0.2s ease-in-out;
191
- }
192
- details[open] > summary::after { transform: rotate(180deg); }
193
- .details-content { padding: 0 1rem 1rem 1rem; border-top: 1px solid var(--border-color); }
194
-
195
- pre {
196
- background-color: var(--code-bg-color);
197
- color: var(--code-text-color);
198
- padding: 1.5rem 1rem 1rem 1rem;
199
- border-radius: 6px;
200
- overflow-x: auto;
201
- font-size: 0.9em;
202
- position: relative;
203
  }
 
 
 
 
 
 
 
 
 
 
204
  code { font-family: 'Courier New', Courier, monospace; }
205
- .code-block-header { font-weight: bold; color: var(--subtle-text-color); margin-bottom: -0.5rem; margin-top: 1rem; }
206
- .copy-btn { position: absolute; top: 10px; right: 10px; background-color: #4a505c; color: #fff; border: none; padding: 6px 10px; border-radius: 4px; cursor: pointer; opacity: 0.7; transition: opacity 0.2s, background-color 0.2s; }
207
  pre:hover .copy-btn { opacity: 1; }
208
- .copy-btn:hover { background-color: #6c7382; }
209
  .copy-btn.copied { background-color: var(--primary-dark); }
210
- .icon-placeholder { font-style: italic; color: #999; display: inline-block; margin-left: 8px; }
211
-
212
  </style>
213
  </head>
214
  <body>
215
 
216
  <div class="container">
217
  <header>
218
- <h1>The MLOps Engineer's Cheatsheet for Model Serving</h1>
219
- <p>Select a framework or model type to see a practical guide for serving it—from local code to a production-grade, auto-scaling Kubernetes deployment.</p>
220
  </header>
221
 
222
  <main>
223
- <!-- ======================= Classic ML Tiles ======================= -->
224
- <h2 class="main-section-title"><i class="material-icons">model_training</i>Classic Machine Learning</h2>
225
- <div class="tile-container">
226
- <div class="tile active" data-target="classic-pytorch">
227
- <span class="material-icons">whatshot</span>
228
- <h4><!-- ICON PLACEHOLDER: PyTorch -->PyTorch</h4>
229
- </div>
230
- <div class="tile" data-target="classic-tensorflow">
231
- <span class="material-icons">hub</span>
232
- <h4><!-- ICON PLACEHOLDER: TensorFlow -->TensorFlow</h4>
233
  </div>
234
- <div class="tile" data-target="classic-sklearn">
235
- <span class="material-icons">data_object</span>
236
- <h4><!-- ICON PLACEHOLDER: Scikit-learn -->Scikit-learn</h4>
237
- </div>
238
- <div class="tile" data-target="classic-xgboost">
239
- <span class="material-icons">trending_up</span>
240
- <h4><!-- ICON PLACEHOLDER: XGBoost -->XGBoost</h4>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  </div>
242
- <div class="tile" data-target="classic-jax">
243
- <span class="material-icons">functions</span>
244
- <h4><!-- ICON PLACEHOLDER: JAX -->JAX</h4>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  </div>
 
 
246
  </div>
 
 
247
 
248
- <!-- ======================= Generative AI Tiles ======================= -->
249
- <h2 class="main-section-title"><i class="material-icons">auto_awesome</i>Generative AI</h2>
 
250
  <div class="tile-container">
251
- <div class="tile" data-target="genai-llm">
252
- <span class="material-icons">chat</span>
253
- <h4>LLMs</h4>
254
- </div>
255
- <div class="tile" data-target="genai-vlm">
256
- <span class="material-icons">image_search</span>
257
- <h4>Multimodal (VLMs)</h4>
258
- </div>
259
- <div class="tile" data-target="genai-diffusion">
260
- <span class="material-icons">palette</span>
261
- <h4>Diffusion Models</h4>
262
- </div>
263
  </div>
264
-
265
- <!-- ======================= Content Panels Container ======================= -->
266
- <div class="content-container">
267
 
268
- <!-- === PyTorch Content Panel === -->
269
- <div id="classic-pytorch" class="content-panel active">
270
- <div class="stack-layer">
271
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
 
 
 
 
 
 
 
272
  <p>A simple feed-forward network defined in PyTorch. The model's `state_dict` is saved for deployment.</p>
273
  <p class="code-block-header">model_setup.py</p>
274
- <pre><code># model_setup.py
275
- import torch
276
  import torch.nn as nn
277
-
278
  class SimpleNet(nn.Module):
279
  def __init__(self):
280
  super(SimpleNet, self).__init__()
281
  self.linear = nn.Linear(10, 1)
282
-
283
- def forward(self, x):
284
- return self.linear(x)
285
-
286
  model = SimpleNet()
287
- torch.save(model.state_dict(), "pytorch_model.pth")
288
- print("Model saved to pytorch_model.pth")</code></pre>
289
  </div>
290
- <div class="stack-layer">
291
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
292
- <p>Use a high-performance web framework like FastAPI for the API. For managed serving, KServe and Ray Serve offer powerful abstractions.</p>
293
- <details>
294
- <summary>Serve with FastAPI</summary>
295
- <div class="details-content">
296
- <pre><code># app.py
297
- from fastapi import FastAPI
298
- from pydantic import BaseModel
299
- import torch
300
- # ... (include SimpleNet class definition here) ...
301
-
302
- app = FastAPI(title="PyTorch Model Server")
303
- model = SimpleNet()
304
- model.load_state_dict(torch.load("pytorch_model.pth"))
305
- model.eval()
306
-
307
- class PredReq(BaseModel): data: list[float]
308
-
309
- @app.post("/predict")
310
- def predict(req: PredReq):
311
- tensor = torch.tensor([req.data], dtype=torch.float32)
312
- with torch.no_grad():
313
- pred = model(tensor)
314
- return {"prediction": pred.item()}
315
- </code></pre>
316
- </div>
317
- </details>
318
- <details>
319
- <summary>Serve with Ray Serve <!-- ICON PLACEHOLDER: Ray --></summary>
320
- <div class="details-content">
321
- <pre><code># ray_serve_app.py
322
- from ray import serve
323
- # ... (include FastAPI app, model class, etc.) ...
324
-
325
- @serve.deployment
326
- @serve.ingress(app)
327
- class ModelServer:
328
- def __init__(self):
329
- self.model = SimpleNet()
330
- self.model.load_state_dict(torch.load("pytorch_model.pth"))
331
- self.model.eval()
332
-
333
- # FastAPI handles routing, this class just holds the model
334
- </code></pre>
335
- </div>
336
- </details>
337
- <details>
338
- <summary>Serve with KServe <!-- ICON PLACEHOLDER: Kubeflow --></summary>
339
- <div class="details-content">
340
- <pre><code># inferenceservice.yaml
341
- apiVersion: "serving.kserve.io/v1beta1"
342
- kind: "InferenceService"
343
- metadata:
344
- name: "pytorch-model"
345
- spec:
346
- predictor:
347
- pytorch:
348
- storageUri: "pvc://your-pvc/path/to/model-dir"
349
- </code></pre>
350
- </div>
351
- </details>
352
  </div>
353
- <div class="stack-layer">
354
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
355
- <p>Package the application with a multi-stage Dockerfile and define its runtime with Kubernetes Deployment, Service, and HPA objects.</p>
356
- <details>
357
- <summary>Dockerfile</summary>
358
- <div class="details-content">
359
- <pre><code># Multi-stage build for a lean final image
360
- FROM python:3.9-slim as builder
361
- WORKDIR /install
362
- RUN pip install --no-cache-dir --prefix="/install" torch fastapi "uvicorn[standard]"
363
-
364
- FROM python:3.9-slim
365
- WORKDIR /app
366
- COPY --from=builder /install /usr/local
367
- COPY ./app.py /app/
368
- COPY ./pytorch_model.pth /app/
369
- EXPOSE 8000
370
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
371
- </code></pre>
372
- </div>
373
- </details>
374
- <details>
375
- <summary>Deployment & Service YAML <!-- ICON PLACEHOLDER: Kubernetes --></summary>
376
- <div class="details-content">
377
- <pre><code># deployment.yaml
378
- apiVersion: apps/v1
379
- kind: Deployment
380
- metadata:
381
- name: pytorch-deployment
382
- spec:
383
- replicas: 2
384
- selector: { matchLabels: { app: pytorch } }
385
- template:
386
- metadata: { labels: { app: pytorch } }
387
- spec:
388
- containers:
389
- - name: server
390
- image: your-repo/pytorch-server:latest
391
- ports:
392
- - containerPort: 8000
393
- ---
394
- # service.yaml
395
- apiVersion: v1
396
- kind: Service
397
- metadata:
398
- name: pytorch-service
399
- spec:
400
- type: NodePort
401
- selector: { app: pytorch }
402
- ports:
403
- - port: 80
404
- targetPort: 8000
405
- </code></pre>
406
- </div>
407
- </details>
408
- <details>
409
- <summary>Autoscaling (HPA) YAML</summary>
410
- <div class="details-content">
411
- <pre><code>apiVersion: autoscaling/v2
412
- kind: HorizontalPodAutoscaler
413
- metadata:
414
- name: pytorch-hpa
415
- spec:
416
- scaleTargetRef:
417
- apiVersion: apps/v1
418
- kind: Deployment
419
- name: pytorch-deployment
420
- minReplicas: 1
421
- maxReplicas: 5
422
- metrics:
423
- - type: Resource
424
- resource: { name: cpu, target: { type: Utilization, averageUtilization: 80 } }
425
- </code></pre>
426
- </div>
427
- </details>
428
  </div>
429
- <div class="stack-layer">
430
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
431
- <p><strong>CPUs:</strong> Suitable for small networks or where latency is not critical.<br><strong>GPUs:</strong> Essential for deep learning models to achieve low-latency inference. Use NVIDIA GPUs (T4, A10G, A100) for best performance with CUDA.<br><strong>TPUs:</strong> Best for massive-scale inference on Google Cloud, especially for models trained on TPUs.</p>
432
  </div>
433
  </div>
434
-
435
- <!-- === TensorFlow Content Panel === -->
436
  <div id="classic-tensorflow" class="content-panel">
437
- <!-- Content for TensorFlow follows the same 4-layer structure -->
438
- <div class="stack-layer">
439
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
440
  <p>A simple Keras model saved in TensorFlow's `SavedModel` format, which bundles the architecture and weights.</p>
441
  <p class="code-block-header">model_setup.py</p>
442
- <pre><code># model_setup.py
443
- import tensorflow as tf
444
  model = tf.keras.Sequential([
445
  tf.keras.layers.Dense(10, activation='relu', input_shape=(10,)),
446
  tf.keras.layers.Dense(1)
447
  ])
448
- model.save("tf_saved_model")
449
- print("Model saved to tf_saved_model/")</code></pre>
450
  </div>
451
- <div class="stack-layer">
452
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
453
- <p>FastAPI is a great choice for a custom Python server. KServe has native, high-performance support for the `SavedModel` format.</p>
454
- <details>
455
- <summary>Serve with FastAPI</summary>
456
- <div class="details-content">
457
- <pre><code># app.py
458
- from fastapi import FastAPI
459
- from pydantic import BaseModel
460
- import tensorflow as tf
461
- import numpy as np
462
-
463
- app = FastAPI(title="TensorFlow Model Server")
464
- model = tf.keras.models.load_model("tf_saved_model")
465
-
466
- class PredReq(BaseModel): data: list[float]
467
-
468
- @app.post("/predict")
469
- def predict(req: PredReq):
470
- pred = model.predict(np.array([req.data]))
471
- return {"prediction": pred.flatten().tolist()}
472
- </code></pre>
473
- </div>
474
- </details>
475
- <details>
476
- <summary>Serve with KServe <!-- ICON PLACEHOLDER: Kubeflow --></summary>
477
- <div class="details-content">
478
- <pre><code># inferenceservice.yaml
479
- apiVersion: "serving.kserve.io/v1beta1"
480
- kind: "InferenceService"
481
- metadata:
482
- name: "tensorflow-model"
483
- spec:
484
- predictor:
485
- tensorflow:
486
- storageUri: "s3://my-bucket/path/to/tf_saved_model"
487
- </code></pre>
488
- </div>
489
- </details>
490
  </div>
491
- <div class="stack-layer">
492
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
493
- <p>The Kubernetes configuration is very similar to the PyTorch example. Ensure your Dockerfile copies the entire `tf_saved_model` directory and installs the `tensorflow` library.</p>
494
  </div>
495
- <div class="stack-layer">
496
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
497
- <p><strong>CPUs:</strong> Good for smaller Keras models. <br><strong>GPUs:</strong> Highly recommended for deep learning models. TensorFlow has excellent CUDA integration. <br><strong>TPUs:</strong> The premier choice for running TensorFlow models at scale, offering the best price/performance on GCP.</p>
498
  </div>
499
  </div>
500
-
501
- <!-- === Scikit-learn Content Panel === -->
502
  <div id="classic-sklearn" class="content-panel">
503
- <div class="stack-layer">
504
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
505
  <p>A classic logistic regression model. Serialization is typically done with `joblib` for efficiency with NumPy structures.</p>
506
  <p class="code-block-header">model_setup.py</p>
507
- <pre><code># model_setup.py
508
- import joblib
509
  from sklearn.linear_model import LogisticRegression
510
  from sklearn.datasets import make_classification
511
-
512
  X, y = make_classification(n_features=4)
513
  model = LogisticRegression().fit(X, y)
514
- joblib.dump(model, "sklearn_model.joblib")
515
- print("Model saved to sklearn_model.joblib")</code></pre>
516
  </div>
517
- <div class="stack-layer">
518
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
519
- <p>FastAPI provides a simple and fast web server. KServe and Ray Serve also have native support for scikit-learn models.</p>
520
- <details>
521
- <summary>Serve with FastAPI</summary>
522
- <div class="details-content">
523
- <pre><code># app.py
524
- from fastapi import FastAPI
525
- from pydantic import BaseModel
526
- import joblib, numpy as np
527
-
528
- app = FastAPI(title="Scikit-learn Server")
529
- model = joblib.load("sklearn_model.joblib")
530
-
531
- class PredReq(BaseModel): data: list[list[float]]
532
-
533
- @app.post("/predict")
534
- def predict(req: PredReq):
535
- pred = model.predict(np.array(req.data))
536
- return {"prediction": pred.tolist()}
537
- </code></pre>
538
- </div>
539
- </details>
540
- <details>
541
- <summary>Serve with KServe <!-- ICON PLACEHOLDER: Kubeflow --></summary>
542
- <div class="details-content">
543
- <pre><code># inferenceservice.yaml
544
- apiVersion: "serving.kserve.io/v1beta1"
545
- kind: "InferenceService"
546
- metadata:
547
- name: "sklearn-model"
548
- spec:
549
- predictor:
550
- sklearn:
551
- storageUri: "pvc://my-pvc/path/to/model-dir" # must contain model.joblib
552
- </code></pre>
553
- </div>
554
- </details>
555
  </div>
556
- <div class="stack-layer">
557
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
558
- <p>Standard Kubernetes setup. The Docker container will be lightweight as it only needs `scikit-learn`, `joblib`, and `fastapi`.</p>
559
  </div>
560
- <div class="stack-layer">
561
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
562
- <p><strong>CPUs:</strong> Almost always sufficient. Scikit-learn models are designed to run efficiently on CPUs.<br><strong>GPUs/TPUs:</strong> Not used. There is no GPU acceleration for standard scikit-learn algorithms.</p>
563
  </div>
564
  </div>
565
-
566
- <!-- === XGBoost Content Panel === -->
567
  <div id="classic-xgboost" class="content-panel">
568
- <div class="stack-layer">
569
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
570
- <p>An XGBoost model saved in its native JSON format, which is portable and human-readable.</p>
571
- <p class="code-block-header">model_setup.py</p>
572
- <pre><code># model_setup.py
573
- import xgboost as xgb
574
- from sklearn.datasets import make_classification
575
-
576
- X, y = make_classification(n_features=4)
577
- dtrain = xgb.DMatrix(X, label=y)
578
- model = xgb.train({'objective':'binary:logistic'}, dtrain, 10)
579
- model.save_model("xgboost_model.json")
580
- print("Model saved to xgboost_model.json")</code></pre>
581
- </div>
582
- <div class="stack-layer">
583
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
584
- <p>KServe and Ray Serve both support XGBoost. A custom FastAPI server is also a robust option.</p>
585
- <details>
586
- <summary>Serve with FastAPI</summary>
587
- <div class="details-content">
588
- <pre><code># app.py
589
- from fastapi import FastAPI
590
- from pydantic import BaseModel
591
- import xgboost as xgb, numpy as np
592
-
593
- app = FastAPI(title="XGBoost Server")
594
- model = xgb.Booster()
595
- model.load_model("xgboost_model.json")
596
-
597
- class PredReq(BaseModel): data: list[list[float]]
598
-
599
- @app.post("/predict")
600
- def predict(req: PredReq):
601
- dmatrix = xgb.DMatrix(np.array(req.data))
602
- pred = model.predict(dmatrix)
603
- return {"prediction": pred.tolist()}
604
- </code></pre>
605
- </div>
606
- </details>
607
- </div>
608
- <div class="stack-layer">
609
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
610
- <p>Standard Kubernetes setup. The Dockerfile should include the `xgboost` library.</p>
611
- </div>
612
- <div class="stack-layer">
613
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
614
- <p><strong>CPUs:</strong> Excellent performance for most use cases.<br><strong>GPUs:</strong> XGBoost has optional GPU acceleration which can provide a significant speedup for large datasets and complex trees during inference.</p>
615
- </div>
616
  </div>
617
-
618
- <!-- === JAX Content Panel === -->
619
  <div id="classic-jax" class="content-panel">
620
- <div class="stack-layer">
621
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
622
- <p>JAX models are often defined as pure functions with parameters handled separately. We save the parameters using NumPy.</p>
623
- <p class="code-block-header">model_setup.py</p>
624
- <pre><code># model_setup.py
625
- import jax
626
- import jax.numpy as jnp
627
- import numpy as np
628
-
629
- # A pure function for linear regression
630
- def predict_fn(params, inputs):
631
- return jnp.dot(inputs, params['w']) + params['b']
632
-
633
- # Initialize and save dummy parameters
634
- key = jax.random.PRNGKey(0)
635
- params = {
636
- 'w': jax.random.normal(key, (10,)),
637
- 'b': jnp.array(0.0)
638
- }
639
- np.savez("jax_params.npz", **params)
640
- print("Parameters saved to jax_params.npz")</code></pre>
641
- </div>
642
- <div class="stack-layer">
643
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
644
- <p>Ray Serve is an excellent fit for JAX's functional paradigm. A custom FastAPI server is also straightforward. KServe requires a custom container.</p>
645
- <details>
646
- <summary>Serve with FastAPI</summary>
647
- <div class="details-content">
648
- <pre><code># app.py
649
- from fastapi import FastAPI
650
- from pydantic import BaseModel
651
- import jax, jax.numpy as jnp, numpy as np
652
-
653
- # Define predict function and JIT-compile it
654
- @jax.jit
655
- def predict_fn(params, inputs):
656
- return jnp.dot(inputs, params['w']) + params['b']
657
-
658
- app = FastAPI(title="JAX Server")
659
- params = np.load("jax_params.npz")
660
-
661
- class PredReq(BaseModel): data: list[float]
662
-
663
- @app.post("/predict")
664
- def predict(req: PredReq):
665
- pred = predict_fn(params, jnp.array(req.data))
666
- return {"prediction": pred.tolist()}
667
- </code></pre>
668
- </div>
669
- </details>
670
- </div>
671
- <div class="stack-layer">
672
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
673
- <p>The Kubernetes configuration is standard. The Dockerfile needs to install `jax` and `jaxlib` corresponding to the target hardware (CPU or GPU).</p>
674
- </div>
675
- <div class="stack-layer">
676
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
677
- <p><strong>CPUs:</strong> JAX is very fast on CPU.<br><strong>GPUs/TPUs:</strong> JAX was designed for accelerators and excels on GPUs and TPUs, often outperforming other frameworks due to its XLA-based compilation.</p>
678
- </div>
679
  </div>
680
-
681
- <!-- === LLM Content Panel === -->
682
  <div id="genai-llm" class="content-panel">
683
- <div class="stack-layer">
684
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
685
- <p>Large Language Models (e.g., Llama, Mistral) are based on the Transformer architecture. The key inference challenge is managing the <strong>KV Cache</strong>, a stateful cache of attention keys and values that grows with every generated token and consumes massive amounts of VRAM.</p>
686
- </div>
687
- <div class="stack-layer">
688
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
689
- <p>Specialized serving toolkits are required for efficient LLM inference. These handle complex optimizations like continuous batching and KV cache management.</p>
690
- <ul>
691
- <li><strong>vLLM:</strong> A high-throughput serving engine using PagedAttention to optimize KV cache memory, drastically improving throughput.</li>
692
- <li><strong>Text Generation Inference (TGI):</strong> Hugging Face's production-ready solution with tensor parallelism and optimized kernels.</li>
693
- <li><strong>TensorRT-LLM:</strong> NVIDIA's library for compiling LLMs into highly optimized engines for NVIDIA GPUs.</li>
694
- </ul>
695
- </div>
696
- <div class="stack-layer">
697
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
698
- <p>Deployments must request large amounts of GPU resources (`nvidia.com/gpu: 1`) and memory. Node affinity and taints/tolerations are used to schedule pods onto specific GPU node pools (e.g., nodes with A100s).</p>
699
- </div>
700
- <div class="stack-layer">
701
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
702
- <p><strong>GPUs:</strong> Essential. High-VRAM GPUs like NVIDIA A100 (40GB/80GB) or H100 (80GB) are required to fit the model weights and KV cache. Multiple GPUs are often needed for larger models via tensor parallelism.</p>
703
- </div>
704
  </div>
705
-
706
- <!-- === VLM Content Panel === -->
707
  <div id="genai-vlm" class="content-panel">
708
- <div class="stack-layer">
709
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
710
- <p>Visual Large Models (e.g., LLaVA, CogVLM) combine a vision encoder (like ViT) with an LLM. They can process and reason about both images and text, making them powerful but complex to serve.</p>
711
- </div>
712
- <div class="stack-layer">
713
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
714
- <p>The serving stack must handle multi-modal inputs (e.g., base64-encoded images and text in a single JSON payload). Preprocessing the image into tensors is a key part of the serving logic. Frameworks like <strong>vLLM</strong> and <strong>SGLang</strong> are adding support for VLMs.</p>
715
- </div>
716
- <div class="stack-layer">
717
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
718
- <p>Similar to LLMs, VLM deployments require significant GPU and memory resources. The API server (e.g., FastAPI) must be configured to accept large request bodies to accommodate image data.</p>
719
- </div>
720
- <div class="stack-layer">
721
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
722
- <p><strong>GPUs:</strong> High-VRAM GPUs are mandatory. The VRAM must accommodate the vision encoder, the LLM, and the KV cache, making memory requirements even higher than for a text-only LLM of a similar size.</p>
723
- </div>
724
  </div>
725
-
726
- <!-- === Diffusion Content Panel === -->
727
  <div id="genai-diffusion" class="content-panel">
728
- <div class="stack-layer">
729
- <h3><i class="material-icons">psychology</i>Model Layer</h3>
730
- <p>Diffusion models (e.g., Stable Diffusion) generate images through an iterative denoising process. Each step is a full forward pass through a large UNet model, making inference latency a major challenge.</p>
731
- </div>
732
- <div class="stack-layer">
733
- <h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
734
- <p>Optimizing the serving stack focuses on reducing the number of inference steps and speeding up each step.</p>
735
- <ul>
736
- <li><strong>Model Compilation:</strong> Use tools like <strong>TensorRT</strong> or `torch.compile` to optimize the UNet and VAE components for the target GPU.</li>
737
- <li><strong>Latent Consistency Models (LCMs):</strong> A powerful distillation technique that allows for high-quality image generation in just 2-8 steps, drastically cutting latency.</li>
738
- <li><strong>Custom Pipelines:</strong> Tools like <strong>ComfyUI</strong> or <strong>Diffusers</strong> provide flexible pipelines that can be wrapped in a serving framework like FastAPI or Ray Serve.</li>
739
- </ul>
740
- </div>
741
- <div class="stack-layer">
742
- <h3><i class="material-icons">cloud_queue</i>Kubernetes Layer</h3>
743
- <p>Deployments must be stateful if caching compiled models or dealing with user-specific LoRAs. Persistent Volumes (PVCs) can be used to store these assets. Resource requests for GPU and VRAM are critical.</p>
744
- </div>
745
- <div class="stack-layer">
746
- <h3><i class="material-icons">memory</i>Hardware Layer</h3>
747
- <p><strong>GPUs:</strong> High-end consumer (e.g., RTX 4090) or datacenter GPUs (A10G, A100) are needed for acceptable generation speeds. VRAM is the most critical resource, as it limits the output resolution and batch size.</p>
748
- </div>
749
- </div>
750
- </div>
751
-
752
- <!-- ======================= Generic ML Optimization Section ======================= -->
753
- <div id="optimizations" style="margin-top: 3rem;">
754
- <h2 class="main-section-title"><i class="material-icons">speed</i>Generic ML Optimization</h2>
755
- <div class="content-panel active">
756
- <div class="stack-layer">
757
- <h3><i class="material-icons">dns</i>Optimize the Cluster</h3>
758
- <p>Tune the foundation for performance and cost.</p>
759
- <ul>
760
- <li><strong>Node Tuning:</strong> Use appropriate machine types (e.g., GPU nodes for DL, compute-optimized for CPU-bound tasks).</li>
761
- <li><strong>Cluster Autoscaling:</strong> Automatically add/remove nodes based on demand to save costs.</li>
762
- <li><strong>Network Policies:</strong> Secure inter-service communication within the cluster.</li>
763
- </ul>
764
- </div>
765
- <div class="stack-layer">
766
- <h3><i class="material-icons">web</i>Optimize the Container & Server</h3>
767
- <p>Make the serving application itself as efficient as possible.</p>
768
- <ul>
769
- <li><strong>Efficient Web Server:</strong> Use ASGI servers (Uvicorn, Hypercorn) with FastAPI over WSGI (Flask) for better async performance.</li>
770
- <li><strong>Dynamic Batching:</strong> Group incoming requests into a single batch to maximize hardware utilization, especially on GPUs.</li>
771
- <li><strong>Lean Containers:</strong> Use multi-stage Docker builds to create small, secure production images.</li>
772
- </ul>
773
- </div>
774
- <div class="stack-layer">
775
- <h3><i class="material-icons">compress</i>Optimize the Model</h3>
776
- <p>Reduce model size and increase inference speed.</p>
777
- <ul>
778
- <li><strong>Quantization:</strong> Reduce model precision (e.g., FP32 to INT8/FP8) to shrink size and accelerate inference.</li>
779
- <li><strong>Pruning:</strong> Remove unnecessary weights from the model to create a smaller, faster "sparse" version.</li>
780
- <li><strong>Compilation:</strong> Use tools like TensorRT, OpenVINO, or JAX's JIT to compile the model into highly optimized, hardware-specific code.</li>
781
- </ul>
782
- </div>
783
  </div>
784
  </div>
785
  </main>
@@ -787,63 +362,175 @@ def predict(req: PredReq):
787
 
788
  <script>
789
  document.addEventListener('DOMContentLoaded', function() {
790
- const tiles = document.querySelectorAll('.tile');
791
- const contentPanels = document.querySelectorAll('.content-panel');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792
 
793
- // Function to switch active panels
794
- function switchPanel(event) {
795
- const targetId = event.currentTarget.dataset.target;
796
-
797
- // Update tiles
798
- tiles.forEach(tile => {
799
- tile.classList.remove('active');
800
- });
801
- event.currentTarget.classList.add('active');
802
-
803
- // Update content panels
804
- contentPanels.forEach(panel => {
805
- if (panel.id === targetId) {
806
- panel.classList.add('active');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807
  } else {
808
- // Only hide panels that are part of the tile system
809
- if (!panel.parentElement.id || panel.parentElement.id !== 'optimizations') {
810
- panel.classList.remove('active');
811
- }
812
  }
813
  });
814
- }
815
 
816
- // Attach click listeners
817
- tiles.forEach(tile => {
818
- tile.addEventListener('click', switchPanel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
 
821
- // Add copy buttons to all pre blocks
822
- const preBlocks = document.querySelectorAll('pre');
823
- preBlocks.forEach(pre => {
824
- const code = pre.querySelector('code');
825
- if (code) {
826
  const copyButton = document.createElement('button');
827
  copyButton.innerText = 'Copy';
828
  copyButton.className = 'copy-btn';
829
-
830
  copyButton.addEventListener('click', (e) => {
831
- e.stopPropagation(); // Prevent details/summary from toggling
832
- navigator.clipboard.writeText(code.innerText).then(() => {
833
  copyButton.innerText = 'Copied!';
834
  copyButton.classList.add('copied');
835
- setTimeout(() => {
836
- copyButton.innerText = 'Copy';
837
- copyButton.classList.remove('copied');
838
- }, 2000);
839
- }).catch(err => {
840
- console.error('Failed to copy text: ', err);
841
  });
842
  });
843
-
844
- pre.appendChild(copyButton);
845
  }
846
  });
 
847
  });
848
  </script>
849
  </body>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>The MLOps Engineer's Interactive Architecture Builder</title>
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet">
 
14
  --primary-color: #1E88E5; /* Blue */
15
  --primary-dark: #1565C0;
16
  --secondary-color: #004d40; /* Dark Teal for contrast */
17
+ --genai-color: #6A1B9A; /* Purple for Gen AI */
18
  --background-color: #f4f6f8;
19
  --card-bg-color: #ffffff;
20
  --text-color: #333;
 
37
  }
38
 
39
  /* --- Layout & Containers --- */
40
+ .container { max-width: 1200px; margin: 0 auto; padding: 2rem; }
41
+ header { text-align: center; margin-bottom: 2rem; }
42
+ header h1 { color: var(--heading-color); font-weight: 700; font-size: 2.8rem; margin-bottom: 0.5rem; }
43
+ header p { font-size: 1.1rem; color: var(--subtle-text-color); max-width: 800px; margin: 0 auto; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  .main-section-title {
46
+ font-size: 2.2rem; color: var(--heading-color); border-bottom: 3px solid var(--primary-color);
47
+ padding-bottom: 0.75rem; margin-top: 3rem; margin-bottom: 2rem; display: flex; align-items: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
+ .main-section-title .material-icons { font-size: 2.8rem; margin-right: 1rem; }
50
+
51
+ /* --- Architecture Builder --- */
52
+ #architecture-builder { background-color: var(--card-bg-color); padding: 2rem; border-radius: 8px; box-shadow: var(--shadow); }
53
+ .arch-type-selector { display: flex; gap: 1rem; margin-bottom: 2rem; border-bottom: 1px solid var(--border-color); padding-bottom: 1.5rem; }
54
+ .arch-type-chip { padding: 0.8rem 1.5rem; border-radius: 8px; cursor: pointer; font-weight: 500; font-size: 1.1rem; border: 2px solid transparent; transition: all 0.2s ease; }
55
+ .arch-type-chip.active.classic { background-color: #e3f2fd; border-color: var(--primary-color); color: var(--primary-dark); }
56
+ .arch-type-chip.active.gen-ai { background-color: #f3e5f5; border-color: var(--genai-color); color: var(--genai-color); }
57
 
58
+ .builder-fields { display: none; }
59
+ .builder-fields.active { display: block; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ .selection-group { margin-bottom: 1.5rem; transition: opacity 0.3s ease; }
62
+ .selection-group.disabled { opacity: 0.5; pointer-events: none; }
63
+ .selection-group h4 { margin-top: 0; margin-bottom: 1rem; font-size: 1.2rem; color: var(--secondary-color); }
64
+ .selection-chips { display: flex; flex-wrap: wrap; gap: 0.75rem; }
65
+ .chip {
66
+ padding: 0.6rem 1.2rem; border: 2px solid var(--border-color); border-radius: 20px;
67
+ cursor: pointer; transition: all 0.2s ease; font-weight: 500; background-color: #f9f9f9;
68
  }
69
+ .chip:not(.disabled):hover { border-color: var(--primary-dark); background-color: #e3f2fd; }
70
+ .chip.active { background-color: var(--primary-color); color: white; border-color: var(--primary-color); }
71
+ .chip.disabled { opacity: 0.6; cursor: not-allowed; background-color: #f0f0f0; border-color: var(--border-color); color: #999; }
72
 
73
+ #generate-btn {
74
+ background-color: var(--secondary-color); color: white; border: none; padding: 0.8rem 2rem; font-size: 1.1rem;
75
+ font-weight: 500; border-radius: 6px; cursor: pointer; transition: background-color 0.2s;
76
+ display: block; margin-top: 2rem; width: 100%;
77
  }
78
+ #generate-btn:hover { background-color: #00695C; }
79
 
80
+ /* --- Architecture Diagram Output --- */
81
+ #architecture-diagram-output {
82
+ display: none; margin-top: 2rem; background-color: #fdfdfd; border: 1px solid var(--border-color);
83
+ padding: 2rem; border-radius: 8px; text-align: center;
 
 
 
 
84
  }
85
+ .diagram-title { font-size: 1.5rem; font-weight: 500; margin-bottom: 2rem; }
86
+ .diagram-stack { display: flex; flex-direction: column; align-items: center; gap: 0.5rem; }
87
+ .diagram-layer {
88
+ background-color: var(--card-bg-color); border: 2px solid var(--primary-color); border-radius: 8px;
89
+ padding: 1.5rem 2.5rem; width: 80%; max-width: 500px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); text-align: center;
90
  }
91
+ .diagram-layer.gen-ai-layer { border-color: var(--genai-color); }
92
+ .diagram-layer.gen-ai-layer h5 { color: var(--genai-color); }
93
+ .diagram-layer h5 { margin: 0 0 0.5rem 0; color: var(--primary-dark); font-size: 1.2rem; font-weight: 700; }
94
+ .diagram-layer p { margin: 0; font-size: 1rem; color: var(--subtle-text-color); }
95
+ .diagram-arrow { font-family: 'Material Icons'; font-size: 2.5rem; color: var(--primary-color); line-height: 1; }
96
+ .diagram-arrow.gen-ai-arrow { color: var(--genai-color); }
97
+ .icon-img-placeholder {
98
+ height: 32px;
99
+ max-width: 120px;
100
+ width: auto;
101
+ margin-top: 10px;
102
  }
103
 
104
+ /* --- Reference Tiles and Panels, Code & Details --- */
105
+ .tile-container { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1.5rem; margin-bottom: 2.5rem; }
106
+ .tile { background-color: var(--card-bg-color); border: 2px solid var(--border-color); border-radius: 8px; padding: 1.5rem; text-align: center; cursor: pointer; transition: all 0.2s ease; display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 150px; }
107
+ .tile:hover { transform: translateY(-5px); box-shadow: var(--tile-hover-shadow); border-color: var(--primary-color); }
108
+ .tile.active { border-color: var(--primary-color); box-shadow: var(--tile-hover-shadow); background-color: #f0f7ff; }
109
+ .tile-icon-img {
110
+ height: 48px;
111
+ width: auto;
112
+ max-width: 100%;
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
+ .tile h4 { margin: 0; font-size: 1.2rem; color: var(--heading-color); }
116
+ .content-panel { display: none; background-color: var(--card-bg-color); border-radius: 8px; box-shadow: var(--shadow); padding: 2.5rem; margin-top: 1rem; }
117
+ .content-panel.active { display: block; }
118
+ .stack-layer { margin-bottom: 2.5rem; padding-bottom: 1.5rem; border-bottom: 1px solid var(--border-color); }
119
+ .stack-layer:last-child { border-bottom: none; margin-bottom: 0; }
120
+ .stack-layer h3 { font-size: 1.6rem; color: var(--secondary-color); margin-top: 0; display: flex; align-items: center; }
121
+ .stack-layer h3 .material-icons { margin-right: 12px; font-size: 2rem; }
122
+ details { border: 1px solid var(--border-color); border-radius: 6px; margin-bottom: 1rem; background-color: #f9fafb; }
123
+ summary { cursor: pointer; padding: 1rem; font-weight: 500; font-size: 1.1rem; list-style: none; display: flex; align-items: center; justify-content: space-between; }
124
+ pre { background-color: var(--code-bg-color); color: var(--code-text-color); padding: 1.5rem 1rem 1rem 1rem; border-radius: 6px; overflow-x: auto; font-size: 0.9em; position: relative; }
125
  code { font-family: 'Courier New', Courier, monospace; }
126
+ .copy-btn { position: absolute; top: 10px; right: 10px; background-color: #4a505c; color: #fff; border: none; padding: 6px 10px; border-radius: 4px; cursor: pointer; opacity: 0.7; }
 
127
  pre:hover .copy-btn { opacity: 1; }
 
128
  .copy-btn.copied { background-color: var(--primary-dark); }
129
+ .code-block-header { font-weight: bold; color: var(--subtle-text-color); margin-bottom: -0.5rem; margin-top: 1rem; }
 
130
  </style>
131
  </head>
132
  <body>
133
 
134
  <div class="container">
135
  <header>
136
+ <h1>MLOps Architecture Builder & Cheatsheet</h1>
137
+ <p>Design your custom model serving stack using the builder below, or explore detailed deployment guides for common frameworks.</p>
138
  </header>
139
 
140
  <main>
141
+ <!-- ======================= My Architecture Builder ======================= -->
142
+ <h2 class="main-section-title"><i class="material-icons">architecture</i>My Architecture</h2>
143
+ <div id="architecture-builder">
144
+ <div class="arch-type-selector">
145
+ <div class="arch-type-chip active classic" data-type="classic">Classic ML</div>
146
+ <div class="arch-type-chip gen-ai" data-type="gen-ai">Generative AI</div>
 
 
 
 
147
  </div>
148
+
149
+ <!-- Classic Builder Fields -->
150
+ <div id="classic-builder-fields" class="builder-fields active">
151
+ <div class="selection-group" data-group="framework">
152
+ <h4>1. ML Framework</h4>
153
+ <div class="selection-chips">
154
+ <div class="chip" data-id="scikit-learn">Scikit-learn</div>
155
+ <div class="chip" data-id="xgboost">XGBoost</div>
156
+ <div class="chip" data-id="pytorch">PyTorch</div>
157
+ <div class="chip" data-id="tensorflow">TensorFlow</div>
158
+ <div class="chip" data-id="jax">JAX</div>
159
+ <div class="chip" data-id="keras">Keras</div>
160
+ </div>
161
+ </div>
162
+ <div class="selection-group" data-group="serving">
163
+ <h4>2. Serving Container</h4>
164
+ <div class="selection-chips">
165
+ <div class="chip" data-id="kserve">Kubeflow KServe</div>
166
+ <div class="chip" data-id="ray-serve">Ray Serve</div>
167
+ <div class="chip" data-id="torchserve">TorchServe</div>
168
+ <div class="chip" data-id="tf-serving">TF Serving</div>
169
+ <div class="chip" data-id="triton">NVIDIA Triton</div>
170
+ <div class="chip" data-id="custom">Custom Container (FastAPI)</div>
171
+ </div>
172
+ </div>
173
+ <div class="selection-group" data-group="orchestration">
174
+ <h4>3. Orchestration / Platform</h4>
175
+ <div class="selection-chips">
176
+ <div class="chip active" data-id="kubernetes">Kubernetes</div>
177
+ <div class="chip" data-id="vertex-ai">Managed: Vertex AI</div>
178
+ <div class="chip" data-id="sagemaker">Managed: SageMaker</div>
179
+ </div>
180
+ </div>
181
+ <div class="selection-group" data-group="hardware">
182
+ <h4>4. Hardware</h4>
183
+ <div class="selection-chips">
184
+ <div class="chip" data-id="vm">VMs (CPU)</div>
185
+ <div class="chip" data-id="gpu">GPU</div>
186
+ <div class="chip" data-id="tpu">TPU</div>
187
+ </div>
188
+ </div>
189
  </div>
190
+
191
+ <!-- Gen AI Builder Fields -->
192
+ <div id="genai-builder-fields" class="builder-fields">
193
+ <div class="selection-group" data-group="model-type">
194
+ <h4>0. Model Type</h4>
195
+ <div class="selection-chips">
196
+ <div class="chip" data-id="llm">LLM</div>
197
+ <div class="chip" data-id="vlm">Multimodal LLM (VLM)</div>
198
+ <div class="chip" data-id="diffusion">Diffusion</div>
199
+ </div>
200
+ </div>
201
+ <div class="selection-group" data-group="framework">
202
+ <h4>1. ML Framework</h4>
203
+ <div class="selection-chips">
204
+ <div class="chip" data-id="pytorch">PyTorch</div>
205
+ <div class="chip" data-id="tensorflow">TensorFlow</div>
206
+ <div class="chip" data-id="jax">JAX</div>
207
+ <div class="chip" data-id="keras">Keras</div>
208
+ </div>
209
+ </div>
210
+ <div class="selection-group" data-group="serving">
211
+ <h4>2. Serving Container</h4>
212
+ <div class="selection-chips">
213
+ <div class="chip" data-id="vllm">vLLM</div>
214
+ <div class="chip" data-id="sglang">SGLang</div>
215
+ <div class="chip" data-id="triton-trt-llm">NVIDIA Triton (TensorRT-LLM)</div>
216
+ <div class="chip" data-id="custom">Custom Container (Diffusers, etc.)</div>
217
+ </div>
218
+ </div>
219
+ <div class="selection-group" data-group="orchestration">
220
+ <h4>3. Orchestration / Platform</h4>
221
+ <div class="selection-chips">
222
+ <div class="chip active" data-id="k8s-ray-kf">Kubernetes (KubeRay/Kubeflow)</div>
223
+ <div class="chip" data-id="vertex-ai">Managed: Vertex AI</div>
224
+ <div class="chip" data-id="sagemaker">Managed: SageMaker</div>
225
+ </div>
226
+ </div>
227
+ <div class="selection-group" data-group="hardware">
228
+ <h4>4. Hardware</h4>
229
+ <div class="selection-chips">
230
+ <div class="chip" data-id="gpu">GPU</div>
231
+ <div class="chip" data-id="tpu">TPU</div>
232
+ </div>
233
+ </div>
234
  </div>
235
+
236
+ <button id="generate-btn">Generate Architecture Diagram</button>
237
  </div>
238
+
239
+ <div id="architecture-diagram-output"></div>
240
 
241
+ <h2 class="main-section-title"><i class="material-icons">menu_book</i>Reference Guides</h2>
242
+
243
+ <h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--primary-color);"><i class="material-icons" style="color: var(--primary-color);">model_training</i>Classic ML</h3>
244
  <div class="tile-container">
245
+ <div class="tile" data-target="classic-pytorch"><img src="pytorch.png" class="tile-icon-img" alt="PyTorch Icon"><h4>PyTorch</h4></div>
246
+ <div class="tile" data-target="classic-tensorflow"><img src="tensorflow.png" class="tile-icon-img" alt="TensorFlow Icon"><h4>TensorFlow</h4></div>
247
+ <div class="tile" data-target="classic-sklearn"><img src="scikit-learn.png" class="tile-icon-img" alt="Scikit-learn Icon"><h4>Scikit-learn</h4></div>
248
+ <div class="tile" data-target="classic-xgboost"><img src="xgboost.png" class="tile-icon-img" alt="XGBoost Icon"><h4>XGBoost</h4></div>
249
+ <div class="tile" data-target="classic-jax"><img src="jax.png" class="tile-icon-img" alt="JAX Icon"><h4>JAX</h4></div>
 
 
 
 
 
 
 
250
  </div>
 
 
 
251
 
252
+ <h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--genai-color);"><i class="material-icons" style="color: var(--genai-color);">auto_awesome</i>Generative AI</h3>
253
+ <div class="tile-container">
254
+ <div class="tile" data-target="genai-llm"><img src="llm.png" class="tile-icon-img" alt="LLM Icon"><h4>LLMs</h4></div>
255
+ <div class="tile" data-target="genai-vlm"><img src="vlm.png" class="tile-icon-img" alt="VLM Icon"><h4>Multimodal (VLMs)</h4></div>
256
+ <div class="tile" data-target="genai-diffusion"><img src="diffusion.png" class="tile-icon-img" alt="Diffusion Icon"><h4>Diffusion Models</h4></div>
257
+ </div>
258
+
259
+ <div class="content-container">
260
+ <!-- Classic ML Panels -->
261
+ <div id="classic-pytorch" class="content-panel">
262
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3>
263
  <p>A simple feed-forward network defined in PyTorch. The model's `state_dict` is saved for deployment.</p>
264
  <p class="code-block-header">model_setup.py</p>
265
+ <pre><code>import torch
 
266
  import torch.nn as nn
 
267
  class SimpleNet(nn.Module):
268
  def __init__(self):
269
  super(SimpleNet, self).__init__()
270
  self.linear = nn.Linear(10, 1)
271
+ def forward(self, x): return self.linear(x)
 
 
 
272
  model = SimpleNet()
273
+ torch.save(model.state_dict(), "pytorch_model.pth")</code></pre>
 
274
  </div>
275
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
276
+ <p>Use a high-performance framework like FastAPI for a custom server. For dedicated solutions, TorchServe is the native choice, while Kubeflow KServe, Ray Serve, and NVIDIA Triton offer powerful, managed abstractions.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  </div>
278
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3>
279
+ <p>Package the application with a multi-stage Dockerfile and define its runtime with Kubernetes Deployment, Service, and HPA objects. Managed platforms like Vertex AI abstract this away.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  </div>
281
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3>
282
+ <p><strong>CPUs:</strong> Suitable for small networks. <strong>GPUs:</strong> Essential for deep learning models. <strong>TPUs:</strong> Best for massive-scale inference on GCP.</p>
 
283
  </div>
284
  </div>
 
 
285
  <div id="classic-tensorflow" class="content-panel">
286
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3>
 
 
287
  <p>A simple Keras model saved in TensorFlow's `SavedModel` format, which bundles the architecture and weights.</p>
288
  <p class="code-block-header">model_setup.py</p>
289
+ <pre><code>import tensorflow as tf
 
290
  model = tf.keras.Sequential([
291
  tf.keras.layers.Dense(10, activation='relu', input_shape=(10,)),
292
  tf.keras.layers.Dense(1)
293
  ])
294
+ model.save("tf_saved_model")</code></pre>
 
295
  </div>
296
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
297
+ <p>TF Serving and Kubeflow KServe offer native, high-performance support for the `SavedModel` format. NVIDIA Triton is also highly optimized for TF models. A custom FastAPI server is another flexible option.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  </div>
299
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3>
300
+ <p>The Kubernetes configuration is very similar to other frameworks. Ensure your Dockerfile copies the entire `tf_saved_model` directory.</p>
 
301
  </div>
302
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3>
303
+ <p><strong>CPUs:</strong> Good for smaller Keras models. <strong>GPUs:</strong> Highly recommended for deep learning models. <strong>TPUs:</strong> The premier choice for running TensorFlow models at scale on GCP.</p>
 
304
  </div>
305
  </div>
 
 
306
  <div id="classic-sklearn" class="content-panel">
307
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3>
 
308
  <p>A classic logistic regression model. Serialization is typically done with `joblib` for efficiency with NumPy structures.</p>
309
  <p class="code-block-header">model_setup.py</p>
310
+ <pre><code>import joblib
 
311
  from sklearn.linear_model import LogisticRegression
312
  from sklearn.datasets import make_classification
 
313
  X, y = make_classification(n_features=4)
314
  model = LogisticRegression().fit(X, y)
315
+ joblib.dump(model, "sklearn_model.joblib")</code></pre>
 
316
  </div>
317
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3>
318
+ <p>FastAPI provides a simple and fast web server. Kubeflow KServe and Ray Serve also have native support for scikit-learn models. NVIDIA Triton is an option for CPU-optimized execution using its FIL backend.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  </div>
320
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3>
321
+ <p>Standard Kubernetes setup. The Docker container will be lightweight as it only needs `scikit-learn`, `joblib`, and `fastapi` for a custom server.</p>
 
322
  </div>
323
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3>
324
+ <p><strong>CPUs:</strong> Almost always sufficient. There is no GPU acceleration for standard scikit-learn algorithms.</p>
 
325
  </div>
326
  </div>
 
 
327
  <div id="classic-xgboost" class="content-panel">
328
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>An XGBoost model saved in its native JSON or UBJ format, which is portable and efficient.</p></div>
329
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Kubeflow KServe, Ray Serve, NVIDIA Triton (with FIL backend), and custom FastAPI servers are all excellent choices.</p></div>
330
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Standard Kubernetes setup. The Dockerfile should include the `xgboost` library.</p></div>
331
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs:</strong> Excellent performance. <strong>GPUs:</strong> XGBoost has optional GPU acceleration which can provide a significant speedup.</p></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  </div>
 
 
333
  <div id="classic-jax" class="content-panel">
334
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>JAX models are often defined as pure functions with parameters handled separately. We save the parameters using a standard serialization library like Flax's `msgpack`.</p></div>
335
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Ray Serve is an excellent fit for JAX's functional paradigm. A custom FastAPI server is also straightforward. Kubeflow KServe and NVIDIA Triton require a custom container approach wrapping the JAX logic.</p></div>
336
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>The Dockerfile needs to install `jax` and `jaxlib` corresponding to the target hardware (CPU, GPU, or TPU).</p></div>
337
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs/GPUs/TPUs:</strong> JAX was designed for accelerators and excels on all of them due to its XLA-based compilation.</p></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  </div>
339
+
340
+ <!-- Gen AI Panels -->
341
  <div id="genai-llm" class="content-panel">
342
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Large Language Models (e.g., Llama, Mistral) are based on the Transformer architecture. The key inference challenge is managing the <strong>KV Cache</strong>.</p></div>
343
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Specialized serving toolkits like <strong>vLLM</strong>, <strong>SGLang</strong>, or <strong>NVIDIA Triton</strong> with its TensorRT-LLM backend are required for efficient inference, handling complexities like continuous batching and paged attention.</p></div>
344
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes (often with KubeRay) is used to manage GPU resources and schedule serving pods. Managed services like Vertex AI and SageMaker also provide optimized runtimes for popular LLMs.</p></div>
345
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> Essential. High-VRAM GPUs like NVIDIA A100 or H100 are required to fit the model weights and KV cache. <strong>TPUs:</strong> Viable for specific models, especially on GCP.</p></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  </div>
 
 
347
  <div id="genai-vlm" class="content-panel">
348
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Visual Large Models (e.g., LLaVA, IDEFICS) combine a vision encoder (like ViT) with an LLM to process images and text.</p></div>
349
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>The stack must handle multi-modal inputs. Frameworks like <strong>vLLM</strong> and <strong>SGLang</strong> are adding native support for VLMs. A custom container is often needed to handle the specific image preprocessing logic.</p></div>
350
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Similar to LLMs, requires robust orchestration to manage high-resource GPU pods and potentially large input payloads.</p></div>
351
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-VRAM GPUs are mandatory due to the combined size of the vision encoder, LLM, and KV cache.</p></div>
 
 
 
 
 
 
 
 
 
 
 
 
352
  </div>
 
 
353
  <div id="genai-diffusion" class="content-panel">
354
+ <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Diffusion models (e.g., Stable Diffusion) generate images through an iterative denoising process, making latency a key challenge.</p></div>
355
+ <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Optimizations focus on reducing latency. Key tools include model compilers like <strong>TensorRT</strong> (often used with NVIDIA Triton), techniques like <strong>Latent Consistency Models (LCMs)</strong>, and libraries like <strong>Diffusers</strong>, typically wrapped in a custom FastAPI container.</p></div>
356
+ <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes or managed platforms are used to serve the GPU-intensive workload. Autoscaling is critical to handle bursty traffic patterns.</p></div>
357
+ <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-end consumer or datacenter GPUs are needed for acceptable generation speeds. VRAM is the most critical resource, dictating max resolution and batch size.</p></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  </div>
359
  </div>
360
  </main>
 
362
 
363
  <script>
364
  document.addEventListener('DOMContentLoaded', function() {
365
+ const builder = document.getElementById('architecture-builder');
366
+ const generateBtn = document.getElementById('generate-btn');
367
+ const diagramOutput = document.getElementById('architecture-diagram-output');
368
+
369
+ const archTypeSelector = builder.querySelector('.arch-type-selector');
370
+ const classicFields = document.getElementById('classic-builder-fields');
371
+ const genaiFields = document.getElementById('genai-builder-fields');
372
+
373
+ function updateChipStates() {
374
+ const activeArchType = archTypeSelector.querySelector('.active').dataset.type;
375
+ const activeBuilderFields = (activeArchType === 'classic') ? classicFields : genaiFields;
376
+
377
+ if (activeArchType === 'classic') {
378
+ const activeFramework = activeBuilderFields.querySelector('.selection-group[data-group="framework"] .chip.active');
379
+ const torchserveChip = activeBuilderFields.querySelector('.chip[data-id="torchserve"]');
380
+ const tfservingChip = activeBuilderFields.querySelector('.chip[data-id="tf-serving"]');
381
+
382
+ [torchserveChip, tfservingChip].forEach(c => c.classList.remove('disabled'));
383
+
384
+ if (activeFramework) {
385
+ const frameworkId = activeFramework.dataset.id;
386
+ const nonTfTsFrameworks = ['scikit-learn', 'xgboost', 'jax'];
387
+ if (frameworkId === 'pytorch') {
388
+ tfservingChip.classList.add('disabled');
389
+ if(tfservingChip.classList.contains('active')) tfservingChip.classList.remove('active');
390
+ } else if (frameworkId === 'tensorflow') {
391
+ torchserveChip.classList.add('disabled');
392
+ if(torchserveChip.classList.contains('active')) torchserveChip.classList.remove('active');
393
+ } else if (nonTfTsFrameworks.includes(frameworkId)) {
394
+ [torchserveChip, tfservingChip].forEach(c => {
395
+ c.classList.add('disabled');
396
+ if(c.classList.contains('active')) c.classList.remove('active');
397
+ });
398
+ }
399
+ }
400
+ } else { // Gen AI Logic
401
+ const activeModelType = activeBuilderFields.querySelector('.selection-group[data-group="model-type"] .chip.active');
402
+ const vllmChip = activeBuilderFields.querySelector('.chip[data-id="vllm"]');
403
+ const sglangChip = activeBuilderFields.querySelector('.chip[data-id="sglang"]');
404
+
405
+ [vllmChip, sglangChip].forEach(c => c.classList.remove('disabled'));
406
+
407
+ if (activeModelType && activeModelType.dataset.id === 'diffusion') {
408
+ [vllmChip, sglangChip].forEach(c => {
409
+ c.classList.add('disabled');
410
+ if(c.classList.contains('active')) c.classList.remove('active');
411
+ });
412
+ }
413
+ }
414
 
415
+ const activeOrchestration = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active');
416
+ const servingGroup = activeBuilderFields.querySelector('.selection-group[data-group="serving"]');
417
+
418
+ if (activeOrchestration && (activeOrchestration.dataset.id === 'vertex-ai' || activeOrchestration.dataset.id === 'sagemaker')) {
419
+ servingGroup.classList.add('disabled');
420
+ servingGroup.querySelector('.chip.active')?.classList.remove('active');
421
+ } else {
422
+ servingGroup.classList.remove('disabled');
423
+ }
424
+ }
425
+
426
+ archTypeSelector.addEventListener('click', function(e){
427
+ if (!e.target.classList.contains('arch-type-chip')) return;
428
+ archTypeSelector.querySelectorAll('.arch-type-chip').forEach(c => c.classList.remove('active'));
429
+ e.target.classList.add('active');
430
+ const type = e.target.dataset.type;
431
+ classicFields.classList.toggle('active', type === 'classic');
432
+ genaiFields.classList.toggle('active', type === 'gen-ai');
433
+ diagramOutput.style.display = 'none';
434
+ updateChipStates();
435
+ });
436
+
437
+ builder.addEventListener('click', function(e) {
438
+ if (!e.target.classList.contains('chip') || e.target.classList.contains('disabled')) return;
439
+ const chip = e.target;
440
+ const group = chip.closest('.selection-group');
441
+ if (group.classList.contains('disabled')) return;
442
+ group.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
443
+ chip.classList.add('active');
444
+ updateChipStates();
445
+ });
446
+
447
+ generateBtn.addEventListener('click', function() {
448
+ const activeArchType = archTypeSelector.querySelector('.active').dataset.type;
449
+ const activeBuilderFields = document.querySelector('.builder-fields.active');
450
+ const selections = {};
451
+ let allSelected = true;
452
+
453
+ const isManaged = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('vertex') ||
454
+ activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('sagemaker');
455
+
456
+ activeBuilderFields.querySelectorAll('.selection-group').forEach(group => {
457
+ const groupKey = group.dataset.group;
458
+ if (isManaged && groupKey === 'serving') return;
459
+
460
+ const activeChip = group.querySelector('.chip.active');
461
+ if (activeChip) {
462
+ selections[groupKey] = { name: activeChip.innerText, id: activeChip.dataset.id };
463
  } else {
464
+ allSelected = false;
 
 
 
465
  }
466
  });
 
467
 
468
+ if (!allSelected) {
469
+ alert('Please make a selection for each required layer.');
470
+ return;
471
+ }
472
+
473
+ let diagramHtml = `<h3 class="diagram-title">Your Custom ${activeArchType === 'gen-ai' ? 'Generative AI' : 'Classic ML'} Architecture</h3><div class="diagram-stack">`;
474
+ const arrowClass = activeArchType === 'gen-ai' ? 'gen-ai-arrow' : '';
475
+ const layerClass = activeArchType === 'gen-ai' ? 'gen-ai-layer' : '';
476
+
477
+ function createImageTag(selection) {
478
+ return `<img src="${selection.id}.png" alt="${selection.name} Icon" class="icon-img-placeholder">`;
479
+ }
480
+
481
+ if (activeArchType === 'gen-ai') {
482
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections['model-type'].name}</h5><p>Model Type</p>${createImageTag(selections['model-type'])}</div><div class="diagram-arrow ${arrowClass}">south</div>`;
483
+ }
484
+
485
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.framework.name}</h5><p>ML Framework</p>${createImageTag(selections.framework)}</div><div class="diagram-arrow ${arrowClass}">south</div>`;
486
+
487
+ if (isManaged) {
488
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Managed Platform</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`;
489
+ } else {
490
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.serving.name}</h5><p>Serving Container</p>${createImageTag(selections.serving)}</div><div class="diagram-arrow ${arrowClass}">south</div>`;
491
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Orchestration</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`;
492
+ }
493
+
494
+ diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.hardware.name}</h5><p>Hardware</p>${createImageTag(selections.hardware)}</div>`;
495
+ diagramHtml += `</div>`;
496
+
497
+ diagramOutput.innerHTML = diagramHtml;
498
+ diagramOutput.style.display = 'block';
499
+ diagramOutput.scrollIntoView({ behavior: 'smooth', block: 'center' });
500
  });
501
+
502
+ const tiles = document.querySelectorAll('.tile');
503
+ const contentPanels = document.querySelectorAll('.content-panel');
504
+ tiles.forEach(tile => tile.addEventListener('click', (e) => {
505
+ const targetId = e.currentTarget.dataset.target;
506
+ tiles.forEach(t => t.classList.remove('active'));
507
+ e.currentTarget.classList.add('active');
508
+ contentPanels.forEach(p => p.classList.remove('active'));
509
+ const panel = document.getElementById(targetId);
510
+ if (panel) {
511
+ panel.classList.add('active');
512
+ panel.scrollIntoView({ behavior: 'smooth', block: 'start' });
513
+ }
514
+ }));
515
 
516
+ document.querySelectorAll('pre code').forEach(codeBlock => {
517
+ const pre = codeBlock.parentElement;
518
+ if (!pre.querySelector('.copy-btn')) {
 
 
519
  const copyButton = document.createElement('button');
520
  copyButton.innerText = 'Copy';
521
  copyButton.className = 'copy-btn';
522
+ pre.appendChild(copyButton);
523
  copyButton.addEventListener('click', (e) => {
524
+ e.stopPropagation();
525
+ navigator.clipboard.writeText(codeBlock.innerText).then(() => {
526
  copyButton.innerText = 'Copied!';
527
  copyButton.classList.add('copied');
528
+ setTimeout(() => { copyButton.innerText = 'Copy'; copyButton.classList.remove('copied'); }, 2000);
 
 
 
 
 
529
  });
530
  });
 
 
531
  }
532
  });
533
+ updateChipStates();
534
  });
535
  </script>
536
  </body>