Spaces:

Aashish34
/

DataScience

Running

App Files Files Community

Aashish34 commited on 15 days ago

Commit

ccd63d1

1 Parent(s): 18550fb

add new topics

Browse files

Files changed (1) hide show

DeepLearning/{Deep Learning Curriculum.html → index.html} +1285 -19

DeepLearning/{Deep Learning Curriculum.html → index.html} RENAMED Viewed

@@ -47,6 +47,7 @@
         h1 {
             font-size: 2.5em;
             background: linear-gradient(135deg, var(--cyan), var(--orange));
             -webkit-background-clip: text;
             -webkit-text-fill-color: transparent;
             margin-bottom: 10px;
@@ -727,6 +728,14 @@
                 category: "Vision",
                 color: "#ff6b35",
                 description: "Transformers applied to image data"
             }
         ];
@@ -1398,6 +1407,37 @@
                         Learning Rule: w_new = w_old + α(y_true - y_pred)x
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">📚 Educational</div>
@@ -2491,6 +2531,83 @@
                         • Start with low learning rate (1e-4) for fine-tuning<br>
                         • Popular backbones: ResNet50, EfficientNet, ViT
                     </div>
                 `
             },
             "localization": {
@@ -2513,6 +2630,64 @@
                         <li><strong>Option 1:</strong> (x_min, y_min, x_max, y_max)</li>
                         <li><strong>Option 2:</strong> (x_center, y_center, width, height) ← Most common</li>
                     </ul>
                 `
             },
             "rcnn": {
@@ -2556,6 +2731,58 @@
                         Faster R-CNN: Best accuracy for detection (not real-time)<br>
                         Mask R-CNN: Detection + instance segmentation
                     </div>
                 `
             },
             "ssd": {
@@ -2577,6 +2804,61 @@
                         <br>
                         Sweet spot between YOLO (faster) and Faster R-CNN (more accurate)
                     </div>
                 `
             },
             "semantic-seg": {
@@ -2610,6 +2892,44 @@
                         With skip connections from encoder to decoder at each level
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🏥 Medical Imaging</div>
@@ -2641,6 +2961,58 @@
                         2. Class prediction<br>
                         3. <strong>Binary mask for the object</strong>
                     </div>
                 `
             },
             "face-recog": {
@@ -2665,6 +3037,54 @@
                         No retraining needed - just compare embeddings.
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">📱 Phone Unlock</div>
@@ -2696,22 +3116,64 @@
                         <li><strong>Sparse:</strong> Encourage sparse activations</li>
                     </ul>
                 `,
-                applications: `
-                    <div class="info-box">
-                        <div class="box-title">🗜️ Compression</div>
-                        <div class="box-content">Dimensionality reduction, data compression, feature extraction</div>
                     </div>
-                    <div class="info-box">
-                        <div class="box-title">🔍 Anomaly Detection</div>
-                        <div class="box-content">High reconstruction error = anomaly (fraud detection, defect detection)</div>
                     </div>
-                `
-            },
-            "gans": {
-                overview: `
-                    <h3>GANs (Generative Adversarial Networks)</h3>
-                    <p>Two networks compete: Generator creates fake data, Discriminator tries to detect fakes.</p>
                     <h3>The GAN Game</h3>
                     <div class="formula">
                         Generator: Creates fake images from random noise<br>
@@ -2796,6 +3258,50 @@
                         • Controllable generation (text-to-image)
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🖼️ Text-to-Image</div>
@@ -2898,6 +3404,51 @@
                         4. Achieves SOTA with minimal data!
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🔍 Search & QA</div>
@@ -2960,6 +3511,49 @@
                         • Multi-step problem solving
                     </div>
                 `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">💬 ChatGPT & Assistants</div>
@@ -3008,6 +3602,137 @@
                         • <strong>Transfer Learning:</strong> Pre-trained ViT beats CNNs on many tasks<br>
                         • <strong>Long-Range Dependencies:</strong> Global attention vs CNN's local receptive field
                     </div>
                 `
             }
         };
@@ -3226,7 +3951,8 @@
                 'transformers': drawAttentionMatrix,
                 'bert': drawBERTProcess,
                 'gpt': drawGPTGeneration,
-                'vit': drawVisionTransformer
             };
             if (vizMap[moduleId]) {
@@ -3596,7 +4322,8 @@
                 'pooling': () => drawPoolingMath(ctx, canvas),
                 'regularization': () => drawRegularizationMath(ctx, canvas),
                 'transformers': () => drawAttentionMath(ctx, canvas),
-                'rnn': () => drawRNNMath(ctx, canvas)
             };
             if (mathVizMap[moduleId]) {
@@ -3628,7 +4355,8 @@
                 'bert': () => drawBERTApplications(ctx, canvas),
                 'gpt': () => drawGPTApplications(ctx, canvas),
                 'gans': () => drawGANApplications(ctx, canvas),
-                'diffusion': () => drawDiffusionApplications(ctx, canvas)
             };
             if (appVizMap[moduleId]) {
@@ -4335,17 +5063,475 @@
         }
         // Animation and download utilities
         function toggleVizAnimation(moduleId) {
             window.vizAnimating = !window.vizAnimating;
             if (window.vizAnimating) {
                 animateVisualization(moduleId);
             }
         }
         function animateVisualization(moduleId) {
             if (!window.vizAnimating) return;
-            drawConceptsVisualization(moduleId);
-            setTimeout(() => animateVisualization(moduleId), 150);
         }
         function downloadViz(moduleId) {
@@ -4358,6 +5544,86 @@
             link.click();
         }
         initDashboard();
     </script>
 </body>

         h1 {
             font-size: 2.5em;
             background: linear-gradient(135deg, var(--cyan), var(--orange));
+            background-clip: text;
             -webkit-background-clip: text;
             -webkit-text-fill-color: transparent;
             margin-bottom: 10px;
                 category: "Vision",
                 color: "#ff6b35",
                 description: "Transformers applied to image data"
+            },
+            {
+                id: "gnn",
+                title: "Graph Neural Networks",
+                icon: "🕸️",
+                category: "Advanced",
+                color: "#9900ff",
+                description: "Deep learning on non-Euclidean graph data"
             }
         ];
                         Learning Rule: w_new = w_old + α(y_true - y_pred)x
                     </div>
                 `,
+                math: `
+                    <h3>Perceptron Learning Algorithm</h3>
+                    <p>The perceptron update rule is the simplest form of gradient descent.</p>
+                    <div class="formula">
+                        For each misclassified sample (x, y):<br>
+                        w ← w + α × y × x<br>
+                        b ← b + α × y
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Manual Training</div>
+                        <strong>Data:</strong> x₁ = [1, 1], y₁ = 1 | x₂ = [0, 0], y₂ = 0<br>
+                        <strong>Initial:</strong> w = [0, 0], b = 0, α = 1<br>
+                        <br>
+                        <strong>Iteration 1 (x₁):</strong><br>
+                        z = 0×1 + 0×1 + 0 = 0 → ŷ = 1 ✓ (correct!)<br>
+                        <br>
+                        <strong>Iteration 2 (x₂):</strong><br>
+                        z = 0×0 + 0×0 + 0 = 0 → ŷ = 1 ✗ (wrong! y=0)<br>
+                        Update: w = [0,0] + 1×(0-1)×[0,0] = [0,0], b = 0 + 1×(0-1) = -1<br>
+                        <br>
+                        Now z(x₂) = 0 + 0 - 1 = -1 → ŷ = 0 ✓
+                    </div>
+                    <h3>Convergence Theorem</h3>
+                    <div class="formula">
+                        If data is linearly separable with margin γ and ||x|| ≤ R,<br>
+                        perceptron converges in at most (R/γ)² updates.
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">📚 Educational</div>
                         • Start with low learning rate (1e-4) for fine-tuning<br>
                         • Popular backbones: ResNet50, EfficientNet, ViT
                     </div>
+                `,
+                concepts: `
+                    <h3>Why Transfer Learning Works</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Feature Hierarchy:</strong> Early layers learn universal features (edges, textures) that transfer across domains</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Domain Similarity:</strong> The more similar source and target domains, the better transfer</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Regularization Effect:</strong> Pre-trained weights act as strong priors, preventing overfitting</div>
+                    </div>
+                    <h3>Transfer Learning Quadrant</h3>
+                    <table>
+                        <tr>
+                            <th></th>
+                            <th>Similar Domain</th>
+                            <th>Different Domain</th>
+                        </tr>
+                        <tr>
+                            <td><strong>Large Data</strong></td>
+                            <td>Fine-tune all layers</td>
+                            <td>Fine-tune top layers</td>
+                        </tr>
+                        <tr>
+                            <td><strong>Small Data</strong></td>
+                            <td>Feature extraction</td>
+                            <td>Feature extraction (risky)</td>
+                        </tr>
+                    </table>
+                `,
+                math: `
+                    <h3>Learning Rate Strategies</h3>
+                    <p>Different layers need different learning rates during fine-tuning.</p>
+                    <div class="formula">
+                        Discriminative Fine-tuning:<br>
+                        lr_layer_n = lr_base × decay^(L-n)<br>
+                        <br>
+                        Where L = total layers, n = layer index<br>
+                        Example: lr_base=1e-3, decay=0.9<br>
+                        Layer 1: 1e-3 × 0.9^9 ≈ 3.9e-4<br>
+                        Layer 10: 1e-3 × 0.9^0 = 1e-3
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Domain Shift</div>
+                        When source and target distributions differ:<br>
+                        • <strong>Covariate Shift:</strong> P(X) changes, P(Y|X) same<br>
+                        • <strong>Label Shift:</strong> P(Y) changes, P(X|Y) same<br>
+                        • <strong>Concept Shift:</strong> P(Y|X) changes<br>
+                        Transfer learning handles covariate shift well but struggles with concept shift.
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">🏥 Medical Imaging</div>
+                        <div class="box-content">
+                            Train on ImageNet, fine-tune for X-ray diagnosis with only 1000 labeled images. Achieves 90%+ accuracy vs 60% from scratch.
+                        </div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🛒 Retail & E-commerce</div>
+                        <div class="box-content">
+                            Product classification, visual search, inventory management using pre-trained ResNet/EfficientNet models.
+                        </div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🌍 Satellite Imagery</div>
+                        <div class="box-content">
+                            Land use classification, deforestation detection, urban planning using models pre-trained on aerial imagery.
+                        </div>
+                    </div>
                 `
             },
             "localization": {
                         <li><strong>Option 1:</strong> (x_min, y_min, x_max, y_max)</li>
                         <li><strong>Option 2:</strong> (x_center, y_center, width, height) ← Most common</li>
                     </ul>
+                `,
+                concepts: `
+                    <h3>Localization vs Detection</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Classification:</strong> What is in the image? → "Cat"</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Localization:</strong> Where is the single object? → "Cat at [100, 50, 200, 150]"</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Detection:</strong> Where are ALL objects? → Multiple bounding boxes</div>
+                    </div>
+                    <h3>Network Architecture</h3>
+                    <p>Modify a classification network (ResNet, VGG) by adding a regression head:</p>
+                    <div class="formula">
+                        CNN Backbone → Feature Map → [Classification Head (1000 classes)]<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;→ [Regression Head (4 coordinates)]
+                    </div>
+                `,
+                math: `
+                    <h3>Smooth L1 Loss (Huber Loss)</h3>
+                    <p>Combines L1 and L2 loss for robust bounding box regression.</p>
+                    <div class="formula">
+                        SmoothL1(x) = { 0.5x² if |x| < 1<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{ |x| - 0.5 otherwise
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Why Smooth L1?</div>
+                        • <strong>L2 Loss:</strong> Penalizes large errors too much (squared), sensitive to outliers<br>
+                        • <strong>L1 Loss:</strong> Robust to outliers but has discontinuous gradient at 0<br>
+                        • <strong>Smooth L1:</strong> Best of both worlds - quadratic near 0, linear for large errors
+                    </div>
+                    <h3>IoU Loss</h3>
+                    <div class="formula">
+                        L_IoU = 1 - IoU(pred, target)<br>
+                        Where IoU = Intersection / Union
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">🚗 Self-Driving Cars</div>
+                        <div class="box-content">Localize the primary vehicle ahead for adaptive cruise control</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">📸 Photo Auto-Crop</div>
+                        <div class="box-content">Detect main subject and automatically crop to optimal composition</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🏥 Medical Imaging</div>
+                        <div class="box-content">Localize tumors, organs, or anomalies in X-rays and CT scans</div>
+                    </div>
                 `
             },
             "rcnn": {
                         Faster R-CNN: Best accuracy for detection (not real-time)<br>
                         Mask R-CNN: Detection + instance segmentation
                     </div>
+                `,
+                concepts: `
+                    <h3>Two-Stage Detection Pipeline</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Stage 1 - Region Proposal:</strong> Find ~2000 candidate regions that might contain objects</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Stage 2 - Classification:</strong> Classify each region and refine bounding box</div>
+                    </div>
+                    <h3>Region Proposal Network (RPN)</h3>
+                    <p>The key innovation of Faster R-CNN - learns to propose regions instead of using hand-crafted algorithms.</p>
+                    <div class="formula">
+                        RPN Output per location:<br>
+                        • k anchor boxes × 4 coordinates = 4k regression outputs<br>
+                        • k anchor boxes × 2 objectness scores = 2k classification outputs<br>
+                        Typical k = 9 (3 scales × 3 aspect ratios)
+                    </div>
+                `,
+                math: `
+                    <h3>RoI Pooling: Fixed-Size Feature Maps</h3>
+                    <p>Convert variable-size regions into fixed 7×7 feature maps for FC layers.</p>
+                    <div class="formula">
+                        For each RoI of size H×W:<br>
+                        1. Divide into 7×7 grid (cells of size H/7 × W/7)<br>
+                        2. Max-pool each cell → single value<br>
+                        3. Output: Fixed 7×7 feature map regardless of input size
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: RoI Align vs RoI Pool</div>
+                        <strong>Problem:</strong> RoI Pooling quantizes coordinates, causing misalignment.<br>
+                        <strong>Solution:</strong> RoI Align uses bilinear interpolation instead of rounding.<br>
+                        This is critical for Mask R-CNN where pixel-level accuracy matters!
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">🏥 Medical Imaging</div>
+                        <div class="box-content">High-accuracy tumor detection where speed is less critical than precision</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">📷 Photo Analysis</div>
+                        <div class="box-content">Face detection, scene understanding, object counting in static images</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🔬 Scientific Research</div>
+                        <div class="box-content">Cell detection, particle tracking, microscopy image analysis</div>
+                    </div>
                 `
             },
             "ssd": {
                         <br>
                         Sweet spot between YOLO (faster) and Faster R-CNN (more accurate)
                     </div>
+                `,
+                concepts: `
+                    <h3>Multi-Scale Feature Maps</h3>
+                    <p>SSD makes predictions at multiple layers, each detecting objects at different scales.</p>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Early Layers (38×38):</strong> Detect small objects (high resolution)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Middle Layers (19×19, 10×10):</strong> Detect medium objects</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Deep Layers (5×5, 3×3, 1×1):</strong> Detect large objects</div>
+                    </div>
+                    <h3>Default Boxes (Anchors)</h3>
+                    <p>At each feature map cell, SSD predicts offsets for k default boxes with different aspect ratios (1:1, 2:1, 1:2, 3:1, 1:3).</p>
+                `,
+                math: `
+                    <h3>SSD Loss Function</h3>
+                    <p>Weighted sum of localization and confidence losses.</p>
+                    <div class="formula">
+                        L(x, c, l, g) = (1/N) × [L_conf(x, c) + α × L_loc(x, l, g)]<br>
+                        <br>
+                        Where:<br>
+                        • L_conf = Softmax loss over class confidences<br>
+                        • L_loc = Smooth L1 loss over box coordinates<br>
+                        • α = Weight factor (typically 1)<br>
+                        • N = Number of matched default boxes
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Hard Negative Mining</div>
+                        Problem: Most default boxes are background (class imbalance).<br>
+                        Solution: Sort negative boxes by confidence loss, pick top ones so pos:neg = 1:3.<br>
+                        This focuses training on hard negatives, not easy ones.
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">📹 Video Analytics</div>
+                        <div class="box-content">Real-time object detection in security cameras, sports broadcasting</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🤖 Robotics</div>
+                        <div class="box-content">Object detection for manipulation tasks, obstacle avoidance</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">📱 Mobile Apps</div>
+                        <div class="box-content">Lightweight models for on-device detection (MobileNet-SSD)</div>
+                    </div>
                 `
             },
             "semantic-seg": {
                         With skip connections from encoder to decoder at each level
                     </div>
                 `,
+                concepts: `
+                    <h3>Key Concepts</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Encoder-Decoder:</strong> Downsample to capture context, upsample to recover spatial detail</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Skip Connections:</strong> Pass high-resolution features from encoder to decoder (U-Net)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Atrous Convolution:</strong> Expand receptive field without losing resolution (DeepLab)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">04</div>
+                        <div><strong>ASPP:</strong> Atrous Spatial Pyramid Pooling - capture multi-scale context</div>
+                    </div>
+                `,
+                math: `
+                    <h3>Dice Loss for Segmentation</h3>
+                    <p>Better than cross-entropy for imbalanced classes (small objects).</p>
+                    <div class="formula">
+                        Dice = 2 × |A ∩ B| / (|A| + |B|)<br>
+                        Dice Loss = 1 - Dice<br>
+                        <br>
+                        Where A = predicted mask, B = ground truth mask
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Why Dice > Cross-Entropy?</div>
+                        If only 1% of pixels are foreground:<br>
+                        • Cross-Entropy: Model can get 99% accuracy by predicting all background!<br>
+                        • Dice: Penalizes missed foreground pixels heavily<br>
+                        • Often use combination: L = BCE + Dice
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🏥 Medical Imaging</div>
                         2. Class prediction<br>
                         3. <strong>Binary mask for the object</strong>
                     </div>
+                `,
+                concepts: `
+                    <h3>Mask R-CNN Architecture</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Backbone:</strong> ResNet-50/101 with Feature Pyramid Network (FPN)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>RPN:</strong> Region Proposal Network (same as Faster R-CNN)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>RoI Align:</strong> Better than RoI Pooling (no quantization)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">04</div>
+                        <div><strong>Mask Head:</strong> Small FCN that outputs 28×28 binary mask per class</div>
+                    </div>
+                `,
+                math: `
+                    <h3>Multi-Task Loss</h3>
+                    <p>Mask R-CNN optimizes three losses simultaneously:</p>
+                    <div class="formula">
+                        L = L_cls + L_box + L_mask<br>
+                        <br>
+                        Where:<br>
+                        • L_cls = Classification loss (cross-entropy)<br>
+                        • L_box = Bounding box regression (smooth L1)<br>
+                        • L_mask = Binary cross-entropy per-pixel mask loss
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Key Insight: Decoupled Masks</div>
+                        Mask R-CNN predicts a binary mask for EACH class independently.<br>
+                        This avoids competition between classes and improves accuracy.
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">📸 Photo Editing</div>
+                        <div class="box-content">Auto-select objects for editing, background removal, composition</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🤖 Robotics</div>
+                        <div class="box-content">Object manipulation - need exact shape, not just bounding box</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🎬 Video Production</div>
+                        <div class="box-content">Rotoscoping, VFX, green screen replacement</div>
+                    </div>
                 `
             },
             "face-recog": {
                         No retraining needed - just compare embeddings.
                     </div>
                 `,
+                concepts: `
+                    <h3>Face Recognition Pipeline</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Face Detection:</strong> Find faces in image (MTCNN, RetinaFace)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Alignment:</strong> Normalize face orientation and scale</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Embedding:</strong> Extract 128/512-dim feature vector (FaceNet, ArcFace)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">04</div>
+                        <div><strong>Matching:</strong> Compare embeddings with cosine similarity or L2 distance</div>
+                    </div>
+                    <h3>Key Models</h3>
+                    <table>
+                        <tr><th>Model</th><th>Key Innovation</th></tr>
+                        <tr><td>FaceNet</td><td>Triplet loss, 128-dim embedding</td></tr>
+                        <tr><td>ArcFace</td><td>Additive angular margin loss, SOTA accuracy</td></tr>
+                        <tr><td>DeepFace</td><td>Facebook's early success</td></tr>
+                    </table>
+                `,
+                math: `
+                    <h3>Triplet Loss Intuition</h3>
+                    <p>Push same-person faces closer, different-person faces apart.</p>
+                    <div class="formula">
+                        ||f(A) - f(P)||² + margin < ||f(A) - f(N)||²
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Hard Triplet Mining</div>
+                        Easy triplets: Random selection - margin already satisfied, loss=0<br>
+                        Hard triplets: Find P closest to anchor, N closest to anchor from different class<br>
+                        <strong>Training on hard triplets is critical for convergence!</strong>
+                    </div>
+                    <h3>ArcFace Angular Margin</h3>
+                    <div class="formula">
+                        L = -log(e^(s·cos(θ + m)) / (e^(s·cos(θ + m)) + Σ e^(s·cos(θ_j))))<br>
+                        Where m = angular margin, s = scale factor
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">📱 Phone Unlock</div>
                         <li><strong>Sparse:</strong> Encourage sparse activations</li>
                     </ul>
                 `,
+                concepts: `
+                    <h3>Key Concepts</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Bottleneck:</strong> Force information compression by using fewer dimensions than input</div>
                     </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Reconstruction:</strong> Learn to recreate input - captures essential features</div>
                     </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Latent Space:</strong> Compressed representation captures data structure</div>
+                    </div>
+                    <h3>Variational Autoencoder (VAE)</h3>
+                    <p>Instead of encoding to a point, encode to a probability distribution (mean + variance).</p>
+                    <div class="formula">
+                        Encoder outputs: μ (mean) and σ (standard deviation)<br>
+                        Sample: z = μ + σ × ε (where ε ~ N(0,1))<br>
+                        This is the "reparameterization trick" for backprop!
+                    </div>
+                `,
+                math: `
+                    <h3>VAE Loss Function (ELBO)</h3>
+                    <p>VAE maximizes the Evidence Lower Bound:</p>
+                    <div class="formula">
+                        L = E[log p(x|z)] - KL(q(z|x) || p(z))<br>
+                        <br>
+                        Where:<br>
+                        • First term: Reconstruction quality<br>
+                        • Second term: KL divergence regularization (push q toward N(0,1))
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: KL Divergence</div>
+                        For Gaussians:<br>
+                        KL = -0.5 × Σ(1 + log(σ²) - μ² - σ²)<br>
+                        This has a closed-form solution - no sampling needed!
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">🗜️ Compression</div>
+                        <div class="box-content">Dimensionality reduction, data compression, feature extraction</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🔍 Anomaly Detection</div>
+                        <div class="box-content">High reconstruction error = anomaly (fraud detection, defect detection)</div>
+                    </div>
+                `
+            },
+            "gans": {
+                overview: `
+                    <h3>GANs (Generative Adversarial Networks)</h3>
+                    <p>Two networks compete: Generator creates fake data, Discriminator tries to detect fakes.</p>
                     <h3>The GAN Game</h3>
                     <div class="formula">
                         Generator: Creates fake images from random noise<br>
                         • Controllable generation (text-to-image)
                     </div>
                 `,
+                concepts: `
+                    <h3>Key Components</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>U-Net Backbone:</strong> Encoder-decoder with skip connections predicts noise at each step</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Time Embedding:</strong> Tell the model which timestep it's at (sinusoidal encoding)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>CLIP Conditioning:</strong> Guide generation with text embeddings (Stable Diffusion)</div>
+                    </div>
+                    <h3>Latent Diffusion</h3>
+                    <p>Instead of diffusing in pixel space (expensive), work in VAE latent space (8× smaller).</p>
+                    <div class="formula">
+                        Image (512×512×3) → VAE Encoder → Latent (64×64×4) → Diffuse → Decode
+                    </div>
+                `,
+                math: `
+                    <h3>Forward Process (Noising)</h3>
+                    <p>Add Gaussian noise according to a schedule β_t:</p>
+                    <div class="formula">
+                        q(x_t | x_{t-1}) = N(x_t; √(1-β_t) × x_{t-1}, β_t × I)<br>
+                        <br>
+                        Or in closed form for any t:<br>
+                        x_t = √(ᾱ_t) × x_0 + √(1-ᾱ_t) × ε<br>
+                        Where ᾱ_t = Π_{s=1}^t (1-β_s)
+                    </div>
+                    <h3>Training Objective</h3>
+                    <p>Simple noise prediction loss:</p>
+                    <div class="formula">
+                        L = E[||ε - ε_θ(x_t, t)||²]
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Simplified Loss</div>
+                        The full variational bound is complex, but Ho et al. (2020) showed this simple MSE loss on noise prediction works just as well and is much easier to implement!
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🖼️ Text-to-Image</div>
                         4. Achieves SOTA with minimal data!
                     </div>
                 `,
+                concepts: `
+                    <h3>BERT Architecture</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Encoder Only:</strong> 12/24 Transformer encoder layers (BERT-base/large)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Token Embedding:</strong> WordPiece tokenization (30K vocab)</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Segment Embedding:</strong> Distinguish sentence A from sentence B</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">04</div>
+                        <div><strong>[CLS] Token:</strong> Aggregated representation for classification tasks</div>
+                    </div>
+                    <h3>Model Sizes</h3>
+                    <table>
+                        <tr><th>Model</th><th>Layers</th><th>Hidden</th><th>Params</th></tr>
+                        <tr><td>BERT-base</td><td>12</td><td>768</td><td>110M</td></tr>
+                        <tr><td>BERT-large</td><td>24</td><td>1024</td><td>340M</td></tr>
+                    </table>
+                `,
+                math: `
+                    <h3>Masked Language Modeling (MLM)</h3>
+                    <p>BERT's main pre-training objective:</p>
+                    <div class="formula">
+                        L_MLM = -Σ log P(x_masked | x_visible)<br>
+                        <br>
+                        For each masked token, predict using cross-entropy loss
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Masking Strategy</div>
+                        Of the 15% tokens selected for masking:<br>
+                        • 80% → [MASK] token<br>
+                        • 10% → Random token<br>
+                        • 10% → Keep original<br>
+                        This prevents over-reliance on [MASK] during fine-tuning!
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">🔍 Search & QA</div>
                         • Multi-step problem solving
                     </div>
                 `,
+                concepts: `
+                    <h3>GPT Architecture</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Decoder Only:</strong> Uses causal (masked) attention - can only see past tokens</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Autoregressive:</strong> Generate one token at a time, feed back as input</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Pre-training:</strong> Next token prediction on massive text corpus</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">04</div>
+                        <div><strong>RLHF:</strong> Reinforcement Learning from Human Feedback (ChatGPT)</div>
+                    </div>
+                    <h3>In-Context Learning</h3>
+                    <p>GPT-3+ can learn from examples in the prompt without updating weights!</p>
+                    <div class="formula">
+                        Zero-shot: "Translate to French: Hello" → "Bonjour"<br>
+                        Few-shot: "cat→chat, dog→chien, house→?" → "maison"
+                    </div>
+                `,
+                math: `
+                    <h3>Causal Language Modeling</h3>
+                    <p>GPT is trained to maximize the likelihood of the next token:</p>
+                    <div class="formula">
+                        L = -Σ log P(x_t | x_{<t})<br>
+                        <br>
+                        Where P(x_t | x_{<t}) = softmax(h_t × W_vocab)
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Scaling Laws</div>
+                        Performance scales predictably with compute, data, and parameters:<br>
+                        L ∝ N^(-0.076) for model size N<br>
+                        This is why OpenAI trained GPT-3 (175B) and GPT-4 (1.8T)!
+                    </div>
+                `,
                 applications: `
                     <div class="info-box">
                         <div class="box-title">💬 ChatGPT & Assistants</div>
                         • <strong>Transfer Learning:</strong> Pre-trained ViT beats CNNs on many tasks<br>
                         • <strong>Long-Range Dependencies:</strong> Global attention vs CNN's local receptive field
                     </div>
+                `,
+                concepts: `
+                    <h3>ViT vs CNN Comparison</h3>
+                    <table>
+                        <tr><th>Aspect</th><th>CNN</th><th>ViT</th></tr>
+                        <tr><td>Inductive Bias</td><td>Locality, translation invariance</td><td>Minimal - learns from data</td></tr>
+                        <tr><td>Data Efficiency</td><td>Better with small datasets</td><td>Needs large datasets</td></tr>
+                        <tr><td>Receptive Field</td><td>Local (grows with depth)</td><td>Global from layer 1</td></tr>
+                        <tr><td>Scalability</td><td>Diminishing returns</td><td>Scales well with compute</td></tr>
+                    </table>
+                    <h3>Key Innovations</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>No Convolutions:</strong> Pure attention - "An Image is Worth 16x16 Words"</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Learnable Position:</strong> Position embeddings are learned, not sinusoidal</div>
+                    </div>
+                `,
+                math: `
+                    <h3>Patch Embedding</h3>
+                    <p>Convert image patches to token embeddings:</p>
+                    <div class="formula">
+                        z_0 = [x_cls; x_p^1 E; x_p^2 E; ...; x_p^N E] + E_pos<br>
+                        <br>
+                        Where:<br>
+                        • x_p^i = flattened patch (16×16×3 = 768 dimensions)<br>
+                        • E = learnable linear projection<br>
+                        • E_pos = position embedding
+                    </div>
+                    <div class="callout insight">
+                        <div class="callout-title">📝 Paper & Pain: Computation</div>
+                        ViT-Base: 12 layers, 768 hidden, 12 heads ~ 86M params<br>
+                        Self-attention cost: O(n²·d) where n=196 patches<br>
+                        This is why ViT is efficient for images (196 tokens) vs text (1000+ tokens)
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">🖼️ Image Classification</div>
+                        <div class="box-content">SOTA on ImageNet with pre-training. Google/DeepMind use for internal systems.</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🔍 Object Detection</div>
+                        <div class="box-content">DETR, DINO - Transformer-based detection replacing Faster R-CNN</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🎬 Video Understanding</div>
+                        <div class="box-content">VideoViT, TimeSformer - extend patches to 3D (space + time)</div>
+                    </div>
+                `
+            },
+            "gnn": {
+                overview: `
+                    <h3>Graph Neural Networks (GNNs)</h3>
+                    <p>Deep learning on non-Euclidean data structures like social networks, molecules, and knowledge graphs.</p>
+                    <h3>Key Concepts</h3>
+                    <div class="list-item">
+                        <div class="list-num">01</div>
+                        <div><strong>Graph Structure:</strong> Nodes (entities) and Edges (relationships).</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">02</div>
+                        <div><strong>Message Passing:</strong> Nodes exchange information with neighbors.</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">03</div>
+                        <div><strong>Aggregation:</strong> Combine incoming messages (Sum, Mean, Max).</div>
+                    </div>
+                    <div class="callout tip">
+                        <div class="callout-title">💡 Why GNNs?</div>
+                        Standard CNNs expect a fixed grid (euclidean). Graphs have arbitrary size and topology. GNNs are permutation invariant!
+                    </div>
+                `,
+                concepts: `
+                    <h3>Message Passing Neural Networks (MPNN)</h3>
+                    <p>The core framework for most GNNs.</p>
+                    <div class="list-item">
+                        <div class="list-num">1</div>
+                        <div><strong>Message Function:</strong> Compute message from neighbor to node.</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">2</div>
+                        <div><strong>Aggregation Function:</strong> Sum all messages from neighbors.</div>
+                    </div>
+                    <div class="list-item">
+                        <div class="list-num">3</div>
+                        <div><strong>Update Function:</strong> Update node state based on aggregated messages.</div>
+                    </div>
+                `,
+                math: `
+                    <h3>Graph Convolution Network (GCN)</h3>
+                    <p>The "Hello World" of GNNs (Kipf & Welling, 2017).</p>
+                    <div class="formula">
+                        H^{(l+1)} = σ(D^{-1/2} A D^{-1/2} H^{(l)} W^{(l)})
+                    </div>
+                    <p>Where:</p>
+                    <ul>
+                        <li><strong>A:</strong> Adjacency Matrix (connections)</li>
+                        <li><strong>D:</strong> Degree Matrix (number of connections)</li>
+                        <li><strong>H:</strong> Node Features</li>
+                        <li><strong>W:</strong> Learnable Weights</li>
+                    </ul>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Over-smoothing</div>
+                        If GNN is too deep, all node representations become indistinguishable. Usually 2-4 layers are enough.
+                    </div>
+                `,
+                applications: `
+                    <div class="info-box">
+                        <div class="box-title">💊 Drug Discovery</div>
+                        <div class="box-content">Predicting molecular properties, protein folding (AlphaFold)</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🚗 Traffic Prediction</div>
+                        <div class="box-content">Road networks, estimating travel times (Google Maps)</div>
+                    </div>
+                    <div class="info-box">
+                        <div class="box-title">🛒 Recommender Systems</div>
+                        <div class="box-content">Pinterest (PinSage), User-Item graphs</div>
+                    </div>
                 `
             }
         };
                 'transformers': drawAttentionMatrix,
                 'bert': drawBERTProcess,
                 'gpt': drawGPTGeneration,
+                'vit': drawVisionTransformer,
+                'gnn': drawGraphNetwork
             };
             if (vizMap[moduleId]) {
                 'pooling': () => drawPoolingMath(ctx, canvas),
                 'regularization': () => drawRegularizationMath(ctx, canvas),
                 'transformers': () => drawAttentionMath(ctx, canvas),
+                'rnn': () => drawRNNMath(ctx, canvas),
+                'gnn': () => drawGNNMath(ctx, canvas)
             };
             if (mathVizMap[moduleId]) {
                 'bert': () => drawBERTApplications(ctx, canvas),
                 'gpt': () => drawGPTApplications(ctx, canvas),
                 'gans': () => drawGANApplications(ctx, canvas),
+                'diffusion': () => drawDiffusionApplications(ctx, canvas),
+                'gnn': () => drawGNNApplications(ctx, canvas)
             };
             if (appVizMap[moduleId]) {
         }
         // Animation and download utilities
+        let animationFrameId = null;
         function toggleVizAnimation(moduleId) {
+            const btn = event.target;
             window.vizAnimating = !window.vizAnimating;
             if (window.vizAnimating) {
+                btn.textContent = '⏹️ Stop';
+                btn.style.background = 'linear-gradient(135deg, #ff4444, #cc0000)';
                 animateVisualization(moduleId);
+            } else {
+                btn.textContent = '▶️ Animate';
+                btn.style.background = '';
+                if (animationFrameId) {
+                    cancelAnimationFrame(animationFrameId);
+                    animationFrameId = null;
+                }
             }
         }
         function animateVisualization(moduleId) {
             if (!window.vizAnimating) return;
+            const canvas = document.getElementById(moduleId + '-canvas');
+            if (!canvas) return;
+            const ctx = canvas.getContext('2d');
+            ctx.clearRect(0, 0, canvas.width, canvas.height);
+            ctx.fillStyle = '#0f1419';
+            ctx.fillRect(0, 0, canvas.width, canvas.height);
+            // Call the appropriate animated drawing function
+            const animatedVizMap = {
+                'nn-basics': drawAnimatedNetwork,
+                'perceptron': drawAnimatedDecisionBoundary,
+                'mlp': drawAnimatedMLP,
+                'activation': drawAnimatedActivations,
+                'conv-layer': drawAnimatedConvolution,
+                'gnn': drawAnimatedGNN,
+                'transformers': drawAnimatedAttention,
+                'backprop': drawAnimatedGradientFlow,
+                'gans': drawAnimatedGAN,
+                'diffusion': drawAnimatedDiffusion,
+                'rnn': drawAnimatedRNN
+            };
+            if (animatedVizMap[moduleId]) {
+                animatedVizMap[moduleId](ctx, canvas, Date.now());
+            } else {
+                // Default animation - pulsing visualization
+                drawDefaultAnimation(ctx, canvas, Date.now());
+            }
+            animationFrameId = requestAnimationFrame(() => animateVisualization(moduleId));
+        }
+        // Default animation for modules without specific animations
+        function drawDefaultAnimation(ctx, canvas, time) {
+            const centerX = canvas.width / 2;
+            const centerY = canvas.height / 2;
+            const pulse = Math.sin(time / 300) * 0.3 + 0.7;
+            // Animated neural network
+            const layers = [3, 4, 4, 2];
+            const layerWidth = canvas.width / (layers.length + 1);
+            layers.forEach((neurons, layerIdx) => {
+                const x = (layerIdx + 1) * layerWidth;
+                const layerHeight = canvas.height / (neurons + 1);
+                for (let i = 0; i < neurons; i++) {
+                    const y = (i + 1) * layerHeight;
+                    const radius = 12 + Math.sin(time / 200 + layerIdx + i) * 3;
+                    // Draw neuron
+                    ctx.fillStyle = `rgba(0, 212, 255, ${pulse})`;
+                    ctx.beginPath();
+                    ctx.arc(x, y, radius, 0, Math.PI * 2);
+                    ctx.fill();
+                    // Draw connections to next layer
+                    if (layerIdx < layers.length - 1) {
+                        const nextLayerHeight = canvas.height / (layers[layerIdx + 1] + 1);
+                        const nextX = (layerIdx + 2) * layerWidth;
+                        for (let j = 0; j < layers[layerIdx + 1]; j++) {
+                            const nextY = (j + 1) * nextLayerHeight;
+                            const signalProgress = ((time / 500) + layerIdx * 0.5) % 1;
+                            ctx.strokeStyle = `rgba(0, 212, 255, ${0.3 + signalProgress * 0.3})`;
+                            ctx.lineWidth = 1;
+                            ctx.beginPath();
+                            ctx.moveTo(x + radius, y);
+                            ctx.lineTo(nextX - 12, nextY);
+                            ctx.stroke();
+                            // Animated signal dot
+                            const dotX = x + radius + (nextX - 12 - x - radius) * signalProgress;
+                            const dotY = y + (nextY - y) * signalProgress;
+                            ctx.fillStyle = '#00ff88';
+                            ctx.beginPath();
+                            ctx.arc(dotX, dotY, 3, 0, Math.PI * 2);
+                            ctx.fill();
+                        }
+                    }
+                }
+            });
+            ctx.fillStyle = '#00d4ff';
+            ctx.font = 'bold 14px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('🔄 Neural Network Animation', centerX, 25);
+        }
+        // Animated GNN with message passing
+        function drawAnimatedGNN(ctx, canvas, time) {
+            ctx.fillStyle = '#9900ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Graph Neural Network - Message Passing', canvas.width / 2, 30);
+            const nodes = [
+                { x: 100, y: 100 }, { x: 200, y: 60 }, { x: 320, y: 120 },
+                { x: 150, y: 200 }, { x: 400, y: 80 }, { x: 450, y: 180 }
+            ];
+            const edges = [[0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]];
+            // Draw edges
+            ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
+            ctx.lineWidth = 2;
+            edges.forEach(e => {
+                ctx.beginPath();
+                ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
+                ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
+                ctx.stroke();
+            });
+            // Draw animated message passing
+            const messageProgress = (time / 1000) % 1;
+            ctx.fillStyle = '#00ff88';
+            edges.forEach((e, idx) => {
+                const progress = (messageProgress + idx * 0.15) % 1;
+                const x = nodes[e[0]].x + (nodes[e[1]].x - nodes[e[0]].x) * progress;
+                const y = nodes[e[0]].y + (nodes[e[1]].y - nodes[e[0]].y) * progress;
+                ctx.beginPath();
+                ctx.arc(x, y, 5, 0, Math.PI * 2);
+                ctx.fill();
+            });
+            // Draw nodes with pulse
+            const pulse = Math.sin(time / 300) * 5 + 15;
+            nodes.forEach((n, i) => {
+                ctx.fillStyle = '#9900ff';
+                ctx.beginPath();
+                ctx.arc(n.x, n.y, pulse, 0, Math.PI * 2);
+                ctx.fill();
+                ctx.fillStyle = 'white';
+                ctx.font = '12px Arial';
+                ctx.textAlign = 'center';
+                ctx.fillText(i, n.x, n.y + 4);
+            });
+        }
+        // Animated attention matrix
+        function drawAnimatedAttention(ctx, canvas, time) {
+            const words = ['The', 'cat', 'sat', 'on', 'mat'];
+            const cellSize = 50;
+            const startX = (canvas.width - words.length * cellSize) / 2;
+            const startY = 80;
+            ctx.fillStyle = '#00d4ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Self-Attention Animation', canvas.width / 2, 30);
+            // Draw words
+            ctx.font = '12px Arial';
+            words.forEach((word, i) => {
+                ctx.fillStyle = '#e4e6eb';
+                ctx.fillText(word, startX + i * cellSize + cellSize/2, startY - 10);
+                ctx.save();
+                ctx.translate(startX - 20, startY + i * cellSize + cellSize/2);
+                ctx.fillText(word, 0, 0);
+                ctx.restore();
+            });
+            // Animated attention weights
+            for (let i = 0; i < words.length; i++) {
+                for (let j = 0; j < words.length; j++) {
+                    const baseWeight = i === j ? 0.8 : 0.2 + Math.abs(i - j) * 0.1;
+                    const animatedWeight = baseWeight + Math.sin(time / 500 + i + j) * 0.2;
+                    const alpha = Math.max(0.1, Math.min(1, animatedWeight));
+                    ctx.fillStyle = `rgba(0, 212, 255, ${alpha})`;
+                    ctx.fillRect(startX + j * cellSize + 2, startY + i * cellSize + 2, cellSize - 4, cellSize - 4);
+                    ctx.fillStyle = '#e4e6eb';
+                    ctx.font = '10px Arial';
+                    ctx.fillText(animatedWeight.toFixed(2), startX + j * cellSize + cellSize/2, startY + i * cellSize + cellSize/2 + 4);
+                }
+            }
+        }
+        // Animated gradient flow for backprop
+        function drawAnimatedGradientFlow(ctx, canvas, time) {
+            ctx.fillStyle = '#ff6b35';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Backpropagation - Gradient Flow', canvas.width / 2, 30);
+            const layers = [2, 4, 4, 1];
+            const layerWidth = canvas.width / (layers.length + 1);
+            // Forward pass (left to right) - blue
+            const forwardProgress = (time / 2000) % 1;
+            layers.forEach((neurons, layerIdx) => {
+                const x = (layerIdx + 1) * layerWidth;
+                const layerHeight = canvas.height / (neurons + 1);
+                for (let i = 0; i < neurons; i++) {
+                    const y = (i + 1) * layerHeight;
+                    // Pulse effect based on forward pass
+                    const isActive = forwardProgress > layerIdx / layers.length;
+                    const radius = isActive ? 15 + Math.sin(time / 200) * 3 : 12;
+                    ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
+                    ctx.beginPath();
+                    ctx.arc(x, y, radius, 0, Math.PI * 2);
+                    ctx.fill();
+                }
+            });
+            // Backward pass (right to left) - orange/red gradients
+            const backwardProgress = ((time / 2000) + 0.5) % 1;
+            for (let layerIdx = layers.length - 2; layerIdx >= 0; layerIdx--) {
+                const x1 = (layerIdx + 1) * layerWidth;
+                const x2 = (layerIdx + 2) * layerWidth;
+                const gradientActive = backwardProgress > (layers.length - 2 - layerIdx) / (layers.length - 1);
+                if (gradientActive) {
+                    const gradX = x2 - (x2 - x1) * ((backwardProgress * (layers.length - 1)) % 1);
+                    ctx.fillStyle = '#ff6b35';
+                    ctx.beginPath();
+                    ctx.arc(gradX, canvas.height / 2, 8, 0, Math.PI * 2);
+                    ctx.fill();
+                }
+            }
+            ctx.fillStyle = '#e4e6eb';
+            ctx.font = '12px Arial';
+            ctx.fillText('Forward: Blue →  |  Backward: Orange ←', canvas.width / 2, canvas.height - 20);
+        }
+        // Animated network for nn-basics
+        function drawAnimatedNetwork(ctx, canvas, time) {
+            drawDefaultAnimation(ctx, canvas, time);
+        }
+        // Animated decision boundary for perceptron
+        function drawAnimatedDecisionBoundary(ctx, canvas, time) {
+            const centerX = canvas.width / 2;
+            const centerY = canvas.height / 2;
+            ctx.fillStyle = '#ff6b35';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Perceptron Decision Boundary', canvas.width / 2, 30);
+            // Animated rotating decision boundary
+            const angle = time / 2000;
+            const length = 200;
+            ctx.strokeStyle = '#ff6b35';
+            ctx.lineWidth = 3;
+            ctx.beginPath();
+            ctx.moveTo(centerX - Math.cos(angle) * length, centerY - Math.sin(angle) * length);
+            ctx.lineTo(centerX + Math.cos(angle) * length, centerY + Math.sin(angle) * length);
+            ctx.stroke();
+            // Fixed sample points
+            const points = [
+                {x: 100, y: 80, c: 1}, {x: 150, y: 100, c: 1}, {x: 120, y: 150, c: 1},
+                {x: 400, y: 200, c: 0}, {x: 450, y: 180, c: 0}, {x: 380, y: 250, c: 0}
+            ];
+            points.forEach(p => {
+                ctx.fillStyle = p.c === 1 ? '#00d4ff' : '#00ff88';
+                ctx.beginPath();
+                ctx.arc(p.x, p.y, 8, 0, Math.PI * 2);
+                ctx.fill();
+            });
+        }
+        function drawAnimatedMLP(ctx, canvas, time) {
+            drawDefaultAnimation(ctx, canvas, time);
+        }
+        function drawAnimatedActivations(ctx, canvas, time) {
+            drawActivationFunctions(ctx, canvas);
+            // Add animated input marker
+            const x = Math.sin(time / 500) * 4;
+            const centerX = canvas.width / 2;
+            const centerY = canvas.height / 2;
+            const scale = 40;
+            ctx.fillStyle = '#ffffff';
+            ctx.beginPath();
+            ctx.arc(centerX + x * scale, centerY, 6, 0, Math.PI * 2);
+            ctx.fill();
+            ctx.strokeStyle = '#ffffff';
+            ctx.setLineDash([5, 5]);
+            ctx.beginPath();
+            ctx.moveTo(centerX + x * scale, 0);
+            ctx.lineTo(centerX + x * scale, canvas.height);
+            ctx.stroke();
+            ctx.setLineDash([]);
+        }
+        function drawAnimatedConvolution(ctx, canvas, time) {
+            drawConvolutionAnimation(ctx, canvas);
+        }
+        function drawAnimatedGAN(ctx, canvas, time) {
+            ctx.fillStyle = '#ffaa00';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('GAN Training Animation', canvas.width / 2, 30);
+            const phase = Math.floor(time / 1000) % 4;
+            // Generator
+            ctx.fillStyle = phase <= 1 ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
+            ctx.fillRect(50, 100, 100, 80);
+            ctx.fillStyle = '#e4e6eb';
+            ctx.font = '12px Arial';
+            ctx.fillText('Generator', 100, 145);
+            // Fake image
+            const noiseToFake = Math.sin(time / 300) * 0.5 + 0.5;
+            ctx.fillStyle = `rgba(255, 170, 0, ${noiseToFake})`;
+            ctx.fillRect(200, 110, 60, 60);
+            ctx.fillStyle = '#e4e6eb';
+            ctx.fillText('Fake', 230, 200);
+            // Discriminator
+            ctx.fillStyle = phase >= 2 ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
+            ctx.fillRect(320, 100, 100, 80);
+            ctx.fillStyle = '#e4e6eb';
+            ctx.fillText('Discriminator', 370, 145);
+            // Output
+            const output = phase === 3 ? 'Real?' : 'Fake?';
+            ctx.fillStyle = '#00d4ff';
+            ctx.font = 'bold 14px Arial';
+            ctx.fillText(output, 370, 220);
+            // Arrows
+            ctx.strokeStyle = '#e4e6eb';
+            ctx.lineWidth = 2;
+            ctx.beginPath();
+            ctx.moveTo(150, 140);
+            ctx.lineTo(200, 140);
+            ctx.stroke();
+            ctx.beginPath();
+            ctx.moveTo(260, 140);
+            ctx.lineTo(320, 140);
+            ctx.stroke();
+        }
+        function drawAnimatedDiffusion(ctx, canvas, time) {
+            ctx.fillStyle = '#9900ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Diffusion Process Animation', canvas.width / 2, 30);
+            const steps = 5;
+            const stepWidth = canvas.width / (steps + 1);
+            const progress = (time / 3000) % 1;
+            const currentStep = Math.floor(progress * steps);
+            for (let i = 0; i < steps; i++) {
+                const x = (i + 1) * stepWidth;
+                const y = 150;
+                const noiseLevel = i / (steps - 1);
+                const isActive = i <= currentStep;
+                // Draw square with noise
+                ctx.fillStyle = isActive ? '#9900ff' : 'rgba(153, 0, 255, 0.3)';
+                ctx.fillRect(x - 30, y - 30, 60, 60);
+                // Add noise dots
+                if (noiseLevel > 0) {
+                    for (let j = 0; j < noiseLevel * 20; j++) {
+                        const nx = x - 25 + Math.random() * 50;
+                        const ny = y - 25 + Math.random() * 50;
+                        ctx.fillStyle = 'rgba(255, 255, 255, 0.5)';
+                        ctx.fillRect(nx, ny, 2, 2);
+                    }
+                }
+                ctx.fillStyle = '#e4e6eb';
+                ctx.font = '10px Arial';
+                ctx.fillText(`t=${i}`, x, y + 50);
+            }
+            ctx.fillStyle = '#e4e6eb';
+            ctx.font = '12px Arial';
+            ctx.fillText('Clean → Noisy (Forward) | Noisy → Clean (Reverse)', canvas.width / 2, canvas.height - 20);
+        }
+        function drawAnimatedRNN(ctx, canvas, time) {
+            ctx.fillStyle = '#00d4ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('RNN Unrolled Through Time', canvas.width / 2, 30);
+            const steps = 5;
+            const stepWidth = canvas.width / (steps + 1);
+            const progress = (time / 500) % steps;
+            const activeStep = Math.floor(progress);
+            for (let i = 0; i < steps; i++) {
+                const x = (i + 1) * stepWidth;
+                const y = 150;
+                const isActive = i === activeStep;
+                // Hidden state
+                ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
+                ctx.beginPath();
+                ctx.arc(x, y, 25, 0, Math.PI * 2);
+                ctx.fill();
+                ctx.fillStyle = '#e4e6eb';
+                ctx.font = '10px Arial';
+                ctx.fillText(`h${i}`, x, y + 4);
+                // Input arrow
+                ctx.strokeStyle = isActive ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
+                ctx.lineWidth = 2;
+                ctx.beginPath();
+                ctx.moveTo(x, y + 60);
+                ctx.lineTo(x, y + 25);
+                ctx.stroke();
+                ctx.fillText(`x${i}`, x, y + 75);
+                // Recurrent connection
+                if (i < steps - 1) {
+                    ctx.strokeStyle = isActive ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
+                    ctx.beginPath();
+                    ctx.moveTo(x + 25, y);
+                    ctx.lineTo(x + stepWidth - 25, y);
+                    ctx.stroke();
+                    // Animated signal
+                    if (isActive) {
+                        const signalX = x + 25 + (stepWidth - 50) * (progress % 1);
+                        ctx.fillStyle = '#ff6b35';
+                        ctx.beginPath();
+                        ctx.arc(signalX, y, 5, 0, Math.PI * 2);
+                        ctx.fill();
+                    }
+                }
+            }
         }
         function downloadViz(moduleId) {
             link.click();
         }
+        function drawGraphNetwork(ctx, canvas) {
+            ctx.fillStyle = '#9900ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Graph Structure & Message Passing', canvas.width / 2, 30);
+            const nodes = [
+                { x: 100, y: 100 }, { x: 200, y: 50 }, { x: 300, y: 150 },
+                { x: 150, y: 250 }, { x: 400, y: 100 }, { x: 500, y: 200 }
+            ];
+            const edges = [
+                [0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]
+            ];
+            // Draw edges
+            ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
+            ctx.lineWidth = 2;
+            edges.forEach(e => {
+                ctx.beginPath();
+                ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
+                ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
+                ctx.stroke();
+            });
+            // Draw nodes
+            nodes.forEach((n, i) => {
+                ctx.fillStyle = '#9900ff';
+                ctx.beginPath();
+                ctx.arc(n.x, n.y, 15, 0, Math.PI * 2);
+                ctx.fill();
+                ctx.fillStyle = 'white';
+                ctx.font = '12px Arial';
+                ctx.fillText(i, n.x, n.y + 4);
+            });
+            // Draw Message Passing Animation (fake)
+            const t = (Date.now() / 1000) % 2;
+            if (t > 1) {
+                ctx.strokeStyle = '#00ff88';
+                ctx.lineWidth = 4;
+                edges.forEach((e, idx) => {
+                    if (idx % 2 === 0) {
+                        ctx.beginPath();
+                        ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
+                        ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
+                        ctx.stroke();
+                    }
+                });
+            }
+        }
+        function drawGNNMath(ctx, canvas) {
+            ctx.fillStyle = '#9900ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('Graph Convolution Math', canvas.width / 2, 50);
+            ctx.fillStyle = '#e4e6eb';
+            ctx.font = '14px Courier New';
+            ctx.fillText('H(l+1) = σ(D^-½ A D^-½ H(l) W(l))', canvas.width / 2, 100);
+            ctx.fillStyle = '#00ff88';
+            ctx.fillText('A = Neighborhood Connections', canvas.width / 2, 150);
+            ctx.fillStyle = '#ff6b35';
+            ctx.fillText('D = Normalization Factor', canvas.width / 2, 180);
+        }
+        function drawGNNApplications(ctx, canvas) {
+            ctx.fillStyle = '#9900ff';
+            ctx.font = 'bold 16px Arial';
+            ctx.textAlign = 'center';
+            ctx.fillText('💊 Drug Discovery (Molecular Graphs)', canvas.width / 2, 60);
+            ctx.fillStyle = '#00d4ff';
+            ctx.fillText('🚗 Traffic Flow Prediction', canvas.width / 2, 120);
+            ctx.fillStyle = '#ff6b35';
+            ctx.fillText('🛒 Pinterest/Amazon Recommendations', canvas.width / 2, 180);
+        }
         initDashboard();
     </script>
 </body>