Aashish34 commited on
Commit
ccd63d1
Β·
1 Parent(s): 18550fb

add new topics

Browse files
DeepLearning/{Deep Learning Curriculum.html β†’ index.html} RENAMED
@@ -47,6 +47,7 @@
47
  h1 {
48
  font-size: 2.5em;
49
  background: linear-gradient(135deg, var(--cyan), var(--orange));
 
50
  -webkit-background-clip: text;
51
  -webkit-text-fill-color: transparent;
52
  margin-bottom: 10px;
@@ -727,6 +728,14 @@
727
  category: "Vision",
728
  color: "#ff6b35",
729
  description: "Transformers applied to image data"
 
 
 
 
 
 
 
 
730
  }
731
  ];
732
 
@@ -1398,6 +1407,37 @@
1398
  Learning Rule: w_new = w_old + Ξ±(y_true - y_pred)x
1399
  </div>
1400
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1401
  applications: `
1402
  <div class="info-box">
1403
  <div class="box-title">πŸ“š Educational</div>
@@ -2491,6 +2531,83 @@
2491
  β€’ Start with low learning rate (1e-4) for fine-tuning<br>
2492
  β€’ Popular backbones: ResNet50, EfficientNet, ViT
2493
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2494
  `
2495
  },
2496
  "localization": {
@@ -2513,6 +2630,64 @@
2513
  <li><strong>Option 1:</strong> (x_min, y_min, x_max, y_max)</li>
2514
  <li><strong>Option 2:</strong> (x_center, y_center, width, height) ← Most common</li>
2515
  </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2516
  `
2517
  },
2518
  "rcnn": {
@@ -2556,6 +2731,58 @@
2556
  Faster R-CNN: Best accuracy for detection (not real-time)<br>
2557
  Mask R-CNN: Detection + instance segmentation
2558
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2559
  `
2560
  },
2561
  "ssd": {
@@ -2577,6 +2804,61 @@
2577
  <br>
2578
  Sweet spot between YOLO (faster) and Faster R-CNN (more accurate)
2579
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2580
  `
2581
  },
2582
  "semantic-seg": {
@@ -2610,6 +2892,44 @@
2610
  With skip connections from encoder to decoder at each level
2611
  </div>
2612
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2613
  applications: `
2614
  <div class="info-box">
2615
  <div class="box-title">πŸ₯ Medical Imaging</div>
@@ -2641,6 +2961,58 @@
2641
  2. Class prediction<br>
2642
  3. <strong>Binary mask for the object</strong>
2643
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2644
  `
2645
  },
2646
  "face-recog": {
@@ -2665,6 +3037,54 @@
2665
  No retraining needed - just compare embeddings.
2666
  </div>
2667
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2668
  applications: `
2669
  <div class="info-box">
2670
  <div class="box-title">πŸ“± Phone Unlock</div>
@@ -2696,22 +3116,64 @@
2696
  <li><strong>Sparse:</strong> Encourage sparse activations</li>
2697
  </ul>
2698
  `,
2699
- applications: `
2700
- <div class="info-box">
2701
- <div class="box-title">πŸ—œοΈ Compression</div>
2702
- <div class="box-content">Dimensionality reduction, data compression, feature extraction</div>
 
2703
  </div>
2704
- <div class="info-box">
2705
- <div class="box-title">πŸ” Anomaly Detection</div>
2706
- <div class="box-content">High reconstruction error = anomaly (fraud detection, defect detection)</div>
2707
  </div>
2708
- `
2709
- },
2710
- "gans": {
2711
- overview: `
2712
- <h3>GANs (Generative Adversarial Networks)</h3>
2713
- <p>Two networks compete: Generator creates fake data, Discriminator tries to detect fakes.</p>
2714
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2715
  <h3>The GAN Game</h3>
2716
  <div class="formula">
2717
  Generator: Creates fake images from random noise<br>
@@ -2796,6 +3258,50 @@
2796
  β€’ Controllable generation (text-to-image)
2797
  </div>
2798
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2799
  applications: `
2800
  <div class="info-box">
2801
  <div class="box-title">πŸ–ΌοΈ Text-to-Image</div>
@@ -2898,6 +3404,51 @@
2898
  4. Achieves SOTA with minimal data!
2899
  </div>
2900
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2901
  applications: `
2902
  <div class="info-box">
2903
  <div class="box-title">πŸ” Search & QA</div>
@@ -2960,6 +3511,49 @@
2960
  β€’ Multi-step problem solving
2961
  </div>
2962
  `,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2963
  applications: `
2964
  <div class="info-box">
2965
  <div class="box-title">πŸ’¬ ChatGPT & Assistants</div>
@@ -3008,6 +3602,137 @@
3008
  β€’ <strong>Transfer Learning:</strong> Pre-trained ViT beats CNNs on many tasks<br>
3009
  β€’ <strong>Long-Range Dependencies:</strong> Global attention vs CNN's local receptive field
3010
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3011
  `
3012
  }
3013
  };
@@ -3226,7 +3951,8 @@
3226
  'transformers': drawAttentionMatrix,
3227
  'bert': drawBERTProcess,
3228
  'gpt': drawGPTGeneration,
3229
- 'vit': drawVisionTransformer
 
3230
  };
3231
 
3232
  if (vizMap[moduleId]) {
@@ -3596,7 +4322,8 @@
3596
  'pooling': () => drawPoolingMath(ctx, canvas),
3597
  'regularization': () => drawRegularizationMath(ctx, canvas),
3598
  'transformers': () => drawAttentionMath(ctx, canvas),
3599
- 'rnn': () => drawRNNMath(ctx, canvas)
 
3600
  };
3601
 
3602
  if (mathVizMap[moduleId]) {
@@ -3628,7 +4355,8 @@
3628
  'bert': () => drawBERTApplications(ctx, canvas),
3629
  'gpt': () => drawGPTApplications(ctx, canvas),
3630
  'gans': () => drawGANApplications(ctx, canvas),
3631
- 'diffusion': () => drawDiffusionApplications(ctx, canvas)
 
3632
  };
3633
 
3634
  if (appVizMap[moduleId]) {
@@ -4335,17 +5063,475 @@
4335
  }
4336
 
4337
  // Animation and download utilities
 
 
4338
  function toggleVizAnimation(moduleId) {
 
4339
  window.vizAnimating = !window.vizAnimating;
 
4340
  if (window.vizAnimating) {
 
 
4341
  animateVisualization(moduleId);
 
 
 
 
 
 
 
4342
  }
4343
  }
4344
 
4345
  function animateVisualization(moduleId) {
4346
  if (!window.vizAnimating) return;
4347
- drawConceptsVisualization(moduleId);
4348
- setTimeout(() => animateVisualization(moduleId), 150);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4349
  }
4350
 
4351
  function downloadViz(moduleId) {
@@ -4358,6 +5544,86 @@
4358
  link.click();
4359
  }
4360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4361
  initDashboard();
4362
  </script>
4363
  </body>
 
47
  h1 {
48
  font-size: 2.5em;
49
  background: linear-gradient(135deg, var(--cyan), var(--orange));
50
+ background-clip: text;
51
  -webkit-background-clip: text;
52
  -webkit-text-fill-color: transparent;
53
  margin-bottom: 10px;
 
728
  category: "Vision",
729
  color: "#ff6b35",
730
  description: "Transformers applied to image data"
731
+ },
732
+ {
733
+ id: "gnn",
734
+ title: "Graph Neural Networks",
735
+ icon: "πŸ•ΈοΈ",
736
+ category: "Advanced",
737
+ color: "#9900ff",
738
+ description: "Deep learning on non-Euclidean graph data"
739
  }
740
  ];
741
 
 
1407
  Learning Rule: w_new = w_old + Ξ±(y_true - y_pred)x
1408
  </div>
1409
  `,
1410
+ math: `
1411
+ <h3>Perceptron Learning Algorithm</h3>
1412
+ <p>The perceptron update rule is the simplest form of gradient descent.</p>
1413
+
1414
+ <div class="formula">
1415
+ For each misclassified sample (x, y):<br>
1416
+ w ← w + Ξ± Γ— y Γ— x<br>
1417
+ b ← b + Ξ± Γ— y
1418
+ </div>
1419
+
1420
+ <div class="callout insight">
1421
+ <div class="callout-title">πŸ“ Paper & Pain: Manual Training</div>
1422
+ <strong>Data:</strong> x₁ = [1, 1], y₁ = 1 | xβ‚‚ = [0, 0], yβ‚‚ = 0<br>
1423
+ <strong>Initial:</strong> w = [0, 0], b = 0, Ξ± = 1<br>
1424
+ <br>
1425
+ <strong>Iteration 1 (x₁):</strong><br>
1426
+ z = 0Γ—1 + 0Γ—1 + 0 = 0 β†’ Ε· = 1 βœ“ (correct!)<br>
1427
+ <br>
1428
+ <strong>Iteration 2 (xβ‚‚):</strong><br>
1429
+ z = 0Γ—0 + 0Γ—0 + 0 = 0 β†’ Ε· = 1 βœ— (wrong! y=0)<br>
1430
+ Update: w = [0,0] + 1Γ—(0-1)Γ—[0,0] = [0,0], b = 0 + 1Γ—(0-1) = -1<br>
1431
+ <br>
1432
+ Now z(xβ‚‚) = 0 + 0 - 1 = -1 β†’ Ε· = 0 βœ“
1433
+ </div>
1434
+
1435
+ <h3>Convergence Theorem</h3>
1436
+ <div class="formula">
1437
+ If data is linearly separable with margin Ξ³ and ||x|| ≀ R,<br>
1438
+ perceptron converges in at most (R/Ξ³)Β² updates.
1439
+ </div>
1440
+ `,
1441
  applications: `
1442
  <div class="info-box">
1443
  <div class="box-title">πŸ“š Educational</div>
 
2531
  β€’ Start with low learning rate (1e-4) for fine-tuning<br>
2532
  β€’ Popular backbones: ResNet50, EfficientNet, ViT
2533
  </div>
2534
+ `,
2535
+ concepts: `
2536
+ <h3>Why Transfer Learning Works</h3>
2537
+ <div class="list-item">
2538
+ <div class="list-num">01</div>
2539
+ <div><strong>Feature Hierarchy:</strong> Early layers learn universal features (edges, textures) that transfer across domains</div>
2540
+ </div>
2541
+ <div class="list-item">
2542
+ <div class="list-num">02</div>
2543
+ <div><strong>Domain Similarity:</strong> The more similar source and target domains, the better transfer</div>
2544
+ </div>
2545
+ <div class="list-item">
2546
+ <div class="list-num">03</div>
2547
+ <div><strong>Regularization Effect:</strong> Pre-trained weights act as strong priors, preventing overfitting</div>
2548
+ </div>
2549
+
2550
+ <h3>Transfer Learning Quadrant</h3>
2551
+ <table>
2552
+ <tr>
2553
+ <th></th>
2554
+ <th>Similar Domain</th>
2555
+ <th>Different Domain</th>
2556
+ </tr>
2557
+ <tr>
2558
+ <td><strong>Large Data</strong></td>
2559
+ <td>Fine-tune all layers</td>
2560
+ <td>Fine-tune top layers</td>
2561
+ </tr>
2562
+ <tr>
2563
+ <td><strong>Small Data</strong></td>
2564
+ <td>Feature extraction</td>
2565
+ <td>Feature extraction (risky)</td>
2566
+ </tr>
2567
+ </table>
2568
+ `,
2569
+ math: `
2570
+ <h3>Learning Rate Strategies</h3>
2571
+ <p>Different layers need different learning rates during fine-tuning.</p>
2572
+
2573
+ <div class="formula">
2574
+ Discriminative Fine-tuning:<br>
2575
+ lr_layer_n = lr_base Γ— decay^(L-n)<br>
2576
+ <br>
2577
+ Where L = total layers, n = layer index<br>
2578
+ Example: lr_base=1e-3, decay=0.9<br>
2579
+ Layer 1: 1e-3 Γ— 0.9^9 β‰ˆ 3.9e-4<br>
2580
+ Layer 10: 1e-3 Γ— 0.9^0 = 1e-3
2581
+ </div>
2582
+
2583
+ <div class="callout insight">
2584
+ <div class="callout-title">πŸ“ Paper & Pain: Domain Shift</div>
2585
+ When source and target distributions differ:<br>
2586
+ β€’ <strong>Covariate Shift:</strong> P(X) changes, P(Y|X) same<br>
2587
+ β€’ <strong>Label Shift:</strong> P(Y) changes, P(X|Y) same<br>
2588
+ β€’ <strong>Concept Shift:</strong> P(Y|X) changes<br>
2589
+ Transfer learning handles covariate shift well but struggles with concept shift.
2590
+ </div>
2591
+ `,
2592
+ applications: `
2593
+ <div class="info-box">
2594
+ <div class="box-title">πŸ₯ Medical Imaging</div>
2595
+ <div class="box-content">
2596
+ Train on ImageNet, fine-tune for X-ray diagnosis with only 1000 labeled images. Achieves 90%+ accuracy vs 60% from scratch.
2597
+ </div>
2598
+ </div>
2599
+ <div class="info-box">
2600
+ <div class="box-title">πŸ›’ Retail & E-commerce</div>
2601
+ <div class="box-content">
2602
+ Product classification, visual search, inventory management using pre-trained ResNet/EfficientNet models.
2603
+ </div>
2604
+ </div>
2605
+ <div class="info-box">
2606
+ <div class="box-title">🌍 Satellite Imagery</div>
2607
+ <div class="box-content">
2608
+ Land use classification, deforestation detection, urban planning using models pre-trained on aerial imagery.
2609
+ </div>
2610
+ </div>
2611
  `
2612
  },
2613
  "localization": {
 
2630
  <li><strong>Option 1:</strong> (x_min, y_min, x_max, y_max)</li>
2631
  <li><strong>Option 2:</strong> (x_center, y_center, width, height) ← Most common</li>
2632
  </ul>
2633
+ `,
2634
+ concepts: `
2635
+ <h3>Localization vs Detection</h3>
2636
+ <div class="list-item">
2637
+ <div class="list-num">01</div>
2638
+ <div><strong>Classification:</strong> What is in the image? β†’ "Cat"</div>
2639
+ </div>
2640
+ <div class="list-item">
2641
+ <div class="list-num">02</div>
2642
+ <div><strong>Localization:</strong> Where is the single object? β†’ "Cat at [100, 50, 200, 150]"</div>
2643
+ </div>
2644
+ <div class="list-item">
2645
+ <div class="list-num">03</div>
2646
+ <div><strong>Detection:</strong> Where are ALL objects? β†’ Multiple bounding boxes</div>
2647
+ </div>
2648
+
2649
+ <h3>Network Architecture</h3>
2650
+ <p>Modify a classification network (ResNet, VGG) by adding a regression head:</p>
2651
+ <div class="formula">
2652
+ CNN Backbone β†’ Feature Map β†’ [Classification Head (1000 classes)]<br>
2653
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;β†’ [Regression Head (4 coordinates)]
2654
+ </div>
2655
+ `,
2656
+ math: `
2657
+ <h3>Smooth L1 Loss (Huber Loss)</h3>
2658
+ <p>Combines L1 and L2 loss for robust bounding box regression.</p>
2659
+
2660
+ <div class="formula">
2661
+ SmoothL1(x) = { 0.5xΒ² if |x| < 1<br>
2662
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{ |x| - 0.5 otherwise
2663
+ </div>
2664
+
2665
+ <div class="callout insight">
2666
+ <div class="callout-title">πŸ“ Paper & Pain: Why Smooth L1?</div>
2667
+ β€’ <strong>L2 Loss:</strong> Penalizes large errors too much (squared), sensitive to outliers<br>
2668
+ β€’ <strong>L1 Loss:</strong> Robust to outliers but has discontinuous gradient at 0<br>
2669
+ β€’ <strong>Smooth L1:</strong> Best of both worlds - quadratic near 0, linear for large errors
2670
+ </div>
2671
+
2672
+ <h3>IoU Loss</h3>
2673
+ <div class="formula">
2674
+ L_IoU = 1 - IoU(pred, target)<br>
2675
+ Where IoU = Intersection / Union
2676
+ </div>
2677
+ `,
2678
+ applications: `
2679
+ <div class="info-box">
2680
+ <div class="box-title">πŸš— Self-Driving Cars</div>
2681
+ <div class="box-content">Localize the primary vehicle ahead for adaptive cruise control</div>
2682
+ </div>
2683
+ <div class="info-box">
2684
+ <div class="box-title">πŸ“Έ Photo Auto-Crop</div>
2685
+ <div class="box-content">Detect main subject and automatically crop to optimal composition</div>
2686
+ </div>
2687
+ <div class="info-box">
2688
+ <div class="box-title">πŸ₯ Medical Imaging</div>
2689
+ <div class="box-content">Localize tumors, organs, or anomalies in X-rays and CT scans</div>
2690
+ </div>
2691
  `
2692
  },
2693
  "rcnn": {
 
2731
  Faster R-CNN: Best accuracy for detection (not real-time)<br>
2732
  Mask R-CNN: Detection + instance segmentation
2733
  </div>
2734
+ `,
2735
+ concepts: `
2736
+ <h3>Two-Stage Detection Pipeline</h3>
2737
+ <div class="list-item">
2738
+ <div class="list-num">01</div>
2739
+ <div><strong>Stage 1 - Region Proposal:</strong> Find ~2000 candidate regions that might contain objects</div>
2740
+ </div>
2741
+ <div class="list-item">
2742
+ <div class="list-num">02</div>
2743
+ <div><strong>Stage 2 - Classification:</strong> Classify each region and refine bounding box</div>
2744
+ </div>
2745
+
2746
+ <h3>Region Proposal Network (RPN)</h3>
2747
+ <p>The key innovation of Faster R-CNN - learns to propose regions instead of using hand-crafted algorithms.</p>
2748
+ <div class="formula">
2749
+ RPN Output per location:<br>
2750
+ β€’ k anchor boxes Γ— 4 coordinates = 4k regression outputs<br>
2751
+ β€’ k anchor boxes Γ— 2 objectness scores = 2k classification outputs<br>
2752
+ Typical k = 9 (3 scales Γ— 3 aspect ratios)
2753
+ </div>
2754
+ `,
2755
+ math: `
2756
+ <h3>RoI Pooling: Fixed-Size Feature Maps</h3>
2757
+ <p>Convert variable-size regions into fixed 7Γ—7 feature maps for FC layers.</p>
2758
+
2759
+ <div class="formula">
2760
+ For each RoI of size HΓ—W:<br>
2761
+ 1. Divide into 7Γ—7 grid (cells of size H/7 Γ— W/7)<br>
2762
+ 2. Max-pool each cell β†’ single value<br>
2763
+ 3. Output: Fixed 7Γ—7 feature map regardless of input size
2764
+ </div>
2765
+
2766
+ <div class="callout insight">
2767
+ <div class="callout-title">πŸ“ Paper & Pain: RoI Align vs RoI Pool</div>
2768
+ <strong>Problem:</strong> RoI Pooling quantizes coordinates, causing misalignment.<br>
2769
+ <strong>Solution:</strong> RoI Align uses bilinear interpolation instead of rounding.<br>
2770
+ This is critical for Mask R-CNN where pixel-level accuracy matters!
2771
+ </div>
2772
+ `,
2773
+ applications: `
2774
+ <div class="info-box">
2775
+ <div class="box-title">πŸ₯ Medical Imaging</div>
2776
+ <div class="box-content">High-accuracy tumor detection where speed is less critical than precision</div>
2777
+ </div>
2778
+ <div class="info-box">
2779
+ <div class="box-title">πŸ“· Photo Analysis</div>
2780
+ <div class="box-content">Face detection, scene understanding, object counting in static images</div>
2781
+ </div>
2782
+ <div class="info-box">
2783
+ <div class="box-title">πŸ”¬ Scientific Research</div>
2784
+ <div class="box-content">Cell detection, particle tracking, microscopy image analysis</div>
2785
+ </div>
2786
  `
2787
  },
2788
  "ssd": {
 
2804
  <br>
2805
  Sweet spot between YOLO (faster) and Faster R-CNN (more accurate)
2806
  </div>
2807
+ `,
2808
+ concepts: `
2809
+ <h3>Multi-Scale Feature Maps</h3>
2810
+ <p>SSD makes predictions at multiple layers, each detecting objects at different scales.</p>
2811
+
2812
+ <div class="list-item">
2813
+ <div class="list-num">01</div>
2814
+ <div><strong>Early Layers (38Γ—38):</strong> Detect small objects (high resolution)</div>
2815
+ </div>
2816
+ <div class="list-item">
2817
+ <div class="list-num">02</div>
2818
+ <div><strong>Middle Layers (19Γ—19, 10Γ—10):</strong> Detect medium objects</div>
2819
+ </div>
2820
+ <div class="list-item">
2821
+ <div class="list-num">03</div>
2822
+ <div><strong>Deep Layers (5Γ—5, 3Γ—3, 1Γ—1):</strong> Detect large objects</div>
2823
+ </div>
2824
+
2825
+ <h3>Default Boxes (Anchors)</h3>
2826
+ <p>At each feature map cell, SSD predicts offsets for k default boxes with different aspect ratios (1:1, 2:1, 1:2, 3:1, 1:3).</p>
2827
+ `,
2828
+ math: `
2829
+ <h3>SSD Loss Function</h3>
2830
+ <p>Weighted sum of localization and confidence losses.</p>
2831
+
2832
+ <div class="formula">
2833
+ L(x, c, l, g) = (1/N) Γ— [L_conf(x, c) + Ξ± Γ— L_loc(x, l, g)]<br>
2834
+ <br>
2835
+ Where:<br>
2836
+ β€’ L_conf = Softmax loss over class confidences<br>
2837
+ β€’ L_loc = Smooth L1 loss over box coordinates<br>
2838
+ β€’ Ξ± = Weight factor (typically 1)<br>
2839
+ β€’ N = Number of matched default boxes
2840
+ </div>
2841
+
2842
+ <div class="callout insight">
2843
+ <div class="callout-title">πŸ“ Paper & Pain: Hard Negative Mining</div>
2844
+ Problem: Most default boxes are background (class imbalance).<br>
2845
+ Solution: Sort negative boxes by confidence loss, pick top ones so pos:neg = 1:3.<br>
2846
+ This focuses training on hard negatives, not easy ones.
2847
+ </div>
2848
+ `,
2849
+ applications: `
2850
+ <div class="info-box">
2851
+ <div class="box-title">πŸ“Ή Video Analytics</div>
2852
+ <div class="box-content">Real-time object detection in security cameras, sports broadcasting</div>
2853
+ </div>
2854
+ <div class="info-box">
2855
+ <div class="box-title">πŸ€– Robotics</div>
2856
+ <div class="box-content">Object detection for manipulation tasks, obstacle avoidance</div>
2857
+ </div>
2858
+ <div class="info-box">
2859
+ <div class="box-title">πŸ“± Mobile Apps</div>
2860
+ <div class="box-content">Lightweight models for on-device detection (MobileNet-SSD)</div>
2861
+ </div>
2862
  `
2863
  },
2864
  "semantic-seg": {
 
2892
  With skip connections from encoder to decoder at each level
2893
  </div>
2894
  `,
2895
+ concepts: `
2896
+ <h3>Key Concepts</h3>
2897
+ <div class="list-item">
2898
+ <div class="list-num">01</div>
2899
+ <div><strong>Encoder-Decoder:</strong> Downsample to capture context, upsample to recover spatial detail</div>
2900
+ </div>
2901
+ <div class="list-item">
2902
+ <div class="list-num">02</div>
2903
+ <div><strong>Skip Connections:</strong> Pass high-resolution features from encoder to decoder (U-Net)</div>
2904
+ </div>
2905
+ <div class="list-item">
2906
+ <div class="list-num">03</div>
2907
+ <div><strong>Atrous Convolution:</strong> Expand receptive field without losing resolution (DeepLab)</div>
2908
+ </div>
2909
+ <div class="list-item">
2910
+ <div class="list-num">04</div>
2911
+ <div><strong>ASPP:</strong> Atrous Spatial Pyramid Pooling - capture multi-scale context</div>
2912
+ </div>
2913
+ `,
2914
+ math: `
2915
+ <h3>Dice Loss for Segmentation</h3>
2916
+ <p>Better than cross-entropy for imbalanced classes (small objects).</p>
2917
+
2918
+ <div class="formula">
2919
+ Dice = 2 Γ— |A ∩ B| / (|A| + |B|)<br>
2920
+ Dice Loss = 1 - Dice<br>
2921
+ <br>
2922
+ Where A = predicted mask, B = ground truth mask
2923
+ </div>
2924
+
2925
+ <div class="callout insight">
2926
+ <div class="callout-title">πŸ“ Paper & Pain: Why Dice > Cross-Entropy?</div>
2927
+ If only 1% of pixels are foreground:<br>
2928
+ β€’ Cross-Entropy: Model can get 99% accuracy by predicting all background!<br>
2929
+ β€’ Dice: Penalizes missed foreground pixels heavily<br>
2930
+ β€’ Often use combination: L = BCE + Dice
2931
+ </div>
2932
+ `,
2933
  applications: `
2934
  <div class="info-box">
2935
  <div class="box-title">πŸ₯ Medical Imaging</div>
 
2961
  2. Class prediction<br>
2962
  3. <strong>Binary mask for the object</strong>
2963
  </div>
2964
+ `,
2965
+ concepts: `
2966
+ <h3>Mask R-CNN Architecture</h3>
2967
+ <div class="list-item">
2968
+ <div class="list-num">01</div>
2969
+ <div><strong>Backbone:</strong> ResNet-50/101 with Feature Pyramid Network (FPN)</div>
2970
+ </div>
2971
+ <div class="list-item">
2972
+ <div class="list-num">02</div>
2973
+ <div><strong>RPN:</strong> Region Proposal Network (same as Faster R-CNN)</div>
2974
+ </div>
2975
+ <div class="list-item">
2976
+ <div class="list-num">03</div>
2977
+ <div><strong>RoI Align:</strong> Better than RoI Pooling (no quantization)</div>
2978
+ </div>
2979
+ <div class="list-item">
2980
+ <div class="list-num">04</div>
2981
+ <div><strong>Mask Head:</strong> Small FCN that outputs 28Γ—28 binary mask per class</div>
2982
+ </div>
2983
+ `,
2984
+ math: `
2985
+ <h3>Multi-Task Loss</h3>
2986
+ <p>Mask R-CNN optimizes three losses simultaneously:</p>
2987
+
2988
+ <div class="formula">
2989
+ L = L_cls + L_box + L_mask<br>
2990
+ <br>
2991
+ Where:<br>
2992
+ β€’ L_cls = Classification loss (cross-entropy)<br>
2993
+ β€’ L_box = Bounding box regression (smooth L1)<br>
2994
+ β€’ L_mask = Binary cross-entropy per-pixel mask loss
2995
+ </div>
2996
+
2997
+ <div class="callout insight">
2998
+ <div class="callout-title">πŸ“ Key Insight: Decoupled Masks</div>
2999
+ Mask R-CNN predicts a binary mask for EACH class independently.<br>
3000
+ This avoids competition between classes and improves accuracy.
3001
+ </div>
3002
+ `,
3003
+ applications: `
3004
+ <div class="info-box">
3005
+ <div class="box-title">πŸ“Έ Photo Editing</div>
3006
+ <div class="box-content">Auto-select objects for editing, background removal, composition</div>
3007
+ </div>
3008
+ <div class="info-box">
3009
+ <div class="box-title">πŸ€– Robotics</div>
3010
+ <div class="box-content">Object manipulation - need exact shape, not just bounding box</div>
3011
+ </div>
3012
+ <div class="info-box">
3013
+ <div class="box-title">🎬 Video Production</div>
3014
+ <div class="box-content">Rotoscoping, VFX, green screen replacement</div>
3015
+ </div>
3016
  `
3017
  },
3018
  "face-recog": {
 
3037
  No retraining needed - just compare embeddings.
3038
  </div>
3039
  `,
3040
+ concepts: `
3041
+ <h3>Face Recognition Pipeline</h3>
3042
+ <div class="list-item">
3043
+ <div class="list-num">01</div>
3044
+ <div><strong>Face Detection:</strong> Find faces in image (MTCNN, RetinaFace)</div>
3045
+ </div>
3046
+ <div class="list-item">
3047
+ <div class="list-num">02</div>
3048
+ <div><strong>Alignment:</strong> Normalize face orientation and scale</div>
3049
+ </div>
3050
+ <div class="list-item">
3051
+ <div class="list-num">03</div>
3052
+ <div><strong>Embedding:</strong> Extract 128/512-dim feature vector (FaceNet, ArcFace)</div>
3053
+ </div>
3054
+ <div class="list-item">
3055
+ <div class="list-num">04</div>
3056
+ <div><strong>Matching:</strong> Compare embeddings with cosine similarity or L2 distance</div>
3057
+ </div>
3058
+
3059
+ <h3>Key Models</h3>
3060
+ <table>
3061
+ <tr><th>Model</th><th>Key Innovation</th></tr>
3062
+ <tr><td>FaceNet</td><td>Triplet loss, 128-dim embedding</td></tr>
3063
+ <tr><td>ArcFace</td><td>Additive angular margin loss, SOTA accuracy</td></tr>
3064
+ <tr><td>DeepFace</td><td>Facebook's early success</td></tr>
3065
+ </table>
3066
+ `,
3067
+ math: `
3068
+ <h3>Triplet Loss Intuition</h3>
3069
+ <p>Push same-person faces closer, different-person faces apart.</p>
3070
+
3071
+ <div class="formula">
3072
+ ||f(A) - f(P)||Β² + margin < ||f(A) - f(N)||Β²
3073
+ </div>
3074
+
3075
+ <div class="callout insight">
3076
+ <div class="callout-title">πŸ“ Paper & Pain: Hard Triplet Mining</div>
3077
+ Easy triplets: Random selection - margin already satisfied, loss=0<br>
3078
+ Hard triplets: Find P closest to anchor, N closest to anchor from different class<br>
3079
+ <strong>Training on hard triplets is critical for convergence!</strong>
3080
+ </div>
3081
+
3082
+ <h3>ArcFace Angular Margin</h3>
3083
+ <div class="formula">
3084
+ L = -log(e^(sΒ·cos(ΞΈ + m)) / (e^(sΒ·cos(ΞΈ + m)) + Ξ£ e^(sΒ·cos(ΞΈ_j))))<br>
3085
+ Where m = angular margin, s = scale factor
3086
+ </div>
3087
+ `,
3088
  applications: `
3089
  <div class="info-box">
3090
  <div class="box-title">πŸ“± Phone Unlock</div>
 
3116
  <li><strong>Sparse:</strong> Encourage sparse activations</li>
3117
  </ul>
3118
  `,
3119
+ concepts: `
3120
+ <h3>Key Concepts</h3>
3121
+ <div class="list-item">
3122
+ <div class="list-num">01</div>
3123
+ <div><strong>Bottleneck:</strong> Force information compression by using fewer dimensions than input</div>
3124
  </div>
3125
+ <div class="list-item">
3126
+ <div class="list-num">02</div>
3127
+ <div><strong>Reconstruction:</strong> Learn to recreate input - captures essential features</div>
3128
  </div>
3129
+ <div class="list-item">
3130
+ <div class="list-num">03</div>
3131
+ <div><strong>Latent Space:</strong> Compressed representation captures data structure</div>
3132
+ </div>
3133
+
3134
+ <h3>Variational Autoencoder (VAE)</h3>
3135
+ <p>Instead of encoding to a point, encode to a probability distribution (mean + variance).</p>
3136
+ <div class="formula">
3137
+ Encoder outputs: ΞΌ (mean) and Οƒ (standard deviation)<br>
3138
+ Sample: z = ΞΌ + Οƒ Γ— Ξ΅ (where Ξ΅ ~ N(0,1))<br>
3139
+ This is the "reparameterization trick" for backprop!
3140
+ </div>
3141
+ `,
3142
+ math: `
3143
+ <h3>VAE Loss Function (ELBO)</h3>
3144
+ <p>VAE maximizes the Evidence Lower Bound:</p>
3145
+
3146
+ <div class="formula">
3147
+ L = E[log p(x|z)] - KL(q(z|x) || p(z))<br>
3148
+ <br>
3149
+ Where:<br>
3150
+ β€’ First term: Reconstruction quality<br>
3151
+ β€’ Second term: KL divergence regularization (push q toward N(0,1))
3152
+ </div>
3153
+
3154
+ <div class="callout insight">
3155
+ <div class="callout-title">πŸ“ Paper & Pain: KL Divergence</div>
3156
+ For Gaussians:<br>
3157
+ KL = -0.5 Γ— Ξ£(1 + log(σ²) - ΞΌΒ² - σ²)<br>
3158
+ This has a closed-form solution - no sampling needed!
3159
+ </div>
3160
+ `,
3161
+ applications: `
3162
+ <div class="info-box">
3163
+ <div class="box-title">πŸ—œοΈ Compression</div>
3164
+ <div class="box-content">Dimensionality reduction, data compression, feature extraction</div>
3165
+ </div>
3166
+ <div class="info-box">
3167
+ <div class="box-title">πŸ” Anomaly Detection</div>
3168
+ <div class="box-content">High reconstruction error = anomaly (fraud detection, defect detection)</div>
3169
+ </div>
3170
+ `
3171
+ },
3172
+ "gans": {
3173
+ overview: `
3174
+ <h3>GANs (Generative Adversarial Networks)</h3>
3175
+ <p>Two networks compete: Generator creates fake data, Discriminator tries to detect fakes.</p>
3176
+
3177
  <h3>The GAN Game</h3>
3178
  <div class="formula">
3179
  Generator: Creates fake images from random noise<br>
 
3258
  β€’ Controllable generation (text-to-image)
3259
  </div>
3260
  `,
3261
+ concepts: `
3262
+ <h3>Key Components</h3>
3263
+ <div class="list-item">
3264
+ <div class="list-num">01</div>
3265
+ <div><strong>U-Net Backbone:</strong> Encoder-decoder with skip connections predicts noise at each step</div>
3266
+ </div>
3267
+ <div class="list-item">
3268
+ <div class="list-num">02</div>
3269
+ <div><strong>Time Embedding:</strong> Tell the model which timestep it's at (sinusoidal encoding)</div>
3270
+ </div>
3271
+ <div class="list-item">
3272
+ <div class="list-num">03</div>
3273
+ <div><strong>CLIP Conditioning:</strong> Guide generation with text embeddings (Stable Diffusion)</div>
3274
+ </div>
3275
+
3276
+ <h3>Latent Diffusion</h3>
3277
+ <p>Instead of diffusing in pixel space (expensive), work in VAE latent space (8Γ— smaller).</p>
3278
+ <div class="formula">
3279
+ Image (512Γ—512Γ—3) β†’ VAE Encoder β†’ Latent (64Γ—64Γ—4) β†’ Diffuse β†’ Decode
3280
+ </div>
3281
+ `,
3282
+ math: `
3283
+ <h3>Forward Process (Noising)</h3>
3284
+ <p>Add Gaussian noise according to a schedule Ξ²_t:</p>
3285
+
3286
+ <div class="formula">
3287
+ q(x_t | x_{t-1}) = N(x_t; √(1-Ξ²_t) Γ— x_{t-1}, Ξ²_t Γ— I)<br>
3288
+ <br>
3289
+ Or in closed form for any t:<br>
3290
+ x_t = √(αΎ±_t) Γ— x_0 + √(1-αΎ±_t) Γ— Ξ΅<br>
3291
+ Where αΎ±_t = Ξ _{s=1}^t (1-Ξ²_s)
3292
+ </div>
3293
+
3294
+ <h3>Training Objective</h3>
3295
+ <p>Simple noise prediction loss:</p>
3296
+ <div class="formula">
3297
+ L = E[||Ξ΅ - Ξ΅_ΞΈ(x_t, t)||Β²]
3298
+ </div>
3299
+
3300
+ <div class="callout insight">
3301
+ <div class="callout-title">πŸ“ Paper & Pain: Simplified Loss</div>
3302
+ The full variational bound is complex, but Ho et al. (2020) showed this simple MSE loss on noise prediction works just as well and is much easier to implement!
3303
+ </div>
3304
+ `,
3305
  applications: `
3306
  <div class="info-box">
3307
  <div class="box-title">πŸ–ΌοΈ Text-to-Image</div>
 
3404
  4. Achieves SOTA with minimal data!
3405
  </div>
3406
  `,
3407
+ concepts: `
3408
+ <h3>BERT Architecture</h3>
3409
+ <div class="list-item">
3410
+ <div class="list-num">01</div>
3411
+ <div><strong>Encoder Only:</strong> 12/24 Transformer encoder layers (BERT-base/large)</div>
3412
+ </div>
3413
+ <div class="list-item">
3414
+ <div class="list-num">02</div>
3415
+ <div><strong>Token Embedding:</strong> WordPiece tokenization (30K vocab)</div>
3416
+ </div>
3417
+ <div class="list-item">
3418
+ <div class="list-num">03</div>
3419
+ <div><strong>Segment Embedding:</strong> Distinguish sentence A from sentence B</div>
3420
+ </div>
3421
+ <div class="list-item">
3422
+ <div class="list-num">04</div>
3423
+ <div><strong>[CLS] Token:</strong> Aggregated representation for classification tasks</div>
3424
+ </div>
3425
+
3426
+ <h3>Model Sizes</h3>
3427
+ <table>
3428
+ <tr><th>Model</th><th>Layers</th><th>Hidden</th><th>Params</th></tr>
3429
+ <tr><td>BERT-base</td><td>12</td><td>768</td><td>110M</td></tr>
3430
+ <tr><td>BERT-large</td><td>24</td><td>1024</td><td>340M</td></tr>
3431
+ </table>
3432
+ `,
3433
+ math: `
3434
+ <h3>Masked Language Modeling (MLM)</h3>
3435
+ <p>BERT's main pre-training objective:</p>
3436
+
3437
+ <div class="formula">
3438
+ L_MLM = -Ξ£ log P(x_masked | x_visible)<br>
3439
+ <br>
3440
+ For each masked token, predict using cross-entropy loss
3441
+ </div>
3442
+
3443
+ <div class="callout insight">
3444
+ <div class="callout-title">πŸ“ Paper & Pain: Masking Strategy</div>
3445
+ Of the 15% tokens selected for masking:<br>
3446
+ β€’ 80% β†’ [MASK] token<br>
3447
+ β€’ 10% β†’ Random token<br>
3448
+ β€’ 10% β†’ Keep original<br>
3449
+ This prevents over-reliance on [MASK] during fine-tuning!
3450
+ </div>
3451
+ `,
3452
  applications: `
3453
  <div class="info-box">
3454
  <div class="box-title">πŸ” Search & QA</div>
 
3511
  β€’ Multi-step problem solving
3512
  </div>
3513
  `,
3514
+ concepts: `
3515
+ <h3>GPT Architecture</h3>
3516
+ <div class="list-item">
3517
+ <div class="list-num">01</div>
3518
+ <div><strong>Decoder Only:</strong> Uses causal (masked) attention - can only see past tokens</div>
3519
+ </div>
3520
+ <div class="list-item">
3521
+ <div class="list-num">02</div>
3522
+ <div><strong>Autoregressive:</strong> Generate one token at a time, feed back as input</div>
3523
+ </div>
3524
+ <div class="list-item">
3525
+ <div class="list-num">03</div>
3526
+ <div><strong>Pre-training:</strong> Next token prediction on massive text corpus</div>
3527
+ </div>
3528
+ <div class="list-item">
3529
+ <div class="list-num">04</div>
3530
+ <div><strong>RLHF:</strong> Reinforcement Learning from Human Feedback (ChatGPT)</div>
3531
+ </div>
3532
+
3533
+ <h3>In-Context Learning</h3>
3534
+ <p>GPT-3+ can learn from examples in the prompt without updating weights!</p>
3535
+ <div class="formula">
3536
+ Zero-shot: "Translate to French: Hello" β†’ "Bonjour"<br>
3537
+ Few-shot: "cat→chat, dog→chien, house→?" → "maison"
3538
+ </div>
3539
+ `,
3540
+ math: `
3541
+ <h3>Causal Language Modeling</h3>
3542
+ <p>GPT is trained to maximize the likelihood of the next token:</p>
3543
+
3544
+ <div class="formula">
3545
+ L = -Ξ£ log P(x_t | x_{<t})<br>
3546
+ <br>
3547
+ Where P(x_t | x_{<t}) = softmax(h_t Γ— W_vocab)
3548
+ </div>
3549
+
3550
+ <div class="callout insight">
3551
+ <div class="callout-title">πŸ“ Paper & Pain: Scaling Laws</div>
3552
+ Performance scales predictably with compute, data, and parameters:<br>
3553
+ L ∝ N^(-0.076) for model size N<br>
3554
+ This is why OpenAI trained GPT-3 (175B) and GPT-4 (1.8T)!
3555
+ </div>
3556
+ `,
3557
  applications: `
3558
  <div class="info-box">
3559
  <div class="box-title">πŸ’¬ ChatGPT & Assistants</div>
 
3602
  β€’ <strong>Transfer Learning:</strong> Pre-trained ViT beats CNNs on many tasks<br>
3603
  β€’ <strong>Long-Range Dependencies:</strong> Global attention vs CNN's local receptive field
3604
  </div>
3605
+ `,
3606
+ concepts: `
3607
+ <h3>ViT vs CNN Comparison</h3>
3608
+ <table>
3609
+ <tr><th>Aspect</th><th>CNN</th><th>ViT</th></tr>
3610
+ <tr><td>Inductive Bias</td><td>Locality, translation invariance</td><td>Minimal - learns from data</td></tr>
3611
+ <tr><td>Data Efficiency</td><td>Better with small datasets</td><td>Needs large datasets</td></tr>
3612
+ <tr><td>Receptive Field</td><td>Local (grows with depth)</td><td>Global from layer 1</td></tr>
3613
+ <tr><td>Scalability</td><td>Diminishing returns</td><td>Scales well with compute</td></tr>
3614
+ </table>
3615
+
3616
+ <h3>Key Innovations</h3>
3617
+ <div class="list-item">
3618
+ <div class="list-num">01</div>
3619
+ <div><strong>No Convolutions:</strong> Pure attention - "An Image is Worth 16x16 Words"</div>
3620
+ </div>
3621
+ <div class="list-item">
3622
+ <div class="list-num">02</div>
3623
+ <div><strong>Learnable Position:</strong> Position embeddings are learned, not sinusoidal</div>
3624
+ </div>
3625
+ `,
3626
+ math: `
3627
+ <h3>Patch Embedding</h3>
3628
+ <p>Convert image patches to token embeddings:</p>
3629
+
3630
+ <div class="formula">
3631
+ z_0 = [x_cls; x_p^1 E; x_p^2 E; ...; x_p^N E] + E_pos<br>
3632
+ <br>
3633
+ Where:<br>
3634
+ β€’ x_p^i = flattened patch (16Γ—16Γ—3 = 768 dimensions)<br>
3635
+ β€’ E = learnable linear projection<br>
3636
+ β€’ E_pos = position embedding
3637
+ </div>
3638
+
3639
+ <div class="callout insight">
3640
+ <div class="callout-title">πŸ“ Paper & Pain: Computation</div>
3641
+ ViT-Base: 12 layers, 768 hidden, 12 heads ~ 86M params<br>
3642
+ Self-attention cost: O(nΒ²Β·d) where n=196 patches<br>
3643
+ This is why ViT is efficient for images (196 tokens) vs text (1000+ tokens)
3644
+ </div>
3645
+ `,
3646
+ applications: `
3647
+ <div class="info-box">
3648
+ <div class="box-title">πŸ–ΌοΈ Image Classification</div>
3649
+ <div class="box-content">SOTA on ImageNet with pre-training. Google/DeepMind use for internal systems.</div>
3650
+ </div>
3651
+ <div class="info-box">
3652
+ <div class="box-title">πŸ” Object Detection</div>
3653
+ <div class="box-content">DETR, DINO - Transformer-based detection replacing Faster R-CNN</div>
3654
+ </div>
3655
+ <div class="info-box">
3656
+ <div class="box-title">🎬 Video Understanding</div>
3657
+ <div class="box-content">VideoViT, TimeSformer - extend patches to 3D (space + time)</div>
3658
+ </div>
3659
+ `
3660
+ },
3661
+ "gnn": {
3662
+ overview: `
3663
+ <h3>Graph Neural Networks (GNNs)</h3>
3664
+ <p>Deep learning on non-Euclidean data structures like social networks, molecules, and knowledge graphs.</p>
3665
+
3666
+ <h3>Key Concepts</h3>
3667
+ <div class="list-item">
3668
+ <div class="list-num">01</div>
3669
+ <div><strong>Graph Structure:</strong> Nodes (entities) and Edges (relationships).</div>
3670
+ </div>
3671
+ <div class="list-item">
3672
+ <div class="list-num">02</div>
3673
+ <div><strong>Message Passing:</strong> Nodes exchange information with neighbors.</div>
3674
+ </div>
3675
+ <div class="list-item">
3676
+ <div class="list-num">03</div>
3677
+ <div><strong>Aggregation:</strong> Combine incoming messages (Sum, Mean, Max).</div>
3678
+ </div>
3679
+
3680
+ <div class="callout tip">
3681
+ <div class="callout-title">πŸ’‘ Why GNNs?</div>
3682
+ Standard CNNs expect a fixed grid (euclidean). Graphs have arbitrary size and topology. GNNs are permutation invariant!
3683
+ </div>
3684
+ `,
3685
+ concepts: `
3686
+ <h3>Message Passing Neural Networks (MPNN)</h3>
3687
+ <p>The core framework for most GNNs.</p>
3688
+
3689
+ <div class="list-item">
3690
+ <div class="list-num">1</div>
3691
+ <div><strong>Message Function:</strong> Compute message from neighbor to node.</div>
3692
+ </div>
3693
+ <div class="list-item">
3694
+ <div class="list-num">2</div>
3695
+ <div><strong>Aggregation Function:</strong> Sum all messages from neighbors.</div>
3696
+ </div>
3697
+ <div class="list-item">
3698
+ <div class="list-num">3</div>
3699
+ <div><strong>Update Function:</strong> Update node state based on aggregated messages.</div>
3700
+ </div>
3701
+ `,
3702
+ math: `
3703
+ <h3>Graph Convolution Network (GCN)</h3>
3704
+ <p>The "Hello World" of GNNs (Kipf & Welling, 2017).</p>
3705
+
3706
+ <div class="formula">
3707
+ H^{(l+1)} = Οƒ(D^{-1/2} A D^{-1/2} H^{(l)} W^{(l)})
3708
+ </div>
3709
+
3710
+ <p>Where:</p>
3711
+ <ul>
3712
+ <li><strong>A:</strong> Adjacency Matrix (connections)</li>
3713
+ <li><strong>D:</strong> Degree Matrix (number of connections)</li>
3714
+ <li><strong>H:</strong> Node Features</li>
3715
+ <li><strong>W:</strong> Learnable Weights</li>
3716
+ </ul>
3717
+
3718
+ <div class="callout warning">
3719
+ <div class="callout-title">⚠️ Over-smoothing</div>
3720
+ If GNN is too deep, all node representations become indistinguishable. Usually 2-4 layers are enough.
3721
+ </div>
3722
+ `,
3723
+ applications: `
3724
+ <div class="info-box">
3725
+ <div class="box-title">πŸ’Š Drug Discovery</div>
3726
+ <div class="box-content">Predicting molecular properties, protein folding (AlphaFold)</div>
3727
+ </div>
3728
+ <div class="info-box">
3729
+ <div class="box-title">πŸš— Traffic Prediction</div>
3730
+ <div class="box-content">Road networks, estimating travel times (Google Maps)</div>
3731
+ </div>
3732
+ <div class="info-box">
3733
+ <div class="box-title">πŸ›’ Recommender Systems</div>
3734
+ <div class="box-content">Pinterest (PinSage), User-Item graphs</div>
3735
+ </div>
3736
  `
3737
  }
3738
  };
 
3951
  'transformers': drawAttentionMatrix,
3952
  'bert': drawBERTProcess,
3953
  'gpt': drawGPTGeneration,
3954
+ 'vit': drawVisionTransformer,
3955
+ 'gnn': drawGraphNetwork
3956
  };
3957
 
3958
  if (vizMap[moduleId]) {
 
4322
  'pooling': () => drawPoolingMath(ctx, canvas),
4323
  'regularization': () => drawRegularizationMath(ctx, canvas),
4324
  'transformers': () => drawAttentionMath(ctx, canvas),
4325
+ 'rnn': () => drawRNNMath(ctx, canvas),
4326
+ 'gnn': () => drawGNNMath(ctx, canvas)
4327
  };
4328
 
4329
  if (mathVizMap[moduleId]) {
 
4355
  'bert': () => drawBERTApplications(ctx, canvas),
4356
  'gpt': () => drawGPTApplications(ctx, canvas),
4357
  'gans': () => drawGANApplications(ctx, canvas),
4358
+ 'diffusion': () => drawDiffusionApplications(ctx, canvas),
4359
+ 'gnn': () => drawGNNApplications(ctx, canvas)
4360
  };
4361
 
4362
  if (appVizMap[moduleId]) {
 
5063
  }
5064
 
5065
  // Animation and download utilities
5066
+ let animationFrameId = null;
5067
+
5068
  function toggleVizAnimation(moduleId) {
5069
+ const btn = event.target;
5070
  window.vizAnimating = !window.vizAnimating;
5071
+
5072
  if (window.vizAnimating) {
5073
+ btn.textContent = '⏹️ Stop';
5074
+ btn.style.background = 'linear-gradient(135deg, #ff4444, #cc0000)';
5075
  animateVisualization(moduleId);
5076
+ } else {
5077
+ btn.textContent = '▢️ Animate';
5078
+ btn.style.background = '';
5079
+ if (animationFrameId) {
5080
+ cancelAnimationFrame(animationFrameId);
5081
+ animationFrameId = null;
5082
+ }
5083
  }
5084
  }
5085
 
5086
  function animateVisualization(moduleId) {
5087
  if (!window.vizAnimating) return;
5088
+
5089
+ const canvas = document.getElementById(moduleId + '-canvas');
5090
+ if (!canvas) return;
5091
+
5092
+ const ctx = canvas.getContext('2d');
5093
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
5094
+ ctx.fillStyle = '#0f1419';
5095
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
5096
+
5097
+ // Call the appropriate animated drawing function
5098
+ const animatedVizMap = {
5099
+ 'nn-basics': drawAnimatedNetwork,
5100
+ 'perceptron': drawAnimatedDecisionBoundary,
5101
+ 'mlp': drawAnimatedMLP,
5102
+ 'activation': drawAnimatedActivations,
5103
+ 'conv-layer': drawAnimatedConvolution,
5104
+ 'gnn': drawAnimatedGNN,
5105
+ 'transformers': drawAnimatedAttention,
5106
+ 'backprop': drawAnimatedGradientFlow,
5107
+ 'gans': drawAnimatedGAN,
5108
+ 'diffusion': drawAnimatedDiffusion,
5109
+ 'rnn': drawAnimatedRNN
5110
+ };
5111
+
5112
+ if (animatedVizMap[moduleId]) {
5113
+ animatedVizMap[moduleId](ctx, canvas, Date.now());
5114
+ } else {
5115
+ // Default animation - pulsing visualization
5116
+ drawDefaultAnimation(ctx, canvas, Date.now());
5117
+ }
5118
+
5119
+ animationFrameId = requestAnimationFrame(() => animateVisualization(moduleId));
5120
+ }
5121
+
5122
+ // Default animation for modules without specific animations
5123
+ function drawDefaultAnimation(ctx, canvas, time) {
5124
+ const centerX = canvas.width / 2;
5125
+ const centerY = canvas.height / 2;
5126
+ const pulse = Math.sin(time / 300) * 0.3 + 0.7;
5127
+
5128
+ // Animated neural network
5129
+ const layers = [3, 4, 4, 2];
5130
+ const layerWidth = canvas.width / (layers.length + 1);
5131
+
5132
+ layers.forEach((neurons, layerIdx) => {
5133
+ const x = (layerIdx + 1) * layerWidth;
5134
+ const layerHeight = canvas.height / (neurons + 1);
5135
+
5136
+ for (let i = 0; i < neurons; i++) {
5137
+ const y = (i + 1) * layerHeight;
5138
+ const radius = 12 + Math.sin(time / 200 + layerIdx + i) * 3;
5139
+
5140
+ // Draw neuron
5141
+ ctx.fillStyle = `rgba(0, 212, 255, ${pulse})`;
5142
+ ctx.beginPath();
5143
+ ctx.arc(x, y, radius, 0, Math.PI * 2);
5144
+ ctx.fill();
5145
+
5146
+ // Draw connections to next layer
5147
+ if (layerIdx < layers.length - 1) {
5148
+ const nextLayerHeight = canvas.height / (layers[layerIdx + 1] + 1);
5149
+ const nextX = (layerIdx + 2) * layerWidth;
5150
+
5151
+ for (let j = 0; j < layers[layerIdx + 1]; j++) {
5152
+ const nextY = (j + 1) * nextLayerHeight;
5153
+ const signalProgress = ((time / 500) + layerIdx * 0.5) % 1;
5154
+
5155
+ ctx.strokeStyle = `rgba(0, 212, 255, ${0.3 + signalProgress * 0.3})`;
5156
+ ctx.lineWidth = 1;
5157
+ ctx.beginPath();
5158
+ ctx.moveTo(x + radius, y);
5159
+ ctx.lineTo(nextX - 12, nextY);
5160
+ ctx.stroke();
5161
+
5162
+ // Animated signal dot
5163
+ const dotX = x + radius + (nextX - 12 - x - radius) * signalProgress;
5164
+ const dotY = y + (nextY - y) * signalProgress;
5165
+ ctx.fillStyle = '#00ff88';
5166
+ ctx.beginPath();
5167
+ ctx.arc(dotX, dotY, 3, 0, Math.PI * 2);
5168
+ ctx.fill();
5169
+ }
5170
+ }
5171
+ }
5172
+ });
5173
+
5174
+ ctx.fillStyle = '#00d4ff';
5175
+ ctx.font = 'bold 14px Arial';
5176
+ ctx.textAlign = 'center';
5177
+ ctx.fillText('πŸ”„ Neural Network Animation', centerX, 25);
5178
+ }
5179
+
5180
+ // Animated GNN with message passing
5181
+ function drawAnimatedGNN(ctx, canvas, time) {
5182
+ ctx.fillStyle = '#9900ff';
5183
+ ctx.font = 'bold 16px Arial';
5184
+ ctx.textAlign = 'center';
5185
+ ctx.fillText('Graph Neural Network - Message Passing', canvas.width / 2, 30);
5186
+
5187
+ const nodes = [
5188
+ { x: 100, y: 100 }, { x: 200, y: 60 }, { x: 320, y: 120 },
5189
+ { x: 150, y: 200 }, { x: 400, y: 80 }, { x: 450, y: 180 }
5190
+ ];
5191
+ const edges = [[0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]];
5192
+
5193
+ // Draw edges
5194
+ ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
5195
+ ctx.lineWidth = 2;
5196
+ edges.forEach(e => {
5197
+ ctx.beginPath();
5198
+ ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
5199
+ ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
5200
+ ctx.stroke();
5201
+ });
5202
+
5203
+ // Draw animated message passing
5204
+ const messageProgress = (time / 1000) % 1;
5205
+ ctx.fillStyle = '#00ff88';
5206
+ edges.forEach((e, idx) => {
5207
+ const progress = (messageProgress + idx * 0.15) % 1;
5208
+ const x = nodes[e[0]].x + (nodes[e[1]].x - nodes[e[0]].x) * progress;
5209
+ const y = nodes[e[0]].y + (nodes[e[1]].y - nodes[e[0]].y) * progress;
5210
+ ctx.beginPath();
5211
+ ctx.arc(x, y, 5, 0, Math.PI * 2);
5212
+ ctx.fill();
5213
+ });
5214
+
5215
+ // Draw nodes with pulse
5216
+ const pulse = Math.sin(time / 300) * 5 + 15;
5217
+ nodes.forEach((n, i) => {
5218
+ ctx.fillStyle = '#9900ff';
5219
+ ctx.beginPath();
5220
+ ctx.arc(n.x, n.y, pulse, 0, Math.PI * 2);
5221
+ ctx.fill();
5222
+ ctx.fillStyle = 'white';
5223
+ ctx.font = '12px Arial';
5224
+ ctx.textAlign = 'center';
5225
+ ctx.fillText(i, n.x, n.y + 4);
5226
+ });
5227
+ }
5228
+
5229
+ // Animated attention matrix
5230
+ function drawAnimatedAttention(ctx, canvas, time) {
5231
+ const words = ['The', 'cat', 'sat', 'on', 'mat'];
5232
+ const cellSize = 50;
5233
+ const startX = (canvas.width - words.length * cellSize) / 2;
5234
+ const startY = 80;
5235
+
5236
+ ctx.fillStyle = '#00d4ff';
5237
+ ctx.font = 'bold 16px Arial';
5238
+ ctx.textAlign = 'center';
5239
+ ctx.fillText('Self-Attention Animation', canvas.width / 2, 30);
5240
+
5241
+ // Draw words
5242
+ ctx.font = '12px Arial';
5243
+ words.forEach((word, i) => {
5244
+ ctx.fillStyle = '#e4e6eb';
5245
+ ctx.fillText(word, startX + i * cellSize + cellSize/2, startY - 10);
5246
+ ctx.save();
5247
+ ctx.translate(startX - 20, startY + i * cellSize + cellSize/2);
5248
+ ctx.fillText(word, 0, 0);
5249
+ ctx.restore();
5250
+ });
5251
+
5252
+ // Animated attention weights
5253
+ for (let i = 0; i < words.length; i++) {
5254
+ for (let j = 0; j < words.length; j++) {
5255
+ const baseWeight = i === j ? 0.8 : 0.2 + Math.abs(i - j) * 0.1;
5256
+ const animatedWeight = baseWeight + Math.sin(time / 500 + i + j) * 0.2;
5257
+ const alpha = Math.max(0.1, Math.min(1, animatedWeight));
5258
+
5259
+ ctx.fillStyle = `rgba(0, 212, 255, ${alpha})`;
5260
+ ctx.fillRect(startX + j * cellSize + 2, startY + i * cellSize + 2, cellSize - 4, cellSize - 4);
5261
+
5262
+ ctx.fillStyle = '#e4e6eb';
5263
+ ctx.font = '10px Arial';
5264
+ ctx.fillText(animatedWeight.toFixed(2), startX + j * cellSize + cellSize/2, startY + i * cellSize + cellSize/2 + 4);
5265
+ }
5266
+ }
5267
+ }
5268
+
5269
+ // Animated gradient flow for backprop
5270
+ function drawAnimatedGradientFlow(ctx, canvas, time) {
5271
+ ctx.fillStyle = '#ff6b35';
5272
+ ctx.font = 'bold 16px Arial';
5273
+ ctx.textAlign = 'center';
5274
+ ctx.fillText('Backpropagation - Gradient Flow', canvas.width / 2, 30);
5275
+
5276
+ const layers = [2, 4, 4, 1];
5277
+ const layerWidth = canvas.width / (layers.length + 1);
5278
+
5279
+ // Forward pass (left to right) - blue
5280
+ const forwardProgress = (time / 2000) % 1;
5281
+
5282
+ layers.forEach((neurons, layerIdx) => {
5283
+ const x = (layerIdx + 1) * layerWidth;
5284
+ const layerHeight = canvas.height / (neurons + 1);
5285
+
5286
+ for (let i = 0; i < neurons; i++) {
5287
+ const y = (i + 1) * layerHeight;
5288
+
5289
+ // Pulse effect based on forward pass
5290
+ const isActive = forwardProgress > layerIdx / layers.length;
5291
+ const radius = isActive ? 15 + Math.sin(time / 200) * 3 : 12;
5292
+
5293
+ ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
5294
+ ctx.beginPath();
5295
+ ctx.arc(x, y, radius, 0, Math.PI * 2);
5296
+ ctx.fill();
5297
+ }
5298
+ });
5299
+
5300
+ // Backward pass (right to left) - orange/red gradients
5301
+ const backwardProgress = ((time / 2000) + 0.5) % 1;
5302
+
5303
+ for (let layerIdx = layers.length - 2; layerIdx >= 0; layerIdx--) {
5304
+ const x1 = (layerIdx + 1) * layerWidth;
5305
+ const x2 = (layerIdx + 2) * layerWidth;
5306
+ const gradientActive = backwardProgress > (layers.length - 2 - layerIdx) / (layers.length - 1);
5307
+
5308
+ if (gradientActive) {
5309
+ const gradX = x2 - (x2 - x1) * ((backwardProgress * (layers.length - 1)) % 1);
5310
+ ctx.fillStyle = '#ff6b35';
5311
+ ctx.beginPath();
5312
+ ctx.arc(gradX, canvas.height / 2, 8, 0, Math.PI * 2);
5313
+ ctx.fill();
5314
+ }
5315
+ }
5316
+
5317
+ ctx.fillStyle = '#e4e6eb';
5318
+ ctx.font = '12px Arial';
5319
+ ctx.fillText('Forward: Blue β†’ | Backward: Orange ←', canvas.width / 2, canvas.height - 20);
5320
+ }
5321
+
5322
+ // Animated network for nn-basics
5323
+ function drawAnimatedNetwork(ctx, canvas, time) {
5324
+ drawDefaultAnimation(ctx, canvas, time);
5325
+ }
5326
+
5327
+ // Animated decision boundary for perceptron
5328
+ function drawAnimatedDecisionBoundary(ctx, canvas, time) {
5329
+ const centerX = canvas.width / 2;
5330
+ const centerY = canvas.height / 2;
5331
+
5332
+ ctx.fillStyle = '#ff6b35';
5333
+ ctx.font = 'bold 16px Arial';
5334
+ ctx.textAlign = 'center';
5335
+ ctx.fillText('Perceptron Decision Boundary', canvas.width / 2, 30);
5336
+
5337
+ // Animated rotating decision boundary
5338
+ const angle = time / 2000;
5339
+ const length = 200;
5340
+
5341
+ ctx.strokeStyle = '#ff6b35';
5342
+ ctx.lineWidth = 3;
5343
+ ctx.beginPath();
5344
+ ctx.moveTo(centerX - Math.cos(angle) * length, centerY - Math.sin(angle) * length);
5345
+ ctx.lineTo(centerX + Math.cos(angle) * length, centerY + Math.sin(angle) * length);
5346
+ ctx.stroke();
5347
+
5348
+ // Fixed sample points
5349
+ const points = [
5350
+ {x: 100, y: 80, c: 1}, {x: 150, y: 100, c: 1}, {x: 120, y: 150, c: 1},
5351
+ {x: 400, y: 200, c: 0}, {x: 450, y: 180, c: 0}, {x: 380, y: 250, c: 0}
5352
+ ];
5353
+
5354
+ points.forEach(p => {
5355
+ ctx.fillStyle = p.c === 1 ? '#00d4ff' : '#00ff88';
5356
+ ctx.beginPath();
5357
+ ctx.arc(p.x, p.y, 8, 0, Math.PI * 2);
5358
+ ctx.fill();
5359
+ });
5360
+ }
5361
+
5362
+ function drawAnimatedMLP(ctx, canvas, time) {
5363
+ drawDefaultAnimation(ctx, canvas, time);
5364
+ }
5365
+
5366
+ function drawAnimatedActivations(ctx, canvas, time) {
5367
+ drawActivationFunctions(ctx, canvas);
5368
+
5369
+ // Add animated input marker
5370
+ const x = Math.sin(time / 500) * 4;
5371
+ const centerX = canvas.width / 2;
5372
+ const centerY = canvas.height / 2;
5373
+ const scale = 40;
5374
+
5375
+ ctx.fillStyle = '#ffffff';
5376
+ ctx.beginPath();
5377
+ ctx.arc(centerX + x * scale, centerY, 6, 0, Math.PI * 2);
5378
+ ctx.fill();
5379
+
5380
+ ctx.strokeStyle = '#ffffff';
5381
+ ctx.setLineDash([5, 5]);
5382
+ ctx.beginPath();
5383
+ ctx.moveTo(centerX + x * scale, 0);
5384
+ ctx.lineTo(centerX + x * scale, canvas.height);
5385
+ ctx.stroke();
5386
+ ctx.setLineDash([]);
5387
+ }
5388
+
5389
+ function drawAnimatedConvolution(ctx, canvas, time) {
5390
+ drawConvolutionAnimation(ctx, canvas);
5391
+ }
5392
+
5393
+ function drawAnimatedGAN(ctx, canvas, time) {
5394
+ ctx.fillStyle = '#ffaa00';
5395
+ ctx.font = 'bold 16px Arial';
5396
+ ctx.textAlign = 'center';
5397
+ ctx.fillText('GAN Training Animation', canvas.width / 2, 30);
5398
+
5399
+ const phase = Math.floor(time / 1000) % 4;
5400
+
5401
+ // Generator
5402
+ ctx.fillStyle = phase <= 1 ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
5403
+ ctx.fillRect(50, 100, 100, 80);
5404
+ ctx.fillStyle = '#e4e6eb';
5405
+ ctx.font = '12px Arial';
5406
+ ctx.fillText('Generator', 100, 145);
5407
+
5408
+ // Fake image
5409
+ const noiseToFake = Math.sin(time / 300) * 0.5 + 0.5;
5410
+ ctx.fillStyle = `rgba(255, 170, 0, ${noiseToFake})`;
5411
+ ctx.fillRect(200, 110, 60, 60);
5412
+ ctx.fillStyle = '#e4e6eb';
5413
+ ctx.fillText('Fake', 230, 200);
5414
+
5415
+ // Discriminator
5416
+ ctx.fillStyle = phase >= 2 ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
5417
+ ctx.fillRect(320, 100, 100, 80);
5418
+ ctx.fillStyle = '#e4e6eb';
5419
+ ctx.fillText('Discriminator', 370, 145);
5420
+
5421
+ // Output
5422
+ const output = phase === 3 ? 'Real?' : 'Fake?';
5423
+ ctx.fillStyle = '#00d4ff';
5424
+ ctx.font = 'bold 14px Arial';
5425
+ ctx.fillText(output, 370, 220);
5426
+
5427
+ // Arrows
5428
+ ctx.strokeStyle = '#e4e6eb';
5429
+ ctx.lineWidth = 2;
5430
+ ctx.beginPath();
5431
+ ctx.moveTo(150, 140);
5432
+ ctx.lineTo(200, 140);
5433
+ ctx.stroke();
5434
+ ctx.beginPath();
5435
+ ctx.moveTo(260, 140);
5436
+ ctx.lineTo(320, 140);
5437
+ ctx.stroke();
5438
+ }
5439
+
5440
+ function drawAnimatedDiffusion(ctx, canvas, time) {
5441
+ ctx.fillStyle = '#9900ff';
5442
+ ctx.font = 'bold 16px Arial';
5443
+ ctx.textAlign = 'center';
5444
+ ctx.fillText('Diffusion Process Animation', canvas.width / 2, 30);
5445
+
5446
+ const steps = 5;
5447
+ const stepWidth = canvas.width / (steps + 1);
5448
+
5449
+ const progress = (time / 3000) % 1;
5450
+ const currentStep = Math.floor(progress * steps);
5451
+
5452
+ for (let i = 0; i < steps; i++) {
5453
+ const x = (i + 1) * stepWidth;
5454
+ const y = 150;
5455
+ const noiseLevel = i / (steps - 1);
5456
+ const isActive = i <= currentStep;
5457
+
5458
+ // Draw square with noise
5459
+ ctx.fillStyle = isActive ? '#9900ff' : 'rgba(153, 0, 255, 0.3)';
5460
+ ctx.fillRect(x - 30, y - 30, 60, 60);
5461
+
5462
+ // Add noise dots
5463
+ if (noiseLevel > 0) {
5464
+ for (let j = 0; j < noiseLevel * 20; j++) {
5465
+ const nx = x - 25 + Math.random() * 50;
5466
+ const ny = y - 25 + Math.random() * 50;
5467
+ ctx.fillStyle = 'rgba(255, 255, 255, 0.5)';
5468
+ ctx.fillRect(nx, ny, 2, 2);
5469
+ }
5470
+ }
5471
+
5472
+ ctx.fillStyle = '#e4e6eb';
5473
+ ctx.font = '10px Arial';
5474
+ ctx.fillText(`t=${i}`, x, y + 50);
5475
+ }
5476
+
5477
+ ctx.fillStyle = '#e4e6eb';
5478
+ ctx.font = '12px Arial';
5479
+ ctx.fillText('Clean β†’ Noisy (Forward) | Noisy β†’ Clean (Reverse)', canvas.width / 2, canvas.height - 20);
5480
+ }
5481
+
5482
+ function drawAnimatedRNN(ctx, canvas, time) {
5483
+ ctx.fillStyle = '#00d4ff';
5484
+ ctx.font = 'bold 16px Arial';
5485
+ ctx.textAlign = 'center';
5486
+ ctx.fillText('RNN Unrolled Through Time', canvas.width / 2, 30);
5487
+
5488
+ const steps = 5;
5489
+ const stepWidth = canvas.width / (steps + 1);
5490
+ const progress = (time / 500) % steps;
5491
+ const activeStep = Math.floor(progress);
5492
+
5493
+ for (let i = 0; i < steps; i++) {
5494
+ const x = (i + 1) * stepWidth;
5495
+ const y = 150;
5496
+ const isActive = i === activeStep;
5497
+
5498
+ // Hidden state
5499
+ ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
5500
+ ctx.beginPath();
5501
+ ctx.arc(x, y, 25, 0, Math.PI * 2);
5502
+ ctx.fill();
5503
+
5504
+ ctx.fillStyle = '#e4e6eb';
5505
+ ctx.font = '10px Arial';
5506
+ ctx.fillText(`h${i}`, x, y + 4);
5507
+
5508
+ // Input arrow
5509
+ ctx.strokeStyle = isActive ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
5510
+ ctx.lineWidth = 2;
5511
+ ctx.beginPath();
5512
+ ctx.moveTo(x, y + 60);
5513
+ ctx.lineTo(x, y + 25);
5514
+ ctx.stroke();
5515
+ ctx.fillText(`x${i}`, x, y + 75);
5516
+
5517
+ // Recurrent connection
5518
+ if (i < steps - 1) {
5519
+ ctx.strokeStyle = isActive ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
5520
+ ctx.beginPath();
5521
+ ctx.moveTo(x + 25, y);
5522
+ ctx.lineTo(x + stepWidth - 25, y);
5523
+ ctx.stroke();
5524
+
5525
+ // Animated signal
5526
+ if (isActive) {
5527
+ const signalX = x + 25 + (stepWidth - 50) * (progress % 1);
5528
+ ctx.fillStyle = '#ff6b35';
5529
+ ctx.beginPath();
5530
+ ctx.arc(signalX, y, 5, 0, Math.PI * 2);
5531
+ ctx.fill();
5532
+ }
5533
+ }
5534
+ }
5535
  }
5536
 
5537
  function downloadViz(moduleId) {
 
5544
  link.click();
5545
  }
5546
 
5547
+ function drawGraphNetwork(ctx, canvas) {
5548
+ ctx.fillStyle = '#9900ff';
5549
+ ctx.font = 'bold 16px Arial';
5550
+ ctx.textAlign = 'center';
5551
+ ctx.fillText('Graph Structure & Message Passing', canvas.width / 2, 30);
5552
+
5553
+ const nodes = [
5554
+ { x: 100, y: 100 }, { x: 200, y: 50 }, { x: 300, y: 150 },
5555
+ { x: 150, y: 250 }, { x: 400, y: 100 }, { x: 500, y: 200 }
5556
+ ];
5557
+ const edges = [
5558
+ [0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]
5559
+ ];
5560
+
5561
+ // Draw edges
5562
+ ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
5563
+ ctx.lineWidth = 2;
5564
+ edges.forEach(e => {
5565
+ ctx.beginPath();
5566
+ ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
5567
+ ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
5568
+ ctx.stroke();
5569
+ });
5570
+
5571
+ // Draw nodes
5572
+ nodes.forEach((n, i) => {
5573
+ ctx.fillStyle = '#9900ff';
5574
+ ctx.beginPath();
5575
+ ctx.arc(n.x, n.y, 15, 0, Math.PI * 2);
5576
+ ctx.fill();
5577
+ ctx.fillStyle = 'white';
5578
+ ctx.font = '12px Arial';
5579
+ ctx.fillText(i, n.x, n.y + 4);
5580
+ });
5581
+
5582
+ // Draw Message Passing Animation (fake)
5583
+ const t = (Date.now() / 1000) % 2;
5584
+ if (t > 1) {
5585
+ ctx.strokeStyle = '#00ff88';
5586
+ ctx.lineWidth = 4;
5587
+ edges.forEach((e, idx) => {
5588
+ if (idx % 2 === 0) {
5589
+ ctx.beginPath();
5590
+ ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
5591
+ ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
5592
+ ctx.stroke();
5593
+ }
5594
+ });
5595
+ }
5596
+ }
5597
+
5598
+ function drawGNNMath(ctx, canvas) {
5599
+ ctx.fillStyle = '#9900ff';
5600
+ ctx.font = 'bold 16px Arial';
5601
+ ctx.textAlign = 'center';
5602
+ ctx.fillText('Graph Convolution Math', canvas.width / 2, 50);
5603
+
5604
+ ctx.fillStyle = '#e4e6eb';
5605
+ ctx.font = '14px Courier New';
5606
+ ctx.fillText('H(l+1) = Οƒ(D^-Β½ A D^-Β½ H(l) W(l))', canvas.width / 2, 100);
5607
+
5608
+ ctx.fillStyle = '#00ff88';
5609
+ ctx.fillText('A = Neighborhood Connections', canvas.width / 2, 150);
5610
+ ctx.fillStyle = '#ff6b35';
5611
+ ctx.fillText('D = Normalization Factor', canvas.width / 2, 180);
5612
+ }
5613
+
5614
+ function drawGNNApplications(ctx, canvas) {
5615
+ ctx.fillStyle = '#9900ff';
5616
+ ctx.font = 'bold 16px Arial';
5617
+ ctx.textAlign = 'center';
5618
+ ctx.fillText('πŸ’Š Drug Discovery (Molecular Graphs)', canvas.width / 2, 60);
5619
+
5620
+ ctx.fillStyle = '#00d4ff';
5621
+ ctx.fillText('πŸš— Traffic Flow Prediction', canvas.width / 2, 120);
5622
+
5623
+ ctx.fillStyle = '#ff6b35';
5624
+ ctx.fillText('πŸ›’ Pinterest/Amazon Recommendations', canvas.width / 2, 180);
5625
+ }
5626
+
5627
  initDashboard();
5628
  </script>
5629
  </body>