diff --git "a/ml_complete-all-topics/index.html" "b/ml_complete-all-topics/index.html"
--- "a/ml_complete-all-topics/index.html"
+++ "b/ml_complete-all-topics/index.html"
@@ -3,9699 +3,4056 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Mathematics Mastery Platform - Statistics, Linear Algebra &amp; Calculus</title>
-    <link rel="stylesheet" href="style.css">
+    <title>Machine Learning: Complete Educational Guide</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <style>
+@font-face {
+  font-family: 'FKGroteskNeue';
+  src: url('https://r2cdn.perplexity.ai/fonts/FKGroteskNeue.woff2') format('woff2');
+}
+
+* {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+
+html {
+  scroll-behavior: smooth;
+}
+
+body {
+  font-family: 'FKGroteskNeue', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+  background: #1a2332;
+  color: #a9b4c2;
+  line-height: 1.6;
+  font-size: 16px;
+}
+
+.guide-container {
+  display: flex;
+  min-height: 100vh;
+}
+
+/* Sidebar */
+.toc-sidebar {
+  width: 280px;
+  background: #0b0f14;
+  border-right: 1px solid #2a3544;
+  position: fixed;
+  height: 100vh;
+  overflow-y: auto;
+  z-index: 100;
+}
+
+.toc-header {
+  padding: 32px 24px;
+  border-bottom: 1px solid #2a3544;
+}
+
+.toc-header h1 {
+  font-size: 24px;
+  font-weight: 600;
+  color: #e8eef6;
+  margin-bottom: 8px;
+}
+
+.toc-subtitle {
+  font-size: 14px;
+  color: #7ef0d4;
+}
+
+.toc-nav {
+  padding: 16px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+.toc-link {
+  display: block;
+  padding: 12px 16px;
+  color: #a9b4c2;
+  text-decoration: none;
+  border-radius: 8px;
+  transition: all 0.2s;
+  font-size: 14px;
+}
+
+.toc-link:hover {
+  background: #2a3544;
+  color: #e8eef6;
+}
+
+.toc-link.active {
+  background: #6aa9ff;
+  color: #0b0f14;
+  font-weight: 600;
+}
+
+/* Main Content */
+.content-main {
+  margin-left: 280px;
+  flex: 1;
+  padding: 48px 64px;
+  max-width: 1400px;
+}
+
+.content-header {
+  margin-bottom: 48px;
+}
+
+.content-header h1 {
+  font-size: 42px;
+  font-weight: 700;
+  color: #e8eef6;
+  margin-bottom: 16px;
+}
+
+.content-header p {
+  font-size: 18px;
+  color: #7ef0d4;
+}
+
+/* Sections */
+.section {
+  background: #111823;
+  border: 1px solid #2a3544;
+  border-radius: 12px;
+  margin-bottom: 24px;
+  overflow: hidden;
+}
+
+.section-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 24px 32px;
+  cursor: pointer;
+  background: #111823;
+  border-bottom: 1px solid #2a3544;
+  transition: background 0.2s;
+}
+
+.section-header:hover {
+  background: #1a2332;
+}
+
+.section-header h2 {
+  font-size: 28px;
+  font-weight: 600;
+  color: #e8eef6;
+}
+
+.section-toggle {
+  background: none;
+  border: none;
+  color: #6aa9ff;
+  font-size: 24px;
+  cursor: pointer;
+  transition: transform 0.3s;
+  padding: 8px;
+}
+
+.section-toggle.collapsed {
+  transform: rotate(-90deg);
+}
+
+.section-body {
+  padding: 32px;
+  display: none;
+}
+
+.section-body.expanded {
+  display: block;
+}
+
+.section-body p {
+  margin-bottom: 16px;
+  font-size: 17px;
+  line-height: 1.7;
+}
+
+.section-body h3 {
+  font-size: 22px;
+  font-weight: 600;
+  color: #e8eef6;
+  margin: 32px 0 16px 0;
+}
+
+.section-body ul {
+  margin: 16px 0;
+  padding-left: 24px;
+}
+
+.section-body li {
+  margin-bottom: 12px;
+  line-height: 1.6;
+}
+
+.section-body ol {
+  margin: 16px 0;
+  padding-left: 24px;
+}
+
+.section-body ol li {
+  margin-bottom: 16px;
+}
+
+/* Info Cards */
+.info-card {
+  background: #2a3544;
+  border: 1px solid #3a4554;
+  border-radius: 10px;
+  padding: 24px;
+  margin: 24px 0;
+}
+
+.info-card-title {
+  font-size: 16px;
+  font-weight: 600;
+  color: #7ef0d4;
+  margin-bottom: 16px;
+}
+
+.info-card-list {
+  list-style: none;
+  padding: 0;
+}
+
+.info-card-list li {
+  padding: 8px 0;
+  border-bottom: 1px solid #3a4554;
+  color: #a9b4c2;
+}
+
+.info-card-list li:last-child {
+  border-bottom: none;
+}
+
+.info-card-list li:before {
+  content: "✓ ";
+  color: #7ef0d4;
+  font-weight: bold;
+  margin-right: 8px;
+}
+
+/* Formulas */
+.formula {
+  background: #0b0f14;
+  border: 1px solid #2a3544;
+  border-left: 4px solid #6aa9ff;
+  border-radius: 8px;
+  padding: 20px;
+  margin: 24px 0;
+  font-family: 'Courier New', monospace;
+  font-size: 16px;
+  color: #e8eef6;
+  overflow-x: auto;
+}
+
+.formula strong {
+  display: block;
+  color: #7ef0d4;
+  margin-bottom: 12px;
+  font-size: 14px;
+}
+
+.formula small {
+  display: block;
+  color: #a9b4c2;
+  font-size: 14px;
+  margin-top: 12px;
+}
+
+/* Callouts */
+.callout {
+  border-radius: 10px;
+  padding: 20px;
+  margin: 24px 0;
+  border-left: 4px solid;
+}
+
+.callout.info {
+  background: rgba(106, 169, 255, 0.1);
+  border-left-color: #6aa9ff;
+}
+
+.callout.warning {
+  background: rgba(255, 140, 106, 0.1);
+  border-left-color: #ff8c6a;
+}
+
+.callout.success {
+  background: rgba(126, 240, 212, 0.1);
+  border-left-color: #7ef0d4;
+}
+
+.callout-title {
+  font-size: 16px;
+  font-weight: 600;
+  color: #e8eef6;
+  margin-bottom: 12px;
+}
+
+.callout-content {
+  color: #a9b4c2;
+  line-height: 1.6;
+}
+
+/* Figures */
+.figure {
+  margin: 32px 0;
+}
+
+.figure-placeholder {
+  background: #0b0f14;
+  border: 1px solid #2a3544;
+  border-radius: 10px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  position: relative;
+}
+
+.figure-caption {
+  margin-top: 12px;
+  font-size: 14px;
+  color: #7ef0d4;
+  text-align: center;
+}
+
+/* Controls */
+.controls {
+  background: #2a3544;
+  border-radius: 10px;
+  padding: 24px;
+  margin: 24px 0;
+}
+
+.control-group {
+  margin-bottom: 20px;
+}
+
+.control-group:last-child {
+  margin-bottom: 0;
+}
+
+.control-group label {
+  display: block;
+  font-size: 14px;
+  font-weight: 600;
+  color: #e8eef6;
+  margin-bottom: 12px;
+}
+
+input[type="range"] {
+  width: 100%;
+  height: 6px;
+  border-radius: 3px;
+  background: #1a2332;
+  outline: none;
+  -webkit-appearance: none;
+}
+
+input[type="range"]::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  width: 18px;
+  height: 18px;
+  border-radius: 50%;
+  background: #6aa9ff;
+  cursor: pointer;
+}
+
+input[type="range"]::-moz-range-thumb {
+  width: 18px;
+  height: 18px;
+  border-radius: 50%;
+  background: #6aa9ff;
+  cursor: pointer;
+  border: none;
+}
+
+.btn {
+  display: inline-block;
+  padding: 12px 24px;
+  border-radius: 8px;
+  font-size: 14px;
+  font-weight: 600;
+  cursor: pointer;
+  border: none;
+  transition: all 0.2s;
+}
+
+.btn-primary {
+  background: #6aa9ff;
+  color: #0b0f14;
+}
+
+.btn-primary:hover {
+  background: #5a99ef;
+}
+
+.btn-secondary {
+  background: #2a3544;
+  color: #e8eef6;
+}
+
+.btn-secondary:hover {
+  background: #3a4554;
+}
+
+/* Tables */
+.data-table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: 24px 0;
+}
+
+.data-table th,
+.data-table td {
+  padding: 12px;
+  text-align: left;
+  border-bottom: 1px solid #2a3544;
+}
+
+.data-table th {
+  background: #2a3544;
+  color: #e8eef6;
+  font-weight: 600;
+  font-size: 14px;
+}
+
+.data-table td {
+  color: #a9b4c2;
+}
+
+.data-table tbody tr:hover {
+  background: rgba(106, 169, 255, 0.05);
+}
+
+/* Canvas */
+canvas {
+  max-width: 100%;
+  height: auto;
+  display: block;
+}
+
+/* Badge */
+.badge {
+  display: inline-block;
+  padding: 4px 12px;
+  border-radius: 12px;
+  font-size: 12px;
+  font-weight: 600;
+  background: #2a3544;
+  color: #7ef0d4;
+  margin-right: 8px;
+}
+
+/* Step boxes */
+.step {
+  background: #2a3544;
+  border-left: 4px solid #6aa9ff;
+  border-radius: 8px;
+  padding: 20px;
+  margin: 20px 0;
+}
+
+.step-title {
+  font-size: 16px;
+  font-weight: 600;
+  color: #7ef0d4;
+  margin-bottom: 12px;
+}
+
+.step-calculation {
+  font-family: 'Courier New', monospace;
+  font-size: 14px;
+  line-height: 1.8;
+  color: #e8eef6;
+  white-space: pre-wrap;
+}
+
+/* Responsive */
+@media (max-width: 1024px) {
+  .toc-sidebar {
+    width: 240px;
+  }
+  
+  .content-main {
+    margin-left: 240px;
+    padding: 32px;
+  }
+}
+
+@media (max-width: 768px) {
+  .toc-sidebar {
+    width: 100%;
+    position: relative;
+    height: auto;
+  }
+  
+  .content-main {
+    margin-left: 0;
+    padding: 24px 16px;
+  }
+}
+    </style>
 </head>
 <body>
-    <!-- Top Navigation -->
-    <nav class="top-nav">
-        <div class="nav-container">
-            <h1 class="course-title">📊 Mathematics Mastery Platform</h1>
-            <div class="subject-tabs">
-                <button class="subject-tab active" data-subject="statistics">📊 Statistics</button>
-                <button class="subject-tab" data-subject="linear-algebra">📐 Linear Algebra</button>
-                <button class="subject-tab" data-subject="calculus">∫ Calculus</button>
-                <button class="subject-tab" data-subject="data-science">🤖 Data Science</button>
-                <button class="subject-tab" data-subject="machine-learning">🚀 Machine Learning</button>
+    <div class="guide-container">
+        <!-- Left Sidebar - Table of Contents -->
+        <aside class="toc-sidebar">
+            <div class="toc-header">
+                <h1>Machine Learning</h1>
+                <p class="toc-subtitle">Complete Learning Guide</p>
             </div>
-            <button class="mobile-menu-btn" id="mobileMenuBtn">
-                <span></span>
-                <span></span>
-                <span></span>
-            </button>
-        </div>
-    </nav>
-
-    <!-- Main Container -->
-    <div class="main-container">
-        <!-- Sidebar Navigation -->
-        <aside class="sidebar" id="sidebar">
-            <div class="sidebar-content">
-                <h3 id="sidebarTitle">Statistics Content</h3>
+            <nav class="toc-nav">
+                <a href="#intro" class="toc-link">📚 Introduction</a>
                 
-                <div class="module">
-                    <h4 class="module-title">Module 1: Introduction</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-1" class="topic-link" data-topic="1">📊 What is Statistics</a></li>
-                        <li><a href="#topic-2" class="topic-link" data-topic="2">�� Population vs Sample</a></li>
-                        <li><a href="#topic-3" class="topic-link" data-topic="3">📈 Parameters vs Statistics</a></li>
-                        <li><a href="#topic-4" class="topic-link" data-topic="4">🔢 Types of Data</a></li>
-                    </ul>
-                </div>
-
-                <div class="module">
-                    <h4 class="module-title">Module 2: Descriptive Statistics</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-5" class="topic-link" data-topic="5">📍 Central Tendency</a></li>
-                        <li><a href="#topic-6" class="topic-link" data-topic="6">⚡ Outliers</a></li>
-                        <li><a href="#topic-7" class="topic-link" data-topic="7">📏 Variance &amp; Std Dev</a></li>
-                        <li><a href="#topic-8" class="topic-link" data-topic="8">🎯 Quartiles &amp; Percentiles</a></li>
-                        <li><a href="#topic-9" class="topic-link" data-topic="9">📦 Interquartile Range</a></li>
-                        <li><a href="#topic-10" class="topic-link" data-topic="10">📉 Skewness</a></li>
-                    </ul>
+                <div class="toc-category">
+                    <div class="toc-category-header" data-category="supervised">
+                        <span class="category-icon">📊</span>
+                        <span class="category-title">SUPERVISED LEARNING</span>
+                        <span class="category-toggle">▼</span>
+                    </div>
+                    <div class="toc-category-content" id="supervised-content">
+                        <div class="toc-subcategory">
+                            <div class="toc-subcategory-title">Regression</div>
+                            <a href="#linear-regression" class="toc-link toc-sub">Linear Regression</a>
+                            <a href="#gradient-descent" class="toc-link toc-sub">Gradient Descent</a>
+                        </div>
+                        <div class="toc-subcategory">
+                            <div class="toc-subcategory-title">Classification</div>
+                            <a href="#logistic-regression" class="toc-link toc-sub">Logistic Regression</a>
+                            <a href="#svm" class="toc-link toc-sub">Support Vector Machines</a>
+                            <a href="#knn" class="toc-link toc-sub">K-Nearest Neighbors</a>
+                            <a href="#naive-bayes" class="toc-link toc-sub">Naive Bayes</a>
+                            <a href="#decision-trees" class="toc-link toc-sub">Decision Trees</a>
+                            <a href="#ensemble-methods" class="toc-link toc-sub">Ensemble Methods</a>
+                        </div>
+                        <div class="toc-subcategory">
+                            <div class="toc-subcategory-title">Evaluation &amp; Tuning</div>
+                            <a href="#model-evaluation" class="toc-link toc-sub">Model Evaluation</a>
+                            <a href="#cross-validation" class="toc-link toc-sub">Cross-Validation</a>
+                            <a href="#optimal-k" class="toc-link toc-sub">Finding Optimal K</a>
+                            <a href="#hyperparameter-tuning" class="toc-link toc-sub">Hyperparameter Tuning</a>
+                            <a href="#regularization" class="toc-link toc-sub">Regularization</a>
+                            <a href="#bias-variance" class="toc-link toc-sub">Bias-Variance Tradeoff</a>
+                        </div>
+                    </div>
                 </div>
-
-                <div class="module">
-                    <h4 class="module-title">Module 3: Correlation</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-11" class="topic-link" data-topic="11">🔗 Covariance</a></li>
-                        <li><a href="#topic-12" class="topic-link" data-topic="12">💞 Correlation</a></li>
-                        <li><a href="#topic-13" class="topic-link" data-topic="13">💪 Correlation Strength</a></li>
-                    </ul>
+                
+                <div class="toc-category">
+                    <div class="toc-category-header" data-category="unsupervised">
+                        <span class="category-icon">🔍</span>
+                        <span class="category-title">UNSUPERVISED LEARNING</span>
+                        <span class="category-toggle">▼</span>
+                    </div>
+                    <div class="toc-category-content" id="unsupervised-content">
+                        <div class="toc-subcategory">
+                            <div class="toc-subcategory-title">Clustering</div>
+                            <a href="#kmeans" class="toc-link toc-sub">K-means Clustering</a>
+                        </div>
+                        <div class="toc-subcategory">
+                            <div class="toc-subcategory-title">Preprocessing</div>
+                            <a href="#preprocessing" class="toc-link toc-sub">Data Preprocessing</a>
+                            <a href="#loss-functions" class="toc-link toc-sub">Loss Functions</a>
+                        </div>
+                    </div>
                 </div>
-
-                <div class="module">
-                    <h4 class="module-title">Module 4: Probability</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-14" class="topic-link" data-topic="14">🎲 Probability Basics</a></li>
-                        <li><a href="#topic-15" class="topic-link" data-topic="15">🔷 Set Theory</a></li>
-                        <li><a href="#topic-16" class="topic-link" data-topic="16">🔀 Conditional Probability</a></li>
-                        <li><a href="#topic-17" class="topic-link" data-topic="17">🎯 Independence</a></li>
-                        <li><a href="#topic-18" class="topic-link" data-topic="18">🧮 Bayes' Theorem</a></li>
-                    </ul>
+                
+                <div class="toc-category">
+                    <div class="toc-category-header" data-category="reinforcement">
+                        <span class="category-icon">🎮</span>
+                        <span class="category-title">REINFORCEMENT LEARNING</span>
+                        <span class="category-toggle">▼</span>
+                    </div>
+                    <div class="toc-category-content" id="reinforcement-content">
+                        <a href="#rl-intro" class="toc-link toc-sub">RL Introduction</a>
+                        <a href="#q-learning" class="toc-link toc-sub">Q-Learning</a>
+                        <a href="#policy-gradient" class="toc-link toc-sub">Policy Gradient</a>
+                    </div>
                 </div>
+                
+                <a href="#algorithm-comparison" class="toc-link">📊 Algorithm Comparison</a>
+            </nav>
+        </aside>
 
-                <div class="module">
-                    <h4 class="module-title">Module 5: Distributions</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-19" class="topic-link" data-topic="19">📊 PMF</a></li>
-                        <li><a href="#topic-20" class="topic-link" data-topic="20">📈 PDF</a></li>
-                        <li><a href="#topic-21" class="topic-link" data-topic="21">📉 CDF</a></li>
-                        <li><a href="#topic-22" class="topic-link" data-topic="22">🪙 Bernoulli Distribution</a></li>
-                        <li><a href="#topic-23" class="topic-link" data-topic="23">🎰 Binomial Distribution</a></li>
-                        <li><a href="#topic-24" class="topic-link" data-topic="24">🔔 Normal Distribution</a></li>
-                    </ul>
-                </div>
+        <!-- Main Content Area -->
+        <main class="content-main">
+            <div class="content-header">
+                <h1>Machine Learning: The Ultimate Learning Platform</h1>
+                <p style="font-size: 18px; margin-bottom: 16px;">Master ML through <strong style="color: #6aa9ff;">Supervised</strong>, <strong style="color: #7ef0d4;">Unsupervised</strong> &amp; <strong style="color: #ff8c6a;">Reinforcement Learning</strong></p>
+                <p style="font-size: 16px; color: #a9b4c2;">Complete with step-by-step mathematical solutions, interactive visualizations, and real-world examples</p>
+            </div>
 
-                <div class="module">
-                    <h4 class="module-title">Module 6: Hypothesis Testing</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-25" class="topic-link" data-topic="25">⚖️ Hypothesis Testing Intro</a></li>
-                        <li><a href="#topic-26" class="topic-link" data-topic="26">🎯 Significance Level α</a></li>
-                        <li><a href="#topic-27" class="topic-link" data-topic="27">📊 Standard Error</a></li>
-                        <li><a href="#topic-28" class="topic-link" data-topic="28">📏 Z-Test</a></li>
-                        <li><a href="#topic-29" class="topic-link" data-topic="29">🎚️ Z-Score &amp; Critical Values</a></li>
-                        <li><a href="#topic-30" class="topic-link" data-topic="30">💯 P-Value</a></li>
-                        <li><a href="#topic-31" class="topic-link" data-topic="31">↔️ One vs Two Tailed</a></li>
-                        <li><a href="#topic-32" class="topic-link" data-topic="32">📐 T-Test</a></li>
-                        <li><a href="#topic-33" class="topic-link" data-topic="33">🔓 Degrees of Freedom</a></li>
-                        <li><a href="#topic-34" class="topic-link" data-topic="34">⚠️ Type I &amp; II Errors</a></li>
-                    </ul>
-                </div>
+            <!-- ========================================
+                 INTRODUCTION SECTION
+                 ======================================== -->
+            <!-- Section 1: Introduction to Machine Learning -->
+            <div class="section" id="intro">
+                <div class="section-header">
+                    <h2>1. Introduction to Machine Learning</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Machine Learning is teaching computers to learn from experience, just like humans do. Instead of programming every rule, we let the computer discover patterns in data and make decisions on its own.</p>
+
+                    <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 20px; margin: 32px 0;">
+                        <div style="background: rgba(106, 169, 255, 0.1); border: 2px solid #6aa9ff; border-radius: 12px; padding: 24px; text-align: center;">
+                            <div style="font-size: 48px; margin-bottom: 12px;">📊</div>
+                            <h4 style="color: #6aa9ff; margin-bottom: 8px;">Supervised Learning</h4>
+                            <p style="font-size: 14px; color: #a9b4c2; margin: 0;">Learning with labeled data - like a teacher providing answers</p>
+                            <div style="margin-top: 12px; font-size: 12px; color: #7ef0d4;">
+                                <div>✓ Regression</div>
+                                <div>✓ Classification</div>
+                                <div>✓ Evaluation</div>
+                            </div>
+                        </div>
+                        <div style="background: rgba(126, 240, 212, 0.1); border: 2px solid #7ef0d4; border-radius: 12px; padding: 24px; text-align: center;">
+                            <div style="font-size: 48px; margin-bottom: 12px;">🔍</div>
+                            <h4 style="color: #7ef0d4; margin-bottom: 8px;">Unsupervised Learning</h4>
+                            <p style="font-size: 14px; color: #a9b4c2; margin: 0;">Finding patterns without labels - discovering hidden structure</p>
+                            <div style="margin-top: 12px; font-size: 12px; color: #7ef0d4;">
+                                <div>✓ Clustering</div>
+                                <div>✓ Dimensionality Reduction</div>
+                                <div>✓ Preprocessing</div>
+                            </div>
+                        </div>
+                        <div style="background: rgba(255, 140, 106, 0.1); border: 2px solid #ff8c6a; border-radius: 12px; padding: 24px; text-align: center;">
+                            <div style="font-size: 48px; margin-bottom: 12px;">🎮</div>
+                            <h4 style="color: #ff8c6a; margin-bottom: 8px;">Reinforcement Learning</h4>
+                            <p style="font-size: 14px; color: #a9b4c2; margin: 0;">Learning through trial &amp; error - maximizing rewards</p>
+                            <div style="margin-top: 12px; font-size: 12px; color: #7ef0d4;">
+                                <div>✓ Q-Learning</div>
+                                <div>✓ Policy Gradient</div>
+                                <div>✓ Applications</div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Learning from data instead of explicit programming</li>
+                            <li>Three types: Supervised, Unsupervised, Reinforcement</li>
+                            <li>Powers Netflix recommendations, Face ID, and more</li>
+                            <li>Requires: Data, Algorithm, and Computing Power</li>
+                        </ul>
+                    </div>
 
-                <div class="module">
-                    <h4 class="module-title">Module 7: Chi-Squared Tests</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-35" class="topic-link" data-topic="35">χ² Chi-Squared Distribution</a></li>
-                        <li><a href="#topic-36" class="topic-link" data-topic="36">✓ Goodness of Fit</a></li>
-                        <li><a href="#topic-37" class="topic-link" data-topic="37">🔗 Test of Independence</a></li>
-                        <li><a href="#topic-38" class="topic-link" data-topic="38">📏 Variance Testing</a></li>
-                    </ul>
-                </div>
+                    <h3>Understanding Machine Learning</h3>
+                    <p>Imagine teaching a child to recognize animals. You show them pictures of cats and dogs, telling them which is which. After seeing many examples, the child learns to identify new animals they've never seen before. Machine Learning works the same way!</p>
 
-                <div class="module">
-                    <h4 class="module-title">Module 8: Confidence Intervals</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-39" class="topic-link" data-topic="39">📊 Confidence Intervals</a></li>
-                        <li><a href="#topic-40" class="topic-link" data-topic="40">± Margin of Error</a></li>
-                        <li><a href="#topic-41" class="topic-link" data-topic="41">🔍 Interpreting CIs</a></li>
-                    </ul>
-                </div>
+                    <p><strong>The Three Types of Learning:</strong></p>
+                    <ol>
+                        <li><strong>Supervised Learning:</strong> Learning with a teacher. You provide labeled examples (like "this is a cat", "this is a dog"), and the model learns to predict labels for new data.</li>
+                        <li><strong>Unsupervised Learning:</strong> Learning without labels. The model finds hidden patterns on its own, like grouping similar customers together.</li>
+                        <li><strong>Reinforcement Learning:</strong> Learning by trial and error. The model tries actions and learns from rewards/punishments, like teaching a robot to walk.</li>
+                    </ol>
 
-                <!-- Linear Algebra Modules (Hidden by default) -->
-                <div class="module" data-subject="linear-algebra" style="display: none;">
-                    <h4 class="module-title">Module 9: Introduction to Linear Algebra</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-42" class="topic-link" data-topic="42">➡️ Vectors - What Even Are They?</a></li>
-                        <li><a href="#topic-43" class="topic-link" data-topic="43">🎯 Linear Combinations, Span, Basis</a></li>
-                    </ul>
-                </div>
+                    <div class="callout info">
+                        <div class="callout-title">💡 Key Insight</div>
+                        <div class="callout-content">
+                            ML is not magic! It's mathematics + statistics + computer science working together to find patterns in data.
+                        </div>
+                    </div>
 
-                <div class="module" data-subject="linear-algebra" style="display: none;">
-                    <h4 class="module-title">Module 10: Transformations &amp; Matrices</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-44" class="topic-link" data-topic="44">🔄 Linear Transformations</a></li>
-                        <li><a href="#topic-45" class="topic-link" data-topic="45">🔗 Matrix Multiplication</a></li>
-                        <li><a href="#topic-46" class="topic-link" data-topic="46">🎲 3D Transformations</a></li>
-                        <li><a href="#topic-47" class="topic-link" data-topic="47">📏 The Determinant</a></li>
+                    <h3>Real-World Applications</h3>
+                    <ul>
+                        <li><strong>Netflix:</strong> Recommends shows based on what you've watched</li>
+                        <li><strong>Face ID:</strong> Recognizes your face to unlock your phone</li>
+                        <li><strong>Gmail:</strong> Filters spam emails automatically</li>
+                        <li><strong>Google Maps:</strong> Predicts traffic and suggests fastest routes</li>
+                        <li><strong>Voice Assistants:</strong> Understands and responds to your speech</li>
                     </ul>
-                </div>
 
-                <div class="module" data-subject="linear-algebra" style="display: none;">
-                    <h4 class="module-title">Module 11: Systems &amp; Inverses</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-48" class="topic-link" data-topic="48">↩️ Inverse Matrices</a></li>
-                        <li><a href="#topic-49" class="topic-link" data-topic="49">🔀 Nonsquare Matrices</a></li>
-                        <li><a href="#topic-50" class="topic-link" data-topic="50">• Dot Products &amp; Duality</a></li>
-                    </ul>
+                    <div class="callout success">
+                        <div class="callout-title">✓ Why ML Matters Today</div>
+                        <div class="callout-content">
+                            We generate 2.5 quintillion bytes of data every day! ML helps make sense of this massive data to solve problems that were impossible before.
+                        </div>
+                    </div>
                 </div>
+            </div>
 
-                <div class="module" data-subject="linear-algebra" style="display: none;">
-                    <h4 class="module-title">Module 12: Cross Products &amp; Advanced</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-51" class="topic-link" data-topic="51">✖️ Cross Products</a></li>
-                        <li><a href="#topic-52" class="topic-link" data-topic="52">🔄 Cross via Transformations</a></li>
-                        <li><a href="#topic-53" class="topic-link" data-topic="53">🔄 Change of Basis</a></li>
-                    </ul>
-                </div>
+            <!-- Section 2: Linear Regression -->
+            <div class="section" id="linear-regression">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Regression</span> Linear Regression</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Linear Regression is one of the simplest and most powerful techniques for predicting continuous values. It finds the "best fit line" through data points.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Predicts continuous values (prices, temperatures, etc.)</li>
+                            <li>Finds the straight line that best fits the data</li>
+                            <li>Uses equation: y = mx + c</li>
+                            <li>Minimizes prediction errors</li>
+                        </ul>
+                    </div>
 
-                <div class="module" data-subject="linear-algebra" style="display: none;">
-                    <h4 class="module-title">Module 13: Eigenvalues &amp; Eigenvectors</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-54" class="topic-link" data-topic="54">🎯 Eigenvectors &amp; Eigenvalues</a></li>
-                        <li><a href="#topic-55" class="topic-link" data-topic="55">⚡ Eigenvalue Quick Trick</a></li>
-                        <li><a href="#topic-56" class="topic-link" data-topic="56">∞ Abstract Vector Spaces</a></li>
-                        <li><a href="#topic-57" class="topic-link" data-topic="57">📐 Cramer's Rule</a></li>
-                    </ul>
-                </div>
+                    <h3>Understanding Linear Regression</h3>
+                    <p>Think of it like this: You want to predict house prices based on size. If you plot size vs. price on a graph, you'll see points scattered around. Linear regression draws the "best" line through these points that you can use to predict prices for houses of any size.</p>
 
-                <!-- Calculus Modules (Hidden by default) -->
-                <div class="module" data-subject="calculus" style="display: none;">
-                    <h4 class="module-title">Module 14: Introduction to Calculus</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-58" class="topic-link" data-topic="58">🎯 The Essence of Calculus</a></li>
-                        <li><a href="#topic-59" class="topic-link" data-topic="59">🤔 Paradox of the Derivative</a></li>
-                    </ul>
-                </div>
+                    <div class="formula">
+                        <strong>The Linear Equation:</strong>
+                        y = mx + c
+                        <br><small>where:<br>y = predicted value (output)<br>x = input feature<br>m = slope (how steep the line is)<br>c = intercept (where line crosses y-axis)</small>
+                    </div>
 
-                <div class="module" data-subject="calculus" style="display: none;">
-                    <h4 class="module-title">Module 15: Derivatives</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-60" class="topic-link" data-topic="60">📐 Derivative Formulas</a></li>
-                        <li><a href="#topic-61" class="topic-link" data-topic="61">🔗 Chain &amp; Product Rule</a></li>
-                        <li><a href="#topic-62" class="topic-link" data-topic="62">ℯ Derivative of eˣ</a></li>
-                        <li><a href="#topic-63" class="topic-link" data-topic="63">🔄 Implicit Differentiation</a></li>
-                    </ul>
-                </div>
+                    <h3>Example: Predicting Salary from Experience</h3>
+                    <p>Let's say we have data about employees' years of experience and their salaries:</p>
 
-                <div class="module" data-subject="calculus" style="display: none;">
-                    <h4 class="module-title">Module 16: Integrals</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-64" class="topic-link" data-topic="64">∫ Integrals</a></li>
-                        <li><a href="#topic-65" class="topic-link" data-topic="65">⚖️ Fundamental Theorem</a></li>
-                        <li><a href="#topic-66" class="topic-link" data-topic="66">📊 Area &amp; Slope Connection</a></li>
-                        <li><a href="#topic-67" class="topic-link" data-topic="67">📈 Higher Order Derivatives</a></li>
-                        <li><a href="#topic-68" class="topic-link" data-topic="68">∞ Taylor Series</a></li>
-                        <li><a href="#topic-69" class="topic-link" data-topic="69">∞ Limits (ε-δ Definition)</a></li>
-                    </ul>
-                </div>
+                    <table class="data-table">
+                        <thead>
+                            <tr>
+                                <th>Experience (years)</th>
+                                <th>Salary ($k)</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>1</td><td>39.8</td></tr>
+                            <tr><td>2</td><td>48.9</td></tr>
+                            <tr><td>3</td><td>57.0</td></tr>
+                            <tr><td>4</td><td>68.3</td></tr>
+                            <tr><td>5</td><td>77.9</td></tr>
+                            <tr><td>6</td><td>85.0</td></tr>
+                        </tbody>
+                    </table>
 
-                <!-- Data Science Modules (Hidden by default) -->
-                <div class="module" data-subject="data-science" style="display: none;">
-                    <h4 class="module-title">Module 17: Regression &amp; Modeling</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-70" class="topic-link" data-topic="70">70. Simple Linear Regression</a></li>
-                        <li><a href="#topic-71" class="topic-link" data-topic="71">71. Multiple Regression</a></li>
-                        <li><a href="#topic-72" class="topic-link" data-topic="72">72. Logistic Regression</a></li>
-                        <li><a href="#topic-73" class="topic-link" data-topic="73">73. ANOVA</a></li>
-                        <li><a href="#topic-74" class="topic-link" data-topic="74">74. Polynomial Regression</a></li>
-                        <li><a href="#topic-75" class="topic-link" data-topic="75">75. R² &amp; Model Evaluation</a></li>
-                    </ul>
-                </div>
+                    <p>We can find a line (y = 7.5x + 32) that predicts: Someone with 7 years experience will earn approximately $84.5k.</p>
 
-                <div class="module" data-subject="data-science" style="display: none;">
-                    <h4 class="module-title">Module 18: Advanced Linear Algebra</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-76" class="topic-link" data-topic="76">76. SVD</a></li>
-                        <li><a href="#topic-77" class="topic-link" data-topic="77">77. PCA</a></li>
-                        <li><a href="#topic-78" class="topic-link" data-topic="78">78. Matrix Decompositions</a></li>
-                        <li><a href="#topic-79" class="topic-link" data-topic="79">79. Norms &amp; Distance</a></li>
-                    </ul>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px; position: relative;">
+                            <canvas id="lr-canvas" style="width: 100%; height: 100%;"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 1:</strong> Scatter plot showing experience vs. salary with the best fit line</p>
+                    </div>
 
-                <div class="module" data-subject="data-science" style="display: none;">
-                    <h4 class="module-title">Module 19: Optimization</h4>
-                    <ul class="topic-list">
-                        <li><a href="#topic-80" class="topic-link" data-topic="80">80. Gradient Descent</a></li>
-                        <li><a href="#topic-81" class="topic-link" data-topic="81">81. SGD</a></li>
-                        <li><a href="#topic-82" class="topic-link" data-topic="82">82. Partial Derivatives</a></li>
-                        <li><a href="#topic-83" class="topic-link" data-topic="83">83. Gradient &amp; Jacobian</a></li>
-                        <li><a href="#topic-84" class="topic-link" data-topic="84">84. Convex Optimization</a></li>
-                        <li><a href="#topic-85" class="topic-link" data-topic="85">85. Loss Functions</a></li>
-                    </ul>
-                </div>
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Adjust Slope (m): <span id="slope-val">7.5</span></label>
+                            <input type="range" id="slope-slider" min="0" max="15" step="0.5" value="7.5">
+                        </div>
+                        <div class="control-group">
+                            <label>Adjust Intercept (c): <span id="intercept-val">32</span></label>
+                            <input type="range" id="intercept-slider" min="0" max="60" step="1" value="32">
+                        </div>
+                    </div>
 
-                <!-- Machine Learning Modules -->
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Supervised Learning - Regression</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-1" class="topic-link" data-topic="ml-1">ML-1. Linear Regression</a></li>
-                        <li><a href="#ml-2" class="topic-link" data-topic="ml-2">ML-2. Polynomial Regression</a></li>
-                        <li><a href="#ml-3" class="topic-link" data-topic="ml-3">ML-3. Ridge Regression (L2)</a></li>
-                        <li><a href="#ml-4" class="topic-link" data-topic="ml-4">ML-4. Lasso Regression (L1)</a></li>
-                        <li><a href="#ml-5" class="topic-link" data-topic="ml-5">ML-5. Elastic Net</a></li>
-                        <li><a href="#ml-6" class="topic-link" data-topic="ml-6">ML-6. Support Vector Regression</a></li>
-                    </ul>
-                </div>
+                    <div class="formula">
+                        <strong>Cost Function (Mean Squared Error):</strong>
+                        MSE = Σ(y_actual - y_predicted)² / n
+                        <br><small>This measures how wrong our predictions are. Lower MSE = better fit!</small>
+                    </div>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Supervised Learning - Classification</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-7" class="topic-link" data-topic="ml-7">ML-7. Logistic Regression</a></li>
-                        <li><a href="#ml-8" class="topic-link" data-topic="ml-8">ML-8. K-Nearest Neighbors</a></li>
-                        <li><a href="#ml-9" class="topic-link" data-topic="ml-9">ML-9. Support Vector Machines</a></li>
-                        <li><a href="#ml-10" class="topic-link" data-topic="ml-10">ML-10. Decision Trees</a></li>
-                        <li><a href="#ml-11" class="topic-link" data-topic="ml-11">ML-11. Naive Bayes</a></li>
-                        <li><a href="#ml-12" class="topic-link" data-topic="ml-12">ML-12. Random Forest</a></li>
-                        <li><a href="#ml-13" class="topic-link" data-topic="ml-13">ML-13. Gradient Boosting</a></li>
-                        <li><a href="#ml-14" class="topic-link" data-topic="ml-14">ML-14. Neural Networks</a></li>
-                    </ul>
-                </div>
+                    <div class="callout info">
+                        <div class="callout-title">💡 Key Insight</div>
+                        <div class="callout-content">
+                            The "best fit line" is the one that minimizes the total error between actual points and predicted points. We square the errors so positive and negative errors don't cancel out.
+                        </div>
+                    </div>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Unsupervised - Clustering</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-15" class="topic-link" data-topic="ml-15">ML-15. K-Means Clustering</a></li>
-                        <li><a href="#ml-16" class="topic-link" data-topic="ml-16">ML-16. Hierarchical Clustering</a></li>
-                        <li><a href="#ml-17" class="topic-link" data-topic="ml-17">ML-17. DBSCAN</a></li>
-                        <li><a href="#ml-18" class="topic-link" data-topic="ml-18">ML-18. Gaussian Mixture Models</a></li>
-                    </ul>
-                </div>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Common Mistake</div>
+                        <div class="callout-content">
+                            Linear regression assumes a straight-line relationship. If your data curves, you need polynomial regression or other techniques!
+                        </div>
+                    </div>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Unsupervised - Dim. Reduction</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-19" class="topic-link" data-topic="ml-19">ML-19. PCA</a></li>
-                        <li><a href="#ml-20" class="topic-link" data-topic="ml-20">ML-20. t-SNE</a></li>
-                        <li><a href="#ml-21" class="topic-link" data-topic="ml-21">ML-21. Autoencoders</a></li>
-                    </ul>
+                    <h3>Step-by-Step Process</h3>
+                    <ol>
+                        <li>Collect data with input (x) and output (y) pairs</li>
+                        <li>Plot the points on a graph</li>
+                        <li>Find values of m and c that minimize prediction errors</li>
+                        <li>Use the equation y = mx + c to predict new values</li>
+                    </ol>
                 </div>
+            </div>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Reinforcement Learning</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-22" class="topic-link" data-topic="ml-22">ML-22. Q-Learning</a></li>
-                        <li><a href="#ml-23" class="topic-link" data-topic="ml-23">ML-23. Deep Q-Networks</a></li>
-                        <li><a href="#ml-24" class="topic-link" data-topic="ml-24">ML-24. Policy Gradient</a></li>
-                    </ul>
-                </div>
+            <!-- Section 3: Gradient Descent -->
+            <div class="section" id="gradient-descent">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Optimization</span> Gradient Descent</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Gradient Descent is the optimization algorithm that helps us find the best values for our model parameters (like m and c in linear regression). Think of it as rolling a ball downhill to find the lowest point.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Optimization algorithm to minimize loss function</li>
+                            <li>Takes small steps in the direction of steepest descent</li>
+                            <li>Learning rate controls step size</li>
+                            <li>Stops when it reaches the minimum (convergence)</li>
+                        </ul>
+                    </div>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Model Evaluation &amp; Optimization</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-25" class="topic-link" data-topic="ml-25">ML-25. Cross-Validation</a></li>
-                        <li><a href="#ml-26" class="topic-link" data-topic="ml-26">ML-26. GridSearch &amp; RandomSearch</a></li>
-                        <li><a href="#ml-27" class="topic-link" data-topic="ml-27">ML-27. Hyperparameter Tuning</a></li>
-                        <li><a href="#ml-28" class="topic-link" data-topic="ml-28">ML-28. Model Evaluation Metrics</a></li>
-                        <li><a href="#ml-29" class="topic-link" data-topic="ml-29">ML-29. Regularization</a></li>
-                        <li><a href="#ml-30" class="topic-link" data-topic="ml-30">ML-30. Bias-Variance Tradeoff</a></li>
-                    </ul>
-                </div>
+                    <h3>Understanding Gradient Descent</h3>
+                    <p>Imagine you're hiking down a mountain in thick fog. You can't see the bottom, but you can feel the slope under your feet. The smart strategy? Always step in the steepest downward direction. That's exactly what gradient descent does with mathematical functions!</p>
 
-                <div class="module" data-subject="machine-learning" style="display: none;">
-                    <h4 class="module-title">Advanced Topics</h4>
-                    <ul class="topic-list">
-                        <li><a href="#ml-31" class="topic-link" data-topic="ml-31">ML-31. Ensemble Methods</a></li>
-                        <li><a href="#ml-32" class="topic-link" data-topic="ml-32">ML-32. Feature Engineering</a></li>
-                        <li><a href="#ml-33" class="topic-link" data-topic="ml-33">ML-33. Imbalanced Data</a></li>
-                        <li><a href="#ml-34" class="topic-link" data-topic="ml-34">ML-34. Time Series Analysis</a></li>
-                        <li><a href="#ml-35" class="topic-link" data-topic="ml-35">ML-35. Anomaly Detection</a></li>
-                        <li><a href="#ml-36" class="topic-link" data-topic="ml-36">ML-36. Transfer Learning</a></li>
-                        <li><a href="#ml-37" class="topic-link" data-topic="ml-37">ML-37. Fine-tuning Models</a></li>
-                        <li><a href="#ml-38" class="topic-link" data-topic="ml-38">ML-38. Model Interpretability</a></li>
-                        <li><a href="#ml-39" class="topic-link" data-topic="ml-39">ML-39. Optimization Algorithms</a></li>
-                        <li><a href="#ml-40" class="topic-link" data-topic="ml-40">ML-40. Batch Norm &amp; Dropout</a></li>
-                    </ul>
-                </div>
-            </div>
-        </aside>
+                    <div class="callout info">
+                        <div class="callout-title">💡 The Mountain Analogy</div>
+                        <div class="callout-content">
+                            Your position on the mountain = current parameter values (m, c)<br>
+                            Your altitude = loss/error<br>
+                            Goal = reach the valley (minimum loss)<br>
+                            Gradient = tells you which direction is steepest
+                        </div>
+                    </div>
 
-        <!-- Main Content -->
-        <main class="content" id="content">
-            <!-- Topic 1: What is Statistics -->
-            <section class="topic-section" id="topic-1">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 1</span>
-                    <h2>📊 What is Statistics &amp; Why It Matters</h2>
-                    <p class="topic-subtitle">The science of collecting, organizing, analyzing, and interpreting data</p>
-                </div>
+                    <div class="formula">
+                        <strong>Gradient Descent Update Rule:</strong>
+                        θ_new = θ_old - α × ∇J(θ)
+                        <br><small>where:<br>θ = parameters (m, c)<br>α = learning rate (step size)<br>∇J(θ) = gradient (direction and steepness)</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Statistics is a branch of mathematics that deals with data. It provides methods to make sense of numbers and help us make informed decisions based on evidence rather than guesswork.</p>
-                    <p><strong>Why it matters:</strong> From business forecasting to medical research, sports analysis to government policy, statistics powers nearly every decision in our modern world.</p>
-                    <p><strong>When to use it:</strong> Whenever you need to understand patterns, test theories, make predictions, or draw conclusions from data.</p>
-                </div>
+                    <h3>The Learning Rate (α)</h3>
+                    <p>The learning rate is like your step size when walking down the mountain:</p>
+                    <ul>
+                        <li><strong>Too small:</strong> You take tiny steps and it takes forever to reach the bottom</li>
+                        <li><strong>Too large:</strong> You take huge leaps and might jump over the valley or even go uphill!</li>
+                        <li><strong>Just right:</strong> You make steady progress toward the minimum</li>
+                    </ul>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>Imagine Netflix deciding what shows to produce. They analyze viewing statistics: what genres people watch, when they pause, what they finish. Statistics transforms millions of data points into actionable insights like "Create more thriller series" or "Release episodes on Fridays."</p>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px; position: relative;">
+                            <canvas id="gd-canvas" style="width: 100%; height: 100%;"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 2:</strong> Loss surface showing gradient descent path to minimum</p>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Two Branches of Statistics</h3>
-                    <div class="two-column">
-                        <div class="column">
-                            <h4 style="color: #64ffda;">Descriptive Statistics</h4>
-                            <ul>
-                                <li>Summarizes and describes data</li>
-                                <li>Uses charts, graphs, averages</li>
-                                <li>Example: "Average class score is 85"</li>
-                            </ul>
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Learning Rate: <span id="lr-val">0.1</span></label>
+                            <input type="range" id="lr-slider" min="0.01" max="1" step="0.01" value="0.1">
                         </div>
-                        <div class="column">
-                            <h4 style="color: #ff6b6b;">Inferential Statistics</h4>
-                            <ul>
-                                <li>Makes predictions and inferences</li>
-                                <li>Tests hypotheses</li>
-                                <li>Example: "New teaching method improves scores"</li>
-                            </ul>
+                        <div class="control-group">
+                            <button class="btn btn-primary" id="run-gd">Run Gradient Descent</button>
+                            <button class="btn btn-secondary" id="reset-gd">Reset</button>
                         </div>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Use Cases &amp; Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Healthcare:</strong> Clinical trials testing new drugs, disease outbreak tracking</li>
-                        <li><strong>Business:</strong> Customer behavior analysis, sales forecasting, A/B testing</li>
-                        <li><strong>Government:</strong> Census data, economic indicators, policy impact assessment</li>
-                        <li><strong>Sports:</strong> Player performance metrics, game strategy optimization</li>
-                    </ul>
-                </div>
+                    <div class="formula">
+                        <strong>Gradients for Linear Regression:</strong>
+                        ∂MSE/∂m = (2/n) × Σ(ŷ - y) × x<br>
+                        ∂MSE/∂c = (2/n) × Σ(ŷ - y)
+                        <br><small>These tell us how much to adjust m and c</small>
+                    </div>
+
+                    <h3>Types of Gradient Descent</h3>
+                    <ol>
+                        <li><strong>Batch Gradient Descent:</strong> Uses all data points for each update. Accurate but slow for large datasets.</li>
+                        <li><strong>Stochastic Gradient Descent (SGD):</strong> Uses one random data point per update. Fast but noisy.</li>
+                        <li><strong>Mini-batch Gradient Descent:</strong> Uses small batches (e.g., 32 points). Best of both worlds!</li>
+                    </ol>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Watch Out!</div>
+                        <div class="callout-content">
+                            Gradient descent can get stuck in local minima (small valleys) instead of finding the global minimum (deepest valley). This is more common with complex, non-convex loss functions.
+                        </div>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Convergence Criteria</h3>
+                    <p>How do we know when to stop? We stop when:</p>
                     <ul>
-                        <li>Statistics transforms raw data into meaningful insights</li>
-                        <li>Two main branches: Descriptive (what happened) and Inferential (what will happen)</li>
-                        <li>Essential for decision-making across all fields</li>
-                        <li>Combines mathematics with real-world problem solving</li>
+                        <li>Loss stops decreasing significantly (e.g., change &lt; 0.0001)</li>
+                        <li>Gradients become very small (near zero)</li>
+                        <li>We reach maximum iterations (e.g., 1000 steps)</li>
                     </ul>
                 </div>
-            </section>
-
-            <!-- Topic 2: Population vs Sample -->
-            <section class="topic-section" id="topic-2">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 2</span>
-                    <h2>👥 Population vs Sample</h2>
-                    <p class="topic-subtitle">Understanding the difference between the entire group and a subset</p>
-                </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A <em>population</em> includes ALL members of a defined group. A <em>sample</em> is a subset selected from that population.</p>
-                    <p><strong>Why it matters:</strong> It's usually impossible or impractical to study entire populations. Sampling allows us to make inferences about large groups by studying smaller representative groups.</p>
-                    <p><strong>When to use it:</strong> Use populations when you can access all data; use samples when populations are too large, expensive, or time-consuming to study.</p>
-                </div>
+            <!-- Section 4: Logistic Regression -->
+            <div class="section" id="logistic-regression">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Classification</span> Logistic Regression</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Logistic Regression is used for binary classification - when you want to predict categories (yes/no, spam/not spam, disease/healthy) not numbers. Despite its name, it's a classification algorithm!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Binary classification (2 classes: 0 or 1)</li>
+                            <li>Uses sigmoid function to output probabilities</li>
+                            <li>Output is always between 0 and 1</li>
+                            <li>Uses log loss (cross-entropy) instead of MSE</li>
+                        </ul>
+                    </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD ANALOGY</div>
-                    <p>Think of tasting soup. You don't need to eat the entire pot (population) to know if it needs salt. A single spoonful (sample) gives you a good idea—as long as you stirred it well first!</p>
-                </div>
+                    <h3>Why Not Linear Regression?</h3>
+                    <p>Imagine using linear regression (y = mx + c) for classification. The problems:</p>
+                    <ul>
+                        <li>Can predict values &lt; 0 or &gt; 1 (not valid probabilities!)</li>
+                        <li>Sensitive to outliers pulling the line</li>
+                        <li>No natural threshold for decision making</li>
+                    </ul>
 
-                <div class="interactive-container">
-                    <h3>Interactive Visualization</h3>
-                    <canvas id="populationSampleCanvas" width="800" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="sampleBtn">Take Sample</button>
-                        <button class="btn btn-secondary" id="resetPopBtn">Reset</button>
-                        <div class="slider-group">
-                            <label>Sample Size: <span id="sampleSizeLabel">30</span></label>
-                            <input type="range" id="sampleSizeSlider" min="10" max="100" value="30" class="slider">
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ The Problem</div>
+                        <div class="callout-content">
+                            Linear regression: ŷ = mx + c can give ANY value (-∞ to +∞)<br>
+                            Classification needs: probability between 0 and 1
                         </div>
                     </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Key Differences</h3>
-                    <table class="comparison-table">
-                        <thead>
-                            <tr>
-                                <th>Aspect</th>
-                                <th>Population</th>
-                                <th>Sample</th>
-                            </tr>
-                        </thead>
-                        <tbody>
-                            <tr>
-                                <td>Size</td>
-                                <td>Entire group (N)</td>
-                                <td>Subset (n)</td>
-                            </tr>
-                            <tr>
-                                <td>Symbol</td>
-                                <td>N (uppercase)</td>
-                                <td>n (lowercase)</td>
-                            </tr>
-                            <tr>
-                                <td>Cost</td>
-                                <td>High</td>
-                                <td>Lower</td>
-                            </tr>
-                            <tr>
-                                <td>Time</td>
-                                <td>Long</td>
-                                <td>Shorter</td>
-                            </tr>
-                            <tr>
-                                <td>Accuracy</td>
-                                <td>100% (if measured correctly)</td>
-                                <td>Has sampling error</td>
-                            </tr>
-                        </tbody>
-                    </table>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKE</div>
-                    <p><strong>Biased Sampling:</strong> If your sample doesn't represent the population, your conclusions will be wrong. Example: Surveying only morning shoppers at a store will miss evening customer patterns.</p>
-                </div>
+                    <h3>Enter the Sigmoid Function</h3>
+                    <p>The sigmoid function σ(z) squashes any input into the range [0, 1], making it perfect for probabilities!</p>
 
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIP</div>
-                    <p>For a sample to be representative, use <strong>random sampling</strong>. Every member of the population should have an equal chance of being selected.</p>
-                </div>
+                    <div class="formula">
+                        <strong>Sigmoid Function:</strong>
+                        σ(z) = 1 / (1 + e^(-z))
+                        <br><small>where:<br>z = w·x + b (linear combination)<br>σ(z) = probability (always between 0 and 1)<br>e ≈ 2.718 (Euler's number)</small>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h4>Sigmoid Properties:</h4>
                     <ul>
-                        <li><strong>Population (N):</strong> All members of a defined group</li>
-                        <li><strong>Sample (n):</strong> A subset selected from the population</li>
-                        <li>Good samples are <em>random</em> and <em>representative</em></li>
-                        <li>Larger samples generally provide better estimates</li>
+                        <li><strong>Input:</strong> Any real number (-∞ to +∞)</li>
+                        <li><strong>Output:</strong> Always between 0 and 1</li>
+                        <li><strong>Shape:</strong> S-shaped curve</li>
+                        <li><strong>At z=0:</strong> σ(0) = 0.5 (middle point)</li>
+                        <li><strong>As z→∞:</strong> σ(z) → 1</li>
+                        <li><strong>As z→-∞:</strong> σ(z) → 0</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 3: Parameters vs Statistics -->
-            <section class="topic-section" id="topic-3">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 3</span>
-                    <h2>📈 Parameters vs Statistics</h2>
-                    <p class="topic-subtitle">Population measures vs sample measures</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A <em>parameter</em> is a numerical characteristic of a <em>population</em>. A <em>statistic</em> is a numerical characteristic of a <em>sample</em>.</p>
-                    <p><strong>Why it matters:</strong> We usually can't measure parameters directly (populations are too large), so we estimate them using statistics from samples.</p>
-                    <p><strong>When to use it:</strong> Parameters are what we want to know; statistics are what we can calculate.</p>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 350px">
+                            <canvas id="sigmoid-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Sigmoid function transforms linear input to probability</p>
+                    </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>You want to know the average height of all students in your country (parameter). You can't measure everyone, so you measure 1,000 students (sample) and calculate their average height (statistic) to estimate the population parameter.</p>
-                </div>
+                    <h3>Logistic Regression Formula</h3>
+                    <div class="formula">
+                        <strong>Complete Process:</strong>
+                        1. Linear combination: z = w·x + b<br>
+                        2. Sigmoid transformation: p = σ(z) = 1/(1 + e^(-z))<br>
+                        3. Decision: if p ≥ 0.5 → Class 1, else → Class 0
+                    </div>
+
+                    <h3>Example: Height Classification</h3>
+                    <p>Let's classify people as "Tall" (1) or "Not Tall" (0) based on height:</p>
 
-                <div class="content-card">
-                    <h3>Common Parameters and Statistics</h3>
-                    <table class="comparison-table">
+                    <table class="data-table">
                         <thead>
                             <tr>
-                                <th>Measure</th>
-                                <th>Parameter (Population)</th>
-                                <th>Statistic (Sample)</th>
+                                <th>Height (cm)</th>
+                                <th>Label</th>
+                                <th>Probability</th>
                             </tr>
                         </thead>
                         <tbody>
-                            <tr>
-                                <td>Mean (Average)</td>
-                                <td>μ (mu)</td>
-                                <td>x̄ (x-bar)</td>
-                            </tr>
-                            <tr>
-                                <td>Standard Deviation</td>
-                                <td>σ (sigma)</td>
-                                <td>s</td>
-                            </tr>
-                            <tr>
-                                <td>Variance</td>
-                                <td>σ²</td>
-                                <td>s²</td>
-                            </tr>
-                            <tr>
-                                <td>Proportion</td>
-                                <td>p</td>
-                                <td>p̂ (p-hat)</td>
-                            </tr>
-                            <tr>
-                                <td>Size</td>
-                                <td>N</td>
-                                <td>n</td>
-                            </tr>
+                            <tr><td>150</td><td>0 (Not Tall)</td><td>0.2</td></tr>
+                            <tr><td>160</td><td>0</td><td>0.35</td></tr>
+                            <tr><td>170</td><td>0</td><td>0.5</td></tr>
+                            <tr><td>180</td><td>1 (Tall)</td><td>0.65</td></tr>
+                            <tr><td>190</td><td>1</td><td>0.8</td></tr>
+                            <tr><td>200</td><td>1</td><td>0.9</td></tr>
                         </tbody>
                     </table>
-                </div>
 
-                <div class="content-card">
-                    <h3>The Relationship</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Key Concept</div>
-                        <p style="text-align: center; font-size: 1.2em; margin: 20px 0;">
-                            <span style="color: #ff6b6b;">Statistic</span> → Estimates → <span style="color: #64ffda;">Parameter</span>
-                        </p>
-                        <p>We use <strong>statistics</strong> (calculated from samples) to <strong>estimate parameters</strong> (unknown population values).</p>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="logistic-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Logistic regression with decision boundary at 0.5</p>
                     </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <div>
-                        <p><strong>Scenario:</strong> A factory wants to know the average weight of cereal boxes.</p>
-                        <ul>
-                            <li><strong>Population:</strong> All cereal boxes produced (millions)</li>
-                            <li><strong>Parameter:</strong> μ = true average weight of ALL boxes (unknown)</li>
-                            <li><strong>Sample:</strong> 100 randomly selected boxes</li>
-                            <li><strong>Statistic:</strong> x̄ = 510 grams (calculated from the 100 boxes)</li>
-                            <li><strong>Inference:</strong> We estimate μ ≈ 510 grams</li>
-                        </ul>
+                    <h3>Log Loss (Cross-Entropy)</h3>
+                    <p>We can't use MSE for logistic regression because it creates a non-convex optimization surface (multiple local minima). Instead, we use log loss:</p>
+
+                    <div class="formula">
+                        <strong>Log Loss for Single Sample:</strong>
+                        L(y, p) = -[y·log(p) + (1-y)·log(1-p)]
+                        <br><small>where:<br>y = actual label (0 or 1)<br>p = predicted probability</small>
                     </div>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKE</div>
-                    <p>Confusing symbols! Greek letters (μ, σ, ρ) refer to <strong>parameters</strong> (population). Roman letters (x̄, s, r) refer to <strong>statistics</strong> (sample).</p>
-                </div>
+                    <h4>Understanding Log Loss:</h4>
+                    <p><strong>Case 1:</strong> Actual y=1, Predicted p=0.9</p>
+                    <p>Loss = -[1·log(0.9) + 0·log(0.1)] = -log(0.9) = 0.105 <span style="color: #7ef0d4;">✓ Low loss (good!)</span></p>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li><strong>Parameter:</strong> Describes a population (usually unknown)</li>
-                        <li><strong>Statistic:</strong> Describes a sample (calculated from data)</li>
-                        <li>Greek letters = population, Roman letters = sample</li>
-                        <li>Statistics are used to estimate parameters</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 4: Types of Data -->
-            <section class="topic-section" id="topic-4">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 4</span>
-                    <h2>🔢 Types of Data</h2>
-                    <p class="topic-subtitle">Categorical, Numerical, Discrete, Continuous, Ordinal, Nominal</p>
-                </div>
+                    <p><strong>Case 2:</strong> Actual y=1, Predicted p=0.1</p>
+                    <p>Loss = -[1·log(0.1) + 0·log(0.9)] = -log(0.1) = 2.303 <span style="color: #ff8c6a;">✗ High loss (bad!)</span></p>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Data comes in different types, and understanding these types determines which statistical methods you can use.</p>
-                    <p><strong>Why it matters:</strong> Using the wrong analysis method for your data type leads to incorrect conclusions. You can't calculate an average of colors!</p>
-                    <p><strong>When to use it:</strong> Before any analysis, identify your data type to choose appropriate statistical techniques.</p>
-                </div>
+                    <p><strong>Case 3:</strong> Actual y=0, Predicted p=0.1</p>
+                    <p>Loss = -[0·log(0.1) + 1·log(0.9)] = -log(0.9) = 0.105 <span style="color: #7ef0d4;">✓ Low loss (good!)</span></p>
 
-                <div class="content-card">
-                    <h3>Data Type Hierarchy</h3>
-                    <div class="data-tree">
-                        <div class="tree-level-1">
-                            <div class="tree-node main">DATA</div>
-                        </div>
-                        <div class="tree-level-2">
-                            <div class="tree-node categorical">CATEGORICAL</div>
-                            <div class="tree-node numerical">NUMERICAL</div>
-                        </div>
-                        <div class="tree-level-3">
-                            <div class="tree-node">Nominal</div>
-                            <div class="tree-node">Ordinal</div>
-                            <div class="tree-node">Discrete</div>
-                            <div class="tree-node">Continuous</div>
+                    <div class="callout info">
+                        <div class="callout-title">💡 Why Log Loss Works</div>
+                        <div class="callout-content">
+                            Log loss heavily penalizes confident wrong predictions! If you predict 0.99 but the answer is 0, you get a huge penalty. This encourages the model to be accurate AND calibrated.
                         </div>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Categorical Data</h3>
-                    <p>Represents categories or groups (qualitative)</p>
-                    
-                    <div class="two-column">
-                        <div class="column">
-                            <h4 style="color: #64ffda;">Nominal</h4>
-                            <p>Categories with NO order</p>
-                            <ul>
-                                <li>Colors: Red, Blue, Green</li>
-                                <li>Gender: Male, Female, Non-binary</li>
-                                <li>Country: USA, India, Japan</li>
-                                <li>Blood Type: A, B, AB, O</li>
-                            </ul>
-                        </div>
-                        <div class="column">
-                            <h4 style="color: #ff6b6b;">Ordinal</h4>
-                            <p>Categories WITH meaningful order</p>
-                            <ul>
-                                <li>Education: High School &lt; Bachelor's &lt; Master's</li>
-                                <li>Satisfaction: Poor &lt; Fair &lt; Good &lt; Excellent</li>
-                                <li>Medal: Bronze &lt; Silver &lt; Gold</li>
-                                <li>Size: Small &lt; Medium &lt; Large</li>
-                            </ul>
+                    <h3>Training with Gradient Descent</h3>
+                    <p>Just like linear regression, we use gradient descent to optimize weights:</p>
+
+                    <div class="formula">
+                        <strong>Gradient for Logistic Regression:</strong>
+                        ∂Loss/∂w = (p - y)·x<br>
+                        ∂Loss/∂b = (p - y)
+                        <br><small>Update: w = w - α·∂Loss/∂w</small>
+                    </div>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Key Takeaway</div>
+                        <div class="callout-content">
+                            Logistic regression = Linear regression + Sigmoid function + Log loss. It's called "regression" for historical reasons, but it's actually for classification!
                         </div>
                     </div>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Numerical Data</h3>
-                    <p>Represents quantities (quantitative)</p>
-                    
-                    <div class="two-column">
-                        <div class="column">
-                            <h4 style="color: #64ffda;">Discrete</h4>
-                            <p>Countable, specific values only</p>
-                            <ul>
-                                <li>Number of students: 25, 30, 42</li>
-                                <li>Number of cars: 0, 1, 2, 3...</li>
-                                <li>Dice roll: 1, 2, 3, 4, 5, 6</li>
-                                <li>Number of children: 0, 1, 2, 3...</li>
-                            </ul>
-                            <p><em>Can't have 2.5 students!</em></p>
-                        </div>
-                        <div class="column">
-                            <h4 style="color: #ff6b6b;">Continuous</h4>
-                            <p>Can take any value in a range</p>
-                            <ul>
-                                <li>Height: 165.3 cm, 180.7 cm</li>
-                                <li>Weight: 68.5 kg, 72.3 kg</li>
-                                <li>Temperature: 23.4°C, 24.7°C</li>
-                                <li>Time: 3.25 seconds</li>
-                            </ul>
-                            <p><em>Infinite precision possible</em></p>
+            <!-- Section 5: Support Vector Machines (COMPREHENSIVE UPDATE) -->
+            <div class="section" id="svm">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Classification</span> Support Vector Machines (SVM)</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <!-- 1. Introduction -->
+                    <h3>What is SVM?</h3>
+                    <p>Support Vector Machine (SVM) is a powerful supervised machine learning algorithm used for both classification and regression tasks. Unlike logistic regression which just needs any line that separates the classes, SVM finds the BEST decision boundary - the one with the maximum margin between classes.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Finds the best decision boundary with maximum margin</li>
+                            <li>Support vectors are critical points that define the margin</li>
+                            <li>Score is proportional to distance from boundary</li>
+                            <li>Only support vectors matter - other points don't affect boundary</li>
+                        </ul>
+                    </div>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Key Insight</div>
+                        <div class="callout-content">
+                            SVM doesn't just want w·x + b &gt; 0, it wants every point to be confidently far from the boundary. The score is directly proportional to the distance from the decision boundary!
                         </div>
                     </div>
-                </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 QUICK TEST</div>
-                    <p><strong>Ask yourself:</strong></p>
-                    <ol>
-                        <li><strong>Is it a label/category?</strong> → Categorical</li>
-                        <li><strong>Is it a number?</strong> → Numerical</li>
-                        <li><strong>Can you count it?</strong> → Discrete</li>
-                        <li><strong>Can you measure it?</strong> → Continuous</li>
-                        <li><strong>Does order matter?</strong> → Ordinal (else Nominal)</li>
-                    </ol>
-                </div>
+                    <!-- 2. Dataset and Example -->
+                    <h3>Dataset and Example</h3>
+                    <p>Let's work with a simple 2D dataset to understand SVM:</p>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLES</div>
-                    <table class="data-examples-table">
+                    <table class="data-table">
                         <thead>
                             <tr>
-                                <th>Data</th>
-                                <th>Type</th>
-                                <th>Reason</th>
+                                <th>Point</th>
+                                <th>X₁</th>
+                                <th>X₂</th>
+                                <th>Class</th>
                             </tr>
                         </thead>
                         <tbody>
-                            <tr>
-                                <td>Zip codes</td>
-                                <td>Categorical (Nominal)</td>
-                                <td>Numbers used as labels, not quantities</td>
-                            </tr>
-                            <tr>
-                                <td>Test scores (A, B, C, D, F)</td>
-                                <td>Categorical (Ordinal)</td>
-                                <td>Categories with clear order</td>
-                            </tr>
-                            <tr>
-                                <td>Number of pages in books</td>
-                                <td>Numerical (Discrete)</td>
-                                <td>Countable whole numbers</td>
-                            </tr>
-                            <tr>
-                                <td>Reaction time in milliseconds</td>
-                                <td>Numerical (Continuous)</td>
-                                <td>Can be measured to any precision</td>
-                            </tr>
+                            <tr><td><strong>A</strong></td><td>2</td><td>7</td><td>+1</td></tr>
+                            <tr><td><strong>B</strong></td><td>3</td><td>8</td><td>+1</td></tr>
+                            <tr><td><strong>C</strong></td><td>4</td><td>7</td><td>+1</td></tr>
+                            <tr><td><strong>D</strong></td><td>6</td><td>2</td><td>-1</td></tr>
+                            <tr><td><strong>E</strong></td><td>7</td><td>3</td><td>-1</td></tr>
+                            <tr><td><strong>F</strong></td><td>8</td><td>2</td><td>-1</td></tr>
                         </tbody>
                     </table>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKE</div>
-                    <p>Just because something is written as a number doesn't make it numerical! Phone numbers, jersey numbers, and zip codes are <strong>categorical</strong> because they identify categories, not quantities.</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li><strong>Categorical:</strong> Labels/categories (Nominal: no order, Ordinal: has order)</li>
-                        <li><strong>Numerical:</strong> Quantities (Discrete: countable, Continuous: measurable)</li>
-                        <li>Data type determines which statistical methods to use</li>
-                        <li>Always identify data type before analysis</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 5: Measures of Central Tendency -->
-            <section class="topic-section" id="topic-5">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 5</span>
-                    <h2>📍 Measures of Central Tendency</h2>
-                    <p class="topic-subtitle">Mean, Median, Mode - Finding the center of data</p>
-                </div>
+                    <p><strong>Initial parameters:</strong> w₁ = 1, w₂ = 1, b = -10</p>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Measures of central tendency are single values that represent the "center" or "typical" value in a dataset.</p>
-                    <p><strong>Why it matters:</strong> Instead of looking at hundreds of numbers, one central value summarizes the data. "Average salary" tells you more than listing every employee's salary.</p>
-                    <p><strong>When to use it:</strong> When you need to summarize data with a single representative value.</p>
-                </div>
+                    <!-- 3. Decision Boundary -->
+                    <h3>Decision Boundary</h3>
+                    <p>The decision boundary is a line (or hyperplane in higher dimensions) that separates the two classes. It's defined by the equation:</p>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD ANALOGY</div>
-                    <p>Imagine finding the "center" of a group of people standing on a field. Mean is like finding the balance point where they'd balance on a seesaw. Median is literally the middle person. Mode is where the most people are clustered together.</p>
-                </div>
+                    <div class="formula">
+                        <strong>Decision Boundary Equation:</strong>
+                        w·x + b = 0
+                        <br><small>where:<br>w = [w₁, w₂] is the weight vector<br>x = [x₁, x₂] is the data point<br>b is the bias term</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Mathematical Foundations</h3>
-                    
-                    <div class="formula-card">
-                        <div class="formula-header">Mean (Average)</div>
-                        <div class="formula-main">
-                            <span class="formula-symbol">μ</span> = 
-                            <span class="formula-fraction">
-                                <span class="formula-numerator">Σx</span>
-                                <span class="formula-line"></span>
-                                <span class="formula-denominator">n</span>
-                            </span>
-                        </div>
-                        <p><strong>Where:</strong></p>
-                        <ul>
-                            <li><span class="formula-var">μ</span> (mu) = population mean or <span class="formula-var">x̄</span> (x-bar) = sample mean</li>
-                            <li><span class="formula-var">Σx</span> = sum of all values</li>
-                            <li><span class="formula-var">n</span> = number of values</li>
+                    <div class="info-card">
+                        <div class="info-card-title">Interpretation</div>
+                        <ul class="info-card-list">
+                            <li><strong>w·x + b &gt; 0</strong> → point above line → class +1</li>
+                            <li><strong>w·x + b &lt; 0</strong> → point below line → class -1</li>
+                            <li><strong>w·x + b = 0</strong> → exactly on boundary</li>
                         </ul>
-                        <div class="formula-steps">
-                            <p><strong>Steps:</strong></p>
-                            <ol>
-                                <li>Add all values together</li>
-                                <li>Divide by the count of values</li>
-                            </ol>
-                        </div>
                     </div>
 
-                    <div class="formula-card">
-                        <div class="formula-header">Median (Middle Value)</div>
-                        <div class="formula-main">
-                            <p>If <strong>odd</strong> number of values: Middle value</p>
-                            <p>If <strong>even</strong> number of values: Average of two middle values</p>
-                        </div>
-                        <div class="formula-steps">
-                            <p><strong>Steps:</strong></p>
-                            <ol>
-                                <li>Sort values in ascending order</li>
-                                <li>Find the middle position: (n + 1) / 2</li>
-                                <li>If between two values, average them</li>
-                            </ol>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="svm-basic-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 3:</strong> SVM decision boundary with 6 data points. Hover to see scores.</p>
                     </div>
 
-                    <div class="formula-card">
-                        <div class="formula-header">Mode (Most Frequent)</div>
-                        <div class="formula-main">
-                            <p>The value(s) that appear most frequently</p>
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Adjust w₁: <span id="svm-w1-val">1.0</span></label>
+                            <input type="range" id="svm-w1-slider" min="-2" max="2" step="0.1" value="1">
                         </div>
-                        <div class="formula-steps">
-                            <p><strong>Types:</strong></p>
-                            <ul>
-                                <li><strong>Unimodal:</strong> One mode</li>
-                                <li><strong>Bimodal:</strong> Two modes</li>
-                                <li><strong>Multimodal:</strong> More than two modes</li>
-                                <li><strong>No mode:</strong> All values appear equally</li>
-                            </ul>
+                        <div class="control-group">
+                            <label>Adjust w₂: <span id="svm-w2-val">1.0</span></label>
+                            <input type="range" id="svm-w2-slider" min="-2" max="2" step="0.1" value="1">
+                        </div>
+                        <div class="control-group">
+                            <label>Adjust b: <span id="svm-b-val">-10</span></label>
+                            <input type="range" id="svm-b-slider" min="-15" max="5" step="0.5" value="-10">
                         </div>
                     </div>
-                </div>
 
-                <div class="interactive-container">
-                    <h3>Interactive Calculator</h3>
-                    <canvas id="centralTendencyCanvas" width="800" height="300"></canvas>
-                    <div class="controls">
-                        <div class="input-group">
-                            <label>Enter values (comma-separated):</label>
-                            <input type="text" id="centralTendencyInput" value="10, 20, 30, 40, 50" class="form-control">
-                            <button class="btn btn-primary" id="calculateCentralBtn">Calculate</button>
-                            <button class="btn btn-secondary" id="randomDataBtn">Random Data</button>
-                        </div>
-                        <div class="results" id="centralTendencyResults">
-                            <div class="result-item"><span class="result-label">Mean:</span> <span id="meanResult">30</span></div>
-                            <div class="result-item"><span class="result-label">Median:</span> <span id="medianResult">30</span></div>
-                            <div class="result-item"><span class="result-label">Mode:</span> <span id="modeResult">None</span></div>
+                    <!-- 4. Margin and Support Vectors -->
+                    <h3>Margin and Support Vectors</h3>
+                    
+                    <div class="callout success">
+                        <div class="callout-title">📏 Understanding Margin</div>
+                        <div class="callout-content">
+                            The <strong>margin</strong> is the distance between the decision boundary and the closest points from each class. <strong>Support vectors</strong> are the points exactly at the margin (with score = ±1). These are the points with "lowest acceptable confidence" and they're the only ones that matter for defining the boundary!
                         </div>
                     </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 WORKED EXAMPLE</div>
-                    <p><strong>Dataset:</strong> Test scores: 65, 70, 75, 80, 85, 90, 95</p>
-                    <div class="example-solution">
-                        <p><strong>Mean:</strong></p>
-                        <p>Sum = 65 + 70 + 75 + 80 + 85 + 90 + 95 = 560</p>
-                        <p>Mean = 560 / 7 = <strong>80</strong></p>
-                        
-                        <p><strong>Median:</strong></p>
-                        <p>Already sorted. Middle position = (7 + 1) / 2 = 4th value</p>
-                        <p>Median = <strong>80</strong></p>
-                        
-                        <p><strong>Mode:</strong></p>
-                        <p>All values appear once. <strong>No mode</strong></p>
+                    <div class="formula">
+                        <strong>Margin Constraints:</strong>
+                        For positive points (yᵢ = +1): w·xᵢ + b ≥ +1<br>
+                        For negative points (yᵢ = -1): w·xᵢ + b ≤ -1<br>
+                        <br>
+                        <strong>Combined:</strong> yᵢ(w·xᵢ + b) ≥ 1<br>
+                        <br>
+                        <strong>Margin Width:</strong> 2/||w||
+                        <br><small>To maximize margin → minimize ||w||</small>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>When to Use Which?</h3>
-                    <div class="comparison-grid">
-                        <div class="comparison-item">
-                            <h4 style="color: #64ffda;">Use Mean</h4>
-                            <ul>
-                                <li>Data is symmetrical</li>
-                                <li>No extreme outliers</li>
-                                <li>Numerical data</li>
-                                <li>Need to use all data points</li>
-                            </ul>
-                        </div>
-                        <div class="comparison-item">
-                            <h4 style="color: #ff6b6b;">Use Median</h4>
-                            <ul>
-                                <li>Data has outliers</li>
-                                <li>Data is skewed</li>
-                                <li>Ordinal data</li>
-                                <li>Need robust measure</li>
-                            </ul>
-                        </div>
-                        <div class="comparison-item">
-                            <h4 style="color: #4a90e2;">Use Mode</h4>
-                            <ul>
-                                <li>Categorical data</li>
-                                <li>Finding most common value</li>
-                                <li>Discrete data</li>
-                                <li>Multiple peaks in data</li>
-                            </ul>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="svm-margin-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 4:</strong> Decision boundary with margin lines and support vectors highlighted in cyan</p>
                     </div>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKE</div>
-                    <p><strong>Mean is affected by outliers!</strong> In salary data like $30K, $35K, $40K, $45K, $500K, the mean is $130K (misleading!). The median of $40K better represents typical salary.</p>
-                </div>
+                    <!-- 5. Hard Margin vs Soft Margin -->
+                    <h3>Hard Margin vs Soft Margin</h3>
 
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIP</div>
-                    <p>For skewed data (like income, house prices), <strong>always report the median</strong> along with the mean. If they're very different, your data has outliers or is skewed!</p>
-                </div>
+                    <h4>Hard Margin SVM</h4>
+                    <p>Hard margin SVM requires perfect separation - no points can violate the margin. It works only when data is linearly separable.</p>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Find the mean, median, and mode of: [12, 15, 12, 18, 20, 15, 12, 22]</p>
+                    <div class="formula">
+                        <strong>Hard Margin Optimization:</strong>
+                        minimize (1/2)||w||²<br>
+                        subject to: yᵢ(w·xᵢ + b) ≥ 1 for all i
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate the Mean (Average)</p>
-                                <div class="step-work">
-                                    <code>Sum = 12 + 15 + 12 + 18 + 20 + 15 + 12 + 22 = 126</code><br>
-                                    <code>Count (n) = 8 values</code><br>
-                                    <code>Mean = Sum ÷ n = 126 ÷ 8 = 15.75</code>
-                                </div>
-                                <p class="step-explanation">Add all values together, then divide by how many values there are</p>
-                            </div>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Hard Margin Limitation</div>
+                        <div class="callout-content">
+                            Hard margin can lead to overfitting if we force perfect separation on noisy data! Real-world data often has outliers and noise.
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find the Median (Middle Value)</p>
-                                <div class="step-work">
-                                    <code>Sorted data: [12, 12, 12, 15, 15, 18, 20, 22]</code><br>
-                                    <code>Even number of values (8), so average the middle two</code><br>
-                                    <code>Middle positions: 4th and 5th values = 15 and 15</code><br>
-                                    <code>Median = (15 + 15) ÷ 2 = 15</code>
-                                </div>
-                                <p class="step-explanation">For even-sized datasets, average the two middle values</p>
-                            </div>
+                    </div>
+
+                    <h4>Soft Margin SVM</h4>
+                    <p>Soft margin SVM allows some margin violations, making it more practical for real-world data. It balances margin maximization with allowing some misclassifications.</p>
+
+                    <div class="formula">
+                        <strong>Soft Margin Cost Function:</strong>
+                        Cost = (1/2)||w||² + C·Σ max(0, 1 - yᵢ(w·xᵢ + b))<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓<br>
+                        Maximize margin&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Hinge Loss<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(penalize violations)
+                    </div>
+
+                    <!-- 6. The C Parameter -->
+                    <h3>The C Parameter</h3>
+                    <p>The C parameter controls the trade-off between maximizing the margin and minimizing classification errors. It acts like regularization in other ML algorithms.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Effects of C Parameter</div>
+                        <ul class="info-card-list">
+                            <li><strong>Small C (0.1 or 1):</strong> Wider margin, more violations allowed, better generalization, use when data is noisy</li>
+                            <li><strong>Large C (1000):</strong> Narrower margin, fewer violations, classify everything correctly, risk of overfitting, use when data is clean</li>
+                        </ul>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="svm-c-canvas"></canvas>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find the Mode (Most Frequent Value)</p>
-                                <div class="step-work">
-                                    <code>Frequency count:</code><br>
-                                    <code>  • 12 appears 3 times ← Most frequent!</code><br>
-                                    <code>  • 15 appears 2 times</code><br>
-                                    <code>  • 18, 20, 22 each appear 1 time</code><br>
-                                    <code>Mode = 12</code>
-                                </div>
-                                <p class="step-explanation">The mode is the value that appears most often</p>
+                        <p class="figure-caption"><strong>Figure 5:</strong> Effect of C parameter on margin and violations</p>
+                    </div>
+
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>C Parameter: <span id="svm-c-val">1</span></label>
+                            <input type="range" id="svm-c-slider" min="-1" max="3" step="0.1" value="0">
+                            <p style="font-size: 12px; color: #7ef0d4; margin-top: 8px;">Slide to see: 0.1 → 1 → 10 → 1000</p>
+                        </div>
+                        <div style="display: flex; gap: 16px; margin-top: 12px;">
+                            <div style="flex: 1; padding: 12px; background: rgba(106, 169, 255, 0.1); border-radius: 8px;">
+                                <div style="font-size: 12px; color: #a9b4c2;">Margin Width</div>
+                                <div style="font-size: 20px; color: #6aa9ff; font-weight: 600;" id="margin-width">2.00</div>
                             </div>
+                            <div style="flex: 1; padding: 12px; background: rgba(255, 140, 106, 0.1); border-radius: 8px;">
+                                <div style="font-size: 12px; color: #a9b4c2;">Violations</div>
+                                <div style="font-size: 20px; color: #ff8c6a; font-weight: 600;" id="violations-count">0</div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <!-- 7. Training Algorithm -->
+                    <h3>Training Algorithm</h3>
+                    <p>SVM can be trained using gradient descent. For each training sample (xᵢ, yᵢ), we check if it violates the margin and update weights accordingly.</p>
+
+                    <div class="formula">
+                        <strong>Update Rules:</strong><br>
+                        <br>
+                        <strong>Case 1: No violation</strong> (yᵢ(w·xᵢ + b) ≥ 1)<br>
+                        &nbsp;&nbsp;w = w - η·w&nbsp;&nbsp;(just regularization)<br>
+                        &nbsp;&nbsp;b = b<br>
+                        <br>
+                        <strong>Case 2: Violation</strong> (yᵢ(w·xᵢ + b) &lt; 1)<br>
+                        &nbsp;&nbsp;w = w - η(w - C·yᵢ·xᵢ)<br>
+                        &nbsp;&nbsp;b = b + η·C·yᵢ<br>
+                        <br>
+                        <small>where η = learning rate (e.g., 0.01)</small>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="svm-train-canvas"></canvas>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">Mean = 15.75, Median = 15, Mode = 12</span>
+                        <p class="figure-caption"><strong>Figure 6:</strong> SVM training visualization - step through each point</p>
+                    </div>
+
+                    <div class="controls">
+                        <div style="display: flex; gap: 12px; margin-bottom: 16px;">
+                            <button class="btn btn-primary" id="svm-train-btn">Start Training</button>
+                            <button class="btn btn-secondary" id="svm-step-btn">Next Step</button>
+                            <button class="btn btn-secondary" id="svm-reset-btn">Reset</button>
                         </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Check:</strong>
-                            <p>Mean (15.75) is slightly higher than median (15) because the outlier 22 pulls it up. The mode (12) is the lowest because it's the most common value at the lower end.</p>
+                        <div id="svm-train-info" style="padding: 16px; background: #2a3544; border-radius: 8px; font-family: monospace; font-size: 14px;">
+                            <div>Step: <span id="train-step">0</span> / 6</div>
+                            <div>Current Point: <span id="train-point">-</span></div>
+                            <div>w = [<span id="train-w">0.00, 0.00</span>]</div>
+                            <div>b = <span id="train-b">0.00</span></div>
+                            <div>Violation: <span id="train-violation" style="color: #7ef0d4;">-</span></div>
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Find the mean of: [5, 10, 15, 20]</li>
-                            <li>What's the median of: [3, 1, 4, 1, 5]?</li>
-                            <li>Find the mode of: [2, 2, 3, 4, 4, 4, 5]</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Mean = 12.5</li>
-                                <li>Median = 3 (sorted: [1,1,3,4,5])</li>
-                                <li>Mode = 4 (appears 3 times)</li>
-                            </ol>
+
+                    <div class="callout info">
+                        <div class="callout-title">📝 Example Calculation (Point A)</div>
+                        <div class="callout-content">
+                            <strong>A = (2, 7), y = +1</strong><br><br>
+                            Check: y(w·x + b) = 1(0 + 0 + 0) = 0 &lt; 1 ❌ Violation!<br><br>
+                            Update:<br>
+                            w<sub>new</sub> = [0, 0] - 0.01(0 - 1·1·[2, 7])<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= [0.02, 0.07]<br><br>
+                            b<sub>new</sub> = 0 + 0.01·1·1 = 0.01
                         </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li><strong>Mean:</strong> Sum of all values divided by count (affected by outliers)</li>
-                        <li><strong>Median:</strong> Middle value when sorted (resistant to outliers)</li>
-                        <li><strong>Mode:</strong> Most frequent value (useful for categorical data)</li>
-                        <li>Choose the measure that best represents your data type and distribution</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 6: Outliers -->
-            <section class="topic-section" id="topic-6">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 6</span>
-                    <h2>⚡ Outliers</h2>
-                    <p class="topic-subtitle">Extreme values that don't fit the pattern</p>
-                </div>
+                    <!-- 8. SVM Kernels -->
+                    <h3>SVM Kernels (Advanced)</h3>
+                    <p>Real-world data is often not linearly separable. Kernels transform data to higher dimensions where a linear boundary exists, which appears non-linear in the original space!</p>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Outliers are data points that are significantly different from other observations in a dataset.</p>
-                    <p><strong>Why it matters:</strong> Outliers can indicate data errors, special cases, or important patterns. They can also severely distort statistical analyses.</p>
-                    <p><strong>When to use it:</strong> Always check for outliers before analyzing data, especially when calculating means and standard deviations.</p>
-                </div>
+                    <div class="callout info">
+                        <div class="callout-title">💡 The Kernel Trick</div>
+                        <div class="callout-content">
+                            Kernels let us solve non-linear problems without explicitly computing high-dimensional features! They compute similarity between points in transformed space efficiently.
+                        </div>
+                    </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>In a salary dataset for entry-level employees: $35K, $38K, $40K, $37K, $250K. The $250K is an outlier—maybe it's a data entry error (someone added an extra zero) or a special case (CEO's child). Either way, it needs investigation!</p>
-                </div>
+                    <div class="formula">
+                        <strong>Three Main Kernels:</strong><br>
+                        <br>
+                        <strong>1. Linear Kernel</strong><br>
+                        K(x₁, x₂) = x₁·x₂<br>
+                        Use case: Linearly separable data<br>
+                        <br>
+                        <strong>2. Polynomial Kernel (degree 2)</strong><br>
+                        K(x₁, x₂) = (x₁·x₂ + 1)²<br>
+                        Use case: Curved boundaries, circular patterns<br>
+                        <br>
+                        <strong>3. RBF / Gaussian Kernel</strong><br>
+                        K(x₁, x₂) = e^(-γ||x₁-x₂||²)<br>
+                        Use case: Complex non-linear patterns<br>
+                        Most popular in practice!
+                    </div>
 
-                <div class="content-card">
-                    <h3>Detection Methods</h3>
-                    <div class="two-column">
-                        <div class="column">
-                            <h4 style="color: #64ffda;">IQR Method</h4>
-                            <p>Most common approach:</p>
-                            <ul>
-                                <li>Calculate Q1, Q3, and IQR = Q3 - Q1</li>
-                                <li>Lower fence = Q1 - 1.5 × IQR</li>
-                                <li>Upper fence = Q3 + 1.5 × IQR</li>
-                                <li>Outliers fall outside fences</li>
-                            </ul>
-                        </div>
-                        <div class="column">
-                            <h4 style="color: #ff6b6b;">Z-Score Method</h4>
-                            <p>For normal distributions:</p>
-                            <ul>
-                                <li>Calculate z-score for each value</li>
-                                <li>z = (x - μ) / σ</li>
-                                <li>If |z| &gt; 3: definitely outlier</li>
-                                <li>If |z| &gt; 2: possible outlier</li>
-                            </ul>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 500px">
+                            <canvas id="svm-kernel-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 7:</strong> Kernel comparison on non-linear data</p>
                     </div>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKE</div>
-                    <p>Never automatically delete outliers! They might be: (1) Valid extreme values, (2) Data entry errors, (3) Important discoveries. Always investigate before removing.</p>
-                </div>
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Select Kernel:</label>
+                            <div class="radio-group">
+                                <label><input type="radio" name="kernel" value="linear" checked> Linear</label>
+                                <label><input type="radio" name="kernel" value="polynomial"> Polynomial</label>
+                                <label><input type="radio" name="kernel" value="rbf"> RBF</label>
+                            </div>
+                        </div>
+                        <div class="control-group" id="kernel-param-group" style="display: none;">
+                            <label>Kernel Parameter (γ or degree): <span id="kernel-param-val">1</span></label>
+                            <input type="range" id="kernel-param-slider" min="0.1" max="5" step="0.1" value="1">
+                        </div>
+                    </div>
+
+                    <!-- 9. Key Formulas Summary -->
+                    <h3>Key Formulas Summary</h3>
+                    
+                    <div class="formula">
+                        <strong>Essential SVM Formulas:</strong><br>
+                        <br>
+                        1. Decision Boundary: w·x + b = 0<br>
+                        <br>
+                        2. Classification Rule: sign(w·x + b)<br>
+                        <br>
+                        3. Margin Width: 2/||w||<br>
+                        <br>
+                        4. Hard Margin Optimization:<br>
+                        &nbsp;&nbsp;&nbsp;minimize (1/2)||w||²<br>
+                        &nbsp;&nbsp;&nbsp;subject to yᵢ(w·xᵢ + b) ≥ 1<br>
+                        <br>
+                        5. Soft Margin Cost:<br>
+                        &nbsp;&nbsp;&nbsp;(1/2)||w||² + C·Σ max(0, 1 - yᵢ(w·xᵢ + b))<br>
+                        <br>
+                        6. Hinge Loss: max(0, 1 - yᵢ(w·xᵢ + b))<br>
+                        <br>
+                        7. Update Rules (if violation):<br>
+                        &nbsp;&nbsp;&nbsp;w = w - η(w - C·yᵢ·xᵢ)<br>
+                        &nbsp;&nbsp;&nbsp;b = b + η·C·yᵢ<br>
+                        <br>
+                        8. Kernel Functions:<br>
+                        &nbsp;&nbsp;&nbsp;Linear: K(x₁, x₂) = x₁·x₂<br>
+                        &nbsp;&nbsp;&nbsp;Polynomial: K(x₁, x₂) = (x₁·x₂ + 1)^d<br>
+                        &nbsp;&nbsp;&nbsp;RBF: K(x₁, x₂) = e^(-γ||x₁-x₂||²)
+                    </div>
+
+                    <!-- 10. Practical Insights -->
+                    <h3>Practical Insights</h3>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Why SVM is Powerful</div>
+                        <div class="callout-content">
+                            SVM only cares about support vectors - the points closest to the boundary. Other points don't affect the decision boundary at all! This makes it memory efficient and robust.
+                        </div>
+                    </div>
+
+                    <div class="info-card">
+                        <div class="info-card-title">When to Use SVM</div>
+                        <ul class="info-card-list">
+                            <li>Small to medium datasets (works great up to ~10,000 samples)</li>
+                            <li>High-dimensional data (even more features than samples!)</li>
+                            <li>Clear margin of separation exists between classes</li>
+                            <li>Need interpretable decision boundary</li>
+                        </ul>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h4>Advantages</h4>
                     <ul>
-                        <li>Outliers are extreme values that differ significantly from other data</li>
-                        <li>Use IQR method (1.5 × IQR rule) or Z-score method to detect</li>
-                        <li>Mean is heavily affected by outliers; median is resistant</li>
-                        <li>Always investigate outliers before deciding to keep or remove</li>
+                        <li><strong>Effective in high dimensions:</strong> Works well even when features &gt; samples</li>
+                        <li><strong>Memory efficient:</strong> Only stores support vectors, not entire dataset</li>
+                        <li><strong>Versatile:</strong> Different kernels for different data patterns</li>
+                        <li><strong>Robust:</strong> Works well with clear margin of separation</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 7: Variance & Standard Deviation -->
-            <section class="topic-section" id="topic-7">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 7</span>
-                    <h2>📏 Variance &amp; Standard Deviation</h2>
-                    <p class="topic-subtitle">Measuring spread and variability in data</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Variance measures the average squared deviation from the mean. Standard deviation is the square root of variance.</p>
-                    <p><strong>Why it matters:</strong> Shows how spread out data is. Low values mean data is clustered; high values mean data is scattered.</p>
-                    <p><strong>When to use it:</strong> Whenever you need to understand data variability—in finance (risk), manufacturing (quality control), or research (reliability).</p>
-                </div>
+                    <h4>Disadvantages</h4>
+                    <ul>
+                        <li><strong>Slow on large datasets:</strong> Training time grows quickly with &gt;10k samples</li>
+                        <li><strong>No probability estimates:</strong> Doesn't directly provide confidence scores</li>
+                        <li><strong>Kernel choice:</strong> Requires expertise to select right kernel</li>
+                        <li><strong>Feature scaling:</strong> Very sensitive to feature scales</li>
+                    </ul>
 
-                <div class="content-card">
-                    <h3>Mathematical Formulas</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Population Variance (σ²)</div>
-                        <div class="formula-main">σ² = Σ(x - μ)² / N</div>
-                        <p>Where N = population size, μ = population mean</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Sample Variance (s²)</div>
-                        <div class="formula-main">s² = Σ(x - x̄)² / (n - 1)</div>
-                        <p>Where n = sample size, x̄ = sample mean. We use (n-1) for unbiased estimation.</p>
+                    <!-- 11. Real-World Example -->
+                    <h3>Real-World Example: Email Spam Classification</h3>
+                    
+                    <div class="info-card">
+                        <div class="info-card-title">📧 Email Spam Detection</div>
+                        <p style="margin: 12px 0; line-height: 1.6;">Imagine we have emails with two features:</p>
+                        <ul class="info-card-list">
+                            <li>x₁ = number of promotional words ("free", "buy", "limited")</li>
+                            <li>x₂ = number of capital letters</li>
+                        </ul>
+                        <p style="margin: 12px 0; line-height: 1.6;">
+                            SVM finds the widest "road" between spam and non-spam emails. Support vectors are the emails closest to this road - they're the trickiest cases that define our boundary! An email far from the boundary is clearly spam or clearly legitimate.
+                        </p>
                     </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Standard Deviation</div>
-                        <div class="formula-main">σ = √(variance)</div>
-                        <p>Same units as original data, easier to interpret</p>
+
+                    <div class="callout warning">
+                        <div class="callout-title">🎯 Key Takeaway</div>
+                        <div class="callout-content">
+                            Unlike other algorithms that try to classify all points correctly, SVM focuses on the decision boundary. It asks: "What's the safest road I can build between these two groups?" The answer: Make it as wide as possible!
+                        </div>
                     </div>
                 </div>
+            </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 WORKED EXAMPLE</div>
-                    <p><strong>Dataset:</strong> [4, 8, 6, 5, 3, 7]</p>
-                    <div class="example-solution">
-                        <p><strong>Step 1:</strong> Mean = (4+8+6+5+3+7)/6 = 5.5</p>
-                        <p><strong>Step 2:</strong> Deviations: [-1.5, 2.5, 0.5, -0.5, -2.5, 1.5]</p>
-                        <p><strong>Step 3:</strong> Squared: [2.25, 6.25, 0.25, 0.25, 6.25, 2.25]</p>
-                        <p><strong>Step 4:</strong> Sum = 17.5</p>
-                        <p><strong>Step 5:</strong> Variance = 17.5/(6-1) = 3.5</p>
-                        <p><strong>Step 6:</strong> Std Dev = √3.5 = 1.87</p>
+            <!-- Section 6: K-Nearest Neighbors -->
+            <div class="section" id="knn">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Classification</span> K-Nearest Neighbors (KNN)</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>K-Nearest Neighbors is the simplest machine learning algorithm! To classify a new point, just look at its K nearest neighbors and take a majority vote. No training required!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Lazy learning: No training phase, just memorize data</li>
+                            <li>K = number of neighbors to consider</li>
+                            <li>Uses distance metrics (Euclidean, Manhattan)</li>
+                            <li>Classification: majority vote | Regression: average</li>
+                        </ul>
                     </div>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
+                    <h3>How KNN Works</h3>
+                    <ol>
+                        <li><strong>Choose K:</strong> Decide how many neighbors (e.g., K=3)</li>
+                        <li><strong>Calculate distance:</strong> Find distance from new point to all training points</li>
+                        <li><strong>Find K nearest:</strong> Select K points with smallest distances</li>
+                        <li><strong>Vote:</strong> Majority class wins (or take average for regression)</li>
+                    </ol>
+
+                    <h3>Distance Metrics</h3>
                     
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Calculate the variance and standard deviation for the dataset: [4, 8, 6, 5, 3]</p>
+                    <div class="formula">
+                        <strong>Euclidean Distance (straight line):</strong>
+                        d = √[(x₁-x₂)² + (y₁-y₂)²]
+                        <br><small>Like measuring with a ruler - shortest path</small>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate the Mean</p>
-                                <div class="step-work">
-                                    <code>Sum = 4 + 8 + 6 + 5 + 3 = 26</code><br>
-                                    <code>Mean (x̄) = 26 ÷ 5 = 5.2</code>
-                                </div>
-                                <p class="step-explanation">First, we need the mean to calculate deviations</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Deviations from Mean</p>
-                                <div class="step-work">
-                                    <code>(4 - 5.2) = -1.2</code><br>
-                                    <code>(8 - 5.2) = 2.8</code><br>
-                                    <code>(6 - 5.2) = 0.8</code><br>
-                                    <code>(5 - 5.2) = -0.2</code><br>
-                                    <code>(3 - 5.2) = -2.2</code>
-                                </div>
-                                <p class="step-explanation">Subtract the mean from each value</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Square Each Deviation</p>
-                                <div class="step-work">
-                                    <code>(-1.2)² = 1.44</code><br>
-                                    <code>(2.8)² = 7.84</code><br>
-                                    <code>(0.8)² = 0.64</code><br>
-                                    <code>(-0.2)² = 0.04</code><br>
-                                    <code>(-2.2)² = 4.84</code>
-                                </div>
-                                <p class="step-explanation">Squaring eliminates negative signs and emphasizes larger deviations</p>
-                            </div>
+
+                    <div class="formula">
+                        <strong>Manhattan Distance (city blocks):</strong>
+                        d = |x₁-x₂| + |y₁-y₂|
+                        <br><small>Like walking on city grid - only horizontal/vertical</small>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="knn-canvas"></canvas>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Variance (sample)</p>
-                                <div class="step-work">
-                                    <code>Sum of squared deviations = 1.44 + 7.84 + 0.64 + 0.04 + 4.84 = 14.8</code><br>
-                                    <code>Divide by (n-1) = 5-1 = 4</code><br>
-                                    <code>s² = 14.8 ÷ 4 = 3.7</code>
-                                </div>
-                                <p class="step-explanation">We use (n-1) for sample variance (Bessel's correction)</p>
-                            </div>
+                        <p class="figure-caption"><strong>Figure:</strong> KNN classification - drag the test point to see predictions</p>
+                    </div>
+
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>K Value: <span id="knn-k-val">3</span></label>
+                            <input type="range" id="knn-k-slider" min="1" max="7" step="2" value="3">
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Deviation</p>
-                                <div class="step-work">
-                                    <code>s = √s² = √3.7 ≈ 1.92</code>
-                                </div>
-                                <p class="step-explanation">Standard deviation is the square root of variance</p>
+                        <div class="control-group">
+                            <label>Distance Metric:</label>
+                            <div class="radio-group">
+                                <label><input type="radio" name="knn-distance" value="euclidean" checked> Euclidean</label>
+                                <label><input type="radio" name="knn-distance" value="manhattan"> Manhattan</label>
                             </div>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">Variance = 3.7, Standard Deviation = 1.92</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Interpretation:</strong>
-                            <p>A standard deviation of 1.92 means most values fall within about 1.92 units of the mean (5.2). This indicates moderate spread in the data.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Calculate the standard deviation of: [2, 4, 6, 8]</li>
-                            <li>Find the variance of: [10, 12, 14, 16, 18]</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>SD ≈ 2.58</li>
-                                <li>Variance = 10</li>
-                            </ol>
-                        </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Variance measures average squared deviation from mean</li>
-                        <li>Standard deviation is square root of variance (same units as data)</li>
-                        <li>Use (n-1) for sample variance to avoid bias</li>
-                        <li>Higher values = more spread; lower values = more clustered</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 8: Quartiles & Percentiles -->
-            <section class="topic-section" id="topic-8">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 8</span>
-                    <h2>🎯 Quartiles &amp; Percentiles</h2>
-                    <p class="topic-subtitle">Dividing data into equal parts</p>
-                </div>
+                    <h3>Worked Example</h3>
+                    <p><strong>Test point at (2.5, 2.5), K=3:</strong></p>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Quartiles divide sorted data into 4 equal parts. Percentiles divide data into 100 equal parts.</p>
-                    <p><strong>Why it matters:</strong> Shows relative position in a dataset. "90th percentile" means you scored better than 90% of people.</p>
-                </div>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Point</th><th>Position</th><th>Class</th><th>Distance</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>A</td><td>(1.0, 2.0)</td><td>Orange</td><td>1.80</td></tr>
+                            <tr><td>B</td><td>(0.9, 1.7)</td><td>Orange</td><td>2.00</td></tr>
+                            <tr style="background: rgba(126, 240, 212, 0.1);"><td><strong>C</strong></td><td>(1.5, 2.5)</td><td>Orange</td><td><strong>1.00 ← nearest!</strong></td></tr>
+                            <tr><td>D</td><td>(4.0, 5.0)</td><td>Yellow</td><td>3.35</td></tr>
+                            <tr><td>E</td><td>(4.2, 4.8)</td><td>Yellow</td><td>3.15</td></tr>
+                            <tr><td>F</td><td>(3.8, 5.2)</td><td>Yellow</td><td>3.12</td></tr>
+                        </tbody>
+                    </table>
+
+                    <p><strong>3-Nearest Neighbors:</strong> C (orange), A (orange), B (orange)</p>
+                    <p><strong>Vote:</strong> 3 orange, 0 yellow → <strong>Prediction: Orange</strong> 🟠</p>
 
-                <div class="content-card">
-                    <h3>The Five-Number Summary</h3>
+                    <h3>Choosing K</h3>
                     <ul>
-                        <li><strong>Minimum:</strong> Smallest value</li>
-                        <li><strong>Q1 (25th percentile):</strong> 25% of data below this</li>
-                        <li><strong>Q2 (50th percentile/Median):</strong> Middle value</li>
-                        <li><strong>Q3 (75th percentile):</strong> 75% of data below this</li>
-                        <li><strong>Maximum:</strong> Largest value</li>
+                        <li><strong>K=1:</strong> Very sensitive to noise, overfits</li>
+                        <li><strong>Small K (3,5):</strong> Flexible boundaries, can capture local patterns</li>
+                        <li><strong>Large K (&gt;10):</strong> Smoother boundaries, more stable but might underfit</li>
+                        <li><strong>Odd K:</strong> Avoids ties in binary classification</li>
+                        <li><strong>Rule of thumb:</strong> K = √n (where n = number of training samples)</li>
                     </ul>
-                </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>SAT scores: If you score 1350 and that's the 90th percentile, it means you scored higher than 90% of test-takers. Percentiles are perfect for standardized tests!</p>
-                </div>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Critical: Feature Scaling!</div>
+                        <div class="callout-content">
+                            Always scale features before using KNN! If one feature has range [0, 1000] and another [0, 1], the large feature dominates distance calculations. Use StandardScaler or MinMaxScaler.
+                        </div>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Advantages</h3>
                     <ul>
-                        <li>Q1 = 25th percentile, Q2 = median, Q3 = 75th percentile</li>
-                        <li>Percentiles show relative standing in a dataset</li>
-                        <li>Five-number summary: Min, Q1, Q2, Q3, Max</li>
-                        <li>Useful for understanding data distribution</li>
+                        <li>✓ Simple to understand and implement</li>
+                        <li>✓ No training time (just stores data)</li>
+                        <li>✓ Works with any number of classes</li>
+                        <li>✓ Can learn complex decision boundaries</li>
+                        <li>✓ Naturally handles multi-class problems</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 9: Interquartile Range -->
-            <section class="topic-section" id="topic-9">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 9</span>
-                    <h2>📦 Interquartile Range (IQR)</h2>
-                    <p class="topic-subtitle">Middle 50% of data and outlier detection</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> IQR = Q3 - Q1. It represents the range of the middle 50% of your data.</p>
-                    <p><strong>Why it matters:</strong> IQR is resistant to outliers and is the foundation of the 1.5×IQR rule for outlier detection.</p>
-                </div>
+                    <h3>Disadvantages</h3>
+                    <ul>
+                        <li>✗ Slow prediction (compares to ALL training points)</li>
+                        <li>✗ High memory usage (stores entire dataset)</li>
+                        <li>✗ Sensitive to feature scaling</li>
+                        <li>✗ Curse of dimensionality (struggles with many features)</li>
+                        <li>✗ Sensitive to irrelevant features</li>
+                    </ul>
 
-                <div class="content-card">
-                    <h3>The 1.5 × IQR Rule</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Outlier Boundaries</div>
-                        <div class="formula-main">
-                            Lower Fence = Q1 - 1.5 × IQR<br>
-                            Upper Fence = Q3 + 1.5 × IQR
+                    <div class="callout info">
+                        <div class="callout-title">💡 When to Use KNN</div>
+                        <div class="callout-content">
+                            KNN works best with small to medium datasets (&lt;10,000 samples) with few features (&lt;20). Great for recommendation systems, pattern recognition, and as a baseline to compare other models!
                         </div>
-                        <p>Any value outside these fences is considered an outlier</p>
                     </div>
                 </div>
+            </div>
+
+            <div class="section" id="model-evaluation">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Evaluation</span> Model Evaluation</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>How do we know if our model is good? Model evaluation provides metrics to measure performance and identify problems!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Metrics</div>
+                        <ul class="info-card-list">
+                            <li>Confusion Matrix: Shows all prediction outcomes</li>
+                            <li>Accuracy, Precision, Recall, F1-Score</li>
+                            <li>ROC Curve &amp; AUC: Performance across thresholds</li>
+                            <li>R² Score: For regression problems</li>
+                        </ul>
+                    </div>
+
+                    <h3>Confusion Matrix</h3>
+                    <p>The confusion matrix shows all possible outcomes of binary classification:</p>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <div class="formula">
+                        <strong>Confusion Matrix Structure:</strong>
+                        <pre style="background: none; border: none; padding: 0;">
+                Predicted
+                Pos    Neg
+Actual  Pos     TP     FN
+        Neg     FP     TN</pre>
+                    </div>
+
+                    <h4>Definitions:</h4>
                     <ul>
-                        <li>IQR = Q3 - Q1 (range of middle 50% of data)</li>
-                        <li>Resistant to outliers (unlike standard deviation)</li>
-                        <li>1.5×IQR rule: standard method for outlier detection</li>
-                        <li>Box plots visualize IQR and outliers</li>
+                        <li><strong>True Positive (TP):</strong> Correctly predicted positive</li>
+                        <li><strong>True Negative (TN):</strong> Correctly predicted negative</li>
+                        <li><strong>False Positive (FP):</strong> Wrongly predicted positive (Type I error)</li>
+                        <li><strong>False Negative (FN):</strong> Wrongly predicted negative (Type II error)</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 10: Skewness -->
-            <section class="topic-section" id="topic-10">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 10</span>
-                    <h2>📉 Skewness</h2>
-                    <p class="topic-subtitle">Understanding data distribution shape</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Skewness measures the asymmetry of a distribution.</p>
-                    <p><strong>Why it matters:</strong> Indicates whether data leans left or right, affecting which statistical methods to use.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Types of Skewness</h3>
-                    <div class="comparison-grid">
-                        <div class="comparison-item">
-                            <h4 style="color: #64ffda;">Negative (Left) Skew</h4>
-                            <p>Tail extends to the left</p>
-                            <p>Mean &lt; Median &lt; Mode</p>
-                            <p>Example: Test scores when most students do well</p>
-                        </div>
-                        <div class="comparison-item">
-                            <h4 style="color: #4a90e2;">Symmetric (No Skew)</h4>
-                            <p>Perfectly balanced</p>
-                            <p>Mean = Median = Mode</p>
-                            <p>Example: Normal distribution</p>
-                        </div>
-                        <div class="comparison-item">
-                            <h4 style="color: #ff6b6b;">Positive (Right) Skew</h4>
-                            <p>Tail extends to the right</p>
-                            <p>Mode &lt; Median &lt; Mean</p>
-                            <p>Example: Income data, house prices</p>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 300px">
+                            <canvas id="confusion-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Confusion matrix for spam detection (TP=600, FP=100, FN=300, TN=900)</p>
                     </div>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Calculate and interpret skewness for dataset: [2, 3, 4, 5, 15]</p>
+                    <h3>Classification Metrics</h3>
+
+                    <div class="formula">
+                        <strong>Accuracy:</strong>
+                        Accuracy = (TP + TN) / (TP + TN + FP + FN)
+                        <br><small>Percentage of correct predictions overall</small>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate the Mean</p>
-                                <div class="step-work">
-                                    <code>Sum = 2 + 3 + 4 + 5 + 15 = 29</code><br>
-                                    <code>n = 5</code><br>
-                                    <code>Mean (x̄) = 29/5 = 5.8</code>
-                                </div>
-                                <p class="step-explanation">First, find the average of all values</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Deviation</p>
-                                <div class="step-work">
-                                    <code>Deviations from mean: (2-5.8), (3-5.8), (4-5.8), (5-5.8), (15-5.8)</code><br>
-                                    <code>= -3.8, -2.8, -1.8, -0.8, 9.2</code><br>
-                                    <code>Squared: 14.44, 7.84, 3.24, 0.64, 84.64</code><br>
-                                    <code>Variance (sample) = (14.44+7.84+3.24+0.64+84.64)/4 = 110.8/4 = 27.7</code><br>
-                                    <code>SD = √27.7 = 5.26</code>
-                                </div>
-                                <p class="step-explanation">We need standard deviation for the skewness formula</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Skewness</p>
-                                <div class="step-work">
-                                    <code>Cubed deviations: (-3.8)³, (-2.8)³, (-1.8)³, (-0.8)³, (9.2)³</code><br>
-                                    <code>= -54.87, -21.95, -5.83, -0.51, 778.69</code><br>
-                                    <code>Sum = 695.53</code><br>
-                                    <code>Skewness = (695.53/5) / (5.26)³ = 139.11 / 145.77 = 0.95</code>
-                                </div>
-                                <p class="step-explanation">Skewness formula uses cubed deviations divided by cubed standard deviation</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret the Result</p>
-                                <div class="step-work">
-                                    <code>Skewness = +0.95 (positive)</code><br>
-                                    <code>Distribution is right-skewed</code><br>
-                                    <code>The value 15 pulls the tail to the right</code><br>
-                                    <code>Most data clustered on left, long tail on right</code>
-                                </div>
-                                <p class="step-explanation">Positive skewness means tail extends to the right</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">Skewness = +0.95 (positively skewed, right tail)</span>
+
+                    <p><strong>Example:</strong> (600 + 900) / (600 + 900 + 100 + 300) = 1500/1900 = <strong>0.789 (78.9%)</strong></p>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Accuracy Paradox</div>
+                        <div class="callout-content">
+                            Accuracy misleads on imbalanced data! If 99% emails are not spam, a model that always predicts "not spam" gets 99% accuracy but is useless!
                         </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>The positive skewness confirms that the outlier (15) creates a long right tail, pulling the mean (5.8) above the median (4).</p>
+                    </div>
+
+                    <div class="formula">
+                        <strong>Precision:</strong>
+                        Precision = TP / (TP + FP)
+                        <br><small>"Of all predicted positives, how many are actually positive?"</small>
+                    </div>
+
+                    <p><strong>Example:</strong> 600 / (600 + 100) = 600/700 = <strong>0.857 (85.7%)</strong></p>
+                    <p><strong>Use when:</strong> False positives are costly (e.g., spam filter - don't want to block legitimate emails)</p>
+
+                    <div class="formula">
+                        <strong>Recall (Sensitivity, TPR):</strong>
+                        Recall = TP / (TP + FN)
+                        <br><small>"Of all actual positives, how many did we catch?"</small>
+                    </div>
+
+                    <p><strong>Example:</strong> 600 / (600 + 300) = 600/900 = <strong>0.667 (66.7%)</strong></p>
+                    <p><strong>Use when:</strong> False negatives are costly (e.g., disease detection - can't miss sick patients)</p>
+
+                    <div class="formula">
+                        <strong>F1-Score:</strong>
+                        F1 = 2 × (Precision × Recall) / (Precision + Recall)
+                        <br><small>Harmonic mean - balances precision and recall</small>
+                    </div>
+
+                    <p><strong>Example:</strong> 2 × (0.857 × 0.667) / (0.857 + 0.667) = <strong>0.750 (75.0%)</strong></p>
+
+                    <h3>ROC Curve &amp; AUC</h3>
+                    <p>The ROC (Receiver Operating Characteristic) curve shows model performance across ALL possible thresholds!</p>
+
+                    <div class="formula">
+                        <strong>ROC Components:</strong>
+                        TPR (True Positive Rate) = TP / (TP + FN) = Recall<br>
+                        FPR (False Positive Rate) = FP / (FP + TN)
+                        <br><small>Plot: FPR (x-axis) vs TPR (y-axis)</small>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="roc-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> ROC curve - slide threshold to see trade-off</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Find skewness of [1, 1, 2, 3, 3]</li>
-                            <li>Data with left tail - positive or negative skew?</li>
-                            <li>If mean &lt; median, what type of skew?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Skewness ≈ 0 (symmetric distribution)</li>
-                                <li>Negative skew (left tail)</li>
-                                <li>Negative skew (left-skewed)</li>
-                            </ol>
+
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Classification Threshold: <span id="roc-threshold-val">0.5</span></label>
+                            <input type="range" id="roc-threshold-slider" min="0" max="1" step="0.1" value="0.5">
                         </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h4>Understanding ROC:</h4>
                     <ul>
-                        <li>Skewness measures asymmetry in distribution</li>
-                        <li>Negative skew: tail to left, Mean &lt; Median</li>
-                        <li>Positive skew: tail to right, Mean &gt; Median</li>
-                        <li>Symmetric: Mean = Median = Mode</li>
+                        <li><strong>Top-left corner (0, 1):</strong> Perfect classifier</li>
+                        <li><strong>Diagonal line:</strong> Random guessing</li>
+                        <li><strong>Above diagonal:</strong> Better than random</li>
+                        <li><strong>Below diagonal:</strong> Worse than random (invert predictions!)</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 11: Covariance -->
-            <section class="topic-section" id="topic-11">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 11</span>
-                    <h2>🔗 Covariance</h2>
-                    <p class="topic-subtitle">How two variables vary together</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Covariance measures how two variables change together.</p>
-                    <p><strong>Why it matters:</strong> Shows if variables have a positive, negative, or no relationship.</p>
-                </div>
+                    <div class="formula">
+                        <strong>AUC (Area Under Curve):</strong>
+                        AUC = Area under ROC curve
+                        <br><small>AUC = 1.0: Perfect | AUC = 0.5: Random | AUC &gt; 0.8: Good</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Sample Covariance</div>
-                        <div class="formula-main">Cov(X,Y) = Σ(xᵢ - x̄)(yᵢ - ȳ) / (n-1)</div>
+                    <h3>Regression Metrics: R² Score</h3>
+                    <p>For regression problems, R² (coefficient of determination) measures how well the model explains variance:</p>
+
+                    <div class="formula">
+                        <strong>R² Formula:</strong>
+                        R² = 1 - (SS_res / SS_tot)<br>
+                        <br>
+                        SS_res = Σ(y - ŷ)² (sum of squared residuals)<br>
+                        SS_tot = Σ(y - ȳ)² (total sum of squares)<br>
+                        <br><small>ȳ = mean of actual values</small>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Interpretation</h3>
+                    <h4>Interpreting R²:</h4>
                     <ul>
-                        <li><strong>Positive:</strong> Variables increase together</li>
-                        <li><strong>Negative:</strong> One increases as other decreases</li>
-                        <li><strong>Zero:</strong> No linear relationship</li>
-                        <li><strong>Problem:</strong> Scale-dependent, hard to interpret magnitude</li>
+                        <li><strong>R² = 1.0:</strong> Perfect fit (model explains 100% of variance)</li>
+                        <li><strong>R² = 0.7:</strong> Model explains 70% of variance (pretty good!)</li>
+                        <li><strong>R² = 0.0:</strong> Model no better than just using the mean</li>
+                        <li><strong>R² &lt; 0:</strong> Model worse than mean (something's very wrong!)</li>
                     </ul>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Find covariance between X=[2, 4, 6, 8] and Y=[1, 3, 5, 7]</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate the Means</p>
-                                <div class="step-work">
-                                    <code>x̄ = (2 + 4 + 6 + 8) / 4 = 20 / 4 = 5</code><br>
-                                    <code>ȳ = (1 + 3 + 5 + 7) / 4 = 16 / 4 = 4</code>
-                                </div>
-                                <p class="step-explanation">Find the average of each variable</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Create Deviation Table</p>
-                                <div class="step-work">
-                                    <table class="calculation-table">
-                                        <thead>
-                                            <tr>
-                                                <th>x</th>
-                                                <th>y</th>
-                                                <th>(x-x̄)</th>
-                                                <th>(y-ȳ)</th>
-                                                <th>(x-x̄)(y-ȳ)</th>
-                                            </tr>
-                                        </thead>
-                                        <tbody>
-                                            <tr>
-                                                <td>2</td>
-                                                <td>1</td>
-                                                <td>-3</td>
-                                                <td>-3</td>
-                                                <td>9</td>
-                                            </tr>
-                                            <tr>
-                                                <td>4</td>
-                                                <td>3</td>
-                                                <td>-1</td>
-                                                <td>-1</td>
-                                                <td>1</td>
-                                            </tr>
-                                            <tr>
-                                                <td>6</td>
-                                                <td>5</td>
-                                                <td>1</td>
-                                                <td>1</td>
-                                                <td>1</td>
-                                            </tr>
-                                            <tr>
-                                                <td>8</td>
-                                                <td>7</td>
-                                                <td>3</td>
-                                                <td>3</td>
-                                                <td>9</td>
-                                            </tr>
-                                            <tr style="font-weight: bold; background: rgba(100, 255, 218, 0.1);">
-                                                <td colspan="4">Sum</td>
-                                                <td>20</td>
-                                            </tr>
-                                        </tbody>
-                                    </table>
-                                </div>
-                                <p class="step-explanation">Calculate deviations from means and their products</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Sample Covariance</p>
-                                <div class="step-work">
-                                    <code>Cov(X,Y) = Σ(x-x̄)(y-ȳ) / (n-1)</code><br>
-                                    <code>Cov(X,Y) = 20 / (4-1)</code><br>
-                                    <code>Cov(X,Y) = 20 / 3</code><br>
-                                    <code>Cov(X,Y) = 6.67</code>
-                                </div>
-                                <p class="step-explanation">Use n-1 for sample covariance (Bessel's correction)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret the Result</p>
-                                <div class="step-work">
-                                    <code>Cov(X,Y) = 6.67 &gt; 0</code><br>
-                                    <code>Positive covariance indicates:</code><br>
-                                    <code>• X and Y tend to increase together</code><br>
-                                    <code>• When X is above its mean, Y tends to be above its mean</code><br>
-                                    <code>• When X is below its mean, Y tends to be below its mean</code>
-                                </div>
-                                <p class="step-explanation">Positive covariance shows positive relationship</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">Cov(X,Y) = 6.67 (positive relationship)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Verification:</strong>
-                            <p>The positive covariance confirms that X and Y have a positive linear relationship. As X increases by 2, Y also increases by 2, showing consistent movement together.</p>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 350px">
+                            <canvas id="r2-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> R² calculation on height-weight regression</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Calculate Cov(X,Y) for X=[1, 2, 3] and Y=[2, 4, 6]</li>
-                            <li>If Cov(X,Y) = -5, what does this tell you about the relationship?</li>
-                            <li>Find Cov(X,Y) for X=[5, 5, 5] and Y=[1, 2, 3]. What do you notice?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Cov(X,Y) = 2 (perfect positive relationship)</li>
-                                <li>Negative relationship - as X increases, Y tends to decrease</li>
-                                <li>Cov(X,Y) = 0 (X is constant, no variation, so no covariance)</li>
-                            </ol>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Choosing the Right Metric</div>
+                        <div class="callout-content">
+                            <strong>Balanced data:</strong> Use accuracy<br>
+                            <strong>Imbalanced data:</strong> Use F1-score, precision, or recall<br>
+                            <strong>Medical diagnosis:</strong> Prioritize recall (catch all diseases)<br>
+                            <strong>Spam filter:</strong> Prioritize precision (don't block legitimate emails)<br>
+                            <strong>Regression:</strong> Use R², RMSE, or MAE
                         </div>
                     </div>
                 </div>
+            </div>
+
+            <div class="section" id="regularization">
+                <div class="section-header">
+                    <h2>8. Regularization</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Regularization prevents overfitting by penalizing complex models. It adds a "simplicity constraint" to force the model to generalize better!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Prevents overfitting by penalizing large coefficients</li>
+                            <li>L1 (Lasso): Drives coefficients to zero, feature selection</li>
+                            <li>L2 (Ridge): Shrinks coefficients proportionally</li>
+                            <li>λ controls penalty strength</li>
+                        </ul>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>The Overfitting Problem</h3>
+                    <p>Without regularization, models can learn training data TOO well:</p>
                     <ul>
-                        <li>Covariance measures joint variability of two variables</li>
-                        <li>Positive: variables move together; Negative: inverse relationship</li>
-                        <li>Scale-dependent (unlike correlation)</li>
-                        <li>Foundation for correlation calculation</li>
+                        <li>Captures noise instead of patterns</li>
+                        <li>High training accuracy, poor test accuracy</li>
+                        <li>Large coefficient values</li>
+                        <li>Model too complex for the problem</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 12: Correlation -->
-            <section class="topic-section" id="topic-12">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 12</span>
-                    <h2>💞 Correlation</h2>
-                    <p class="topic-subtitle">Standardized measure of relationship strength</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Correlation coefficient (r) is a standardized measure of linear relationship between two variables.</p>
-                    <p><strong>Why it matters:</strong> Always between -1 and +1, making it easy to interpret strength and direction of relationships.</p>
-                </div>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Overfitting Example</div>
+                        <div class="callout-content">
+                            Imagine fitting a 10th-degree polynomial to 12 data points. It perfectly fits training data (even noise) but fails on new data. Regularization prevents this!
+                        </div>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Pearson Correlation Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Correlation Coefficient (r)</div>
-                        <div class="formula-main">r = Cov(X,Y) / (σₓ × σᵧ)</div>
-                        <p>Covariance divided by product of standard deviations</p>
+                    <h3>The Regularization Solution</h3>
+                    <p>Instead of minimizing just the loss, we minimize: <strong>Loss + Penalty</strong></p>
+
+                    <div class="formula">
+                        <strong>Regularized Cost Function:</strong>
+                        Cost = Loss + λ × Penalty(θ)
+                        <br><small>where:<br>θ = model parameters (weights)<br>λ = regularization strength<br>Penalty = function of parameter magnitudes</small>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Interpretation Guide</h3>
+                    <h3>L1 Regularization (Lasso)</h3>
+                    <div class="formula">
+                        <strong>L1 Penalty:</strong>
+                        Cost = MSE + λ × Σ|θᵢ|
+                        <br><small>Sum of absolute values of coefficients</small>
+                    </div>
+
+                    <h4>L1 Effects:</h4>
                     <ul>
-                        <li><strong>r = +1:</strong> Perfect positive correlation</li>
-                        <li><strong>r = 0.7 to 0.9:</strong> Strong positive</li>
-                        <li><strong>r = 0.4 to 0.6:</strong> Moderate positive</li>
-                        <li><strong>r = 0.1 to 0.3:</strong> Weak positive</li>
-                        <li><strong>r = 0:</strong> No correlation</li>
-                        <li><strong>r = -0.1 to -0.3:</strong> Weak negative</li>
-                        <li><strong>r = -0.4 to -0.6:</strong> Moderate negative</li>
-                        <li><strong>r = -0.7 to -0.9:</strong> Strong negative</li>
-                        <li><strong>r = -1:</strong> Perfect negative correlation</li>
+                        <li><strong>Feature selection:</strong> Drives coefficients to exactly 0</li>
+                        <li><strong>Sparse models:</strong> Only important features remain</li>
+                        <li><strong>Interpretable:</strong> Easy to see which features matter</li>
+                        <li><strong>Use when:</strong> Many features, few are important</li>
                     </ul>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>Study hours vs exam scores typically show r = 0.7 (strong positive). More study hours correlate with higher scores.</p>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Calculate correlation coefficient for X=[2, 4, 6, 8] and Y=[1, 3, 5, 7]</p>
+                    <h3>L2 Regularization (Ridge)</h3>
+                    <div class="formula">
+                        <strong>L2 Penalty:</strong>
+                        Cost = MSE + λ × Σθᵢ²
+                        <br><small>Sum of squared coefficients</small>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Use Covariance from Topic 11</p>
-                                <div class="step-work">
-                                    <code>From previous calculation:</code><br>
-                                    <code>Cov(X,Y) = 6.67</code><br>
-                                    <code>x̄ = 5, ȳ = 4</code>
-                                </div>
-                                <p class="step-explanation">We already calculated this in Topic 11</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Deviation of X</p>
-                                <div class="step-work">
-                                    <code>Deviations from mean: -3, -1, 1, 3</code><br>
-                                    <code>Squared deviations: 9, 1, 1, 9</code><br>
-                                    <code>Sum of squared deviations = 20</code><br>
-                                    <code>Variance_x = 20 / (4-1) = 20/3 = 6.67</code><br>
-                                    <code>SD_x = √6.67 ≈ 2.58</code>
-                                </div>
-                                <p class="step-explanation">Standard deviation measures spread of X values</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Deviation of Y</p>
-                                <div class="step-work">
-                                    <code>Deviations from mean: -3, -1, 1, 3</code><br>
-                                    <code>Squared deviations: 9, 1, 1, 9</code><br>
-                                    <code>Sum of squared deviations = 20</code><br>
-                                    <code>Variance_y = 20 / (4-1) = 20/3 = 6.67</code><br>
-                                    <code>SD_y = √6.67 ≈ 2.58</code>
-                                </div>
-                                <p class="step-explanation">Standard deviation measures spread of Y values</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Correlation Coefficient</p>
-                                <div class="step-work">
-                                    <code>r = Cov(X,Y) / (SD_x × SD_y)</code><br>
-                                    <code>r = 6.67 / (2.58 × 2.58)</code><br>
-                                    <code>r = 6.67 / 6.66</code><br>
-                                    <code>r ≈ 1.00</code>
-                                </div>
-                                <p class="step-explanation">Correlation standardizes covariance by dividing by both standard deviations</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret the Result</p>
-                                <div class="step-work">
-                                    <code>r = 1.00 (perfect positive correlation)</code><br>
-                                    <code>This means:</code><br>
-                                    <code>• X and Y have a perfect linear relationship</code><br>
-                                    <code>• As X increases by 2, Y increases by 2 (exactly)</code><br>
-                                    <code>• All points lie exactly on a straight line</code><br>
-                                    <code>• The relationship is: Y = 0.5X (or Y = -1 + 0.5X when adjusted)</code>
-                                </div>
-                                <p class="step-explanation">r = 1 indicates perfect positive linear correlation</p>
-                            </div>
+
+                    <h4>L2 Effects:</h4>
+                    <ul>
+                        <li><strong>Shrinks coefficients:</strong> Makes them smaller, not zero</li>
+                        <li><strong>Keeps all features:</strong> No automatic selection</li>
+                        <li><strong>Smooth predictions:</strong> Less sensitive to individual features</li>
+                        <li><strong>Use when:</strong> Many correlated features (multicollinearity)</li>
+                    </ul>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="regularization-canvas"></canvas>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">r = 1.00 (perfect positive linear correlation)</span>
+                        <p class="figure-caption"><strong>Figure:</strong> Comparing vanilla, L1, and L2 regularization effects</p>
+                    </div>
+
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Lambda (λ): <span id="reg-lambda-val">0.1</span></label>
+                            <input type="range" id="reg-lambda-slider" min="0" max="2" step="0.1" value="0.1">
                         </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Verification:</strong>
-                            <p>Check: If we plot these points, they form a perfect line. When X=2, Y=1; X=4, Y=3; X=6, Y=5; X=8, Y=7. The relationship is Y = (X/2) - 1 + (X/2) = 0.5X, which is indeed perfectly linear! ✓</p>
+                    </div>
+
+                    <h3>The Lambda (λ) Parameter</h3>
+                    <ul>
+                        <li><strong>λ = 0:</strong> No regularization (original model, risk of overfitting)</li>
+                        <li><strong>Small λ (0.01):</strong> Weak penalty, slight regularization</li>
+                        <li><strong>Medium λ (1):</strong> Balanced, good generalization</li>
+                        <li><strong>Large λ (100):</strong> Strong penalty, risk of underfitting</li>
+                    </ul>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 L1 vs L2: Quick Guide</div>
+                        <div class="callout-content">
+                            <strong>Use L1 when:</strong><br>
+                            • You suspect many features are irrelevant<br>
+                            • You want automatic feature selection<br>
+                            • You need interpretability<br>
+                            <br>
+                            <strong>Use L2 when:</strong><br>
+                            • All features might be useful<br>
+                            • Features are highly correlated<br>
+                            • You want smooth, stable predictions<br>
+                            <br>
+                            <strong>Elastic Net:</strong> Combines both L1 and L2!
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>If Cov(X,Y) = 10, SD_x = 2, SD_y = 5, find r</li>
-                            <li>What does r = -0.8 indicate about the relationship?</li>
-                            <li>Can correlation be greater than 1? Why or why not?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>r = 10/(2×5) = 10/10 = 1.0</li>
-                                <li>Strong negative relationship - as X increases, Y tends to decrease</li>
-                                <li>No! r is always between -1 and +1 (bounded by Cauchy-Schwarz inequality)</li>
-                            </ol>
+
+                    <h3>Practical Example</h3>
+                    <p>Predicting house prices with 10 features (size, bedrooms, age, etc.):</p>
+
+                    <p><strong>Without regularization:</strong> All features have large, varying coefficients. Model overfits noise.</p>
+
+                    <p><strong>With L1:</strong> Only 4 features remain (size, location, bedrooms, age). Others set to 0. Simpler, more interpretable!</p>
+
+                    <p><strong>With L2:</strong> All features kept but coefficients shrunk. More stable predictions, handles correlated features well.</p>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Key Takeaway</div>
+                        <div class="callout-content">
+                            Regularization is like adding a "simplicity tax" to your model. Complex models pay more tax, encouraging simpler solutions that generalize better!
                         </div>
                     </div>
                 </div>
+            </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+            <div class="section" id="bias-variance">
+                <div class="section-header">
+                    <h2>9. Bias-Variance Tradeoff</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Every model makes two types of errors: bias and variance. The bias-variance tradeoff is the fundamental challenge in machine learning - we must balance them!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Bias = systematic error (underfitting)</li>
+                            <li>Variance = sensitivity to training data (overfitting)</li>
+                            <li>Can't minimize both simultaneously</li>
+                            <li>Goal: Find the sweet spot</li>
+                        </ul>
+                    </div>
+
+                    <h3>Understanding Bias</h3>
+                    <p><strong>Bias</strong> is the error from overly simplistic assumptions. High bias causes <strong>underfitting</strong>.</p>
+
+                    <h4>Characteristics of High Bias:</h4>
                     <ul>
-                        <li>r ranges from -1 to +1</li>
-                        <li>Measures strength AND direction of linear relationship</li>
-                        <li>Scale-independent (unlike covariance)</li>
-                        <li>Only measures LINEAR relationships</li>
+                        <li>Model too simple for the problem</li>
+                        <li>High error on training data</li>
+                        <li>High error on test data</li>
+                        <li>Can't capture underlying patterns</li>
+                        <li>Example: Using a straight line for curved data</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 13: Interpreting Correlation -->
-            <section class="topic-section" id="topic-13">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 13</span>
-                    <h2>💪 Interpreting Correlation</h2>
-                    <p class="topic-subtitle">Correlation vs causation and common pitfalls</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>The Golden Rule</h3>
-                    <div class="callout-box warning">
-                        <div class="callout-header">⚠️ CORRELATION ≠ CAUSATION</div>
-                        <p>Just because two variables are correlated does NOT mean one causes the other!</p>
+                    <div class="callout warning">
+                        <div class="callout-title">🎯 High Bias Example</div>
+                        <div class="callout-content">
+                            Trying to fit a parabola with a straight line. No matter how much training data you have, a line can't capture the curve. That's bias!
+                        </div>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Common Scenarios</h3>
+                    <h3>Understanding Variance</h3>
+                    <p><strong>Variance</strong> is the error from sensitivity to small fluctuations in training data. High variance causes <strong>overfitting</strong>.</p>
+
+                    <h4>Characteristics of High Variance:</h4>
                     <ul>
-                        <li><strong>Direct Causation:</strong> X causes Y (smoking causes cancer)</li>
-                        <li><strong>Reverse Causation:</strong> Y causes X (not the direction you thought)</li>
-                        <li><strong>Third Variable:</strong> Z causes both X and Y (confounding variable)</li>
-                        <li><strong>Coincidence:</strong> Pure chance with no real relationship</li>
+                        <li>Model too complex for the problem</li>
+                        <li>Very low error on training data</li>
+                        <li>High error on test data</li>
+                        <li>Captures noise as if it were pattern</li>
+                        <li>Example: Using 10th-degree polynomial for simple data</li>
                     </ul>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 FAMOUS EXAMPLE</div>
-                    <p><strong>Ice cream sales correlate with drowning deaths.</strong></p>
-                    <p>Does ice cream cause drowning? NO! The third variable is summer weather—more people swim in summer (more drownings) and eat ice cream in summer.</p>
-                </div>
+                    <div class="callout warning">
+                        <div class="callout-title">📊 High Variance Example</div>
+                        <div class="callout-content">
+                            A wiggly curve that passes through every training point perfectly, including outliers. Change one data point and the entire curve changes dramatically. That's variance!
+                        </div>
+                    </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Study finds r = -0.75 between hours of TV watched and exam scores. Interpret this result and discuss causation.</p>
+                    <h3>The Tradeoff</h3>
+                    <div class="formula">
+                        <strong>Total Error Decomposition:</strong>
+                        Total Error = Bias² + Variance + Irreducible Error
+                        <br><small>Irreducible error = noise in data (can't be eliminated)</small>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Analyze the Sign</p>
-                                <div class="step-work">
-                                    <code>Negative correlation (r &lt; 0)</code><br>
-                                    <code>As one variable increases, the other decreases</code><br>
-                                    <code>More TV → Lower scores (or vice versa)</code>
-                                </div>
-                                <p class="step-explanation">The negative sign tells us the direction of the relationship</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Analyze the Strength</p>
-                                <div class="step-work">
-                                    <code>|r| = |-0.75| = 0.75</code><br>
-                                    <code>Interpretation scale:</code><br>
-                                    <code>  • 0.0-0.3 = Weak</code><br>
-                                    <code>  • 0.3-0.7 = Moderate</code><br>
-                                    <code>  • 0.7-1.0 = Strong</code><br>
-                                    <code>0.75 falls in "Strong" category</code>
-                                </div>
-                                <p class="step-explanation">The absolute value determines relationship strength</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">State the Relationship</p>
-                                <div class="step-work">
-                                    <code>Strong negative correlation</code><br>
-                                    <code>Students who watch more TV tend to have lower exam scores</code><br>
-                                    <code>Relationship is fairly consistent but not perfect</code>
-                                </div>
-                                <p class="step-explanation">Combine sign and strength for complete interpretation</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Address Causation</p>
-                                <div class="step-work">
-                                    <code>Correlation ≠ Causation!</code><br>
-                                    <code>Possible explanations:</code><br>
-                                    <code>  a) TV causes lower scores (less study time)</code><br>
-                                    <code>  b) Lower-performing students watch more TV (compensating)</code><br>
-                                    <code>  c) Third variable: stress causes both TV watching and poor performance</code><br>
-                                    <code>Cannot determine causation from correlation alone</code>
-                                </div>
-                                <p class="step-explanation">Correlation never proves causation - always consider alternatives</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Predict Using Correlation</p>
-                                <div class="step-work">
-                                    <code>If we know TV hours, we can predict exam score</code><br>
-                                    <code>But prediction ≠ causation</code><br>
-                                    <code>r² = 0.75² = 0.56 = 56% of variance explained</code>
-                                </div>
-                                <p class="step-explanation">r² shows percentage of variance in one variable explained by the other</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">Strong negative correlation (r = -0.75), but does NOT prove TV causes lower scores</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>While the correlation is strong, we must resist concluding causation. The relationship could be coincidental, reverse-causal, or due to confounding variables.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>r = +0.90 between study hours and grades. Interpret.</li>
-                            <li>Can r = 1.5? Why or why not?</li>
-                            <li>If r = 0, does that mean no relationship at all?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Very strong positive correlation - more study predicts higher grades</li>
-                                <li>No! r is always between -1 and +1</li>
-                                <li>No linear relationship, but could have non-linear relationship</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <p><strong>The tradeoff:</strong></p>
                     <ul>
-                        <li>Correlation shows relationship, NOT causation</li>
-                        <li>Always consider third variables (confounders)</li>
-                        <li>Need controlled experiments to prove causation</li>
-                        <li>Be skeptical of correlation claims in media</li>
+                        <li>Decrease bias → Increase variance (more complex model)</li>
+                        <li>Decrease variance → Increase bias (simpler model)</li>
+                        <li>Goal: Minimize total error by balancing both</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 14: Probability Basics -->
-            <section class="topic-section" id="topic-14">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 14</span>
-                    <h2>🎲 Probability Basics</h2>
-                    <p class="topic-subtitle">Foundation of statistical inference</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Probability measures the likelihood of an event occurring, ranging from 0 (impossible) to 1 (certain).</p>
-                    <p><strong>Why it matters:</strong> Foundation for all statistical inference, hypothesis testing, and prediction.</p>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="bias-variance-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Three models showing underfitting, good fit, and overfitting</p>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Basic Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Probability of Event E</div>
-                        <div class="formula-main">P(E) = Number of favorable outcomes / Total number of possible outcomes</div>
+                    <h3>The Driving Test Analogy</h3>
+                    <p>Think of learning to drive:</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Driving Test Analogy</div>
+                        <ul style="list-style: none; padding: 0;">
+                            <li style="padding: 12px; border: none; margin-bottom: 8px; background: rgba(255, 140, 106, 0.1); border-radius: 6px;">
+                                <strong style="color: #ff8c6a;">High Bias (Underfitting):</strong><br>
+                                Failed practice tests, failed real test<br>
+                                → Can't learn to drive at all
+                            </li>
+                            <li style="padding: 12px; border: none; margin-bottom: 8px; background: rgba(126, 240, 212, 0.1); border-radius: 6px;">
+                                <strong style="color: #7ef0d4;">Good Balance:</strong><br>
+                                Passed practice tests, passed real test<br>
+                                → Actually learned to drive!
+                            </li>
+                            <li style="padding: 12px; border: none; margin-bottom: 8px; background: rgba(255, 140, 106, 0.1); border-radius: 6px;">
+                                <strong style="color: #ff8c6a;">High Variance (Overfitting):</strong><br>
+                                Perfect on practice tests, failed real test<br>
+                                → Memorized practice, didn't truly learn
+                            </li>
+                        </ul>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Key Rules</h3>
+                    <h3>How to Find the Balance</h3>
+
+                    <h4>Reduce Bias (if underfitting):</h4>
                     <ul>
-                        <li><strong>Range:</strong> 0 ≤ P(E) ≤ 1</li>
-                        <li><strong>Complement:</strong> P(not E) = 1 - P(E)</li>
-                        <li><strong>Addition (OR):</strong> P(A or B) = P(A) + P(B) - P(A and B)</li>
-                        <li><strong>Multiplication (AND):</strong> P(A and B) = P(A) × P(B) [if independent]</li>
+                        <li>Use more complex model (more features, higher degree polynomial)</li>
+                        <li>Add more features</li>
+                        <li>Reduce regularization</li>
+                        <li>Train longer (more iterations)</li>
                     </ul>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>Rolling a die:</strong></p>
-                    <p>P(rolling a 4) = 1/6 ≈ 0.167</p>
-                    <p>P(rolling even) = 3/6 = 0.5</p>
-                    <p>P(not rolling a 6) = 5/6 ≈ 0.833</p>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h4>Reduce Variance (if overfitting):</h4>
                     <ul>
-                        <li>Probability ranges from 0 to 1</li>
-                        <li>P(E) = favorable outcomes / total outcomes</li>
-                        <li>Complement rule: P(not E) = 1 - P(E)</li>
-                        <li>Foundation for all statistical inference</li>
+                        <li>Use simpler model (fewer features, lower degree)</li>
+                        <li>Get more training data</li>
+                        <li>Add regularization (L1, L2)</li>
+                        <li>Use cross-validation</li>
+                        <li>Feature selection or dimensionality reduction</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 15: Set Theory -->
-            <section class="topic-section" id="topic-15">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 15</span>
-                    <h2>🔷 Set Theory</h2>
-                    <p class="topic-subtitle">Union, intersection, and complement</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Set theory provides a mathematical framework for organizing events and calculating probabilities.</p>
+                    <h3>Model Complexity Curve</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 350px">
+                            <canvas id="complexity-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Error vs model complexity - find the sweet spot</p>
+                    </div>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Detecting Bias vs Variance</div>
+                        <div class="callout-content">
+                            <strong>High Bias:</strong><br>
+                            Training error: High 🔴<br>
+                            Test error: High 🔴<br>
+                            Gap: Small<br>
+                            <br>
+                            <strong>High Variance:</strong><br>
+                            Training error: Low 🟢<br>
+                            Test error: High 🔴<br>
+                            Gap: Large ⚠️<br>
+                            <br>
+                            <strong>Good Model:</strong><br>
+                            Training error: Low 🟢<br>
+                            Test error: Low 🟢<br>
+                            Gap: Small ✓
+                        </div>
+                    </div>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Key Takeaway</div>
+                        <div class="callout-content">
+                            The bias-variance tradeoff is unavoidable. You can't have zero bias AND zero variance. The art of machine learning is finding the sweet spot where total error is minimized!
+                        </div>
+                    </div>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Key Concepts</h3>
+            <div class="section" id="cross-validation">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Evaluation</span> Cross-Validation</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Cross-validation gives more reliable performance estimates by testing your model on multiple different splits of the data!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Splits data into K folds</li>
+                            <li>Trains K times, each with different test fold</li>
+                            <li>Averages results for robust estimate</li>
+                            <li>Reduces variance in performance estimate</li>
+                        </ul>
+                    </div>
+
+                    <h3>The Problem with Simple Train-Test Split</h3>
+                    <p>With a single 80-20 split:</p>
                     <ul>
-                        <li><strong>Union (A ∪ B):</strong> A OR B (either event occurs)</li>
-                        <li><strong>Intersection (A ∩ B):</strong> A AND B (both events occur)</li>
-                        <li><strong>Complement (A'):</strong> NOT A (event doesn't occur)</li>
-                        <li><strong>Mutually Exclusive:</strong> A ∩ B = ∅ (can't both occur)</li>
+                        <li>Performance depends on which data you randomly picked</li>
+                        <li>Might get lucky/unlucky with the split</li>
+                        <li>20% of data wasted (not used for training)</li>
+                        <li>One number doesn't tell you about variance</li>
                     </ul>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">In a class of 40 students: 25 like Math, 20 like Science, 10 like both. Find: a) P(Math OR Science), b) P(only Math), c) P(neither)</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Set Up the Information</p>
-                                <div class="step-work">
-                                    <code>Total students: n = 40</code><br>
-                                    <code>P(Math) = 25/40 = 0.625</code><br>
-                                    <code>P(Science) = 20/40 = 0.5</code><br>
-                                    <code>P(Math ∩ Science) = 10/40 = 0.25</code>
-                                </div>
-                                <p class="step-explanation">Convert all counts to probabilities</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(Math ∪ Science) using Addition Rule</p>
-                                <div class="step-work">
-                                    <code>Formula: P(A ∪ B) = P(A) + P(B) - P(A ∩ B)</code><br>
-                                    <code>P(Math ∪ Science) = 0.625 + 0.5 - 0.25</code><br>
-                                    <code>= 1.125 - 0.25</code><br>
-                                    <code>= 0.875</code>
-                                </div>
-                                <p class="step-explanation">We subtract the intersection to avoid double-counting</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(only Math)</p>
-                                <div class="step-work">
-                                    <code>Only Math = Math AND NOT Science</code><br>
-                                    <code>Students in only Math = 25 - 10 = 15</code><br>
-                                    <code>P(only Math) = 15/40 = 0.375</code>
-                                </div>
-                                <p class="step-explanation">Subtract those who like both from total Math students</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(neither)</p>
-                                <div class="step-work">
-                                    <code>Neither = NOT (Math OR Science)</code><br>
-                                    <code>P(neither) = 1 - P(Math ∪ Science)</code><br>
-                                    <code>= 1 - 0.875</code><br>
-                                    <code>= 0.125</code><br>
-                                    <code>Or: 40 - 35 = 5 students, so 5/40 = 0.125 ✓</code>
-                                </div>
-                                <p class="step-explanation">Use complement rule or count directly</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) P(Math OR Science) = 0.875 (87.5%)<br>b) P(only Math) = 0.375 (37.5%)<br>c) P(neither) = 0.125 (12.5%)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Verification:</strong>
-                            <p>Check: 0.375 (only Math) + 0.25 (both) + 0.25 (only Science) + 0.125 (neither) = 1.0 ✓</p>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Single Split Problem</div>
+                        <div class="callout-content">
+                            You test once and get 85% accuracy. Is that good? Or did you just get lucky with an easy test set? Without multiple tests, you don't know!
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>P(A)=0.6, P(B)=0.5, P(A∩B)=0.3. Find P(A∪B)</li>
-                            <li>If P(A∪B)=0.8, P(A)=0.5, P(B)=0.4, find P(A∩B)</li>
-                            <li>100 students: 60 like pizza, 40 like burgers, 20 like both. How many like neither?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>P(A∪B) = 0.6 + 0.5 - 0.3 = 0.8</li>
-                                <li>P(A∩B) = 0.5 + 0.4 - 0.8 = 0.1</li>
-                                <li>60 + 40 - 20 = 80 like at least one, so 20 like neither</li>
-                            </ol>
+
+                    <h3>K-Fold Cross-Validation</h3>
+                    <p>The solution: Split data into K folds and test K times!</p>
+
+                    <div class="formula">
+                        <strong>K-Fold Algorithm:</strong>
+                        1. Split data into K equal folds<br>
+                        2. For i = 1 to K:<br>
+                        &nbsp;&nbsp;&nbsp;- Use fold i as test set<br>
+                        &nbsp;&nbsp;&nbsp;- Use all other folds as training set<br>
+                        &nbsp;&nbsp;&nbsp;- Train model and record accuracyᵢ<br>
+                        3. Final score = mean(accuracy₁, ..., accuracyₖ)<br>
+                        4. Also report std dev for confidence
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="cv-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> 3-Fold Cross-Validation - each fold serves as test set once</p>
+                    </div>
+
+                    <h3>Example: 3-Fold CV</h3>
+                    <p>Dataset with 12 samples (A through L), split into 3 folds:</p>
+
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Fold</th><th>Test Set</th><th>Training Set</th><th>Accuracy</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr>
+                                <td>1</td>
+                                <td>A, B, C, D</td>
+                                <td>E, F, G, H, I, J, K, L</td>
+                                <td>0.96</td>
+                            </tr>
+                            <tr>
+                                <td>2</td>
+                                <td>E, F, G, H</td>
+                                <td>A, B, C, D, I, J, K, L</td>
+                                <td>0.84</td>
+                            </tr>
+                            <tr>
+                                <td>3</td>
+                                <td>I, J, K, L</td>
+                                <td>A, B, C, D, E, F, G, H</td>
+                                <td>0.90</td>
+                            </tr>
+                        </tbody>
+                    </table>
+
+                    <div class="formula">
+                        <strong>Final Score:</strong>
+                        Mean = (0.96 + 0.84 + 0.90) / 3 = 0.90 (90%)<br>
+                        Std Dev = 0.049<br>
+                        <br>
+                        <strong>Report:</strong> 90% ± 5%
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Choosing K</h3>
                     <ul>
-                        <li>Union (∪): OR operation</li>
-                        <li>Intersection (∩): AND operation</li>
-                        <li>Complement ('): NOT operation</li>
-                        <li>Venn diagrams visualize set relationships</li>
+                        <li><strong>K=5:</strong> Most common, good balance</li>
+                        <li><strong>K=10:</strong> More reliable, standard in research</li>
+                        <li><strong>K=n (Leave-One-Out):</strong> Maximum data usage, but expensive</li>
+                        <li><strong>Larger K:</strong> More computation, less bias, more variance</li>
+                        <li><strong>Smaller K:</strong> Less computation, more bias, less variance</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 16: Conditional Probability -->
-            <section class="topic-section" id="topic-16">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 16</span>
-                    <h2>🔀 Conditional Probability</h2>
-                    <p class="topic-subtitle">Probability given that something else happened</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Conditional probability is the probability of event A occurring given that event B has already occurred.</p>
-                </div>
+                    <h3>Stratified K-Fold</h3>
+                    <p>For classification with imbalanced classes, use <strong>stratified</strong> K-fold to maintain class proportions in each fold!</p>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Conditional Probability</div>
-                        <div class="formula-main">P(A|B) = P(A and B) / P(B)</div>
-                        <p>Read as: "Probability of A given B"</p>
+                    <div class="callout info">
+                        <div class="callout-title">💡 Example</div>
+                        <div class="callout-content">
+                            Dataset: 80% class 0, 20% class 1<br>
+                            <br>
+                            <strong>Regular K-fold:</strong> One fold might have 90% class 0, another 70%<br>
+                            <strong>Stratified K-fold:</strong> Every fold has 80% class 0, 20% class 1 ✓
+                        </div>
                     </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p>Drawing cards: P(King | Red card) = ?</p>
-                    <p>P(Red card) = 26/52</p>
-                    <p>P(King and Red) = 2/52</p>
-                    <p>P(King | Red) = (2/52) / (26/52) = 2/26 = 1/13</p>
-                </div>
+                    <h3>Leave-One-Out Cross-Validation (LOOCV)</h3>
+                    <p>Special case where K = n (number of samples):</p>
+                    <ul>
+                        <li>Each sample is test set once</li>
+                        <li>Train on n-1 samples, test on 1</li>
+                        <li>Repeat n times</li>
+                        <li>Maximum use of training data</li>
+                        <li>Very expensive for large datasets</li>
+                    </ul>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Benefits of Cross-Validation</h3>
                     <ul>
-                        <li>P(A|B) = probability of A given B occurred</li>
-                        <li>Formula: P(A|B) = P(A and B) / P(B)</li>
-                        <li>Critical for Bayes' Theorem</li>
-                        <li>Used in machine learning and diagnostics</li>
+                        <li>✓ More reliable performance estimate</li>
+                        <li>✓ Uses all data for both training and testing</li>
+                        <li>✓ Reduces variance in estimate</li>
+                        <li>✓ Detects overfitting (high variance across folds)</li>
+                        <li>✓ Better for small datasets</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 17: Independence -->
-            <section class="topic-section" id="topic-17">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 17</span>
-                    <h2>🎯 Independence</h2>
-                    <p class="topic-subtitle">When events don't affect each other</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Two events are independent if the occurrence of one doesn't affect the probability of the other.</p>
-                </div>
+                    <h3>Drawbacks</h3>
+                    <ul>
+                        <li>✗ Computationally expensive (train K times)</li>
+                        <li>✗ Not suitable for time series (can't shuffle)</li>
+                        <li>✗ Still need final train-test split for final model</li>
+                    </ul>
 
-                <div class="content-card">
-                    <h3>Test for Independence</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Events A and B are independent if:</div>
-                        <div class="formula-main">P(A|B) = P(A)</div>
-                        <p>OR equivalently:</p>
-                        <div class="formula-main">P(A and B) = P(A) × P(B)</div>
+                    <div class="callout success">
+                        <div class="callout-title">✅ Best Practice</div>
+                        <div class="callout-content">
+                            1. Use cross-validation to evaluate models and tune hyperparameters<br>
+                            2. Once you pick the best model, train on ALL training data<br>
+                            3. Test once on held-out test set for final unbiased estimate<br>
+                            <br>
+                            <strong>Never</strong> use test set during cross-validation!
+                        </div>
                     </div>
                 </div>
+            </div>
+
+            <div class="section" id="preprocessing">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">🔍 Unsupervised - Preprocessing</span> Data Preprocessing</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Raw data is messy! Data preprocessing cleans and transforms data into a format that machine learning algorithms can use effectively.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Steps</div>
+                        <ul class="info-card-list">
+                            <li>Handle missing values</li>
+                            <li>Encode categorical variables</li>
+                            <li>Scale/normalize features</li>
+                            <li>Split data properly</li>
+                        </ul>
+                    </div>
+
+                    <h3>1. Handling Missing Values</h3>
+                    <p>Real-world data often has missing values. We can't just ignore them!</p>
 
-                <div class="content-card">
-                    <h3>Examples</h3>
+                    <h4>Strategies:</h4>
                     <ul>
-                        <li><strong>Independent:</strong> Coin flips, die rolls with replacement</li>
-                        <li><strong>Dependent:</strong> Drawing cards without replacement, weather on consecutive days</li>
+                        <li><strong>Drop rows:</strong> If only few values missing (&lt;5%)</li>
+                        <li><strong>Mean imputation:</strong> Replace with column mean (numerical)</li>
+                        <li><strong>Median imputation:</strong> Replace with median (robust to outliers)</li>
+                        <li><strong>Mode imputation:</strong> Replace with most frequent (categorical)</li>
+                        <li><strong>Forward/backward fill:</strong> Use previous/next value (time series)</li>
+                        <li><strong>Predictive imputation:</strong> Train model to predict missing values</li>
                     </ul>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Two dice are rolled. Let A = "first die shows 6" and B = "sum is 7". Are A and B independent?</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(A)</p>
-                                <div class="step-work">
-                                    <code>First die shows 6: one outcome out of 6</code><br>
-                                    <code>P(A) = 1/6 ≈ 0.167</code>
-                                </div>
-                                <p class="step-explanation">Probability the first die is 6</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(B)</p>
-                                <div class="step-work">
-                                    <code>Sum equals 7: (1,6), (2,5), (3,4), (4,3), (5,2), (6,1)</code><br>
-                                    <code>6 favorable outcomes out of 36 total</code><br>
-                                    <code>P(B) = 6/36 = 1/6 ≈ 0.167</code>
-                                </div>
-                                <p class="step-explanation">Count all ways to get sum of 7</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(A ∩ B)</p>
-                                <div class="step-work">
-                                    <code>First die is 6 AND sum is 7</code><br>
-                                    <code>Only possibility: (6,1)</code><br>
-                                    <code>P(A ∩ B) = 1/36 ≈ 0.028</code>
-                                </div>
-                                <p class="step-explanation">Find where both events occur simultaneously</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Test Independence</p>
-                                <div class="step-work">
-                                    <code>If independent: P(A ∩ B) = P(A) × P(B)</code><br>
-                                    <code>P(A) × P(B) = (1/6) × (1/6) = 1/36</code><br>
-                                    <code>P(A ∩ B) = 1/36</code><br>
-                                    <code>1/36 = 1/36 ✓ EQUAL!</code>
-                                </div>
-                                <p class="step-explanation">Compare the two probabilities to test independence</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Conclusion</p>
-                                <div class="step-work">
-                                    <code>Events A and B ARE independent</code><br>
-                                    <code>Knowing first die is 6 doesn't change probability of sum being 7</code>
-                                </div>
-                                <p class="step-explanation">When the product rule holds, events are independent</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">YES, events are independent. P(A∩B) = P(A)×P(B) = 1/36</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>We can also verify: P(B|A) = P(A∩B)/P(A) = (1/36)/(1/6) = 1/6 = P(B). Since P(B|A) = P(B), the events are independent.</p>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Warning</div>
+                        <div class="callout-content">
+                            Never drop columns with many missing values without investigation! The missingness itself might be informative (e.g., income not reported might correlate with high income).
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>P(A)=0.3, P(B)=0.4, P(A∩B)=0.12. Independent?</li>
-                            <li>Coin flip: P(Heads) and P(Tails). Independent?</li>
-                            <li>Drawing two cards without replacement. Independent?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>YES! 0.3 × 0.4 = 0.12 = P(A∩B)</li>
-                                <li>NO - they're mutually exclusive (can't both happen)</li>
-                                <li>NO - first draw affects second draw probabilities</li>
-                            </ol>
+
+                    <h3>2. Encoding Categorical Variables</h3>
+                    <p>Most ML algorithms need numerical input. We must convert categories to numbers!</p>
+
+                    <h4>One-Hot Encoding</h4>
+                    <p>Creates binary column for each category. Use for <strong>nominal</strong> data (no order).</p>
+
+                    <div class="formula">
+                        <strong>Example:</strong>
+                        Color: ["Red", "Blue", "Green", "Blue"]<br>
+                        <br>
+                        Becomes three columns:<br>
+                        Red:&nbsp;&nbsp;&nbsp;[1, 0, 0, 0]<br>
+                        Blue:&nbsp;&nbsp;[0, 1, 0, 1]<br>
+                        Green: [0, 0, 1, 0]
+                    </div>
+
+                    <h4>Label Encoding</h4>
+                    <p>Assigns integer to each category. Use for <strong>ordinal</strong> data (has order).</p>
+
+                    <div class="formula">
+                        <strong>Example:</strong>
+                        Size: ["Small", "Large", "Medium", "Small"]<br>
+                        <br>
+                        Becomes: [0, 2, 1, 0]<br>
+                        <small>(Small=0, Medium=1, Large=2)</small>
+                    </div>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Don't Mix Them Up!</div>
+                        <div class="callout-content">
+                            Never use label encoding for nominal data! If you encode ["Red", "Blue", "Green"] as [0, 1, 2], the model thinks Green &gt; Blue &gt; Red, which is meaningless!
                         </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>3. Feature Scaling</h3>
+                    <p>Different features have different scales. Age (0-100) vs Income ($0-$1M). This causes problems!</p>
+
+                    <h4>Why Scale?</h4>
                     <ul>
-                        <li>Independent events don't affect each other</li>
-                        <li>Test: P(A and B) = P(A) × P(B)</li>
-                        <li>With replacement → independent</li>
-                        <li>Without replacement → dependent</li>
+                        <li>Gradient descent converges faster</li>
+                        <li>Distance-based algorithms (KNN, SVM) need it</li>
+                        <li>Regularization treats features equally</li>
+                        <li>Neural networks train better</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 18: Bayes' Theorem -->
-            <section class="topic-section" id="topic-18">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 18</span>
-                    <h2>🧮 Bayes' Theorem</h2>
-                    <p class="topic-subtitle">Updating probabilities with new evidence</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Bayes' Theorem shows how to update probability based on new information.</p>
-                    <p><strong>Why it matters:</strong> Used in medical diagnosis, spam filters, machine learning, and countless applications.</p>
-                </div>
+                    <h4>StandardScaler (Z-score normalization)</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        z = (x - μ) / σ
+                        <br><small>where:<br>μ = mean of feature<br>σ = standard deviation<br>Result: mean=0, std=1</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>The Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Bayes' Theorem</div>
-                        <div class="formula-main">P(A|B) = [P(B|A) × P(A)] / P(B)</div>
-                        <ul>
-                            <li>P(A|B) = posterior probability</li>
-                            <li>P(B|A) = likelihood</li>
-                            <li>P(A) = prior probability</li>
-                            <li>P(B) = marginal probability</li>
-                        </ul>
+                    <p><strong>Example:</strong> [10, 20, 30, 40, 50]</p>
+                    <p>μ = 30, σ = 15.81</p>
+                    <p>Scaled: [-1.26, -0.63, 0, 0.63, 1.26]</p>
+
+                    <h4>MinMaxScaler</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        x' = (x - min) / (max - min)
+                        <br><small>Result: range [0, 1]</small>
                     </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 MEDICAL DIAGNOSIS EXAMPLE</div>
-                    <p><strong>Disease affects 1% of population. Test is 95% accurate.</strong></p>
-                    <p>You test positive. What's probability you have disease?</p>
-                    <div class="example-solution">
-                        <p>P(Disease) = 0.01</p>
-                        <p>P(Positive|Disease) = 0.95</p>
-                        <p>P(Positive|No Disease) = 0.05</p>
-                        <p>P(Positive) = 0.01×0.95 + 0.99×0.05 = 0.059</p>
-                        <p>P(Disease|Positive) = (0.95×0.01)/0.059 = 0.161</p>
-                        <p><strong>Only 16.1% chance you have the disease!</strong></p>
+                    <p><strong>Example:</strong> [10, 20, 30, 40, 50]</p>
+                    <p>Scaled: [0, 0.25, 0.5, 0.75, 1.0]</p>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 350px">
+                            <canvas id="scaling-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Feature distributions before and after scaling</p>
                     </div>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">A disease affects 1% of the population. A test is 99% accurate (detects 99% of sick people and correctly identifies 99% of healthy people). You test positive. What's the probability you actually have the disease?</p>
+                    <h3>Critical: fit_transform vs transform</h3>
+                    <p>This is where many beginners make mistakes!</p>
+
+                    <div class="formula">
+                        <strong>fit_transform():</strong><br>
+                        1. Learns parameters (μ, σ, min, max) from data<br>
+                        2. Transforms the data<br>
+                        <strong>Use on:</strong> Training data ONLY<br>
+                        <br>
+                        <strong>transform():</strong><br>
+                        1. Uses already-learned parameters<br>
+                        2. Transforms the data<br>
+                        <strong>Use on:</strong> Test data, new data
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Define the Events and Given Information</p>
-                                <div class="step-work">
-                                    <code>Let A = has disease</code><br>
-                                    <code>Let B = tests positive</code><br>
-                                    <code>P(A) = 0.01 (1% of population has disease)</code><br>
-                                    <code>P(B|A) = 0.99 (99% true positive rate)</code><br>
-                                    <code>P(B|A') = 0.01 (1% false positive rate)</code>
-                                </div>
-                                <p class="step-explanation">Set up all known probabilities before applying Bayes' Theorem</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate P(B) using Total Probability</p>
-                                <div class="step-work">
-                                    <code>P(B) = P(B|A) × P(A) + P(B|A') × P(A')</code><br>
-                                    <code>P(B) = (0.99 × 0.01) + (0.01 × 0.99)</code><br>
-                                    <code>P(B) = 0.0099 + 0.0099 = 0.0198</code>
-                                </div>
-                                <p class="step-explanation">Find the overall probability of testing positive</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Apply Bayes' Theorem</p>
-                                <div class="step-work">
-                                    <code>P(A|B) = [P(B|A) × P(A)] / P(B)</code><br>
-                                    <code>P(A|B) = (0.99 × 0.01) / 0.0198</code><br>
-                                    <code>P(A|B) = 0.0099 / 0.0198</code><br>
-                                    <code>P(A|B) = 0.5 = 50%</code>
-                                </div>
-                                <p class="step-explanation">This is the posterior probability - what we want to find!</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">Only 50% chance you have the disease despite testing positive!</span>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ DATA LEAKAGE!</div>
+                        <div class="callout-content">
+                            <strong>WRONG:</strong><br>
+                            scaler.fit(test_data) # Learns from test data!<br>
+                            <br>
+                            <strong>CORRECT:</strong><br>
+                            scaler.fit(train_data) # Learn from train only<br>
+                            train_scaled = scaler.transform(train_data)<br>
+                            test_scaled = scaler.transform(test_data)<br>
+                            <br>
+                            If you fit on test data, you're "peeking" at the answers!
                         </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Why So Low?</strong>
-                            <p>This counter-intuitive result occurs because the disease is so rare (1%). Even with a 99% accurate test, there are many more false positives from the healthy 99% than true positives from the sick 1%. Base rates matter!</p>
+                    </div>
+
+                    <h3>4. Train-Test Split</h3>
+                    <p>Always split data BEFORE any preprocessing that learns parameters!</p>
+
+                    <div class="formula">
+                        <strong>Correct Order:</strong><br>
+                        1. Split data → train (80%), test (20%)<br>
+                        2. Handle missing values (fit on train)<br>
+                        3. Encode categories (fit on train)<br>
+                        4. Scale features (fit on train)<br>
+                        5. Train model<br>
+                        6. Test model (using same transformations)
+                    </div>
+
+                    <h3>Complete Pipeline Example</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 300px">
+                            <canvas id="pipeline-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Complete preprocessing pipeline</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>What if the disease affects 10% of the population instead? Recalculate P(A|B)</li>
-                            <li>If the test was 95% accurate instead of 99%, what would P(A|B) be?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>P(A|B) ≈ 91.7% (much higher!)</li>
-                                <li>P(A|B) ≈ 16.1% (much lower)</li>
-                            </ol>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Golden Rules</div>
+                        <div class="callout-content">
+                            1. <strong>Split first!</strong> Before any preprocessing<br>
+                            2. <strong>Fit on train only!</strong> Never on test<br>
+                            3. <strong>Transform both!</strong> Apply same transformations to test<br>
+                            4. <strong>Pipeline everything!</strong> Use scikit-learn Pipeline to avoid mistakes<br>
+                            5. <strong>Save your scaler!</strong> You'll need it for new predictions
                         </div>
                     </div>
                 </div>
+            </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Updates probability based on new evidence</li>
-                        <li>P(A|B) = [P(B|A) × P(A)] / P(B)</li>
-                        <li>Critical for medical testing and machine learning</li>
-                        <li>Counter-intuitive results common (base rate matters!)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 19: PMF -->
-            <section class="topic-section" id="topic-19">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 19</span>
-                    <h2>📊 Probability Mass Function (PMF)</h2>
-                    <p class="topic-subtitle">Probabilities for discrete random variables</p>
-                </div>
+            <div class="section" id="loss-functions">
+                <div class="section-header">
+                    <h2>12. Loss Functions</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Loss functions measure how wrong our predictions are. Different problems need different loss functions! The choice dramatically affects what your model learns.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Loss = how wrong a single prediction is</li>
+                            <li>Cost = average loss over all samples</li>
+                            <li>Regression: MSE, MAE, RMSE</li>
+                            <li>Classification: Log Loss, Hinge Loss</li>
+                        </ul>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> PMF gives the probability that a discrete random variable equals a specific value.</p>
-                    <p><strong>Why it matters:</strong> Used for countable outcomes like dice rolls, coin flips, or number of defects.</p>
-                </div>
+                    <h3>Loss Functions for Regression</h3>
+
+                    <h4>Mean Squared Error (MSE)</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        MSE = (1/n) × Σ(y - ŷ)²
+                        <br><small>where:<br>y = actual value<br>ŷ = predicted value<br>n = number of samples</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Properties</h3>
+                    <h5>Characteristics:</h5>
                     <ul>
-                        <li>0 ≤ P(X = x) ≤ 1 for all x</li>
-                        <li>Sum of all probabilities = 1</li>
-                        <li>Only defined for discrete variables</li>
-                        <li>Visualized with bar charts</li>
+                        <li><strong>Squares errors:</strong> Penalizes large errors heavily</li>
+                        <li><strong>Always positive:</strong> Minimum is 0 (perfect predictions)</li>
+                        <li><strong>Differentiable:</strong> Great for gradient descent</li>
+                        <li><strong>Sensitive to outliers:</strong> One huge error dominates</li>
+                        <li><strong>Units:</strong> Squared units (harder to interpret)</li>
                     </ul>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE: Die Roll</div>
-                    <p>P(X = 1) = 1/6</p>
-                    <p>P(X = 2) = 1/6</p>
-                    <p>... and so on</p>
-                    <p>Sum = 6 × (1/6) = 1 ✓</p>
-                </div>
+                    <p><strong>Example:</strong> Predictions [12, 19, 32], Actual [10, 20, 30]</p>
+                    <p>Errors: [2, -1, 2]</p>
+                    <p>Squared: [4, 1, 4]</p>
+                    <p>MSE = (4 + 1 + 4) / 3 = <strong>3.0</strong></p>
+
+                    <h4>Mean Absolute Error (MAE)</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        MAE = (1/n) × Σ|y - ŷ|
+                        <br><small>Absolute value of errors</small>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h5>Characteristics:</h5>
                     <ul>
-                        <li>PMF is for discrete random variables</li>
-                        <li>Gives P(X = specific value)</li>
-                        <li>All probabilities sum to 1</li>
-                        <li>Visualized with bar charts</li>
+                        <li><strong>Linear penalty:</strong> All errors weighted equally</li>
+                        <li><strong>Robust to outliers:</strong> One huge error doesn't dominate</li>
+                        <li><strong>Interpretable units:</strong> Same units as target</li>
+                        <li><strong>Not differentiable at 0:</strong> Slightly harder to optimize</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 20: PDF -->
-            <section class="topic-section" id="topic-20">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 20</span>
-                    <h2>📈 Probability Density Function (PDF)</h2>
-                    <p class="topic-subtitle">Probabilities for continuous random variables</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> PDF describes probability for continuous random variables. Probability at exact point is 0; we calculate probability over intervals.</p>
-                </div>
+                    <p><strong>Example:</strong> Predictions [12, 19, 32], Actual [10, 20, 30]</p>
+                    <p>Errors: [2, -1, 2]</p>
+                    <p>Absolute: [2, 1, 2]</p>
+                    <p>MAE = (2 + 1 + 2) / 3 = <strong>1.67</strong></p>
+
+                    <h4>Root Mean Squared Error (RMSE)</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        RMSE = √MSE
+                        <br><small>Square root of MSE</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Key Differences from PMF</h3>
+                    <h5>Characteristics:</h5>
                     <ul>
-                        <li>For continuous (not discrete) variables</li>
-                        <li>P(X = exact value) = 0</li>
-                        <li>Calculate P(a &lt; X &lt; b) = area under curve</li>
-                        <li>Total area under curve = 1</li>
+                        <li><strong>Same units as target:</strong> More interpretable than MSE</li>
+                        <li><strong>Still sensitive to outliers:</strong> But less than MSE</li>
+                        <li><strong>Common in competitions:</strong> Kaggle, etc.</li>
                     </ul>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Continuous random variable X has uniform distribution on interval [0, 10]. a) Find the PDF f(x), b) Calculate P(3 ≤ X ≤ 7)</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Understand Uniform Distribution</p>
-                                <div class="step-work">
-                                    <code>X is equally likely anywhere between 0 and 10</code><br>
-                                    <code>For uniform on [a, b], PDF is constant</code><br>
-                                    <code>Total area under curve must equal 1</code>
-                                </div>
-                                <p class="step-explanation">Uniform means constant probability density across the interval</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find PDF Height</p>
-                                <div class="step-work">
-                                    <code>Interval length = b - a = 10 - 0 = 10</code><br>
-                                    <code>For area = 1: height × width = 1</code><br>
-                                    <code>height × 10 = 1</code><br>
-                                    <code>height = 1/10 = 0.1</code><br>
-                                    <code>Therefore: f(x) = 0.1 for 0 ≤ x ≤ 10, and 0 otherwise</code>
-                                </div>
-                                <p class="step-explanation">The constant height must give total area of 1</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate P(3 ≤ X ≤ 7)</p>
-                                <div class="step-work">
-                                    <code>For continuous uniform: P(a ≤ X ≤ b) = (b-a) × height</code><br>
-                                    <code>P(3 ≤ X ≤ 7) = (7-3) × 0.1</code><br>
-                                    <code>= 4 × 0.1</code><br>
-                                    <code>= 0.4</code>
-                                </div>
-                                <p class="step-explanation">Probability is the area of the rectangle</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Visualize (Area Under Curve)</p>
-                                <div class="step-work">
-                                    <code>Rectangle: width = 4, height = 0.1</code><br>
-                                    <code>Area = 4 × 0.1 = 0.4</code><br>
-                                    <code>This represents probability</code>
-                                </div>
-                                <p class="step-explanation">The geometric area equals the probability</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) f(x) = 0.1 for x ∈ [0,10]<br>b) P(3 ≤ X ≤ 7) = 0.4 (40%)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Verification:</strong>
-                            <p>P(0 ≤ X ≤ 10) = 10 × 0.1 = 1.0 ✓ (total probability = 1)</p>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="loss-comparison-canvas"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Comparing MSE, MAE, and their response to errors</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Uniform on [5,15]. Find PDF.</li>
-                            <li>For above, find P(8 ≤ X ≤ 12)</li>
-                            <li>Why is P(X = 7) = 0 for continuous distributions?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>f(x) = 1/10 = 0.1 for x ∈ [5,15]</li>
-                                <li>P(8 ≤ X ≤ 12) = 4 × 0.1 = 0.4</li>
-                                <li>A single point has no width, so area = 0</li>
-                            </ol>
-                        </div>
+
+                    <h3>Loss Functions for Classification</h3>
+
+                    <h4>Log Loss (Cross-Entropy)</h4>
+                    <div class="formula">
+                        <strong>Binary Cross-Entropy:</strong>
+                        Loss = -(1/n) × Σ[y·log(ŷ) + (1-y)·log(1-ŷ)]
+                        <br><small>where:<br>y ∈ {0, 1} = actual label<br>ŷ ∈ (0, 1) = predicted probability</small>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h5>Characteristics:</h5>
                     <ul>
-                        <li>PDF is for continuous random variables</li>
-                        <li>Probability = area under curve</li>
-                        <li>P(X = exact point) = 0</li>
-                        <li>Total area under PDF = 1</li>
+                        <li><strong>For probabilities:</strong> Output must be [0, 1]</li>
+                        <li><strong>Heavily penalizes confident wrong predictions:</strong> Good!</li>
+                        <li><strong>Convex:</strong> No local minima, easy to optimize</li>
+                        <li><strong>Probabilistic interpretation:</strong> Maximum likelihood</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 21: CDF -->
-            <section class="topic-section" id="topic-21">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 21</span>
-                    <h2>📉 Cumulative Distribution Function (CDF)</h2>
-                    <p class="topic-subtitle">Probability up to a value</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> CDF gives the probability that X is less than or equal to a specific value.</p>
-                    <p><strong>Formula:</strong> F(x) = P(X ≤ x)</p>
-                </div>
+                    <p><strong>Example:</strong> y=1 (spam), predicted p=0.9</p>
+                    <p>Loss = -[1·log(0.9) + 0·log(0.1)] = -log(0.9) = <strong>0.105</strong> (low, good!)</p>
+
+                    <p><strong>Example:</strong> y=1 (spam), predicted p=0.1</p>
+                    <p>Loss = -[1·log(0.1) + 0·log(0.9)] = -log(0.1) = <strong>2.303</strong> (high, bad!)</p>
+
+                    <h4>Hinge Loss (for SVM)</h4>
+                    <div class="formula">
+                        <strong>Formula:</strong>
+                        Loss = max(0, 1 - y·score)
+                        <br><small>where:<br>y ∈ {-1, +1}<br>score = w·x + b</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Properties</h3>
+                    <h5>Characteristics:</h5>
                     <ul>
-                        <li>Always non-decreasing</li>
-                        <li>F(-∞) = 0</li>
-                        <li>F(+∞) = 1</li>
-                        <li>P(a &lt; X ≤ b) = F(b) - F(a)</li>
+                        <li><strong>Margin-based:</strong> Encourages confident predictions</li>
+                        <li><strong>Zero loss for correct &amp; confident:</strong> When y·score ≥ 1</li>
+                        <li><strong>Linear penalty:</strong> For violations</li>
+                        <li><strong>Used in SVM:</strong> Maximizes margin</li>
                     </ul>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">For the uniform distribution from Topic 20 (X ~ Uniform[0,10]), find: a) F(5) = P(X ≤ 5), b) F(12), c) P(2 &lt; X ≤ 8)</p>
+                    <h3>When to Use Which Loss?</h3>
+
+                    <div class="info-card" style="background: rgba(106, 169, 255, 0.1);">
+                        <div class="info-card-title" style="color: #6aa9ff;">Regression Problems</div>
+                        <ul style="list-style: none; padding: 0;">
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>MSE:</strong> Default choice, smooth optimization, use when outliers are errors
+                            </li>
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>MAE:</strong> When you have outliers that are valid data points
+                            </li>
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>RMSE:</strong> When you need interpretable metric in original units
+                            </li>
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>Huber Loss:</strong> Combines MSE and MAE - best of both worlds!
+                            </li>
+                        </ul>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Recall PDF</p>
-                                <div class="step-work">
-                                    <code>f(x) = 0.1 for 0 ≤ x ≤ 10</code><br>
-                                    <code>CDF is cumulative (area from left up to x)</code>
-                                </div>
-                                <p class="step-explanation">CDF accumulates probability from the left</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find F(5)</p>
-                                <div class="step-work">
-                                    <code>F(5) = P(X ≤ 5)</code><br>
-                                    <code>Area from 0 to 5: width = 5, height = 0.1</code><br>
-                                    <code>F(5) = 5 × 0.1 = 0.5</code>
-                                </div>
-                                <p class="step-explanation">Half of the distribution is below x = 5</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find F(12)</p>
-                                <div class="step-work">
-                                    <code>F(12) = P(X ≤ 12)</code><br>
-                                    <code>But X can't exceed 10</code><br>
-                                    <code>All probability is accounted for by x = 10</code><br>
-                                    <code>F(12) = 1.0 (certainty)</code>
-                                </div>
-                                <p class="step-explanation">CDF plateaus at 1 beyond the support of the distribution</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(2 &lt; X ≤ 8)</p>
-                                <div class="step-work">
-                                    <code>Using CDF: P(a &lt; X ≤ b) = F(b) - F(a)</code><br>
-                                    <code>F(8) = 8 × 0.1 = 0.8</code><br>
-                                    <code>F(2) = 2 × 0.1 = 0.2</code><br>
-                                    <code>P(2 &lt; X ≤ 8) = 0.8 - 0.2 = 0.6</code>
-                                </div>
-                                <p class="step-explanation">Subtract lower CDF from upper CDF</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">General CDF Formula</p>
-                                <div class="step-work">
-                                    <code>For uniform [0, 10]:</code><br>
-                                    <code>  • F(x) = 0 if x &lt; 0</code><br>
-                                    <code>  • F(x) = x/10 if 0 ≤ x ≤ 10</code><br>
-                                    <code>  • F(x) = 1 if x &gt; 10</code>
-                                </div>
-                                <p class="step-explanation">The complete CDF function has three pieces</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) F(5) = 0.5<br>b) F(12) = 1.0<br>c) P(2 &lt; X ≤ 8) = 0.6</span>
+
+                    <div class="info-card" style="background: rgba(126, 240, 212, 0.1); margin-top: 16px;">
+                        <div class="info-card-title" style="color: #7ef0d4;">Classification Problems</div>
+                        <ul style="list-style: none; padding: 0;">
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>Log Loss:</strong> Default for binary/multi-class, when you need probabilities
+                            </li>
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>Hinge Loss:</strong> For SVM, when you want maximum margin
+                            </li>
+                            <li style="padding: 8px 0; border: none;">
+                                <strong>Focal Loss:</strong> For highly imbalanced datasets
+                            </li>
+                        </ul>
+                    </div>
+
+                    <h3>Visualizing Loss Curves</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 350px">
+                            <canvas id="loss-curves-canvas"></canvas>
                         </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>F(0) = 0 (no probability below 0), F(10) = 1 (all probability by 10), F is non-decreasing ✓</p>
+                        <p class="figure-caption"><strong>Figure:</strong> How different losses respond to errors</p>
+                    </div>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Impact of Outliers</div>
+                        <div class="callout-content">
+                            Imagine predictions [100, 102, 98, 150] for actuals [100, 100, 100, 100]:<br>
+                            <br>
+                            <strong>MSE:</strong> (0 + 4 + 4 + 2500) / 4 = 627 ← Dominated by outlier!<br>
+                            <strong>MAE:</strong> (0 + 2 + 2 + 50) / 4 = 13.5 ← More balanced<br>
+                            <br>
+                            MSE is 48× larger because it squares the huge error!
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>For uniform [5,15], find F(10)</li>
-                            <li>What is P(X &gt; 7) using the CDF?</li>
-                            <li>If F(x) = 0.75, what does this mean?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>F(10) = (10-5)/10 = 0.5</li>
-                                <li>P(X &gt; 7) = 1 - F(7) = 1 - 0.7 = 0.3</li>
-                                <li>75% of the distribution is at or below x</li>
-                            </ol>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Key Takeaways</div>
+                        <div class="callout-content">
+                            1. Loss function choice affects what your model learns<br>
+                            2. MSE penalizes large errors more than MAE<br>
+                            3. Use MAE when outliers are valid, MSE when they're errors<br>
+                            4. Log loss for classification with probabilities<br>
+                            5. Always plot your errors to understand what's happening!<br>
+                            <br>
+                            <strong>The loss function IS your model's objective!</strong>
                         </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>CDF: F(x) = P(X ≤ x)</li>
-                        <li>Works for both discrete and continuous</li>
-                        <li>Always increases from 0 to 1</li>
-                        <li>Useful for finding percentiles</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 22: Bernoulli Distribution -->
-            <section class="topic-section" id="topic-22">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 22</span>
-                    <h2>🪙 Bernoulli Distribution</h2>
-                    <p class="topic-subtitle">Single trial with two outcomes</p>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Models a single trial with two outcomes: success (1) or failure (0).</p>
-                    <p><strong>Examples:</strong> Coin flip, pass/fail test, yes/no question</p>
-                </div>
+            <!-- Section 13: Finding Optimal K in KNN -->
+            <div class="section" id="optimal-k">
+                <div class="section-header">
+                    <h2>13. Finding Optimal K in KNN</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Choosing the right K value is critical for KNN performance! Too small causes overfitting, too large causes underfitting. Let's explore systematic methods to find the optimal K.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Methods</div>
+                        <ul class="info-card-list">
+                            <li>Elbow Method: Plot accuracy vs K, find the "elbow"</li>
+                            <li>Cross-Validation: Test multiple K values with k-fold CV</li>
+                            <li>Grid Search: Systematically test K values</li>
+                            <li>Avoid K=1 (overfits) and K=n (underfits)</li>
+                        </ul>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Bernoulli PMF</div>
-                        <div class="formula-main">P(X = 1) = p</div>
-                        <div class="formula-main">P(X = 0) = 1 - p = q</div>
-                        <p>Mean = p, Variance = p(1-p)</p>
+                    <h3>Method 1: Elbow Method</h3>
+                    <p>Test different K values and plot performance. Look for the "elbow" where adding more neighbors doesn't help much.</p>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px; position: relative;">
+                            <canvas id="elbow-canvas" style="width: 100%; height: 100%;"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 1:</strong> Elbow curve showing optimal K at the bend</p>
                     </div>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Flip a fair coin once. Let X = 1 if Heads, X = 0 if Tails. a) Find P(X=1) and P(X=0), b) Calculate E(X) and Var(X)</p>
+                    <h3>Method 2: Cross-Validation Approach</h3>
+                    <p>For each K value, run k-fold cross-validation and calculate mean accuracy. Choose K with highest mean accuracy.</p>
+
+                    <div class="formula">
+                        <strong>Cross-Validation Process:</strong>
+                        for K in [1, 2, 3, ..., 20]:<br>
+                        &nbsp;&nbsp;accuracies = []<br>
+                        &nbsp;&nbsp;for fold in [1, 2, 3]:<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;train model with K neighbors<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;test on validation fold<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;accuracies.append(accuracy)<br>
+                        &nbsp;&nbsp;mean_accuracy[K] = mean(accuracies)<br>
+                        <br>
+                        optimal_K = argmax(mean_accuracy)
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Identify Bernoulli Trial</p>
-                                <div class="step-work">
-                                    <code>Single trial with two outcomes (Success/Failure)</code><br>
-                                    <code>Success = Heads, p = 0.5</code><br>
-                                    <code>Failure = Tails, 1-p = 0.5</code>
-                                </div>
-                                <p class="step-explanation">This is a classic Bernoulli trial</p>
-                            </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="cv-k-canvas"></canvas>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Probabilities</p>
-                                <div class="step-work">
-                                    <code>P(X = 1) = p = 0.5 (probability of heads)</code><br>
-                                    <code>P(X = 0) = 1-p = 0.5 (probability of tails)</code><br>
-                                    <code>Check: 0.5 + 0.5 = 1.0 ✓</code>
-                                </div>
-                                <p class="step-explanation">Probabilities must sum to 1</p>
-                            </div>
+                        <p class="figure-caption"><strong>Figure 2:</strong> Cross-validation accuracies heatmap for different K values</p>
+                    </div>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Why Cross-Validation is Better</div>
+                        <div class="callout-content">
+                            Single train-test split might be lucky/unlucky. Cross-validation gives you:
+                            <ul>
+                                <li>Mean accuracy (average performance)</li>
+                                <li>Standard deviation (how stable is K?)</li>
+                                <li>Confidence in your choice</li>
+                            </ul>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Expected Value</p>
-                                <div class="step-work">
-                                    <code>Formula: E(X) = p</code><br>
-                                    <code>E(X) = 0.5</code><br>
-                                    <code>Or: E(X) = 0×P(X=0) + 1×P(X=1)</code><br>
-                                    <code>= 0×0.5 + 1×0.5 = 0.5 ✓</code>
-                                </div>
-                                <p class="step-explanation">Expected value is the probability of success</p>
-                            </div>
+                    </div>
+
+                    <h3>Practical Guidelines</h3>
+                    <ul>
+                        <li><strong>Start with K = √n:</strong> Good rule of thumb</li>
+                        <li><strong>Try odd K values:</strong> Avoids ties in binary classification</li>
+                        <li><strong>Test range [1, 20]:</strong> Covers most practical scenarios</li>
+                        <li><strong>Check for stability:</strong> Low std dev across folds</li>
+                    </ul>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Real-World Example</div>
+                        <div class="callout-content">
+                            <strong>Iris Dataset (150 samples):</strong><br>
+                            √150 ≈ 12, so start testing around K=11, K=13, K=15<br>
+                            After CV: K=5 gives 96% ± 2% → Optimal choice!<br>
+                            K=1 gives 94% ± 8% → Too much variance<br>
+                            K=25 gives 88% ± 1% → Too smooth, underfitting
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Variance</p>
-                                <div class="step-work">
-                                    <code>Formula: Var(X) = p(1-p)</code><br>
-                                    <code>Var(X) = 0.5 × 0.5 = 0.25</code><br>
-                                    <code>Standard deviation: σ = √0.25 = 0.5</code>
-                                </div>
-                                <p class="step-explanation">Variance measures spread of outcomes</p>
-                            </div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Section 14: Hyperparameter Tuning -->
+            <div class="section" id="hyperparameter-tuning">
+                <div class="section-header">
+                    <h2>14. Hyperparameter Tuning with GridSearch</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Hyperparameters control how your model learns. Unlike model parameters (learned from data), hyperparameters are set BEFORE training. GridSearch systematically finds the best combination!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Common Hyperparameters</div>
+                        <ul class="info-card-list">
+                            <li>Learning rate (α) - Gradient Descent step size</li>
+                            <li>K - Number of neighbors in KNN</li>
+                            <li>C, gamma - SVM parameters</li>
+                            <li>Max depth - Decision Tree depth</li>
+                            <li>Number of trees - Random Forest</li>
+                        </ul>
+                    </div>
+
+                    <h3>GridSearch Explained</h3>
+                    <p>GridSearch tests ALL combinations of hyperparameters you specify. It's exhaustive but guarantees finding the best combination in your grid.</p>
+
+                    <div class="formula">
+                        <strong>Example: SVM GridSearch</strong>
+                        param_grid = {<br>
+                        &nbsp;&nbsp;'C': [0.1, 1, 10, 100],<br>
+                        &nbsp;&nbsp;'gamma': [0.001, 0.01, 0.1, 1],<br>
+                        &nbsp;&nbsp;'kernel': ['linear', 'rbf']<br>
+                        }<br>
+                        <br>
+                        Total combinations: 4 × 4 × 2 = 32<br>
+                        With 5-fold CV: 32 × 5 = 160 model trainings!
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="gridsearch-heatmap"></canvas>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret</p>
-                                <div class="step-work">
-                                    <code>On average, we get 0.5 heads per flip</code><br>
-                                    <code>Variance measures spread of 0 and 1 outcomes</code>
-                                </div>
-                                <p class="step-explanation">Expected value represents long-run average</p>
+                        <p class="figure-caption"><strong>Figure:</strong> GridSearch heatmap showing accuracy for C vs gamma combinations</p>
+                    </div>
+
+                    <div class="controls">
+                        <div class="control-group">
+                            <label>Select Model:</label>
+                            <div class="radio-group">
+                                <label><input type="radio" name="grid-model" value="svm" checked> SVM</label>
+                                <label><input type="radio" name="grid-model" value="rf"> Random Forest</label>
                             </div>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) P(X=1) = 0.5, P(X=0) = 0.5<br>b) E(X) = 0.5, Var(X) = 0.25</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>For fair coin, p = 0.5 makes sense. Over many flips, we expect half heads (E(X) = 0.5).</p>
+                    </div>
+
+                    <h3>Performance Surface (3D View)</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="param-surface"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure:</strong> 3D surface showing how parameters affect performance</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Biased coin: P(Heads) = 0.7. Find E(X) and Var(X)</li>
-                            <li>Free throw: 80% success rate. Model as Bernoulli</li>
-                            <li>When is Var(X) maximized for Bernoulli?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>E(X) = 0.7, Var(X) = 0.7(0.3) = 0.21</li>
-                                <li>p = 0.8, E(X) = 0.8, Var(X) = 0.16</li>
-                                <li>p = 0.5 (fair coin has maximum variance)</li>
-                            </ol>
+
+                    <h3>When GridSearch Fails</h3>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ The Curse of Dimensionality</div>
+                        <div class="callout-content">
+                            <strong>Problem:</strong> Too many hyperparameters = exponential search space<br>
+                            <br>
+                            <strong>Example:</strong> 5 hyperparameters × 10 values each = 100,000 combinations!<br>
+                            <br>
+                            <strong>Solutions:</strong><br>
+                            • RandomSearchCV: Random sampling (faster, often good enough)<br>
+                            • Bayesian Optimization: Smart search using previous results<br>
+                            • Halving GridSearch: Eliminate poor performers early
                         </div>
                     </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Best Practices</h3>
                     <ul>
-                        <li>Single trial, two outcomes (0 or 1)</li>
-                        <li>Parameter: p (probability of success)</li>
-                        <li>Mean = p, Variance = p(1-p)</li>
-                        <li>Building block for binomial distribution</li>
+                        <li><strong>Start coarse:</strong> Wide range, few values (e.g., C: [0.1, 1, 10, 100])</li>
+                        <li><strong>Then refine:</strong> Narrow range around best (e.g., C: [5, 7, 9, 11])</li>
+                        <li><strong>Use cross-validation:</strong> Avoid overfitting to validation set</li>
+                        <li><strong>Log scale for wide ranges:</strong> [0.001, 0.01, 0.1, 1, 10, 100]</li>
+                        <li><strong>Consider computation time:</strong> More folds = more reliable but slower</li>
                     </ul>
                 </div>
-            </section>
-
-            <!-- Topic 23: Binomial Distribution -->
-            <section class="topic-section" id="topic-23">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 23</span>
-                    <h2>🎰 Binomial Distribution</h2>
-                    <p class="topic-subtitle">Multiple independent Bernoulli trials</p>
-                </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Models the number of successes in n independent Bernoulli trials.</p>
-                    <p><strong>Requirements:</strong> Fixed n, same p, independent trials, binary outcomes</p>
-                </div>
+            <!-- Section 15: Naive Bayes (COMPREHENSIVE WITH MATH) -->
+            <div class="section" id="naive-bayes">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised - Classification</span> Naive Bayes Classification</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Naive Bayes is a probabilistic classifier based on Bayes' Theorem. Despite its "naive" independence assumption, it works surprisingly well for text classification and other tasks! We'll cover both Categorical and Gaussian Naive Bayes with complete mathematical solutions.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Based on Bayes' Theorem from probability theory</li>
+                            <li>Assumes features are independent (naive assumption)</li>
+                            <li>Very fast training and prediction</li>
+                            <li>Works well with high-dimensional data</li>
+                        </ul>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Binomial PMF</div>
-                        <div class="formula-main">P(X = k) = C(n,k) × p^k × (1-p)^(n-k)</div>
-                        <p>C(n,k) = n! / (k!(n-k)!)</p>
-                        <p>Mean = np, Variance = np(1-p)</p>
+                    <h3>Bayes' Theorem</h3>
+                    <div class="formula">
+                        <strong>The Foundation:</strong>
+                        P(Class|Features) = P(Features|Class) × P(Class) / P(Features)<br>
+                        <br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓<br>
+                        Posterior&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Likelihood&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Prior&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Evidence<br>
+                        (What we want)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(From data)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(Baseline)&nbsp;&nbsp;(Normalizer)
                     </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p>Flip coin 10 times. P(exactly 6 heads)?</p>
-                    <p>n=10, k=6, p=0.5</p>
-                    <p>P(X=6) = C(10,6) × 0.5^6 × 0.5^4 = 210 × 0.000977 ≈ 0.205</p>
-                </div>
+                    <h3>The Naive Independence Assumption</h3>
+                    <p>"Naive" because we assume all features are independent given the class:</p>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>n independent trials, probability p each</li>
-                        <li>Counts number of successes</li>
-                        <li>Mean = np, Variance = np(1-p)</li>
-                        <li>Common in quality control and surveys</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 24: Normal Distribution -->
-            <section class="topic-section" id="topic-24">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 24</span>
-                    <h2>🔔 Normal Distribution</h2>
-                    <p class="topic-subtitle">The bell curve and 68-95-99.7 rule</p>
-                </div>
+                    <div class="formula">
+                        <strong>Independence Assumption:</strong>
+                        P(x₁, x₂, ..., xₙ | Class) = P(x₁|Class) × P(x₂|Class) × ... × P(xₙ|Class)<br>
+                        <br>
+                        <small>This is often NOT true in reality, but works anyway!</small>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The most important continuous probability distribution—symmetric, bell-shaped curve.</p>
-                    <p><strong>Why it matters:</strong> Many natural phenomena follow normal distribution. Foundation of inferential statistics.</p>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="bayes-theorem-viz"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 1:</strong> Bayes' Theorem visual explanation</p>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Properties</h3>
-                    <ul>
-                        <li>Symmetric around mean μ</li>
-                        <li>Bell-shaped curve</li>
-                        <li>Mean = Median = Mode</li>
-                        <li>Defined by μ (mean) and σ (standard deviation)</li>
-                        <li>Total area under curve = 1</li>
-                    </ul>
-                </div>
+                    <h3>Real-World Example: Email Spam Detection</h3>
+                    <p>Let's classify an email with words: ["free", "winner", "click"]</p>
+
+                    <div class="formula">
+                        <strong>Training Data:</strong><br>
+                        • 300 spam emails (30%)<br>
+                        • 700 not-spam emails (70%)<br>
+                        <br>
+                        <strong>Word frequencies:</strong><br>
+                        P("free" | spam) = 0.8 (appears in 80% of spam)<br>
+                        P("free" | not-spam) = 0.1 (appears in 10% of not-spam)<br>
+                        <br>
+                        P("winner" | spam) = 0.7<br>
+                        P("winner" | not-spam) = 0.05<br>
+                        <br>
+                        P("click" | spam) = 0.6<br>
+                        P("click" | not-spam) = 0.2
+                    </div>
 
-                <div class="content-card">
-                    <h3>The 68-95-99.7 Rule (Empirical Rule)</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="spam-classification"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 2:</strong> Spam classification calculation step-by-step</p>
+                    </div>
+
+                    <h3>Step-by-Step Calculation</h3>
+                    <div class="callout info">
+                        <div class="callout-title">📧 Classifying Our Email</div>
+                        <div class="callout-content">
+                            <strong>P(spam | features):</strong><br>
+                            = P("free"|spam) × P("winner"|spam) × P("click"|spam) × P(spam)<br>
+                            = 0.8 × 0.7 × 0.6 × 0.3<br>
+                            = 0.1008<br>
+                            <br>
+                            <strong>P(not-spam | features):</strong><br>
+                            = P("free"|not-spam) × P("winner"|not-spam) × P("click"|not-spam) × P(not-spam)<br>
+                            = 0.1 × 0.05 × 0.2 × 0.7<br>
+                            = 0.0007<br>
+                            <br>
+                            <strong>Prediction:</strong> 0.1008 &gt; 0.0007 → SPAM! 📧❌
+                        </div>
+                    </div>
+
+                    <h3>Why It Works Despite Wrong Assumption</h3>
                     <ul>
-                        <li><strong>68%</strong> of data within μ ± 1σ</li>
-                        <li><strong>95%</strong> of data within μ ± 2σ</li>
-                        <li><strong>99.7%</strong> of data within μ ± 3σ</li>
+                        <li><strong>Don't need exact probabilities:</strong> Just need correct ranking</li>
+                        <li><strong>Errors cancel out:</strong> Multiple features reduce impact</li>
+                        <li><strong>Simple is robust:</strong> Fewer parameters = less overfitting</li>
+                        <li><strong>Fast:</strong> Just multiply probabilities!</li>
                     </ul>
-                </div>
 
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>IQ scores: μ = 100, σ = 15</p>
-                    <p>68% of people have IQ between 85-115</p>
-                    <p>95% have IQ between 70-130</p>
-                    <p>99.7% have IQ between 55-145</p>
-                </div>
+                    <h3>Comparison with Other Classifiers</h3>
+                    <table class="data-table">
+                        <thead>
+                            <tr>
+                                <th>Aspect</th>
+                                <th>Naive Bayes</th>
+                                <th>Logistic Reg</th>
+                                <th>SVM</th>
+                                <th>KNN</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>Speed</td><td>Very Fast</td><td>Fast</td><td>Slow</td><td>Very Slow</td></tr>
+                            <tr><td>Works with Little Data</td><td>Yes</td><td>Yes</td><td>No</td><td>No</td></tr>
+                            <tr><td>Interpretable</td><td>Very</td><td>Yes</td><td>No</td><td>No</td></tr>
+                            <tr><td>Handles Non-linear</td><td>Yes</td><td>No</td><td>Yes</td><td>Yes</td></tr>
+                            <tr><td>High Dimensions</td><td>Excellent</td><td>Good</td><td>Good</td><td>Poor</td></tr>
+                        </tbody>
+                    </table>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
+                    <h3>🎯 PART A: Categorical Naive Bayes (Step-by-Step from PDF)</h3>
                     
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">IQ scores follow Normal distribution with μ = 100, σ = 15. Find: a) P(IQ ≤ 115), b) P(85 ≤ IQ ≤ 115), c) IQ score at 95th percentile</p>
-                    </div>
+                    <h4>Dataset: Tennis Play Prediction</h4>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Outlook</th><th>Temperature</th><th>Play</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>Sunny</td><td>Hot</td><td>No</td></tr>
+                            <tr><td>Sunny</td><td>Mild</td><td>No</td></tr>
+                            <tr><td>Cloudy</td><td>Hot</td><td>Yes</td></tr>
+                            <tr><td>Rainy</td><td>Mild</td><td>Yes</td></tr>
+                            <tr><td>Rainy</td><td>Cool</td><td>Yes</td></tr>
+                            <tr><td>Cloudy</td><td>Cool</td><td>Yes</td></tr>
+                        </tbody>
+                    </table>
                     
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Understand Normal Distribution</p>
-                                <div class="step-work">
-                                    <code>Bell-shaped, symmetric around mean</code><br>
-                                    <code>μ = 100 (center)</code><br>
-                                    <code>σ = 15 (spread)</code>
-                                </div>
-                                <p class="step-explanation">Parameters define the shape and location of the curve</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(IQ ≤ 115) using z-score</p>
-                                <div class="step-work">
-                                    <code>z = (x - μ)/σ = (115 - 100)/15 = 15/15 = 1</code><br>
-                                    <code>P(Z ≤ 1) = 0.8413 (from z-table)</code><br>
-                                    <code>About 84.13% have IQ ≤ 115</code>
-                                </div>
-                                <p class="step-explanation">Standardize to z-score, then use standard normal table</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P(85 ≤ IQ ≤ 115)</p>
-                                <div class="step-work">
-                                    <code>Lower bound: z₁ = (85-100)/15 = -15/15 = -1</code><br>
-                                    <code>Upper bound: z₂ = (115-100)/15 = 1</code><br>
-                                    <code>This is μ ± 1σ (68-95-99.7 rule)</code><br>
-                                    <code>P(-1 ≤ Z ≤ 1) = 0.68 (approximately 68%)</code><br>
-                                    <code>Exact: P(Z≤1) - P(Z≤-1) = 0.8413 - 0.1587 = 0.6826</code>
-                                </div>
-                                <p class="step-explanation">One standard deviation on each side covers 68% of data</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find 95th Percentile</p>
-                                <div class="step-work">
-                                    <code>P(IQ ≤ x) = 0.95</code><br>
-                                    <code>From z-table: z = 1.645 for 95th percentile</code><br>
-                                    <code>x = μ + zσ = 100 + 1.645×15</code><br>
-                                    <code>= 100 + 24.675 = 124.675</code><br>
-                                    <code>IQ ≈ 125</code>
-                                </div>
-                                <p class="step-explanation">Convert z-score back to original scale using inverse formula</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) P(IQ ≤ 115) = 0.8413 (84.13%)<br>b) P(85 ≤ IQ ≤ 115) = 0.6826 (68.26%)<br>c) 95th percentile = IQ of 125</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Verification:</strong>
-                            <p>Using 68-95-99.7 rule: μ±1σ contains 68% ✓, μ±2σ contains 95%, μ±3σ contains 99.7%. Our answer matches the empirical rule!</p>
-                        </div>
-                    </div>
+                    <p><strong>Problem:</strong> Predict whether to play tennis when Outlook=Rainy and Temperature=Hot</p>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 1: Calculate Prior Probabilities</div>
+                        <div class="step-calculation">
+                            Count occurrences in training data:<br>
+                            • Play=Yes appears 4 times out of 6 total<br>
+                            • Play=No appears 2 times out of 6 total<br>
+                            <br>
+                            <strong>Calculation:</strong><br>
+                            P(Yes) = 4/6 = <strong>0.667 (66.7%)</strong><br>
+                            P(No) = 2/6 = <strong>0.333 (33.3%)</strong>
+                        </div>
+                    </div>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 2: Calculate Conditional Probabilities (Before Smoothing)</div>
+                        <div class="step-calculation">
+                            <strong>For Outlook = "Rainy":</strong><br>
+                            • Count (Rainy AND Yes) = 2 examples<br>
+                            • Count (Yes) = 4 total<br>
+                            • P(Rainy|Yes) = 2/4 = <strong>0.5</strong><br>
+                            <br>
+                            • Count (Rainy AND No) = 0 examples ❌<br>
+                            • Count (No) = 2 total<br>
+                            • P(Rainy|No) = 0/2 = <strong>0</strong> ���️ <span style="color: #ff8c6a;">ZERO PROBABILITY PROBLEM!</span><br>
+                            <br>
+                            <strong>For Temperature = "Hot":</strong><br>
+                            • P(Hot|Yes) = 1/4 = <strong>0.25</strong><br>
+                            • P(Hot|No) = 1/2 = <strong>0.5</strong>
+                        </div>
+                    </div>
+                    
+                    <div class="formula">
+                        <strong>Step 3: Apply Bayes' Theorem (Initial)</strong><br>
+                        <br>
+                        P(Yes|Rainy,Hot) = P(Yes) × P(Rainy|Yes) × P(Hot|Yes)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.667 × 0.5 × 0.25<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.0833<br>
+                        <br>
+                        P(No|Rainy,Hot) = P(No) × P(Rainy|No) × P(Hot|No)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.333 × 0 × 0.5<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0 ❌ Problem!
+                    </div>
+                    
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Zero Probability Problem</div>
+                        <div class="callout-content">
+                            When P(Rainy|No) = 0, the entire probability becomes 0! This is unrealistic - just because we haven't seen "Rainy" with "No" in our training data doesn't mean it's impossible. We need <strong>Laplace Smoothing</strong>!
+                        </div>
+                    </div>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 4: Apply Laplace Smoothing (α = 1)</div>
+                        <div class="step-calculation">
+                            <strong>Smoothed formula:</strong><br>
+                            P(x|c) = (count(x,c) + α) / (count(c) + α × num_categories)<br>
+                            <br>
+                            <strong>For Outlook</strong> (3 categories: Sunny, Cloudy, Rainy):<br>
+                            P(Rainy|Yes) = (2 + 1) / (4 + 1×3)<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 3/7<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong>0.429</strong> ✓<br>
+                            <br>
+                            P(Rainy|No) = (0 + 1) / (2 + 1×3)<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 1/5<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong>0.2</strong> ✓ <span style="color: #7ef0d4;">Fixed the zero!</span><br>
+                            <br>
+                            <strong>For Temperature</strong> (3 categories: Hot, Mild, Cool):<br>
+                            P(Hot|Yes) = (1 + 1) / (4 + 1×3) = 2/7 = <strong>0.286</strong><br>
+                            P(Hot|No) = (1 + 1) / (2 + 1×3) = 2/5 = <strong>0.4</strong>
+                        </div>
+                    </div>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 5: Recalculate with Smoothing</div>
+                        <div class="step-calculation">
+                            <strong>P(Yes|Rainy,Hot):</strong><br>
+                            = P(Yes) × P(Rainy|Yes) × P(Hot|Yes)<br>
+                            = 0.667 × 0.429 × 0.286<br>
+                            = <strong>0.0818</strong><br>
+                            <br>
+                            <strong>P(No|Rainy,Hot):</strong><br>
+                            = P(No) × P(Rainy|No) × P(Hot|No)<br>
+                            = 0.333 × 0.2 × 0.4<br>
+                            = <strong>0.0266</strong>
+                        </div>
+                    </div>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 6: Normalize to Get Final Probabilities</div>
+                        <div class="step-calculation">
+                            <strong>Sum of probabilities:</strong><br>
+                            Sum = 0.0818 + 0.0266 = <strong>0.1084</strong><br>
+                            <br>
+                            <strong>Normalize:</strong><br>
+                            P(Yes|Rainy,Hot) = 0.0818 / 0.1084<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong style="color: #7ef0d4;">0.755 (75.5%)</strong><br>
+                            <br>
+                            P(No|Rainy,Hot) = 0.0266 / 0.1084<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong style="color: #ff8c6a;">0.245 (24.5%)</strong><br>
+                            <br>
+                            <div style="background: rgba(126, 240, 212, 0.2); padding: 16px; border-radius: 8px; margin-top: 12px;">
+                                <strong style="color: #7ef0d4; font-size: 20px;">✅ FINAL PREDICTION: YES (Play Tennis!)</strong><br>
+                                <span style="color: #a9b4c2; font-size: 14px;">Confidence: 75.5%</span>
+                            </div>
+                        </div>
+                    </div>
+                    
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="categorical-nb-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Categorical Naive Bayes calculation visualization</p>
+                    </div>
+                    
+                    <h3>🎯 PART B: Gaussian Naive Bayes (Step-by-Step from PDF)</h3>
+                    
+                    <h4>Dataset: 2D Classification</h4>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>ID</th><th>X₁</th><th>X₂</th><th>Class</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>A</td><td>1.0</td><td>2.0</td><td>Yes</td></tr>
+                            <tr><td>B</td><td>2.0</td><td>1.0</td><td>Yes</td></tr>
+                            <tr><td>C</td><td>1.5</td><td>1.8</td><td>Yes</td></tr>
+                            <tr><td>D</td><td>3.0</td><td>3.0</td><td>No</td></tr>
+                            <tr><td>E</td><td>3.5</td><td>2.8</td><td>No</td></tr>
+                            <tr><td>F</td><td>2.9</td><td>3.2</td><td>No</td></tr>
+                        </tbody>
+                    </table>
                     
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Find P(IQ &gt; 130) using same distribution</li>
-                            <li>What IQ scores contain the middle 95% of people?</li>
-                            <li>If z = -2, what percentile is this?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>z = 2, P(Z &gt; 2) = 1 - 0.9772 = 0.0228 (2.28%)</li>
-                                <li>70 to 130 (μ ± 2σ)</li>
-                                <li>2.5th percentile (2.5% below)</li>
-                            </ol>
+                    <p><strong>Problem:</strong> Classify test point [X₁=2.0, X₂=2.0]</p>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 1: Calculate Mean and Variance for Each Class</div>
+                        <div class="step-calculation">
+                            <strong>Class "Yes" (samples A, B, C):</strong><br>
+                            X₁ values: [1.0, 2.0, 1.5]<br>
+                            μ₁(Yes) = (1.0 + 2.0 + 1.5) / 3 = <strong>1.5</strong><br>
+                            σ₁²(Yes) = [(1-1.5)² + (2-1.5)² + (1.5-1.5)²] / 3<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= [0.25 + 0.25 + 0] / 3<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong>0.166</strong><br>
+                            <br>
+                            X₂ values: [2.0, 1.0, 1.8]<br>
+                            μ₂(Yes) = (2.0 + 1.0 + 1.8) / 3 = <strong>1.6</strong><br>
+                            σ₂²(Yes) = [(2-1.6)² + (1-1.6)² + (1.8-1.6)²] / 3<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= [0.16 + 0.36 + 0.04] / 3<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= <strong>0.186</strong><br>
+                            <br>
+                            <strong>Class "No" (samples D, E, F):</strong><br>
+                            X₁ values: [3.0, 3.5, 2.9]<br>
+                            μ₁(No) = (3.0 + 3.5 + 2.9) / 3 = <strong>3.133</strong><br>
+                            σ₁²(No) = <strong>0.0688</strong><br>
+                            <br>
+                            X₂ values: [3.0, 2.8, 3.2]<br>
+                            μ₂(No) = (3.0 + 2.8 + 3.2) / 3 = <strong>3.0</strong><br>
+                            σ₂²(No) = <strong>0.0266</strong>
+                        </div>
+                    </div>
+                    
+                    <div class="formula">
+                        <strong>Step 2: Gaussian Probability Density Function</strong><br>
+                        <br>
+                        P(x|μ,σ²) = (1/√(2πσ²)) × exp(-(x-μ)²/(2σ²))<br>
+                        <br>
+                        This gives us the probability density at point x given mean μ and variance σ²
+                    </div>
+                    
+                    <div class="step">
+                        <div class="step-title">STEP 3: Calculate P(X₁=2.0 | Class) using Gaussian PDF</div>
+                        <div class="step-calculation">
+                            <strong>For Class "Yes" (μ=1.5, σ²=0.166):</strong><br>
+                            P(2.0|Yes) = (1/√(2π × 0.166)) × exp(-(2.0-1.5)²/(2 × 0.166))<br>
+                            <br>
+                            Step-by-step:<br>
+                            • Normalization: 1/√(2π × 0.166) = 1/√1.043 = 1/1.021 = <strong>0.9772</strong><br>
+                            • Exponent: -(2.0-1.5)²/(2 × 0.166) = -(0.5)²/0.332 = -0.25/0.332 = <strong>-0.753</strong><br>
+                            • e^(-0.753) = <strong>0.471</strong><br>
+                            • Final: 0.9772 × 0.471 = <strong style="color: #7ef0d4;">0.460</strong><br>
+                            <br>
+                            <strong>For Class "No" (μ=3.133, σ²=0.0688):</strong><br>
+                            P(2.0|No) = (1/√(2π × 0.0688)) × exp(-(2.0-3.133)²/(2 × 0.0688))<br>
+                            <br>
+                            Step-by-step:<br>
+                            • Normalization: 1/√(2π × 0.0688) = <strong>1.523</strong><br>
+                            • Exponent: -(2.0-3.133)²/(2 × 0.0688) = -(-1.133)²/0.1376 = -1.283/0.1376 = <strong>-9.333</strong><br>
+                            • e^(-9.333) = <strong>0.000088</strong><br>
+                            • Final: 1.523 × 0.000088 = <strong style="color: #ff8c6a;">0.000134</strong><br>
+                            <br>
+                            <span style="color: #7ef0d4;">• Point (2.0, ?) is MUCH more likely to be "Yes"!</span>
+                        </div>
+                    </div>
+                    
+                    <div class="formula">
+                        <strong>Step 4: Calculate P(X₂=2.0 | Class)</strong><br>
+                        <br>
+                        <strong>For "Yes":</strong><br>
+                        P(2.0|Yes) = (1/√(2π×0.186)) × exp(-(2.0-1.6)²/(2×0.186))<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.923 × exp(-0.430)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.923 × 0.651<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.601<br>
+                        <br>
+                        <strong>For "No":</strong><br>
+                        P(2.0|No) = (1/√(2π×0.0266)) × exp(-(2.0-3.0)²/(2×0.0266))<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 2.449 × exp(-18.797)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 2.449 × 0.0000000614<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.00000015
+                    </div>
+                    
+                    <div class="formula">
+                        <strong>Step 5: Combine with Prior (assume equal priors)</strong><br>
+                        <br>
+                        P(Yes) = P(No) = 0.5<br>
+                        <br>
+                        P(Yes|X) ∝ P(Yes) × P(X₁=2.0|Yes) × P(X₂=2.0|Yes)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.5 × 0.460 × 0.601<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.138<br>
+                        <br>
+                        P(No|X) ∝ P(No) × P(X₁=2.0|No) × P(X₂=2.0|No)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.5 × 0.000134 × 0.00000015<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.00000000001
+                    </div>
+                    
+                    <div class="formula">
+                        <strong>Step 6: Normalize</strong><br>
+                        <br>
+                        Sum = 0.138 + 0.00000000001 ≈ 0.138<br>
+                        <br>
+                        P(Yes|X) = 0.138 / 0.138 ≈ 1.0 (99.99%)<br>
+                        P(No|X) ≈ 0.0 (0.01%)<br>
+                        <br>
+                        <strong style="color: #7ef0d4; font-size: 18px;">Prediction: YES ✅</strong>
+                    </div>
+                    
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="gaussian-nb-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Gaussian Naive Bayes with decision boundary</p>
+                    </div>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ When to Use Naive Bayes</div>
+                        <div class="callout-content">
+                            <strong>Categorical NB:</strong> Discrete features (text, categories)<br>
+                            <strong>Gaussian NB:</strong> Continuous features (measurements, coordinates)<br>
+                            <br>
+                            <strong>Perfect for:</strong><br>
+                            • Text classification (spam detection, sentiment analysis)<br>
+                            • Document categorization<br>
+                            • Real-time prediction (very fast)<br>
+                            • High-dimensional data<br>
+                            • Small training datasets<br>
+                            <br>
+                            <strong>Avoid when:</strong><br>
+                            • Features are highly correlated<br>
+                            • Need probability calibration<br>
+                            • Complex feature interactions matter
                         </div>
                     </div>
                 </div>
+            </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Symmetric bell curve, parameters μ and σ</li>
-                        <li>68-95-99.7 rule for standard deviations</li>
-                        <li>Foundation for hypothesis testing</li>
-                        <li>Central Limit Theorem connects to sampling</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 25: Hypothesis Testing Intro -->
-            <section class="topic-section" id="topic-25">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 25</span>
-                    <h2>⚖️ Hypothesis Testing Introduction</h2>
-                    <p class="topic-subtitle">Making decisions from data</p>
-                </div>
+            <!-- Section 16: K-means Clustering -->
+            <div class="section" id="kmeans">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">🔍 Unsupervised - Clustering</span> K-means Clustering</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>K-means is an unsupervised learning algorithm that groups data into K clusters. Each cluster has a centroid (center point), and points are assigned to the nearest centroid. Perfect for customer segmentation, image compression, and pattern discovery!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Unsupervised: No labels needed!</li>
+                            <li>K = number of clusters (you choose)</li>
+                            <li>Minimizes Within-Cluster Sum of Squares (WCSS)</li>
+                            <li>Iterative: Updates centroids until convergence</li>
+                        </ul>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Statistical method for testing claims about populations using sample data.</p>
-                    <p><strong>Why it matters:</strong> Allows us to make evidence-based decisions and determine if effects are real or due to chance.</p>
-                </div>
+                    <h3>🎯 Step-by-Step K-means Algorithm (from PDF)</h3>
 
-                <div class="content-card">
-                    <h3>The Two Hypotheses</h3>
-                    <ul>
-                        <li><strong>Null Hypothesis (H₀):</strong> Status quo, no effect, no difference</li>
-                        <li><strong>Alternative Hypothesis (H₁ or Hₐ):</strong> What we're trying to prove</li>
+                    <h4>Dataset: 6 Points in 2D Space</h4>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Point</th><th>X</th><th>Y</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>A</td><td>1</td><td>2</td></tr>
+                            <tr><td>B</td><td>1.5</td><td>1.8</td></tr>
+                            <tr><td>C</td><td>5</td><td>8</td></tr>
+                            <tr><td>D</td><td>8</td><td>8</td></tr>
+                            <tr><td>E</td><td>1</td><td>0.6</td></tr>
+                            <tr><td>F</td><td>9</td><td>11</td></tr>
+                        </tbody>
+                    </table>
+
+                    <p><strong>Goal:</strong> Group into K=2 clusters</p>
+                    <p><strong>Initial Centroids:</strong> c₁ = [3, 4], c₂ = [5, 1]</p>
+
+                    <div class="formula">
+                        <strong>Distance Formula (Euclidean):</strong><br>
+                        d(point, centroid) = √[(x₁-x₂)² + (y₁-y₂)²]
+                    </div>
+
+                    <h4>Iteration 1</h4>
+
+                    <div class="formula">
+                        <strong>Step 1: Calculate Distances to All Centroids</strong><br>
+                        <br>
+                        <strong>Point A (1, 2):</strong><br>
+                        d(A, c₁) = √[(1-3)² + (2-4)²] = √[4+4] = √8 = 2.83<br>
+                        d(A, c₂) = √[(1-5)² + (2-1)²] = √[16+1] = √17 = 4.12<br>
+                        → Assign to c₁ (closer)<br>
+                        <br>
+                        <strong>Point B (1.5, 1.8):</strong><br>
+                        d(B, c₁) = √[(1.5-3)² + (1.8-4)²] = √[2.25+4.84] = 2.66<br>
+                        d(B, c₂) = √[(1.5-5)² + (1.8-1)²] = √[12.25+0.64] = 3.59<br>
+                        → Assign to c₁<br>
+                        <br>
+                        <strong>Point C (5, 8):</strong><br>
+                        d(C, c₁) = √[(5-3)² + (8-4)²] = √[4+16] = 4.47<br>
+                        d(C, c₂) = √[(5-5)² + (8-1)²] = √[0+49] = 7.0<br>
+                        → Assign to c₁<br>
+                        <br>
+                        <strong>Point D (8, 8):</strong><br>
+                        d(D, c₁) = √[(8-3)² + (8-4)²] = √[25+16] = 6.40<br>
+                        d(D, c₂) = √[(8-5)² + (8-1)²] = √[9+49] = 7.62<br>
+                        → Assign to c₁<br>
+                        <br>
+                        <strong>Point E (1, 0.6):</strong><br>
+                        d(E, c₁) = √[(1-3)² + (0.6-4)²] = √[4+11.56] = 3.94<br>
+                        d(E, c₂) = √[(1-5)² + (0.6-1)²] = √[16+0.16] = 4.02<br>
+                        → Assign to c₁<br>
+                        <br>
+                        <strong>Point F (9, 11):</strong><br>
+                        d(F, c₁) = √[(9-3)² + (11-4)²] = √[36+49] = 9.22<br>
+                        d(F, c₂) = √[(9-5)² + (11-1)²] = √[16+100] = 10.77<br>
+                        → Assign to c₁<br>
+                        <br>
+                        <strong>Result:</strong> Cluster 1 = {A, B, C, D, E, F}, Cluster 2 = {}
+                    </div>
+
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ Poor Initial Centroids!</div>
+                        <div class="callout-content">
+                            All points assigned to c₁! This happens with bad initialization. Let's try better initial centroids for the algorithm to work properly.
+                        </div>
+                    </div>
+
+                    <p><strong>Better Initial Centroids:</strong> c₁ = [1, 1], c₂ = [8, 9]</p>
+
+                    <div class="formula">
+                        <strong>Iteration 1 (Revised):</strong><br>
+                        <br>
+                        Cluster 1: {A, B, E} → c₁_new = mean = [(1+1.5+1)/3, (2+1.8+0.6)/3] = [1.17, 1.47]<br>
+                        Cluster 2: {C, D, F} → c₂_new = mean = [(5+8+9)/3, (8+8+11)/3] = [7.33, 9.00]<br>
+                        <br>
+                        <strong>WCSS Calculation:</strong><br>
+                        WCSS₁ = d²(A,c₁) + d²(B,c₁) + d²(E,c₁)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= (1-1.17)²+(2-1.47)² + (1.5-1.17)²+(1.8-1.47)² + (1-1.17)²+(0.6-1.47)²<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.311 + 0.218 + 0.786 = 1.315<br>
+                        <br>
+                        WCSS₂ = d²(C,c₂) + d²(D,c₂) + d²(F,c₂)<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= (5-7.33)²+(8-9)² + (8-7.33)²+(8-9)² + (9-7.33)²+(11-9)²<br>
+                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 6.433 + 1.447 + 6.789 = 14.669<br>
+                        <br>
+                        <strong>Total WCSS = 1.315 + 14.669 = 15.984</strong>
+                    </div>
+
+                    <div class="formula">
+                        <strong>Iteration 2:</strong><br>
+                        <br>
+                        Using c₁ = [1.17, 1.47] and c₂ = [7.33, 9.00], recalculate distances...<br>
+                        <br>
+                        Result: Same assignments! Centroids don't change.<br>
+                        <strong>✓ Converged!</strong>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="kmeans-viz-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> K-means clustering visualization with centroid movement</p>
+                    </div>
+
+                    <h3>Finding Optimal K: The Elbow Method</h3>
+
+                    <p>How do we choose K? Try different values and plot WCSS!</p>
+
+                    <div class="formula">
+                        <strong>WCSS for Different K Values:</strong><br>
+                        <br>
+                        K=1: WCSS = 50.0 (all in one cluster)<br>
+                        K=2: WCSS = 18.0<br>
+                        K=3: WCSS = 10.0 ← Elbow point!<br>
+                        K=4: WCSS = 8.0<br>
+                        K=5: WCSS = 7.0<br>
+                        <br>
+                        <strong>Rule:</strong> Choose K at the "elbow" where WCSS stops decreasing rapidly
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="kmeans-elbow-canvas"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure:</strong> Elbow method - optimal K is where the curve bends</p>
+                    </div>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 K-means Tips</div>
+                        <div class="callout-content">
+                            <strong>Advantages:</strong><br>
+                            ✓ Simple and fast<br>
+                            ✓ Works well with spherical clusters<br>
+                            ✓ Scales to large datasets<br>
+                            <br>
+                            <strong>Disadvantages:</strong><br>
+                            ✗ Need to specify K in advance<br>
+                            ✗ Sensitive to initial centroids (use K-means++!)<br>
+                            ✗ Assumes spherical clusters<br>
+                            ✗ Sensitive to outliers<br>
+                            <br>
+                            <strong>Solutions:</strong><br>
+                            • Use elbow method for K<br>
+                            • Use K-means++ initialization<br>
+                            • Run multiple times with different initializations
+                        </div>
+                    </div>
+
+                    <h3>Real-World Applications</h3>
+                    <ul>
+                        <li><strong>Customer Segmentation:</strong> Group customers by behavior</li>
+                        <li><strong>Image Compression:</strong> Reduce colors in images</li>
+                        <li><strong>Document Clustering:</strong> Group similar articles</li>
+                        <li><strong>Anomaly Detection:</strong> Points far from centroids are outliers</li>
+                        <li><strong>Feature Learning:</strong> Learn representations for neural networks</li>
                     </ul>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Decision Process</h3>
-                    <ol>
-                        <li>State hypotheses (H₀ and H₁)</li>
-                        <li>Choose significance level (α)</li>
-                        <li>Collect data and calculate test statistic</li>
-                        <li>Find p-value or critical value</li>
-                        <li>Make decision: Reject H₀ or Fail to reject H₀</li>
-                    </ol>
-                </div>
+            <!-- Section 17: Decision Trees -->
+            <div class="section" id="decision-trees">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised</span> Decision Trees</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Decision Trees make decisions by asking yes/no questions recursively. They're interpretable, powerful, and the foundation for ensemble methods like Random Forests!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Recursive partitioning of feature space</li>
+                            <li>Each node asks a yes/no question</li>
+                            <li>Leaves contain predictions</li>
+                            <li>Uses Information Gain or Gini Impurity for splitting</li>
+                        </ul>
+                    </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>Claim:</strong> New teaching method improves test scores</p>
-                    <p>H₀: μ = 75 (no improvement)</p>
-                    <p>H₁: μ &gt; 75 (scores improved)</p>
-                </div>
+                    <h3>How Decision Trees Work</h3>
+                    <p>Imagine you're playing "20 Questions" to guess an animal. Each question splits possibilities into two groups. Decision Trees work the same way!</p>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>H₀ = null hypothesis (status quo)</li>
-                        <li>H₁ = alternative hypothesis (what we test)</li>
-                        <li>We either reject or fail to reject H₀</li>
-                        <li>Never "accept" or "prove" anything</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 26: Significance Level α -->
-            <section class="topic-section" id="topic-26">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 26</span>
-                    <h2>🎯 Significance Level (α)</h2>
-                    <p class="topic-subtitle">Setting your error tolerance</p>
-                </div>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="decision-tree-viz"></canvas>
+                        </div>
+                        <p class="figure-caption"><strong>Figure 1:</strong> Interactive decision tree structure</p>
+                    </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> α (alpha) is the probability of rejecting H₀ when it's actually true (Type I error rate).</p>
-                    <p><strong>Common values:</strong> 0.05 (5%), 0.01 (1%), 0.10 (10%)</p>
-                </div>
+                    <h3>Splitting Criteria</h3>
+                    <p>How do we choose which question to ask at each node? We want splits that maximize information gain!</p>
 
-                <div class="content-card">
-                    <h3>Interpretation</h3>
-                    <ul>
-                        <li><strong>α = 0.05:</strong> Willing to be wrong 5% of the time</li>
-                        <li><strong>Lower α:</strong> More stringent, harder to reject H₀</li>
-                        <li><strong>Higher α:</strong> More lenient, easier to reject H₀</li>
-                        <li><strong>Confidence level:</strong> 1 - α (e.g., 0.05 → 95% confidence)</li>
-                    </ul>
-                </div>
+                    <h4>1. Entropy (Information Theory)</h4>
+                    <div class="formula">
+                        <strong>Entropy Formula:</strong>
+                        H(S) = -Σ pᵢ × log₂(pᵢ)<br>
+                        <br>
+                        where pᵢ = proportion of class i<br>
+                        <br>
+                        <strong>Interpretation:</strong><br>
+                        • Entropy = 0: Pure (all same class)<br>
+                        • Entropy = 1: Maximum disorder (50-50 split)<br>
+                        • Lower entropy = better!
+                    </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Explain the difference between α = 0.05 and α = 0.01. Which is more strict? Find critical values for both in a two-tailed test.</p>
+                    <h4>2. Information Gain</h4>
+                    <div class="formula">
+                        <strong>Information Gain Formula:</strong>
+                        IG(S, A) = H(S) - Σ |Sᵥ|/|S| × H(Sᵥ)<br>
+                        <br>
+                        = Entropy before split - Weighted entropy after split<br>
+                        <br>
+                        <strong>We choose the split with HIGHEST information gain!</strong>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Understand α = 0.05</p>
-                                <div class="step-work">
-                                    <code>α = 0.05 means 5% significance<br>
-                                    95% confidence level (1 - 0.05)<br>
-                                    P(Type I error) = 5%<br>
-                                    Willing to be wrong 5% of the time</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Understand α = 0.01</p>
-                                <div class="step-work">
-                                    <code>α = 0.01 means 1% significance<br>
-                                    99% confidence level (1 - 0.01)<br>
-                                    P(Type I error) = 1%<br>
-                                    Only willing to be wrong 1% of the time</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Critical Values for α = 0.05</p>
-                                <div class="step-work">
-                                    <code>Two-tailed: split α into both tails<br>
-                                    Each tail = 0.05/2 = 0.025<br>
-                                    Z₀.₉₇₅ = ±1.96<br>
-                                    Reject if |z| &gt; 1.96</code>
-                                </div>
-                            </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="entropy-viz"></canvas>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Critical Values for α = 0.01</p>
-                                <div class="step-work">
-                                    <code>Two-tailed: each tail = 0.01/2 = 0.005<br>
-                                    Z₀.₉₉₅ = ±2.576<br>
-                                    Reject if |z| &gt; 2.576<br>
-                                    Harder to reject (more strict!)</code>
-                                </div>
-                            </div>
+                        <p class="figure-caption"><strong>Figure 2:</strong> Entropy and Information Gain visualization</p>
+                    </div>
+
+                    <h4>3. Gini Impurity (Alternative)</h4>
+                    <div class="formula">
+                        <strong>Gini Formula:</strong>
+                        Gini(S) = 1 - Σ pᵢ²<br>
+                        <br>
+                        <strong>Interpretation:</strong><br>
+                        • Gini = 0: Pure<br>
+                        • Gini = 0.5: Maximum impurity (binary)<br>
+                        • Faster to compute than entropy
+                    </div>
+
+                    <h3>Worked Example: Email Classification</h3>
+                    <p>Dataset: 10 emails - 7 spam, 3 not spam</p>
+
+                    <div class="callout info">
+                        <div class="callout-title">📊 Calculating Information Gain</div>
+                        <div class="callout-content">
+                            <strong>Initial Entropy:</strong><br>
+                            H(S) = -7/10×log₂(7/10) - 3/10×log₂(3/10)<br>
+                            H(S) = 0.881 bits<br>
+                            <br>
+                            <strong>Split by "Contains 'FREE'":</strong><br>
+                            • Left (5 emails): 4 spam, 1 not → H = 0.722<br>
+                            • Right (5 emails): 3 spam, 2 not → H = 0.971<br>
+                            <br>
+                            <strong>Weighted Entropy:</strong><br>
+                            = 5/10 × 0.722 + 5/10 × 0.971 = 0.847<br>
+                            <br>
+                            <strong>Information Gain:</strong><br>
+                            IG = 0.881 - 0.847 = 0.034 bits<br>
+                            <br>
+                            <strong>Split by "Has suspicious link":</strong><br>
+                            IG = 0.156 bits ← BETTER! Use this split!
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Compare</p>
-                                <div class="step-work">
-                                    <code>α = 0.01 is MORE STRICT<br>
-                                    Requires stronger evidence to reject H₀<br>
-                                    Reduces Type I errors but increases Type II</code>
-                                </div>
-                            </div>
+                    </div>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="split-comparison"></canvas>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">α = 0.05: z = ±1.96; α = 0.01: z = ±2.576 (more strict)</span>
+                        <p class="figure-caption"><strong>Figure 3:</strong> Comparing different splits by information gain</p>
+                    </div>
+
+                    <h3>Decision Boundaries</h3>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="tree-boundary"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 4:</strong> Decision tree creates rectangular regions</p>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>Find critical value for α = 0.10, two-tailed</li>
-                            <li>If we want to be very strict, should we use α = 0.05 or α = 0.001?</li>
-                            <li>What happens to Type II error when α decreases?</li>
-                        </ol>
+
+                    <h3>Overfitting in Decision Trees</h3>
+                    <div class="callout warning">
+                        <div class="callout-title">⚠️ The Overfitting Problem</div>
+                        <div class="callout-content">
+                            Without constraints, decision trees grow until each leaf has ONE sample!<br>
+                            <br>
+                            <strong>Solutions:</strong><br>
+                            • <strong>Max depth:</strong> Limit tree height (e.g., max_depth=5)<br>
+                            • <strong>Min samples split:</strong> Need X samples to split (e.g., min=10)<br>
+                            • <strong>Min samples leaf:</strong> Each leaf must have X samples<br>
+                            • <strong>Pruning:</strong> Grow full tree, then remove branches
+                        </div>
                     </div>
+
+                    <h3>Advantages vs Disadvantages</h3>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Advantages ✅</th><th>Disadvantages ❌</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr>
+                                <td>Easy to understand and interpret</td>
+                                <td>Prone to overfitting</td>
+                            </tr>
+                            <tr>
+                                <td>No feature scaling needed</td>
+                                <td>Small changes → big tree changes</td>
+                            </tr>
+                            <tr>
+                                <td>Handles non-linear relationships</td>
+                                <td>Biased toward features with more levels</td>
+                            </tr>
+                            <tr>
+                                <td>Works with mixed data types</td>
+                                <td>Can't extrapolate beyond training data</td>
+                            </tr>
+                            <tr>
+                                <td>Fast prediction</td>
+                                <td>Less accurate than ensemble methods</td>
+                            </tr>
+                        </tbody>
+                    </table>
                 </div>
+            </div>
+
+            <!-- REINFORCEMENT LEARNING SECTIONS -->
+            
+            <!-- Section: RL Introduction -->
+            <div class="section" id="rl-intro">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(255, 140, 106, 0.3); color: #ff8c6a;">🎮 Reinforcement</span> Introduction to Reinforcement Learning</h2>
+                    <button class="section-toggle collapsed">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Reinforcement Learning (RL) is learning by trial and error, just like teaching a dog tricks! The agent takes actions in an environment, receives rewards or punishments, and learns which actions lead to the best outcomes.</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Agent: The learner/decision maker</li>
+                            <li>Environment: The world the agent interacts with</li>
+                            <li>State: Current situation of the agent</li>
+                            <li>Action: What the agent can do</li>
+                            <li>Reward: Feedback signal (positive or negative)</li>
+                            <li>Policy: Strategy the agent follows</li>
+                        </ul>
+                    </div>
+
+                    <h3>The RL Loop</h3>
+                    <ol>
+                        <li><strong>Observe state:</strong> Agent sees current situation</li>
+                        <li><strong>Choose action:</strong> Based on policy π(s)</li>
+                        <li><strong>Execute action:</strong> Interact with environment</li>
+                        <li><strong>Receive reward:</strong> Get feedback r</li>
+                        <li><strong>Transition to new state:</strong> Environment changes to s'</li>
+                        <li><strong>Learn and update:</strong> Improve policy</li>
+                    </ol>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Key Difference from Supervised Learning</div>
+                        <div class="callout-content">
+                            <strong>Supervised:</strong> "Here's the right answer for each example"<br>
+                            <strong>Reinforcement:</strong> "Try things and I'll tell you if you did well or poorly"<br>
+                            <br>
+                            RL must explore to discover good actions, while supervised learning is given correct answers upfront!
+                        </div>
+                    </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
+                    <h3>Real-World Examples</h3>
                     <ul>
-                        <li>α = probability of Type I error</li>
-                        <li>Common: α = 0.05 (5% error rate)</li>
-                        <li>Set before collecting data</li>
-                        <li>Trade-off between Type I and Type II errors</li>
+                        <li><strong>Game Playing:</strong> AlphaGo learning to play Go by playing millions of games</li>
+                        <li><strong>Robotics:</strong> Robot learning to walk by trying different leg movements</li>
+                        <li><strong>Self-Driving Cars:</strong> Learning to drive safely through experience</li>
+                        <li><strong>Recommendation Systems:</strong> Learning what users like from their interactions</li>
+                        <li><strong>Resource Management:</strong> Optimizing data center cooling to save energy</li>
                     </ul>
-                </div>
-            </section>
-
-            <!-- Topic 27: Standard Error -->
-            <section class="topic-section" id="topic-27">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 27</span>
-                    <h2>📊 Standard Error</h2>
-                    <p class="topic-subtitle">Measuring sampling variability</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Standard error (SE) measures how much sample means vary from the true population mean.</p>
-                </div>
+                    <h3>Exploration vs Exploitation</h3>
+                    <p>The fundamental dilemma in RL:</p>
+                    <ul>
+                        <li><strong>Exploration:</strong> Try new actions to discover better rewards</li>
+                        <li><strong>Exploitation:</strong> Use known good actions to maximize reward</li>
+                    </ul>
+                    <p>Balance is key! Too much exploration wastes time on bad actions. Too much exploitation misses better strategies.</p>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Standard Error of Mean</div>
-                        <div class="formula-main">SE = σ / √n</div>
-                        <p>or estimate: SE = s / √n</p>
+                    <div class="formula">
+                        <strong>Reward Signal:</strong>
+                        Total Return = R = r₁ + γr₂ + γ²r₃ + ... = Σ γᵗ rᵗ₊₁
+                        <br><small>where:<br>γ = discount factor (0 ≤ γ ≤ 1)<br>Future rewards are worth less than immediate rewards</small>
                     </div>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Key Points</h3>
-                    <ul>
-                        <li>Decreases as sample size increases</li>
-                        <li>Measures precision of sample mean</li>
-                        <li>Lower SE = better estimate</li>
-                        <li>Used in confidence intervals and hypothesis tests</li>
+            <!-- Section: Q-Learning -->
+            <div class="section" id="q-learning">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(255, 140, 106, 0.3); color: #ff8c6a;">🎮 Reinforcement</span> Q-Learning</h2>
+                    <button class="section-toggle collapsed">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Q-Learning is a value-based RL algorithm that learns the quality (Q-value) of taking each action in each state. It's model-free and can learn optimal policies even without knowing how the environment works!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Q-value: Expected future reward for action a in state s</li>
+                            <li>Q-table: Stores Q-values for all state-action pairs</li>
+                            <li>Off-policy: Can learn optimal policy while following exploratory policy</li>
+                            <li>Temporal Difference: Learn from each step, not just end of episode</li>
+                        </ul>
+                    </div>
+
+                    <div class="formula">
+                        <strong>Q-Learning Update Rule:</strong>
+                        Q(s, a) ← Q(s, a) + α[r + γ · max Q(s', a') - Q(s, a)]
+                        <br><br>
+                        Breaking it down:<br>
+                        Q(s, a) = Current Q-value estimate<br>
+                        α = Learning rate (e.g., 0.1)<br>
+                        r = Immediate reward received<br>
+                        γ = Discount factor (e.g., 0.9)<br>
+                        max Q(s', a') = Best Q-value in next state<br>
+                        [r + γ · max Q(s', a') - Q(s, a)] = TD error (how wrong we were)
+                    </div>
+
+                    <h3>Step-by-Step Example: Grid World Navigation</h3>
+                    <p><strong>Problem:</strong> Agent navigates 3x3 grid to reach goal at (2,2)</p>
+
+                    <div class="step">
+                        <div class="step-title">STEP 1: Initialize Q-Table</div>
+                        <div class="step-calculation">
+                            States: 9 positions (0,0) to (2,2)<br>
+                            Actions: 4 directions (Up, Down, Left, Right)<br>
+                            <br>
+                            Q-table: 9 × 4 = 36 values, all initialized to 0<br>
+                            <br>
+                            Example entry: Q((1,1), Right) = 0.0
+                        </div>
+                    </div>
+
+                    <div class="step">
+                        <div class="step-title">STEP 2: Episode 1 - Random Exploration</div>
+                        <div class="step-calculation">
+                            Start: s = (0,0)<br>
+                            <br>
+                            <strong>Step 1:</strong> Choose action a = Right (ε-greedy)<br>
+                            Execute: Move to s' = (0,1)<br>
+                            Reward: r = -1 (penalty for each step)<br>
+                            <br>
+                            Update Q((0,0), Right):<br>
+                            Q = 0 + 0.1[-1 + 0.9 × max(0, 0, 0, 0) - 0]<br>
+                            Q = 0 + 0.1[-1]<br>
+                            Q((0,0), Right) = <strong>-0.1</strong> ✓<br>
+                            <br>
+                            <strong>Step 2:</strong> s = (0,1), action = Down<br>
+                            s' = (1,1), r = -1<br>
+                            Q((0,1), Down) = 0 + 0.1[-1 + 0] = <strong>-0.1</strong><br>
+                            <br>
+                            <strong>Step 3:</strong> s = (1,1), action = Right<br>
+                            s' = (1,2), r = -1<br>
+                            Q((1,1), Right) = <strong>-0.1</strong><br>
+                            <br>
+                            <strong>Step 4:</strong> s = (1,2), action = Down<br>
+                            s' = (2,2) ← <span style="color: #7ef0d4;">GOAL!</span><br>
+                            r = +100 (big reward!)<br>
+                            <br>
+                            Q((1,2), Down) = 0 + 0.1[100 + 0]<br>
+                            Q((1,2), Down) = <strong style="color: #7ef0d4;">10.0</strong> ✓✓✓
+                        </div>
+                    </div>
+
+                    <div class="step">
+                        <div class="step-title">STEP 3: Episode 2 - Learning Propagates Backward</div>
+                        <div class="step-calculation">
+                            Path: (0,0) → (0,1) → (1,1) → (1,2) → (2,2)<br>
+                            <br>
+                            At (1,1), choosing Right:<br>
+                            Q((1,1), Right) = -0.1 + 0.1[-1 + 0.9 × 10.0 - (-0.1)]<br>
+                            = -0.1 + 0.1[-1 + 9.0 + 0.1]<br>
+                            = -0.1 + 0.1[8.1]<br>
+                            = -0.1 + 0.81<br>
+                            Q((1,1), Right) = <strong style="color: #7ef0d4;">0.71</strong> ✓<br>
+                            <br>
+                            <span style="color: #7ef0d4;">→ The value of being near the goal propagates backward!</span>
+                        </div>
+                    </div>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ After Many Episodes</div>
+                        <div class="callout-content">
+                            The Q-table converges to optimal values:<br>
+                            <br>
+                            Q((0,0), Right) ≈ 7.3<br>
+                            Q((1,1), Right) ≈ 8.1<br>
+                            Q((1,2), Down) ≈ 9.0<br>
+                            <br>
+                            <strong>Optimal Policy:</strong> Always move toward (2,2) via shortest path!<br>
+                            Agent has learned to navigate perfectly through trial and error.
+                        </div>
+                    </div>
+
+                    <h3>ε-Greedy Policy</h3>
+                    <div class="formula">
+                        <strong>Action Selection:</strong><br>
+                        With probability ε: Choose random action (explore)<br>
+                        With probability 1-ε: Choose argmax Q(s,a) (exploit)<br>
+                        <br>
+                        Common: Start ε=1.0, decay to ε=0.01 over time
+                    </div>
+
+                    <h3>Advantages</h3>
+                    <ul>
+                        <li>✓ Simple to implement</li>
+                        <li>✓ Guaranteed to converge to optimal policy</li>
+                        <li>✓ Model-free (doesn't need environment model)</li>
+                        <li>✓ Off-policy (learn from exploratory behavior)</li>
+                    </ul>
+
+                    <h3>Disadvantages</h3>
+                    <ul>
+                        <li>✗ Doesn't scale to large/continuous state spaces</li>
+                        <li>✗ Slow convergence in complex environments</li>
+                        <li>✗ Requires discrete actions</li>
                     </ul>
                 </div>
+            </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Population has σ = 20. Calculate standard error for sample sizes: n = 4, n = 16, n = 64, n = 100. What pattern do you notice?</p>
+            <!-- Section: Policy Gradient -->
+            <div class="section" id="policy-gradient">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(255, 140, 106, 0.3); color: #ff8c6a;">🎮 Reinforcement</span> Policy Gradient Methods</h2>
+                    <button class="section-toggle collapsed">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Policy Gradient methods directly optimize the policy (action selection strategy) instead of learning value functions. They're powerful for continuous action spaces and stochastic policies!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Direct policy optimization: Learn πᵧ(a|s) directly</li>
+                            <li>Parameterized policy: Use neural network with weights θ</li>
+                            <li>Gradient ascent: Move parameters to maximize expected reward</li>
+                            <li>Works with continuous actions: Can output action distributions</li>
+                        </ul>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Recall Standard Error Formula</p>
-                                <div class="step-work">
-                                    <code>SE = σ / √n<br>
-                                    Where:<br>
-                                    - σ = population standard deviation<br>
-                                    - n = sample size<br>
-                                    SE measures variability of sample means</code>
-                                </div>
-                            </div>
+
+                    <h3>Policy vs Value-Based Methods</h3>
+                    <table class="data-table">
+                        <thead>
+                            <tr><th>Aspect</th><th>Value-Based (Q-Learning)</th><th>Policy-Based</th></tr>
+                        </thead>
+                        <tbody>
+                            <tr><td>What it learns</td><td>Q(s,a) values</td><td>π(a|s) policy directly</td></tr>
+                            <tr><td>Action selection</td><td>argmax Q(s,a)</td><td>Sample from π(a|s)</td></tr>
+                            <tr><td>Continuous actions</td><td>Difficult</td><td>Natural</td></tr>
+                            <tr><td>Stochastic policy</td><td>Indirect</td><td>Direct</td></tr>
+                            <tr><td>Convergence</td><td>Can be unstable</td><td>Smoother</td></tr>
+                        </tbody>
+                    </table>
+
+                    <div class="formula">
+                        <strong>Policy Gradient Theorem:</strong>
+                        ∇ᵧ J(θ) = Eᵧ[∇ᵧ log πᵧ(a|s) · Qᵧ(s,a)]
+                        <br><br>
+                        Practical form (REINFORCE):<br>
+                        ∇ᵧ J(θ) ≈ ∇ᵧ log πᵧ(aᵗ|sᵗ) · Gᵗ<br>
+                        <br>
+                        where:<br>
+                        Gᵗ = Total return from time t onward<br>
+                        πᵧ(a|s) = Probability of action a in state s<br>
+                        θ = Policy parameters (neural network weights)
+                    </div>
+
+                    <h3>REINFORCE Algorithm (Monte Carlo Policy Gradient)</h3>
+                    <div class="step">
+                        <div class="step-title">Algorithm Steps</div>
+                        <div class="step-calculation">
+                            <strong>1. Initialize:</strong> Random policy parameters θ<br>
+                            <br>
+                            <strong>2. For each episode:</strong><br>
+                            &nbsp;&nbsp;&nbsp;a. Generate trajectory: s₀, a₀, r₁, s₁, a₁, r₂, ..., sₜ<br>
+                            &nbsp;&nbsp;&nbsp;b. For each time step t:<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- Calculate return: Gᵗ = rᵗ₊₁ + γrᵗ₊₂ + γ²rᵗ₊₃ + ...<br>
+                            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- Update: θ ← θ + α · Gᵗ · ∇ᵧ log πᵧ(aᵗ|sᵗ)<br>
+                            <br>
+                            <strong>3. Repeat</strong> until policy converges
+                        </div>
+                    </div>
+
+                    <h3>Example: CartPole Balancing</h3>
+                    <p><strong>Problem:</strong> Balance a pole on a cart by moving left or right</p>
+
+                    <div class="step">
+                        <div class="step-title">Episode Example</div>
+                        <div class="step-calculation">
+                            State: s = [cart_pos, cart_vel, pole_angle, pole_vel]<br>
+                            Actions: a ∈ {Left, Right}<br>
+                            <br>
+                            <strong>Time t=0:</strong><br>
+                            s₀ = [0.0, 0.0, 0.1, 0.0] (pole leaning right)<br>
+                            π(Left|s₀) = 0.3, π(Right|s₀) = 0.7<br>
+                            Sample action: a₀ = Right<br>
+                            Reward: r₁ = +1 (pole still balanced)<br>
+                            <br>
+                            <strong>Time t=1:</strong><br>
+                            s₁ = [0.05, 0.1, 0.08, -0.05]<br>
+                            Action: a₁ = Right<br>
+                            r₂ = +1<br>
+                            <br>
+                            ... episode continues for T=200 steps ...<br>
+                            <br>
+                            <strong>Total return:</strong> G = 200 (balanced entire episode!)<br>
+                            <br>
+                            <strong>Update policy:</strong><br>
+                            For each (sᵗ, aᵗ) in trajectory:<br>
+                            θ ← θ + 0.01 × 200 × ∇ log π(aᵗ|sᵗ)<br>
+                            <br>
+                            → Increase probability of all actions taken in this successful episode!
+                        </div>
+                    </div>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 Why It Works</div>
+                        <div class="callout-content">
+                            <strong>Good episode (high G):</strong> Increase probability of actions taken<br>
+                            <strong>Bad episode (low G):</strong> Decrease probability of actions taken<br>
+                            <br>
+                            Over many episodes, the policy learns which actions lead to better outcomes!
+                        </div>
+                    </div>
+
+                    <h3>Advantages</h3>
+                    <ul>
+                        <li>✓ Works with continuous action spaces</li>
+                        <li>✓ Can learn stochastic policies</li>
+                        <li>✓ Better convergence properties</li>
+                        <li>✓ Effective in high-dimensional spaces</li>
+                    </ul>
+
+                    <h3>Disadvantages</h3>
+                    <ul>
+                        <li>✗ High variance in gradient estimates</li>
+                        <li>✗ Sample inefficient (needs many episodes)</li>
+                        <li>✗ Can get stuck in local optima</li>
+                        <li>✗ Sensitive to learning rate</li>
+                    </ul>
+
+                    <div class="callout success">
+                        <div class="callout-title">✅ Modern Improvements</div>
+                        <div class="callout-content">
+                            <strong>Actor-Critic:</strong> Combine policy gradient with value function to reduce variance<br>
+                            <strong>PPO (Proximal Policy Optimization):</strong> Constrain policy updates for stability<br>
+                            <strong>TRPO (Trust Region):</strong> Guarantee monotonic improvement<br>
+                            <br>
+                            These advances make policy gradients practical for complex tasks like robot control and game playing!
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate SE for n = 4</p>
-                                <div class="step-work">
-                                    <code>SE = 20 / √4<br>
-                                    SE = 20 / 2<br>
-                                    SE = 10</code>
-                                </div>
-                            </div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Section 18: Algorithm Comparison Tool -->
+            <div class="section" id="algorithm-comparison">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">🔄 Comparison</span> Algorithm Comparison Tool</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>Compare machine learning algorithms side-by-side to choose the best one for your problem!</p>
+
+                    <!-- Step 1: Select Category -->
+                    <div class="info-card" style="background: var(--color-bg-1);">
+                        <h3 style="margin-bottom: 16px; color: var(--color-text);">Step 1: Select Learning Category</h3>
+                        <div class="radio-group">
+                            <label><input type="radio" name="category" value="all" checked> Show All</label>
+                            <label><input type="radio" name="category" value="supervised"> Supervised Learning</label>
+                            <label><input type="radio" name="category" value="unsupervised"> Unsupervised Learning</label>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate SE for n = 16</p>
-                                <div class="step-work">
-                                    <code>SE = 20 / √16<br>
-                                    SE = 20 / 4<br>
-                                    SE = 5</code>
-                                </div>
-                            </div>
+                    </div>
+
+                    <!-- Step 2: Select Algorithms -->
+                    <div class="info-card" style="background: var(--color-bg-2); margin-top: 24px;">
+                        <h3 style="margin-bottom: 16px; color: var(--color-text);">Step 2: Select Algorithms to Compare (2-5)</h3>
+                        <div id="algorithm-checkboxes" style="display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 12px;">
+                            <!-- Populated by JavaScript -->
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate SE for n = 64</p>
-                                <div class="step-work">
-                                    <code>SE = 20 / √64<br>
-                                    SE = 20 / 8<br>
-                                    SE = 2.5</code>
-                                </div>
-                            </div>
+                        <p id="selection-count" style="margin-top: 12px; color: var(--color-text-secondary); font-size: 14px;">Selected: 0 algorithms</p>
+                    </div>
+
+                    <!-- Step 3: Compare Button -->
+                    <div style="text-align: center; margin: 32px 0;">
+                        <button class="btn btn--primary" id="compare-btn" style="padding: 14px 48px; font-size: 16px;" disabled>Compare Algorithms</button>
+                    </div>
+
+                    <!-- Comparison Results Container -->
+                    <div id="comparison-results" style="display: none;">
+                        <!-- View Selector -->
+                        <div style="display: flex; gap: 12px; margin-bottom: 24px; flex-wrap: wrap; justify-content: center;">
+                            <button class="btn btn--secondary view-btn active" data-view="table">📊 Table View</button>
+                            <button class="btn btn--secondary view-btn" data-view="radar">🎯 Radar Chart</button>
+                            <button class="btn btn--secondary view-btn" data-view="heatmap">🔥 Heatmap</button>
+                            <button class="btn btn--secondary view-btn" data-view="decision">🌳 Decision Tree</button>
+                            <button class="btn btn--secondary view-btn" data-view="matrix">📋 Use Case Matrix</button>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate SE for n = 100</p>
-                                <div class="step-work">
-                                    <code>SE = 20 / √100<br>
-                                    SE = 20 / 10<br>
-                                    SE = 2</code>
-                                </div>
+
+                        <!-- View: Table -->
+                        <div class="comparison-view" id="view-table">
+                            <h3 style="margin-bottom: 20px; text-align: center;">Side-by-Side Comparison</h3>
+                            <div style="overflow-x: auto;">
+                                <table class="data-table" id="comparison-table">
+                                    <!-- Populated by JavaScript -->
+                                </table>
                             </div>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Analyze Pattern</p>
-                                <div class="step-work">
-                                    <code>n = 4:   SE = 10<br>
-                                    n = 16:  SE = 5   (4× sample → ½ SE)<br>
-                                    n = 64:  SE = 2.5 (16× sample → ¼ SE)<br>
-                                    n = 100: SE = 2   (25× sample → ⅕ SE)<br>
-                                    <br>
-                                    Pattern: Quadruple sample size → Half the SE<br>
-                                    Larger samples give more precise estimates!</code>
+
+                        <!-- View: Radar Chart -->
+                        <div class="comparison-view" id="view-radar" style="display: none;">
+                            <h3 style="margin-bottom: 20px; text-align: center;">Visual Performance Comparison</h3>
+                            <p style="text-align: center; color: var(--color-text-secondary); margin-bottom: 16px;">Interactive radar chart - works in all modern browsers</p>
+                            <div class="figure">
+                                <div class="figure-placeholder" style="height: 500px;">
+                                    <canvas id="radar-comparison-canvas"></canvas>
                                 </div>
                             </div>
                         </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">SE: 10, 5, 2.5, 2. Larger n → Smaller SE (more precision)</span>
+
+                        <!-- View: Heatmap -->
+                        <div class="comparison-view" id="view-heatmap" style="display: none;">
+                            <!-- HTML table-based heatmap - works in ALL browsers (Chrome, Firefox, Safari, Edge, Mobile) -->
+                            <p style="text-align: center; color: var(--color-text-secondary); margin-bottom: 16px;">✓ HTML table-based visualization - 100% browser compatible</p>
                         </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>If σ = 15 and n = 25, find SE</li>
-                            <li>To cut SE in half, by what factor must we increase n?</li>
-                            <li>Why does larger sample size reduce SE?</li>
-                        </ol>
-                    </div>
-                </div>
 
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>SE = σ / √n</li>
-                        <li>Measures sampling variability</li>
-                        <li>Larger samples → smaller SE</li>
-                        <li>Critical for inference</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 28: Z-Test -->
-            <section class="topic-section" id="topic-28">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 28</span>
-                    <h2>📏 Z-Test</h2>
-                    <p class="topic-subtitle">Hypothesis test for large samples with known σ</p>
-                </div>
+                        <!-- View: Decision Tree -->
+                        <div class="comparison-view" id="view-decision" style="display: none;">
+                            <h3 style="margin-bottom: 20px; text-align: center;">When to Use Which Algorithm?</h3>
+                            <div style="background: var(--color-surface); padding: 32px; border-radius: 12px; border: 1px solid var(--color-border);">
+                                <pre style="font-family: monospace; font-size: 13px; line-height: 1.8; color: var(--color-text); background: transparent; border: none; padding: 0; margin: 0; white-space: pre-wrap;">What's your use case?
 
-                <div class="content-card">
-                    <h3>When to Use Z-Test</h3>
-                    <ul>
-                        <li>Sample size n ≥ 30 (large sample)</li>
-                        <li>Population standard deviation (σ) known</li>
-                        <li>Testing population mean</li>
-                        <li>Normal distribution or large n</li>
-                    </ul>
-                </div>
+├─ I have <strong style="color: #7ef0d4;">LABELED</strong> data
+│  ├─ Predict <strong style="color: #6aa9ff;">NUMBERS</strong> (Regression)
+│  │  ├─ Linear relationship? → <strong style="color: #7ef0d4;">Linear Regression</strong>
+│  │  └─ Complex patterns? → <strong style="color: #7ef0d4;">Random Forest / XGBoost</strong>
+│  │
+│  ├─ Predict <strong style="color: #6aa9ff;">CATEGORIES</strong> (Classification)
+│  │  ├─ Want interpretability? → <strong style="color: #7ef0d4;">Decision Trees / Naive Bayes</strong>
+│  │  ├─ Want best accuracy? → <strong style="color: #7ef0d4;">SVM / Random Forest</strong>
+│  │  ├─ Want speed? → <strong style="color: #7ef0d4;">Logistic Regression / Naive Bayes</strong>
+│  │  ├─ Have few samples? → <strong style="color: #7ef0d4;">Naive Bayes</strong>
+│  │  └─ Local patterns? → <strong style="color: #7ef0d4;">KNN</strong>
+│
+├─ I have <strong style="color: #ff8c6a;">UNLABELED</strong> data
+│  ├─ Want to group similar items? → <strong style="color: #7ef0d4;">K-means</strong>
+│  ├─ Want to reduce dimensions? → <strong style="color: #7ef0d4;">PCA</strong>
+│  └─ Unknown number of groups? → <strong style="color: #7ef0d4;">DBSCAN</strong>
+│
+└─ I want to <strong style="color: #ffb490;">LEARN from experience</strong>
+   └─ Use <strong style="color: #7ef0d4;">Reinforcement Learning</strong></pre>
+                            </div>
+                        </div>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Z-Test Statistic</div>
-                        <div class="formula-main">z = (x̄ - μ₀) / (σ / √n)</div>
-                        <p>x̄ = sample mean</p>
-                        <p>μ₀ = hypothesized population mean</p>
-                        <p>σ = population standard deviation</p>
-                        <p>n = sample size</p>
-                    </div>
-                </div>
+                        <!-- View: Use Case Matrix -->
+                        <div class="comparison-view" id="view-matrix" style="display: none;">
+                            <h3 style="margin-bottom: 20px; text-align: center;">Use Case Suitability Matrix</h3>
+                            <div style="overflow-x: auto;">
+                                <table class="data-table" id="matrix-table">
+                                    <!-- Populated by JavaScript -->
+                                </table>
+                            </div>
+                        </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">A factory claims μ = 100. Sample: n = 36, x̄ = 105, σ = 12. Test at α = 0.05 (two-tailed).</p>
+                        <!-- Detailed Comparison Cards -->
+                        <div id="detailed-cards" style="margin-top: 48px;">
+                            <!-- Populated by JavaScript -->
+                        </div>
                     </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">State Hypotheses</p>
-                                <div class="step-work">
-                                    <code>H₀: μ = 100 (claim is true)<br>
-                                    H₁: μ ≠ 100 (claim is false)<br>
-                                    α = 0.05, two-tailed test</code>
+
+                    <!-- Algorithm Quiz -->
+                    <div class="info-card" style="background: var(--color-bg-5); margin-top: 48px;">
+                        <h3 style="margin-bottom: 20px; color: var(--color-text);">🎯 Not Sure Which Algorithm? Take the Quiz!</h3>
+                        <div id="quiz-container">
+                            <div class="quiz-question" id="quiz-q1">
+                                <p style="font-weight: 600; margin-bottom: 12px;">Question 1: Do you have labeled data?</p>
+                                <div class="radio-group">
+                                    <label><input type="radio" name="q1" value="yes"> Yes</label>
+                                    <label><input type="radio" name="q1" value="no"> No</label>
                                 </div>
                             </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Error</p>
-                                <div class="step-work">
-                                    <code>SE = σ / √n<br>
-                                    SE = 12 / √36<br>
-                                    SE = 12 / 6<br>
-                                    SE = 2</code>
+                            <div class="quiz-question" id="quiz-q2" style="display: none; margin-top: 20px;">
+                                <p style="font-weight: 600; margin-bottom: 12px;">Question 2: What do you want to predict?</p>
+                                <div class="radio-group">
+                                    <label><input type="radio" name="q2" value="numbers"> Numbers (Regression)</label>
+                                    <label><input type="radio" name="q2" value="categories"> Categories (Classification)</label>
+                                    <label><input type="radio" name="q2" value="groups"> Groups (Clustering)</label>
                                 </div>
                             </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Z-Statistic</p>
-                                <div class="step-work">
-                                    <code>z = (x̄ - μ₀) / SE<br>
-                                    z = (105 - 100) / 2<br>
-                                    z = 5 / 2<br>
-                                    z = 2.5</code>
+                            <div class="quiz-question" id="quiz-q3" style="display: none; margin-top: 20px;">
+                                <p style="font-weight: 600; margin-bottom: 12px;">Question 3: How much training data do you have?</p>
+                                <div class="radio-group">
+                                    <label><input type="radio" name="q3" value="little"> Very Little (&lt;100 samples)</label>
+                                    <label><input type="radio" name="q3" value="some"> Some (100-10k samples)</label>
+                                    <label><input type="radio" name="q3" value="lots"> Lots (&gt;10k samples)</label>
                                 </div>
                             </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Critical Values</p>
-                                <div class="step-work">
-                                    <code>α = 0.05, two-tailed<br>
-                                    Critical values: z = ±1.96<br>
-                                    Rejection regions: z &lt; -1.96 or z &gt; 1.96</code>
+                            <div class="quiz-question" id="quiz-q4" style="display: none; margin-top: 20px;">
+                                <p style="font-weight: 600; margin-bottom: 12px;">Question 4: Is interpretability important?</p>
+                                <div class="radio-group">
+                                    <label><input type="radio" name="q4" value="very"> Very Important</label>
+                                    <label><input type="radio" name="q4" value="somewhat"> Somewhat Important</label>
+                                    <label><input type="radio" name="q4" value="not"> Not Important</label>
                                 </div>
                             </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Make Decision</p>
-                                <div class="step-work">
-                                    <code>Test statistic: z = 2.5<br>
-                                    Critical value: z = 1.96<br>
-                                    2.5 &gt; 1.96 → In rejection region<br>
-                                    <br>
-                                    REJECT H₀</code>
-                                </div>
+                            <div id="quiz-result" style="display: none; margin-top: 24px; padding: 20px; background: var(--color-bg-3); border-radius: 8px; border-left: 4px solid var(--color-success);">
+                                <!-- Result populated by JavaScript -->
                             </div>
                         </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret</p>
-                                <div class="step-work">
-                                    <code>There IS significant evidence that μ ≠ 100<br>
-                                    The sample mean of 105 is statistically different<br>
-                                    Factory's claim is likely false</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">z = 2.5 &gt; 1.96, REJECT H₀ (claim is false)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>P-value = 2 × P(Z &gt; 2.5) = 2 × 0.0062 = 0.0124 &lt; 0.05 ✓ Confirms rejection</p>
-                        </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>Test: μ₀ = 50, x̄ = 48, σ = 10, n = 25, α = 0.05</li>
-                            <li>If z = -1.5, α = 0.05, two-tailed, what's your decision?</li>
-                            <li>When should we use z-test vs t-test?</li>
-                        </ol>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Use when n ≥ 30 and σ known</li>
-                        <li>z = (x̄ - μ₀) / SE</li>
-                        <li>Compare z to critical value or find p-value</li>
-                        <li>Large |z| = evidence against H₀</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 29: Z-Score & Critical Values -->
-            <section class="topic-section" id="topic-29">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 29</span>
-                    <h2>🎚️ Z-Score &amp; Critical Values</h2>
-                    <p class="topic-subtitle">Standardization and rejection regions</p>
                 </div>
+            </div>
 
-                <div class="content-card">
-                    <h3>Z-Score (Standardization)</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Z-Score Formula</div>
-                        <div class="formula-main">z = (x - μ) / σ</div>
-                        <p>Converts any normal distribution to standard normal (μ=0, σ=1)</p>
+            <!-- Section 19: Ensemble Methods -->
+            <div class="section" id="ensemble-methods">
+                <div class="section-header">
+                    <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">📊 Supervised</span> Ensemble Methods</h2>
+                    <button class="section-toggle">▼</button>
+                </div>
+                <div class="section-body">
+                    <p>"Wisdom of the crowds" applied to machine learning! Ensemble methods combine multiple weak learners to create a strong learner. They power most Kaggle competition winners!</p>
+
+                    <div class="info-card">
+                        <div class="info-card-title">Key Concepts</div>
+                        <ul class="info-card-list">
+                            <li>Combine multiple models for better predictions</li>
+                            <li>Bagging: Train on random subsets (parallel)</li>
+                            <li>Boosting: Sequential learning from mistakes</li>
+                            <li>Stacking: Meta-learner combines base models</li>
+                        </ul>
                     </div>
-                </div>
 
-                <div class="content-card">
-                    <h3>Critical Values</h3>
-                    <ul>
-                        <li><strong>α = 0.05 (two-tailed):</strong> z = ±1.96</li>
-                        <li><strong>α = 0.05 (one-tailed):</strong> z = 1.645</li>
-                        <li><strong>α = 0.01 (two-tailed):</strong> z = ±2.576</li>
-                    </ul>
-                </div>
+                    <h3>Why Ensembles Work</h3>
+                    <p>Imagine 100 doctors diagnosing a patient. Even if each is 70% accurate individually, their majority vote is 95%+ accurate! Same principle applies to ML.</p>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Find critical z-values for: a) α = 0.05 one-tailed (right), b) α = 0.05 two-tailed, c) α = 0.01 two-tailed. Draw rejection regions.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">One-Tailed Right (α = 0.05)</p>
-                                <div class="step-work">
-                                    <code>All α in right tail<br>
-                                    Find z where P(Z &gt; z) = 0.05<br>
-                                    P(Z ≤ z) = 1 - 0.05 = 0.95<br>
-                                    From z-table: z₀.₉₅ = 1.645<br>
-                                    <br>
-                                    Critical value: z = 1.645<br>
-                                    Reject H₀ if z &gt; 1.645</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Two-Tailed (α = 0.05)</p>
-                                <div class="step-work">
-                                    <code>Split α between both tails<br>
-                                    Each tail = 0.05/2 = 0.025<br>
-                                    Left tail: P(Z &lt; z) = 0.025 → z = -1.96<br>
-                                    Right tail: P(Z &gt; z) = 0.025 → z = +1.96<br>
-                                    <br>
-                                    Critical values: z = ±1.96<br>
-                                    Reject H₀ if |z| &gt; 1.96</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Two-Tailed (α = 0.01)</p>
-                                <div class="step-work">
-                                    <code>More strict test<br>
-                                    Each tail = 0.01/2 = 0.005<br>
-                                    P(Z &lt; z) = 0.005 → z = -2.576<br>
-                                    P(Z &gt; z) = 0.005 → z = +2.576<br>
-                                    <br>
-                                    Critical values: z = ±2.576<br>
-                                    Reject H₀ if |z| &gt; 2.576</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Visualize Rejection Regions</p>
-                                <div class="step-work">
-                                    <code>One-tailed (α=0.05): [______|████] z &gt; 1.645<br>
-                                    Two-tailed (α=0.05): [██|________|██] |z| &gt; 1.96<br>
-                                    Two-tailed (α=0.01): [█|__________|█] |z| &gt; 2.576<br>
-                                    <br>
-                                    Smaller α → Larger critical values → Harder to reject</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) z = 1.645, b) z = ±1.96, c) z = ±2.576</span>
+                    <div class="callout success">
+                        <div class="callout-title">🎯 The Magic of Diversity</div>
+                        <div class="callout-content">
+                            <strong>Key insight:</strong> Each model makes DIFFERENT errors!<br>
+                            <br>
+                            Model A: Correct on samples [1,2,3,5,7,9] - 60% accuracy<br>
+                            Model B: Correct on samples [2,4,5,6,8,10] - 60% accuracy<br>
+                            Model C: Correct on samples [1,3,4,6,7,8] - 60% accuracy<br>
+                            <br>
+                            <strong>Majority vote:</strong> Correct on [1,2,3,4,5,6,7,8] - 80% accuracy!<br>
+                            <br>
+                            Diversity reduces variance!
                         </div>
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>Find critical value for α = 0.10, one-tailed (left)</li>
-                            <li>If your test statistic is z = 2.0, which tests would reject H₀?</li>
-                            <li>Why are two-tailed critical values larger than one-tailed?</li>
-                        </ol>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Z-score standardizes values</li>
-                        <li>Critical values define rejection region</li>
-                        <li>|z| &gt; critical value → reject H₀</li>
-                        <li>Common: ±1.96 for 95% confidence</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 30: P-Value -->
-            <section class="topic-section" id="topic-30">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 30</span>
-                    <h2>💯 P-Value Method</h2>
-                    <p class="topic-subtitle">Probability of observing data if H₀ is true</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> P-value is the probability of getting results as extreme as observed, assuming H₀ is true.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Decision Rule</h3>
-                    <ul>
-                        <li><strong>If p-value ≤ α:</strong> Reject H₀ (statistically significant)</li>
-                        <li><strong>If p-value &gt; α:</strong> Fail to reject H₀ (not significant)</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Interpretation</h3>
-                    <ul>
-                        <li><strong>p &lt; 0.01:</strong> Very strong evidence against H₀</li>
-                        <li><strong>0.01 ≤ p &lt; 0.05:</strong> Strong evidence against H₀</li>
-                        <li><strong>0.05 ≤ p &lt; 0.10:</strong> Weak evidence against H₀</li>
-                        <li><strong>p ≥ 0.10:</strong> Little or no evidence against H₀</li>
-                    </ul>
-                </div>
 
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISCONCEPTION</div>
-                    <p>P-value is NOT the probability that H₀ is true! It's the probability of observing your data IF H₀ were true.</p>
-                </div>
+                    <h3>Method 1: Bagging (Bootstrap Aggregating)</h3>
+                    <p>Train multiple models on different random subsets of data (with replacement), then average predictions.</p>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Sample of 36 students has mean score x̄ = 78. Population mean claimed to be μ₀ = 75 with σ = 12. Test at α = 0.05 using p-value method.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">State Hypotheses</p>
-                                <div class="step-work">
-                                    <code>H₀: μ = 75 (null hypothesis - no difference)</code><br>
-                                    <code>H₁: μ ≠ 75 (alternative - there is a difference)</code><br>
-                                    <code>Two-tailed test</code>
-                                </div>
-                                <p class="step-explanation">Set up null and alternative hypotheses</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Test Statistic</p>
-                                <div class="step-work">
-                                    <code>z = (x̄ - μ₀) / (σ/√n)</code><br>
-                                    <code>z = (78 - 75) / (12/√36)</code><br>
-                                    <code>z = 3 / (12/6)</code><br>
-                                    <code>z = 3 / 2 = 1.5</code>
-                                </div>
-                                <p class="step-explanation">Calculate the z-score</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find P-Value</p>
-                                <div class="step-work">
-                                    <code>For two-tailed: p-value = 2 × P(Z &gt; |1.5|)</code><br>
-                                    <code>P(Z &gt; 1.5) = 1 - 0.9332 = 0.0668</code><br>
-                                    <code>p-value = 2 × 0.0668 = 0.1336</code>
-                                </div>
-                                <p class="step-explanation">Multiply by 2 for two-tailed test</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Compare with α</p>
-                                <div class="step-work">
-                                    <code>p-value = 0.1336</code><br>
-                                    <code>α = 0.05</code><br>
-                                    <code>0.1336 &gt; 0.05</code>
-                                </div>
-                                <p class="step-explanation">Since p-value exceeds α, we fail to reject H₀</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Make Decision</p>
-                                <div class="step-work">
-                                    <code>Since p-value &gt; α, FAIL TO REJECT H₀</code><br>
-                                    <code>Not enough evidence to conclude mean differs from 75</code><br>
-                                    <code>p-value of 13.36% means we'd see results this extreme</code><br>
-                                    <code>13.36% of time if H₀ true</code>
-                                </div>
-                                <p class="step-explanation">Interpret in context</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">p-value = 0.1336 &gt; 0.05, Fail to reject H₀</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>The result is not statistically significant at α = 0.05 level. We need stronger evidence to claim the mean differs from 75.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>If z = 2.5, α = 0.01, find p-value and decide</li>
-                            <li>When do we reject H₀ using p-value method?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>p-value = 0.0124, reject H₀ (0.0124 &lt; 0.01)</li>
-                                <li>When p-value ≤ α</li>
-                            </ol>
-                        </div>
+                    <div class="formula">
+                        <strong>Bagging Algorithm:</strong><br>
+                        1. Create B bootstrap samples (random sampling with replacement)<br>
+                        2. Train a model on each sample independently<br>
+                        3. For prediction:<br>
+                        &nbsp;&nbsp;&nbsp;• Regression: Average all predictions<br>
+                        &nbsp;&nbsp;&nbsp;• Classification: Majority vote<br>
+                        <br>
+                        <strong>Effect:</strong> Reduces variance, prevents overfitting
                     </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>P-value = P(data | H₀ true)</li>
-                        <li>Reject H₀ if p ≤ α</li>
-                        <li>Smaller p-value = stronger evidence against H₀</li>
-                        <li>Most common approach in modern statistics</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 31: One vs Two Tailed -->
-            <section class="topic-section" id="topic-31">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 31</span>
-                    <h2>↔️ One-Tailed vs Two-Tailed Tests</h2>
-                    <p class="topic-subtitle">Directional vs non-directional hypotheses</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>Two-Tailed Test</h3>
-                    <ul>
-                        <li><strong>H₁:</strong> μ ≠ μ₀ (different, could be higher or lower)</li>
-                        <li>Testing for any difference</li>
-                        <li>Rejection regions in both tails</li>
-                        <li>More conservative</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>One-Tailed Test</h3>
-                    <ul>
-                        <li><strong>Right-tailed:</strong> H₁: μ &gt; μ₀</li>
-                        <li><strong>Left-tailed:</strong> H₁: μ &lt; μ₀</li>
-                        <li>Testing for specific direction</li>
-                        <li>Rejection region in one tail</li>
-                        <li>More powerful for directional effects</li>
-                    </ul>
-                </div>
-
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Researcher claims new drug LOWERS blood pressure (μ &lt; 120). Sample of 49: x̄ = 115, σ = 21. Test at α = 0.05. Should this be one-tailed or two-tailed?</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Analyze the Claim</p>
-                                <div class="step-work">
-                                    <code>Claim: drug LOWERS pressure (directional)</code><br>
-                                    <code>Looking for decrease specifically</code><br>
-                                    <code>This requires ONE-TAILED test (left tail)</code>
-                                </div>
-                                <p class="step-explanation">Directional claim = one-tailed test</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Set Up Hypotheses</p>
-                                <div class="step-work">
-                                    <code>H₀: μ ≥ 120 (blood pressure not lower)</code><br>
-                                    <code>H₁: μ &lt; 120 (blood pressure IS lower)</code><br>
-                                    <code>Left-tailed test</code>
-                                </div>
-                                <p class="step-explanation">Alternative hypothesis shows the direction</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Z-Score</p>
-                                <div class="step-work">
-                                    <code>z = (x̄ - μ₀) / (σ/√n)</code><br>
-                                    <code>z = (115 - 120) / (21/√49)</code><br>
-                                    <code>z = -5 / (21/7)</code><br>
-                                    <code>z = -5 / 3 = -1.67</code>
-                                </div>
-                                <p class="step-explanation">Negative z-score indicates below mean</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Critical Value (One-Tailed)</p>
-                                <div class="step-work">
-                                    <code>For α = 0.05, one-tailed (left)</code><br>
-                                    <code>Critical value: z = -1.645</code>
-                                </div>
-                                <p class="step-explanation">One-tailed critical value differs from two-tailed</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Make Decision</p>
-                                <div class="step-work">
-                                    <code>Test statistic: z = -1.67</code><br>
-                                    <code>Critical value: z = -1.645</code><br>
-                                    <code>-1.67 &lt; -1.645 (in rejection region)</code><br>
-                                    <code>REJECT H₀</code>
-                                </div>
-                                <p class="step-explanation">Falls in rejection region, so reject null</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Contrast with Two-Tailed</p>
-                                <div class="step-work">
-                                    <code>If two-tailed: critical values ±1.96</code><br>
-                                    <code>Our |z| = 1.67 &lt; 1.96</code><br>
-                                    <code>Would NOT reject H₀ with two-tailed!</code><br>
-                                    <code>This shows importance of choosing correct test</code>
-                                </div>
-                                <p class="step-explanation">Test choice matters!</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">Use ONE-TAILED (left). z = -1.67 &lt; -1.645, Reject H₀</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>Evidence supports claim that drug lowers blood pressure. One-tailed test was appropriate for directional claim.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Claim: μ &gt; 50. One-tailed or two-tailed?</li>
-                            <li>Claim: μ ≠ 100. Which test?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>One-tailed (right)</li>
-                                <li>Two-tailed</li>
-                            </ol>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="bagging-viz"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 1:</strong> Bagging process - multiple models from bootstrap samples</p>
                     </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Two-tailed: testing for any difference</li>
-                        <li>One-tailed: testing for specific direction</li>
-                        <li>Choose before collecting data</li>
-                        <li>Two-tailed is more conservative</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 32: T-Test -->
-            <section class="topic-section" id="topic-32">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 32</span>
-                    <h2>📐 T-Test</h2>
-                    <p class="topic-subtitle">Hypothesis test for small samples or unknown σ</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>When to Use T-Test</h3>
-                    <ul>
-                        <li>Small sample (n &lt; 30)</li>
-                        <li>Population σ unknown (use sample s)</li>
-                        <li>Population approximately normal</li>
-                    </ul>
-                </div>
+                    <h3>Method 2: Boosting (Sequential Learning)</h3>
+                    <p>Train models sequentially, where each new model focuses on examples the previous models got wrong.</p>
 
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">T-Test Statistic</div>
-                        <div class="formula-main">t = (x̄ - μ₀) / (s / √n)</div>
-                        <p>Same as z-test but uses s instead of σ</p>
-                        <p>Follows t-distribution with df = n - 1</p>
+                    <div class="formula">
+                        <strong>Boosting Algorithm:</strong><br>
+                        1. Start with equal weights for all samples<br>
+                        2. Train model on weighted data<br>
+                        3. Increase weights for misclassified samples<br>
+                        4. Train next model (focuses on hard examples)<br>
+                        5. Repeat for M iterations<br>
+                        6. Final prediction = weighted vote of all models<br>
+                        <br>
+                        <strong>Effect:</strong> Reduces bias AND variance
                     </div>
-                </div>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Small sample: n = 16, x̄ = 52, s = 8. Test if μ = 50 at α = 0.05. Population σ unknown.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Choose Correct Test</p>
-                                <div class="step-work">
-                                    <code>n = 16 &lt; 30 (small sample)</code><br>
-                                    <code>σ unknown (use sample s)</code><br>
-                                    <code>Use T-TEST instead of z-test</code>
-                                </div>
-                                <p class="step-explanation">Small sample + unknown σ = t-test</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate T-Statistic</p>
-                                <div class="step-work">
-                                    <code>t = (x̄ - μ₀) / (s/√n)</code><br>
-                                    <code>t = (52 - 50) / (8/√16)</code><br>
-                                    <code>t = 2 / (8/4)</code><br>
-                                    <code>t = 2 / 2 = 1.0</code>
-                                </div>
-                                <p class="step-explanation">Use sample standard deviation s</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Degrees of Freedom</p>
-                                <div class="step-work">
-                                    <code>df = n - 1</code><br>
-                                    <code>df = 16 - 1 = 15</code>
-                                </div>
-                                <p class="step-explanation">Lose 1 df for estimating mean</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find Critical Value</p>
-                                <div class="step-work">
-                                    <code>Two-tailed test, α = 0.05</code><br>
-                                    <code>df = 15</code><br>
-                                    <code>From t-table: t₀.₀₂₅,₁₅ = ±2.131</code>
-                                </div>
-                                <p class="step-explanation">Look up in t-distribution table</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Compare and Decide</p>
-                                <div class="step-work">
-                                    <code>Test statistic: t = 1.0</code><br>
-                                    <code>Critical values: ±2.131</code><br>
-                                    <code>|1.0| &lt; 2.131</code><br>
-                                    <code>FAIL TO REJECT H₀</code>
-                                </div>
-                                <p class="step-explanation">Test statistic not in rejection region</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Interpret</p>
-                                <div class="step-work">
-                                    <code>Not enough evidence that μ ≠ 50</code><br>
-                                    <code>Sample mean of 52 is not significantly different from 50</code>
-                                </div>
-                                <p class="step-explanation">Interpret in context of problem</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">t = 1.0, critical = ±2.131, Fail to reject H₀</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>The difference between 52 and 50 is not statistically significant at α = 0.05 level with this small sample.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>n = 25, x̄ = 100, s = 15, test μ = 95 at α = 0.01</li>
-                            <li>Why use t-test instead of z-test?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>t = 1.67, df = 24, not significant at α = 0.01</li>
-                                <li>When σ unknown or n &lt; 30</li>
-                            </ol>
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 450px">
+                            <canvas id="boosting-viz"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 2:</strong> Boosting iteration - focusing on misclassified points</p>
                     </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Use when σ unknown or n &lt; 30</li>
-                        <li>t = (x̄ - μ₀) / (s / √n)</li>
-                        <li>Follows t-distribution</li>
-                        <li>More variable than z-distribution</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 33: Degrees of Freedom -->
-            <section class="topic-section" id="topic-33">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 33</span>
-                    <h2>🔓 Degrees of Freedom</h2>
-                    <p class="topic-subtitle">Independent pieces of information</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Degrees of freedom (df) is the number of independent values that can vary in analysis.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Common Formulas</h3>
-                    <ul>
-                        <li><strong>One-sample t-test:</strong> df = n - 1</li>
-                        <li><strong>Two-sample t-test:</strong> df ≈ n₁ + n₂ - 2</li>
-                        <li><strong>Chi-squared:</strong> df = (rows-1)(cols-1)</li>
-                    </ul>
-                </div>
 
-                <div class="content-card">
-                    <h3>Why It Matters</h3>
-                    <ul>
-                        <li>Determines shape of t-distribution</li>
-                        <li>Higher df → closer to normal distribution</li>
-                        <li>Affects critical values</li>
-                    </ul>
-                </div>
+                    <h3>Random Forest: Bagging + Decision Trees</h3>
+                    <p>The most popular ensemble method! Combines bagging with feature randomness.</p>
 
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Calculate degrees of freedom for: a) Single sample t-test: n = 20, b) Two-sample t-test: n₁ = 15, n₂ = 18, c) Chi-squared test: 3×4 contingency table</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Single Sample T-Test</p>
-                                <div class="step-work">
-                                    <code>Formula: df = n - 1</code><br>
-                                    <code>n = 20</code><br>
-                                    <code>df = 20 - 1 = 19</code><br>
-                                    <code>We "lose" 1 df because we estimate mean from sample</code>
-                                </div>
-                                <p class="step-explanation">Each parameter estimated reduces df by 1</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Two-Sample T-Test (Equal Variances)</p>
-                                <div class="step-work">
-                                    <code>Formula: df = n₁ + n₂ - 2</code><br>
-                                    <code>n₁ = 15, n₂ = 18</code><br>
-                                    <code>df = 15 + 18 - 2 = 31</code><br>
-                                    <code>Lose 1 df per sample for estimating each mean</code>
-                                </div>
-                                <p class="step-explanation">Two samples = two means estimated</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Chi-Squared Contingency Table</p>
-                                <div class="step-work">
-                                    <code>Formula: df = (rows - 1) × (columns - 1)</code><br>
-                                    <code>3 rows, 4 columns</code><br>
-                                    <code>df = (3 - 1) × (4 - 1)</code><br>
-                                    <code>df = 2 × 3 = 6</code>
-                                </div>
-                                <p class="step-explanation">Degrees of freedom for independence test</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Explain Concept</p>
-                                <div class="step-work">
-                                    <code>Degrees of freedom = number of values free to vary</code><br>
-                                    <code>Each parameter estimated reduces df by 1</code><br>
-                                    <code>Higher df → distribution closer to normal</code>
-                                </div>
-                                <p class="step-explanation">Conceptual understanding</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">a) df = 19, b) df = 31, c) df = 6</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>These df values would be used to find appropriate critical values from respective distribution tables.</p>
-                        </div>
+                    <div class="formula">
+                        <strong>Random Forest Algorithm:</strong><br>
+                        1. Create B bootstrap samples<br>
+                        2. For each sample:<br>
+                        &nbsp;&nbsp;&nbsp;• Grow decision tree<br>
+                        &nbsp;&nbsp;&nbsp;• At each split, consider random subset of features<br>
+                        &nbsp;&nbsp;&nbsp;• Don't prune (let trees overfit!)<br>
+                        3. Final prediction = average/vote of all trees<br>
+                        <br>
+                        <strong>Typical values:</strong> B=100-500 trees, √features per split
                     </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Sample size 100, find df for t-test</li>
-                            <li>5×3 table, find df for chi-squared</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>df = 99</li>
-                                <li>df = (5-1)(3-1) = 8</li>
-                            </ol>
+
+                    <div class="figure">
+                        <div class="figure-placeholder" style="height: 400px">
+                            <canvas id="random-forest-viz"></canvas>
                         </div>
+                        <p class="figure-caption"><strong>Figure 3:</strong> Random Forest - multiple diverse trees voting</p>
                     </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>df = number of independent values</li>
-                        <li>For t-test: df = n - 1</li>
-                        <li>Higher df → distribution closer to normal</li>
-                        <li>Critical for finding correct critical values</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 34: Type I & II Errors -->
-            <section class="topic-section" id="topic-34">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 34</span>
-                    <h2>⚠️ Type I &amp; Type II Errors</h2>
-                    <p class="topic-subtitle">False positives and false negatives</p>
-                </div>
 
-                <div class="content-card">
-                    <h3>The Two Types of Errors</h3>
-                    <table class="comparison-table">
+                    <h3>Comparison: Bagging vs Boosting</h3>
+                    <table class="data-table">
                         <thead>
-                            <tr>
-                                <th></th>
-                                <th>H₀ True</th>
-                                <th>H₀ False</th>
-                            </tr>
+                            <tr><th>Aspect</th><th>Bagging</th><th>Boosting</th></tr>
                         </thead>
                         <tbody>
-                            <tr>
-                                <td><strong>Reject H₀</strong></td>
-                                <td style="color: #ff6b6b;">Type I Error (α)</td>
-                                <td style="color: #51cf66;">Correct!</td>
-                            </tr>
-                            <tr>
-                                <td><strong>Fail to Reject H₀</strong></td>
-                                <td style="color: #51cf66;">Correct!</td>
-                                <td style="color: #ff6b6b;">Type II Error (β)</td>
-                            </tr>
+                            <tr><td>Training</td><td>Parallel (independent)</td><td>Sequential (dependent)</td></tr>
+                            <tr><td>Focus</td><td>Reduce variance</td><td>Reduce bias &amp; variance</td></tr>
+                            <tr><td>Weights</td><td>Equal for all samples</td><td>Higher for hard samples</td></tr>
+                            <tr><td>Speed</td><td>Fast (parallelizable)</td><td>Slower (sequential)</td></tr>
+                            <tr><td>Overfitting</td><td>Resistant</td><td>Can overfit if too many iterations</td></tr>
+                            <tr><td>Examples</td><td>Random Forest</td><td>AdaBoost, Gradient Boosting, XGBoost</td></tr>
                         </tbody>
                     </table>
-                </div>
-
-                <div class="content-card">
-                    <h3>Definitions</h3>
-                    <ul>
-                        <li><strong>Type I Error (α):</strong> Rejecting true H₀ (false positive)</li>
-                        <li><strong>Type II Error (β):</strong> Failing to reject false H₀ (false negative)</li>
-                        <li><strong>Power = 1 - β:</strong> Probability of correctly rejecting false H₀</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 MEDICAL ANALOGY</div>
-                    <p><strong>Type I Error:</strong> Telling healthy person they're sick (false alarm)</p>
-                    <p><strong>Type II Error:</strong> Telling sick person they're healthy (missed diagnosis)</p>
-                </div>
-
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Drug trial tests H₀: "Drug is safe" vs H₁: "Drug is dangerous". Describe Type I and Type II errors with consequences.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Define Type I Error (False Positive)</p>
-                                <div class="step-work">
-                                    <code>Type I: Reject H₀ when H₀ is TRUE</code><br>
-                                    <code>In this case: Conclude drug is dangerous when it's actually safe</code><br>
-                                    <code>Probability = α (significance level)</code><br>
-                                    <code>Consequence: Safe drug rejected, patients miss beneficial treatment</code>
-                                </div>
-                                <p class="step-explanation">False alarm - reject truth</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Define Type II Error (False Negative)</p>
-                                <div class="step-work">
-                                    <code>Type II: Fail to reject H₀ when H₁ is TRUE</code><br>
-                                    <code>In this case: Conclude drug is safe when it's actually dangerous</code><br>
-                                    <code>Probability = β</code><br>
-                                    <code>Consequence: Dangerous drug approved, patients harmed!</code>
-                                </div>
-                                <p class="step-explanation">Miss detecting danger</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Create Decision Matrix</p>
-                                <div class="step-work">
-                                    <code>Reality vs Decision:</code><br>
-                                    <code>If H₀ true (safe) + Reject H₀ (call dangerous) = TYPE I</code><br>
-                                    <code>If H₁ true (dangerous) + Fail to reject = TYPE II</code><br>
-                                    <code>Correct decisions: Accept truth or reject false</code>
-                                </div>
-                                <p class="step-explanation">Four possible outcomes</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Example</p>
-                                <div class="step-work">
-                                    <code>If α = 0.05: 5% chance of Type I error</code><br>
-                                    <code>If β = 0.20: 20% chance of Type II error</code><br>
-                                    <code>Power = 1 - β = 0.80 (80% chance of detecting dangerous drug)</code>
-                                </div>
-                                <p class="step-explanation">Probabilities of each error</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Compare Consequences</p>
-                                <div class="step-work">
-                                    <code>Type I: Waste safe drug (economic cost)</code><br>
-                                    <code>Type II: Approve dangerous drug (LIFE RISK!)</code><br>
-                                    <code>Type II often more serious → use lower α</code>
-                                </div>
-                                <p class="step-explanation">Context determines which error is worse</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Answer:</strong>
-                            <span class="answer-highlight">Type I (α): Reject safe drug<br>Type II (β): Approve dangerous drug<br>Type II more dangerous in this case!</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Check:</strong>
-                            <p>In medical contexts, Type II errors (missing danger) are often considered worse than Type I errors (false alarms).</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Security scanner: H₀ = "Safe". Describe Type I/II errors</li>
-                            <li>If α = 0.01, what's P(Type I error)?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Type I: False alarm (safe person flagged), Type II: Miss threat</li>
-                                <li>P(Type I error) = 0.01 = 1%</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Type I: False positive (α)</li>
-                        <li>Type II: False negative (β)</li>
-                        <li>Trade-off: decreasing one increases the other</li>
-                        <li>Power = 1 - β (ability to detect true effect)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 35: Chi-Squared Distribution -->
-            <section class="topic-section" id="topic-35">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 35</span>
-                    <h2>χ² Chi-Squared Distribution</h2>
-                    <p class="topic-subtitle">Distribution for categorical data analysis</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Chi-squared (χ²) distribution is used for testing hypotheses about categorical data.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Properties</h3>
-                    <ul>
-                        <li>Always positive (ranges from 0 to ∞)</li>
-                        <li>Right-skewed</li>
-                        <li>Shape depends on degrees of freedom</li>
-                        <li>Higher df → more symmetric</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Uses</h3>
-                    <ul>
-                        <li>Goodness of fit test</li>
-                        <li>Test of independence</li>
-                        <li>Testing variance</li>
-                    </ul>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Used for categorical data</li>
-                        <li>Always positive, right-skewed</li>
-                        <li>Shape depends on df</li>
-                        <li>Foundation for chi-squared tests</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 36: Goodness of Fit -->
-            <section class="topic-section" id="topic-36">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 36</span>
-                    <h2>✓ Goodness of Fit Test</h2>
-                    <p class="topic-subtitle">Testing if data follows expected distribution</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Tests whether observed frequencies match expected frequencies from a theoretical distribution.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Chi-Squared Test Statistic</div>
-                        <div class="formula-main">χ² = Σ [(O - E)² / E]</div>
-                        <p>O = observed frequency</p>
-                        <p>E = expected frequency</p>
-                        <p>df = k - 1 (k = number of categories)</p>
-                    </div>
-                </div>
 
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>Testing if die is fair:</strong></p>
-                    <p>Roll 60 times. Expected: 10 per face</p>
-                    <p>Observed: 8, 12, 11, 9, 10, 10</p>
-                    <p>Calculate χ² and compare to critical value</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Tests if observed matches expected distribution</li>
-                        <li>χ² = Σ(O-E)²/E</li>
-                        <li>Large χ² = poor fit</li>
-                        <li>df = number of categories - 1</li>
-                    </ul>
+                    <h3>Real-World Success Stories</h3>
+                    <ul>
+                        <li><strong>Netflix Prize (2009):</strong> Winning team used ensemble of 100+ models</li>
+                        <li><strong>Kaggle competitions:</strong> 99% of winners use ensembles</li>
+                        <li><strong>XGBoost:</strong> Most popular algorithm for structured data</li>
+                        <li><strong>Random Forests:</strong> Default choice for many data scientists</li>
+                    </ul>
+
+                    <div class="callout info">
+                        <div class="callout-title">💡 When to Use Each Method</div>
+                        <div class="callout-content">
+                            <strong>Use Random Forest when:</strong><br>
+                            • You want good accuracy with minimal tuning<br>
+                            • You have high-variance base models<br>
+                            • Interpretability is secondary<br>
+                            <br>
+                            <strong>Use Gradient Boosting (XGBoost) when:</strong><br>
+                            • You want maximum accuracy<br>
+                            • You can afford hyperparameter tuning<br>
+                            • You have high-bias base models<br>
+                            <br>
+                            <strong>Use Stacking when:</strong><br>
+                            • You want to combine very different model types<br>
+                            • You're in a competition (squeeze every 0.1%!)
+                        </div>
+                    </div>
+
+                    <h3>🎉 Course Complete!</h3>
+                    <p style="font-size: 18px; color: #7ef0d4; margin-top: 24px;">
+                        Congratulations! You've mastered all 17 machine learning topics - from basic linear regression to advanced ensemble methods! You now have the knowledge to:
+                    </p>
+                    <ul style="color: #7ef0d4; font-size: 16px;">
+                        <li>Choose the right algorithm for any problem</li>
+                        <li>Understand the math behind each method</li>
+                        <li>Tune hyperparameters systematically</li>
+                        <li>Evaluate models properly</li>
+                        <li>Build production-ready ML systems</li>
+                    </ul>
+                    <p style="font-size: 18px; color: #7ef0d4; margin-top: 16px;">
+                        Keep practicing, building projects, and exploring! The ML journey never ends. 🚀✨
+                    </p>
                 </div>
-            </section>
-
-            <!-- Topic 37: Test of Independence -->
-            <section class="topic-section" id="topic-37">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 37</span>
-                    <h2>🔗 Test of Independence</h2>
-                    <p class="topic-subtitle">Testing relationship between categorical variables</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Tests whether two categorical variables are independent or associated.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Chi-Squared for Independence</div>
-                        <div class="formula-main">χ² = Σ [(O - E)² / E]</div>
-                        <p>E = (row total × column total) / grand total</p>
-                        <p>df = (rows - 1)(columns - 1)</p>
-                    </div>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>Are gender and color preference independent?</strong></p>
-                    <p>Create contingency table, calculate expected frequencies, compute χ², and test against critical value.</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Tests independence of two categorical variables</li>
-                        <li>Uses contingency tables</li>
-                        <li>df = (r-1)(c-1)</li>
-                        <li>Large χ² suggests association</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 38: Chi-Squared for Variance -->
-            <section class="topic-section" id="topic-38">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 38</span>
-                    <h2>📏 Chi-Squared Variance Test</h2>
-                    <p class="topic-subtitle">Testing claims about population variance</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Tests hypotheses about population variance or standard deviation.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Chi-Squared for Variance</div>
-                        <div class="formula-main">χ² = (n-1)s² / σ₀²</div>
-                        <p>n = sample size</p>
-                        <p>s² = sample variance</p>
-                        <p>σ₀² = hypothesized population variance</p>
-                        <p>df = n - 1</p>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Tests claims about variance/standard deviation</li>
-                        <li>χ² = (n-1)s²/σ₀²</li>
-                        <li>Requires normal population</li>
-                        <li>Common in quality control</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 39: Confidence Intervals -->
-            <section class="topic-section" id="topic-39">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 39</span>
-                    <h2>📊 Confidence Intervals</h2>
-                    <p class="topic-subtitle">Range of plausible values for parameter</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A confidence interval provides a range of values that likely contains the true population parameter.</p>
-                    <p><strong>Why it matters:</strong> More informative than point estimates—shows precision and uncertainty.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Confidence Interval for Mean</div>
-                        <div class="formula-main">CI = x̄ ± (critical value × SE)</div>
-                        <p>For z: CI = x̄ ± z* × (σ/√n)</p>
-                        <p>For t: CI = x̄ ± t* × (s/√n)</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Common Confidence Levels</h3>
-                    <ul>
-                        <li><strong>90% CI:</strong> z* = 1.645</li>
-                        <li><strong>95% CI:</strong> z* = 1.96</li>
-                        <li><strong>99% CI:</strong> z* = 2.576</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p>Sample: n=100, x̄=50, s=10</p>
-                    <p>95% CI = 50 ± 1.96(10/√100)</p>
-                    <p>95% CI = 50 ± 1.96 = (48.04, 51.96)</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>CI = point estimate ± margin of error</li>
-                        <li>95% CI most common</li>
-                        <li>Wider CI = more uncertainty</li>
-                        <li>Larger sample = narrower CI</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 40: Margin of Error -->
-            <section class="topic-section" id="topic-40">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 40</span>
-                    <h2>± Margin of Error</h2>
-                    <p class="topic-subtitle">Measuring estimate precision</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Margin of error (MOE) is the ± part of a confidence interval, showing the precision of an estimate.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Margin of Error</div>
-                        <div class="formula-main">MOE = (critical value) × SE</div>
-                        <p>MOE = z* × (σ/√n) or t* × (s/√n)</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Factors Affecting MOE</h3>
-                    <ul>
-                        <li><strong>Sample size:</strong> Larger n → smaller MOE</li>
-                        <li><strong>Confidence level:</strong> Higher confidence → larger MOE</li>
-                        <li><strong>Variability:</strong> Higher σ → larger MOE</li>
-                    </ul>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>MOE = critical value × SE</li>
-                        <li>Indicates precision of estimate</li>
-                        <li>Inversely related to sample size</li>
-                        <li>Trade-off between confidence and precision</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 41: Interpreting CIs -->
-            <section class="topic-section" id="topic-41">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 41</span>
-                    <h2>🔍 Interpreting Confidence Intervals</h2>
-                    <p class="topic-subtitle">Common misconceptions and proper interpretation</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Correct Interpretation</h3>
-                    <p><strong>"We are 95% confident that the true population parameter lies within this interval."</strong></p>
-                    <p>This means: If we repeated this process many times, 95% of the intervals would contain the true parameter.</p>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISCONCEPTIONS</div>
-                    <ul>
-                        <li><strong>WRONG:</strong> "There's a 95% probability the parameter is in this interval."</li>
-                        <li><strong>WRONG:</strong> "95% of the data falls in this interval."</li>
-                        <li><strong>WRONG:</strong> "We are 95% sure our sample mean is in this interval."</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Using CIs for Hypothesis Testing</h3>
-                    <ul>
-                        <li>If hypothesized value is INSIDE CI → fail to reject H₀</li>
-                        <li>If hypothesized value is OUTSIDE CI → reject H₀</li>
-                        <li>95% CI corresponds to α = 0.05 test</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIP</div>
-                    <p>Report confidence intervals instead of just p-values! CIs provide more information: effect size AND statistical significance.</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Correct interpretation: confidence in the method, not the specific interval</li>
-                        <li>95% refers to long-run success rate</li>
-                        <li>Can use CIs for hypothesis testing</li>
-                        <li>More informative than p-values alone</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- LINEAR ALGEBRA TOPICS (42-57) -->
-
-            <!-- Topic 42: Vectors -->
-            <section class="topic-section" id="topic-42" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 42</span>
-                    <h2>➡️ Vectors - What Even Are They?</h2>
-                    <p class="topic-subtitle">Multiple perspectives on vectors: physics, CS, and mathematics</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A vector can be viewed from three perspectives: as an arrow in space (physics), as an ordered list of numbers (computer science), or as an abstract object that can be added and scaled (mathematics).</p>
-                    <p><strong>Why it matters:</strong> Vectors are fundamental to linear algebra, physics, machine learning, and computer graphics. Understanding vectors unlocks countless applications.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 THREE PERSPECTIVES</div>
-                    <p><strong>Physics:</strong> Arrows with magnitude and direction (velocity, force)</p>
-                    <p><strong>Computer Science:</strong> Ordered lists of numbers [3, 2] representing data</p>
-                    <p><strong>Mathematics:</strong> Abstract objects following specific rules (addition, scaling)</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Vector Operations</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Vector Notation</div>
-                        <div class="formula-main">v = [x, y] in 2D<br>v = [x, y, z] in 3D</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Addition</div>
-                        <div class="formula-main">v + w = [v₁ + w₁, v₂ + w₂]</div>
-                        <p>Tip-to-tail method: Place vectors end-to-end</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Scalar Multiplication</div>
-                        <div class="formula-main">cv = [cv₁, cv₂]</div>
-                        <p>Stretches (c &gt; 1) or shrinks (0 &lt; c &lt; 1) the vector</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Interactive Vector Playground</h3>
-                    <canvas id="canvas-42" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Vector X: <span id="vec42x">3</span></label>
-                            <input type="range" id="slider42x" min="-5" max="5" value="3" step="0.5" class="slider">
-                        </div>
-                        <div class="slider-group">
-                            <label>Vector Y: <span id="vec42y">2</span></label>
-                            <input type="range" id="slider42y" min="-5" max="5" value="2" step="0.5" class="slider">
-                        </div>
-                        <button class="btn btn-primary" id="btn42reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Vectors have three valid perspectives: arrows, lists, abstract objects</li>
-                        <li>Addition: tip-to-tail method geometrically</li>
-                        <li>Scalar multiplication: stretching or shrinking</li>
-                        <li>Foundation for all of linear algebra</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 43: Linear Combinations -->
-            <section class="topic-section" id="topic-43" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 43</span>
-                    <h2>🎯 Linear Combinations, Span, and Basis Vectors</h2>
-                    <p class="topic-subtitle">Building all vectors from fundamental components</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Any vector can be expressed as a linear combination of basis vectors. In 2D, we use i-hat [1,0] and j-hat [0,1].</p>
-                    <p><strong>The span</strong> of vectors is all possible linear combinations you can create by scaling and adding them.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Key Concepts</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Basis Vectors (2D)</div>
-                        <div class="formula-main">î = [1, 0] &nbsp; ĵ = [0, 1]</div>
-                        <p>Any vector v = [x, y] = x·î + y·ĵ</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Linear Combination</div>
-                        <div class="formula-main">v = a·v₁ + b·v₂ + ... + c·vₙ</div>
-                        <p>Where a, b, c are scalars</p>
-                    </div>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 SPAN INTUITION</div>
-                    <p><strong>1 vector:</strong> Span is a line through the origin</p>
-                    <p><strong>2 non-parallel vectors:</strong> Span is the entire 2D plane</p>
-                    <p><strong>2 parallel vectors:</strong> Span is still just a line</p>
-                    <p><strong>Key:</strong> Linear independence determines the dimensionality of the span</p>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Span Visualization</h3>
-                    <canvas id="canvas-43" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn43animate">Animate Span</button>
-                        <button class="btn btn-secondary" id="btn43reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Linear Independence</h3>
-                    <p><strong>Independent:</strong> No vector can be expressed as a combination of others</p>
-                    <p><strong>Dependent:</strong> At least one vector can be written as a combination of others</p>
-                    <p><strong>Basis:</strong> A set of linearly independent vectors that span the entire space</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Linear combination: av + bw (scaling and adding)</li>
-                        <li>Span: all possible linear combinations</li>
-                        <li>Basis: minimal set of independent vectors spanning space</li>
-                        <li>i-hat and j-hat are standard basis in 2D</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 44: Linear Transformations -->
-            <section class="topic-section" id="topic-44" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 44</span>
-                    <h2>🔄 Linear Transformations and Matrices</h2>
-                    <p class="topic-subtitle">Matrices as movements of space</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A linear transformation moves vectors around space while keeping grid lines parallel and evenly spaced, with the origin fixed.</p>
-                    <p><strong>Why matrices?</strong> A matrix completely describes a linear transformation by recording where the basis vectors land.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Properties of Linear Transformations</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Linearity Conditions</div>
-                        <div class="formula-main">T(v + w) = T(v) + T(w)</div>
-                        <div class="formula-main">T(cv) = c · T(v)</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Matrix Representation</div>
-                        <div class="formula-main">A = [where î lands | where ĵ lands]</div>
-                        <p>Columns are the transformed basis vectors</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Transformation Grid</h3>
-                    <canvas id="canvas-44" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <select id="select44" class="form-control">
-                            <option value="identity">Identity</option>
-                            <option value="rotation">Rotation 90°</option>
-                            <option value="shear">Shear</option>
-                            <option value="reflection">Reflection</option>
-                        </select>
-                        <button class="btn btn-primary" id="btn44apply">Apply Transform</button>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Linear transformations keep grid lines parallel</li>
-                        <li>Matrix columns = where basis vectors land</li>
-                        <li>Matrix-vector multiplication applies the transformation</li>
-                        <li>Visual: watch the grid transform</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 54: Eigenvectors -->
-            <section class="topic-section" id="topic-54" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 54</span>
-                    <h2>🎯 Eigenvectors and Eigenvalues</h2>
-                    <p class="topic-subtitle">Special vectors that only get scaled</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> An eigenvector of a matrix is a special vector that doesn't change direction during the transformation—it only gets scaled by its eigenvalue λ.</p>
-                    <p><strong>Why it matters:</strong> Eigenvectors reveal the fundamental directions of a transformation. Used in Google PageRank, quantum mechanics, and data science.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>The Eigenvalue Equation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Definition</div>
-                        <div class="formula-main">Av = λv</div>
-                        <p>A = matrix, v = eigenvector, λ = eigenvalue</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Finding Eigenvalues</div>
-                        <div class="formula-main">det(A - λI) = 0</div>
-                        <p>Characteristic equation: solve for λ</p>
-                    </div>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 GEOMETRIC MEANING</div>
-                    <p>Most vectors get knocked off their span during a transformation. But eigenvectors stay on their line—they just stretch or shrink!</p>
-                    <p><strong>Example:</strong> A rotation has no real eigenvectors (everything rotates). A scaling transformation has eigenvectors along the axes.</p>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Eigenvector Visualization</h3>
-                    <canvas id="canvas-54" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn54transform">Apply Transformation</button>
-                        <button class="btn btn-secondary" id="btn54reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Google PageRank:</strong> Finds important web pages using eigenvectors</li>
-                        <li><strong>PCA (Principal Component Analysis):</strong> Data dimensionality reduction</li>
-                        <li><strong>Stability Analysis:</strong> Engineering systems (will it oscillate or stabilize?)</li>
-                        <li><strong>Quantum Mechanics:</strong> Energy states are eigenvectors</li>
-                    </ul>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Eigenvector: stays on its span, Av = λv</li>
-                        <li>Eigenvalue λ: scaling factor</li>
-                        <li>Find via det(A - λI) = 0</li>
-                        <li>Reveal fundamental directions of transformation</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 45: Matrix Multiplication -->
-            <section class="topic-section" id="topic-45" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 45</span>
-                    <h2>🔗 Matrix Multiplication as Composition</h2>
-                    <p class="topic-subtitle">Successive transformations</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Matrix multiplication represents applying one transformation after another (composition).</p>
-                    <p><strong>Why it matters:</strong> Understanding this geometrically makes matrix multiplication intuitive instead of just memorizing rules.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Matrix Multiplication</div>
-                        <div class="formula-main">(AB)v = A(Bv)</div>
-                        <p>Apply B first, then A - right to left!</p>
-                    </div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Matrix multiplication = composition of transformations</li>
-                        <li>Apply right-to-left: (AB)v means B first, then A</li>
-                        <li>Generally not commutative: AB ≠ BA</li>
-                        <li>Order matters!</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 46: 3D Transformations -->
-            <section class="topic-section" id="topic-46" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 46</span>
-                    <h2>🎲 3D Transformations</h2>
-                    <p class="topic-subtitle">Extending to three dimensions</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Linear transformations in 3D space using 3×3 matrices.</p>
-                    <p><strong>Why it matters:</strong> Essential for 3D graphics, robotics, and physics simulations.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Basis in 3D</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Standard Basis</div>
-                        <div class="formula-main">î = [1, 0, 0]  ĵ = [0, 1, 0]  k̂ = [0, 0, 1]</div>
-                        <p>3×3 matrix = [where î lands | where ĵ lands | where k̂ lands]</p>
-                    </div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>3D transformations use 3×3 matrices</li>
-                        <li>Three basis vectors: î, ĵ, k̂</li>
-                        <li>Used in computer graphics and physics</li>
-                        <li>Same principles as 2D, extended to 3D</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 47: Determinant -->
-            <section class="topic-section" id="topic-47" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 47</span>
-                    <h2>📏 The Determinant</h2>
-                    <p class="topic-subtitle">Measuring how transformations scale area/volume</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The determinant measures how much a transformation scales areas (2D) or volumes (3D).</p>
-                    <p><strong>Why it matters:</strong> Tells if transformation is invertible, changes orientation, and by how much space is scaled.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">2×2 Determinant</div>
-                        <div class="formula-main">det([a b; c d]) = ad - bc</div>
-                    </div>
-                </div>
-                <div class="content-card">
-                    <h3>Interpretation</h3>
-                    <ul>
-                        <li><strong>|det| &gt; 1:</strong> Expands space</li>
-                        <li><strong>|det| &lt; 1:</strong> Compresses space</li>
-                        <li><strong>det = 0:</strong> Squishes to lower dimension (not invertible)</li>
-                        <li><strong>det &lt; 0:</strong> Flips orientation</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Determinant = area/volume scaling factor</li>
-                        <li>det = 0 means non-invertible (squishes space)</li>
-                        <li>Negative det means orientation flip</li>
-                        <li>Critical for understanding linear systems</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 48: Inverse Matrices -->
-            <section class="topic-section" id="topic-48" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 48</span>
-                    <h2>↩️ Inverse Matrices &amp; Column Space</h2>
-                    <p class="topic-subtitle">Undoing transformations and solving systems</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The inverse matrix A⁻¹ undoes the transformation of A.</p>
-                    <p><strong>Why it matters:</strong> Solving Ax = b means x = A⁻¹b (if inverse exists).</p>
-                </div>
-                <div class="content-card">
-                    <h3>Key Concepts</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Inverse</div>
-                        <div class="formula-main">AA⁻¹ = A⁻¹A = I</div>
-                        <p>Exists only if det(A) ≠ 0</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Column Space</div>
-                        <p>All possible outputs of Ax. The span of matrix columns.</p>
-                    </div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>A⁻¹ undoes transformation A</li>
-                        <li>Exists only when det(A) ≠ 0</li>
-                        <li>Column space = span of columns = range of transformation</li>
-                        <li>Used to solve linear systems</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 49: Nonsquare Matrices -->
-            <section class="topic-section" id="topic-49" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 49</span>
-                    <h2>🔀 Nonsquare Matrices</h2>
-                    <p class="topic-subtitle">Transformations between different dimensions</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> m×n matrices transform n-dimensional space to m-dimensional space.</p>
-                    <p><strong>Why it matters:</strong> Many real-world problems involve different input/output dimensions.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Examples</h3>
-                    <ul>
-                        <li><strong>3×2 matrix:</strong> Maps 2D → 3D (embedding)</li>
-                        <li><strong>2×3 matrix:</strong> Maps 3D → 2D (projection)</li>
-                        <li><strong>Applications:</strong> Data compression, dimension reduction</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>m×n matrices transform n-D to m-D</li>
-                        <li>No inverse (can't undo dimension change perfectly)</li>
-                        <li>Used in data science and machine learning</li>
-                        <li>Columns still represent where basis vectors land</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 50: Dot Products -->
-            <section class="topic-section" id="topic-50" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 50</span>
-                    <h2>• Dot Products and Duality</h2>
-                    <p class="topic-subtitle">Projection and measuring similarity</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The dot product v·w measures how much v and w point in the same direction.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Dot Product</div>
-                        <div class="formula-main">v·w = v₁w₁ + v₂w₂ + ... + vₙwₙ</div>
-                        <p>Also: v·w = |v||w|cos(θ)</p>
-                    </div>
-                </div>
-                <div class="content-card">
-                    <h3>Geometric Meaning</h3>
-                    <ul>
-                        <li><strong>v·w &gt; 0:</strong> Point in same direction</li>
-                        <li><strong>v·w = 0:</strong> Perpendicular (orthogonal)</li>
-                        <li><strong>v·w &lt; 0:</strong> Point in opposite directions</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Dot product measures alignment/similarity</li>
-                        <li>Zero dot product = perpendicular vectors</li>
-                        <li>Used in projections and angle calculations</li>
-                        <li>Foundation for inner product spaces</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 51: Cross Products -->
-            <section class="topic-section" id="topic-51" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 51</span>
-                    <h2>✖️ Cross Products</h2>
-                    <p class="topic-subtitle">Finding perpendicular vectors in 3D</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The cross product v×w creates a vector perpendicular to both v and w.</p>
-                    <p><strong>Why it matters:</strong> Essential in 3D physics (torque, angular momentum) and graphics.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Cross Product</div>
-                        <div class="formula-main">v×w = [v₂w₃-v₃w₂, v₃w₁-v₁w₃, v₁w₂-v₂w₁]</div>
-                        <p>Magnitude: |v×w| = |v||w|sin(θ)</p>
-                    </div>
-                </div>
-                <div class="content-card">
-                    <h3>Properties</h3>
-                    <ul>
-                        <li>Result perpendicular to both inputs</li>
-                        <li>Magnitude = area of parallelogram</li>
-                        <li>Direction: right-hand rule</li>
-                        <li>Not commutative: v×w = -(w×v)</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Cross product only defined in 3D</li>
-                        <li>Result perpendicular to both vectors</li>
-                        <li>Magnitude = area of parallelogram</li>
-                        <li>Used in physics and computer graphics</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 52: Cross Products via Transformations -->
-            <section class="topic-section" id="topic-52" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 52</span>
-                    <h2>🔄 Cross Products via Transformations</h2>
-                    <p class="topic-subtitle">Deeper understanding through linear transformations</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Understanding cross products through the lens of determinants and transformations.</p>
-                    <p><strong>Why it matters:</strong> Reveals the connection between cross products, determinants, and volume.</p>
-                </div>
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 KEY INSIGHT</div>
-                    <p>The cross product can be computed as the determinant of a special matrix involving basis vectors î, ĵ, k̂ and the components of v and w.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Cross product related to determinant</li>
-                        <li>Represents volume of parallelepiped</li>
-                        <li>Geometric interpretation through transformations</li>
-                        <li>Connects multiple LA concepts</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 53: Change of Basis -->
-            <section class="topic-section" id="topic-53" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 53</span>
-                    <h2>🔄 Change of Basis</h2>
-                    <p class="topic-subtitle">Viewing the same transformation in different coordinate systems</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Converting between different coordinate systems (bases) to view transformations differently.</p>
-                    <p><strong>Why it matters:</strong> Some problems are easier in different coordinate systems. Eigenvector basis simplifies many calculations.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Change of Basis</div>
-                        <div class="formula-main">A' = P⁻¹AP</div>
-                        <p>P = change of basis matrix</p>
-                        <p>A' = transformation in new basis</p>
-                    </div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Same transformation, different coordinate system</li>
-                        <li>Formula: A' = P⁻¹AP</li>
-                        <li>Eigenvector basis often simplest</li>
-                        <li>Used in diagonalization</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 55: Eigenvalue Quick Trick -->
-            <section class="topic-section" id="topic-55" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 55</span>
-                    <h2>⚡ Eigenvalue Quick Trick</h2>
-                    <p class="topic-subtitle">Fast methods for finding eigenvalues</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Shortcuts and tricks for quickly computing eigenvalues in special cases.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Quick Tricks</h3>
-                    <ul>
-                        <li><strong>Trace:</strong> Sum of eigenvalues = trace (sum of diagonal)</li>
-                        <li><strong>Determinant:</strong> Product of eigenvalues = determinant</li>
-                        <li><strong>Triangular matrices:</strong> Eigenvalues are diagonal entries</li>
-                        <li><strong>2×2 matrices:</strong> Use trace and determinant directly</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Trace = sum of eigenvalues</li>
-                        <li>Determinant = product of eigenvalues</li>
-                        <li>Diagonal/triangular: eigenvalues on diagonal</li>
-                        <li>Saves computation time</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 56: Abstract Vector Spaces -->
-            <section class="topic-section" id="topic-56" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 56</span>
-                    <h2>∞ Abstract Vector Spaces</h2>
-                    <p class="topic-subtitle">Beyond arrows and lists</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Vectors can be anything that follows vector space axioms: polynomials, functions, matrices themselves!</p>
-                    <p><strong>Why it matters:</strong> Linear algebra applies to far more than just arrows in space.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Examples of Vector Spaces</h3>
-                    <ul>
-                        <li><strong>Polynomials:</strong> P(x) = a₀ + a₁x + a₂x² + ...</li>
-                        <li><strong>Functions:</strong> f(x), g(x) can be added and scaled</li>
-                        <li><strong>Matrices:</strong> Can add matrices and multiply by scalars</li>
-                        <li><strong>Solutions to equations:</strong> Solution space forms vector space</li>
-                    </ul>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Vector spaces: any set following vector axioms</li>
-                        <li>Includes functions, polynomials, matrices</li>
-                        <li>Same theorems apply to all vector spaces</li>
-                        <li>Powerful abstraction in mathematics</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 57: Cramer's Rule -->
-            <section class="topic-section" id="topic-57" data-subject="linear-algebra" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 57</span>
-                    <h2>📐 Cramer's Rule</h2>
-                    <p class="topic-subtitle">Solving systems using determinants</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A formula for solving linear systems Ax = b using determinants.</p>
-                    <p><strong>Why it matters:</strong> Provides explicit formulas for solutions, though not always most efficient computationally.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Cramer's Rule</div>
-                        <div class="formula-main">xᵢ = det(Aᵢ) / det(A)</div>
-                        <p>Aᵢ = matrix A with column i replaced by b</p>
-                    </div>
-                </div>
-                <div class="content-card">
-                    <h3>Geometric Interpretation</h3>
-                    <p>Solution relates to how much the output vector b changes the signed volume of the transformation.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Solves Ax = b using determinants</li>
-                        <li>xᵢ = det(Aᵢ) / det(A)</li>
-                        <li>Works when det(A) ≠ 0</li>
-                        <li>Elegant but not always efficient</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- CALCULUS TOPICS -->
-
-            <!-- Topic 58: Essence of Calculus -->
-            <section class="topic-section" id="topic-58" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 58</span>
-                    <h2>🎯 The Essence of Calculus</h2>
-                    <p class="topic-subtitle">What calculus is really about</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Calculus is the mathematics of change and accumulation. It answers: "How fast?" (derivatives) and "How much total?" (integrals).</p>
-                    <p><strong>Big Idea:</strong> Break problems into infinitely many infinitely small pieces, then add them up.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 THE CIRCLE AREA PROBLEM</div>
-                    <p>How do we know the area of a circle is πr²?</p>
-                    <p><strong>Calculus approach:</strong> Unwrap the circle into infinitely thin rings. Each ring is approximately a rectangle with height dr and width 2πr. Integrate from 0 to r:</p>
-                    <p>∫ 2πr dr = πr²</p>
-                    <p>This is the essence of integration: summing infinitely small pieces!</p>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Circle Area Visualization</h3>
-                    <canvas id="canvas-58" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn58animate">Unwrap Circle</button>
-                        <button class="btn btn-secondary" id="btn58reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Two Fundamental Concepts</h3>
-                    <div class="two-column">
-                        <div class="column">
-                            <h4 style="color: #64ffda;">Derivatives</h4>
-                            <p>Instantaneous rate of change</p>
-                            <p>Slope of tangent line</p>
-                            <p>"How fast is it changing?"</p>
-                        </div>
-                        <div class="column">
-                            <h4 style="color: #ff6b6b;">Integrals</h4>
-                            <p>Accumulated change</p>
-                            <p>Area under curve</p>
-                            <p>"How much total?"</p>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Calculus = study of change and accumulation</li>
-                        <li>Key strategy: infinitely many infinitely small pieces</li>
-                        <li>Derivatives and integrals are inverses (Fundamental Theorem)</li>
-                        <li>Applicable to physics, economics, biology, engineering</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 59: Paradox of the Derivative -->
-            <section class="topic-section" id="topic-59" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 59</span>
-                    <h2>🤔 The Paradox of the Derivative</h2>
-                    <p class="topic-subtitle">Instantaneous rate of change</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>The Paradox</h3>
-                    <p><strong>Question:</strong> How can something have a "rate of change" at a single instant in time?</p>
-                    <p>Rate means change divided by time. But at a single instant, no time passes, so we'd get 0/0!</p>
-                    <p><strong>Solution:</strong> Limits! We find the rate as we approach that instant.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>The Derivative Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Definition via Limits</div>
-                        <div class="formula-main">f'(x) = lim[Δx→0] (f(x+Δx) - f(x)) / Δx</div>
-                        <p>Slope of secant line → slope of tangent line</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Derivative Visualization</h3>
-                    <canvas id="canvas-59" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Δx = <span id="label59dx">1.0</span></label>
-                            <input type="range" id="slider59dx" min="0.1" max="2" value="1" step="0.1" class="slider">
-                        </div>
-                        <button class="btn btn-primary" id="btn59animate">Show Limit</button>
-                    </div>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 VISUAL INTUITION</div>
-                    <p>Draw a secant line between two points on a curve. As you bring the points closer together, the secant line approaches the tangent line. The derivative is the slope of that tangent line!</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Derivative = instantaneous rate of change</li>
-                        <li>Paradox resolved using limits</li>
-                        <li>Geometrically: slope of tangent line</li>
-                        <li>Foundation for all of differential calculus</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 60: Derivative Formulas -->
-            <section class="topic-section" id="topic-60" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 60</span>
-                    <h2>📐 Derivative Formulas (Geometric)</h2>
-                    <p class="topic-subtitle">Power rule, sum rule, and more</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Standard rules for computing derivatives without using limits every time.</p>
-                    <p><strong>Why it matters:</strong> Makes calculus practical - we can quickly find derivatives of complex functions.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Common Derivative Rules</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Power Rule</div>
-                        <div class="formula-main">d/dx(xⁿ) = nxⁿ⁻¹</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Sum Rule</div>
-                        <div class="formula-main">d/dx[f(x) + g(x)] = f'(x) + g'(x)</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Constant Multiple</div>
-                        <div class="formula-main">d/dx[cf(x)] = c·f'(x)</div>
-                    </div>
-                </div>
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>f(x) = 3x⁴ - 2x² + 5</strong></p>
-                    <p>f'(x) = 3·4x³ - 2·2x + 0</p>
-                    <p>f'(x) = 12x³ - 4x</p>
-                </div>
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Find the derivative of f(x) = 3x⁴ - 2x³ + 5x - 7</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Identify the Power Rule</p>
-                                <div class="step-work">
-                                    <code>Power Rule: d/dx(xⁿ) = n·xⁿ⁻¹</code><br>
-                                    <code>Apply to each term separately</code>
-                                </div>
-                                <p class="step-explanation">The power rule works term by term</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Derivative of First Term: 3x⁴</p>
-                                <div class="step-work">
-                                    <code>Coefficient stays: 3</code><br>
-                                    <code>Exponent: 4 → multiply by 4 and decrease exponent</code><br>
-                                    <code>Result: 3 × 4 × x³ = 12x³</code>
-                                </div>
-                                <p class="step-explanation">Bring down the 4, multiply by coefficient 3</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Derivative of Second Term: -2x³</p>
-                                <div class="step-work">
-                                    <code>Result: -2 × 3 × x² = -6x²</code>
-                                </div>
-                                <p class="step-explanation">Same process, keep the negative sign</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Derivative of Third Term: 5x</p>
-                                <div class="step-work">
-                                    <code>5x = 5x¹</code><br>
-                                    <code>Result: 5 × 1 × x⁰ = 5</code>
-                                </div>
-                                <p class="step-explanation">x⁰ = 1, so we just get the coefficient</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Derivative of Fourth Term: -7</p>
-                                <div class="step-work">
-                                    <code>Constant → derivative is 0</code>
-                                </div>
-                                <p class="step-explanation">Constants disappear when we take the derivative</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Combine All Terms</p>
-                                <div class="step-work">
-                                    <code>f'(x) = 12x³ - 6x² + 5 + 0</code><br>
-                                    <code>f'(x) = 12x³ - 6x² + 5</code>
-                                </div>
-                                <p class="step-explanation">Sum the derivatives of each term</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">f'(x) = 12x³ - 6x² + 5</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Check:</strong>
-                            <p>At x=1: f'(1) = 12(1) - 6(1) + 5 = 11. This is the slope of the tangent line at x=1.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Find f'(x) for f(x) = 2x³ + 4x² - 3</li>
-                            <li>Find the derivative of g(x) = 5x⁴ - x</li>
-                            <li>What is d/dx(7x² + 2x + 8)?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>f'(x) = 6x² + 8x</li>
-                                <li>g'(x) = 20x³ - 1</li>
-                                <li>14x + 2</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Power rule: bring down exponent, subtract 1</li>
-                        <li>Sum rule: derivative of sum = sum of derivatives</li>
-                        <li>Constant multiple: constant comes out front</li>
-                        <li>Makes computing derivatives fast and easy</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 61: Chain Rule & Product Rule -->
-            <section class="topic-section" id="topic-61" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 61</span>
-                    <h2>🔗 Chain Rule &amp; Product Rule</h2>
-                    <p class="topic-subtitle">Derivatives of composite and multiplied functions</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Rules for finding derivatives of complex functions built from simpler ones.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Rules</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Chain Rule</div>
-                        <div class="formula-main">d/dx[f(g(x))] = f'(g(x)) · g'(x)</div>
-                        <p>"Derivative of outside × derivative of inside"</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Product Rule</div>
-                        <div class="formula-main">d/dx[f(x)g(x)] = f'(x)g(x) + f(x)g'(x)</div>
-                        <p>"First times derivative of second + second times derivative of first"</p>
-                    </div>
-                </div>
-                <div class="callout-box example">
-                    <div class="callout-header">📊 CHAIN RULE EXAMPLE</div>
-                    <p><strong>f(x) = (3x² + 1)⁵</strong></p>
-                    <p>Let u = 3x² + 1, so f = u⁵</p>
-                    <p>f'(x) = 5u⁴ · 6x = 5(3x² + 1)⁴ · 6x</p>
-                    <p>f'(x) = 30x(3x² + 1)⁴</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Chain rule for composed functions: outer' × inner'</li>
-                        <li>Product rule for multiplied functions: f'g + fg'</li>
-                        <li>Essential for complex derivatives</li>
-                        <li>Also: Quotient rule for division</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 62: Derivative of e^x -->
-            <section class="topic-section" id="topic-62" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 62</span>
-                    <h2>ℯ Derivative of eˣ</h2>
-                    <p class="topic-subtitle">The function that is its own derivative</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The exponential function eˣ has the remarkable property that its derivative equals itself!</p>
-                    <p><strong>Why it matters:</strong> e appears throughout nature - growth, decay, compound interest, probability.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Special Property</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Derivative of eˣ</div>
-                        <div class="formula-main">d/dx(eˣ) = eˣ</div>
-                        <p>The ONLY function (up to constant multiple) that equals its own derivative!</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">General Exponential</div>
-                        <div class="formula-main">d/dx(aˣ) = aˣ · ln(a)</div>
-                    </div>
-                </div>
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 WHY e IS SPECIAL</div>
-                    <p>e ≈ 2.71828... is defined so that the rate of change of eˣ at x=0 equals 1. This makes it the "natural" base for exponentials in calculus!</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>d/dx(eˣ) = eˣ (function = its own derivative)</li>
-                        <li>e is the natural base (~2.71828)</li>
-                        <li>Appears in growth/decay models</li>
-                        <li>Foundation for differential equations</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 63: Implicit Differentiation -->
-            <section class="topic-section" id="topic-63" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 63</span>
-                    <h2>🔄 Implicit Differentiation</h2>
-                    <p class="topic-subtitle">Derivatives when y isn't isolated</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Finding dy/dx when the equation isn't solved for y (like x² + y² = 25).</p>
-                    <p><strong>Why it matters:</strong> Many relationships can't be expressed as y = f(x), but we still need derivatives.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Method</h3>
-                    <ol>
-                        <li>Differentiate both sides with respect to x</li>
-                        <li>Treat y as a function of x (use chain rule)</li>
-                        <li>Solve for dy/dx</li>
-                    </ol>
-                </div>
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE: Circle</div>
-                    <p><strong>x² + y² = 25</strong></p>
-                    <p>Differentiate: 2x + 2y(dy/dx) = 0</p>
-                    <p>Solve: dy/dx = -x/y</p>
-                    <p>This gives the slope of the tangent line at any point on the circle!</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Used when equation isn't solved for y</li>
-                        <li>Differentiate both sides with respect to x</li>
-                        <li>Remember: d/dx(y) = dy/dx (chain rule)</li>
-                        <li>Solve algebraically for dy/dx</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 64: Integrals -->
-            <section class="topic-section" id="topic-64" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 64</span>
-                    <h2>∫ Integrals</h2>
-                    <p class="topic-subtitle">Area and accumulation</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Integration sums infinitely many infinitely small pieces. Visually, it's the area under a curve.</p>
-                    <p><strong>Why it matters:</strong> Calculates total distance from velocity, total profit from marginal profit, total probability from density functions.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Riemann Sums</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Approximation</div>
-                        <div class="formula-main">∑ f(xᵢ) Δx</div>
-                        <p>Sum of rectangle areas = height × width</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Integral (Exact)</div>
-                        <div class="formula-main">∫ᵃᵇ f(x) dx = lim[Δx→0] ∑ f(xᵢ) Δx</div>
-                        <p>As rectangles get infinitely thin, sum approaches exact area</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Riemann Sum Visualization</h3>
-                    <canvas id="canvas-64" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Rectangles: <span id="label64n">8</span></label>
-                            <input type="range" id="slider64n" min="4" max="50" value="8" step="1" class="slider">
-                        </div>
-                        <button class="btn btn-primary" id="btn64animate">Animate Convergence</button>
-                    </div>
-                </div>
-
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Calculate the definite integral: ∫₀³ (2x + 1) dx</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find the Antiderivative</p>
-                                <div class="step-work">
-                                    <code>∫(2x + 1) dx</code><br>
-                                    <code>∫2x dx = 2 × (x²/2) = x²</code><br>
-                                    <code>∫1 dx = x</code><br>
-                                    <code>F(x) = x² + x + C</code>
-                                </div>
-                                <p class="step-explanation">Use the power rule in reverse</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Apply the Fundamental Theorem of Calculus (Part 2)</p>
-                                <div class="step-work">
-                                    <code>∫ₐᵇ f(x)dx = F(b) - F(a)</code><br>
-                                    <code>No need for +C (it cancels out)</code>
-                                </div>
-                                <p class="step-explanation">Evaluate antiderivative at bounds and subtract</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Evaluate at Upper Bound (x = 3)</p>
-                                <div class="step-work">
-                                    <code>F(3) = 3² + 3</code><br>
-                                    <code>F(3) = 9 + 3 = 12</code>
-                                </div>
-                                <p class="step-explanation">Substitute x = 3 into F(x)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Evaluate at Lower Bound (x = 0)</p>
-                                <div class="step-work">
-                                    <code>F(0) = 0² + 0</code><br>
-                                    <code>F(0) = 0</code>
-                                </div>
-                                <p class="step-explanation">Substitute x = 0 into F(x)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Subtract: F(upper) - F(lower)</p>
-                                <div class="step-work">
-                                    <code>∫₀³ (2x + 1) dx = F(3) - F(0)</code><br>
-                                    <code>= 12 - 0 = 12</code>
-                                </div>
-                                <p class="step-explanation">Final calculation</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">12</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Meaning:</strong>
-                            <p>The area under the curve y = 2x + 1 from x = 0 to x = 3 is 12 square units. This represents the accumulated value of the function over that interval.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Calculate ∫₁⁴ x² dx</li>
-                            <li>Find ∫₀² (3x² + 2) dx</li>
-                            <li>Evaluate ∫₁³ (4x - 1) dx</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>21</li>
-                                <li>12</li>
-                                <li>14</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Integral = area under curve = accumulated change</li>
-                        <li>Riemann sums approximate with rectangles</li>
-                        <li>As Δx → 0, approximation becomes exact</li>
-                        <li>Notation: ∫ means "sum" in continuous sense</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 65: Fundamental Theorem of Calculus -->
-            <section class="topic-section" id="topic-65" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 65</span>
-                    <h2>⚖️ Fundamental Theorem of Calculus</h2>
-                    <p class="topic-subtitle">The bridge connecting derivatives and integrals</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The most important theorem in calculus - it shows that derivatives and integrals are inverse operations!</p>
-                    <p><strong>Why it matters:</strong> Lets us compute integrals using antiderivatives instead of limits of Riemann sums.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Two Parts</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Part 1</div>
-                        <div class="formula-main">d/dx[∫ᵃˣ f(t) dt] = f(x)</div>
-                        <p>Derivative of integral = original function</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Part 2</div>
-                        <div class="formula-main">∫ᵃᵇ f(x) dx = F(b) - F(a)</div>
-                        <p>Where F'(x) = f(x) (F is antiderivative of f)</p>
-                    </div>
-                </div>
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 THE BIG IDEA</div>
-                    <p>Integration and differentiation are opposite operations, like multiplication and division. This means we can evaluate integrals by finding antiderivatives!</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Derivatives and integrals are inverses</li>
-                        <li>∫ᵃᵇ f(x) dx = F(b) - F(a) where F' = f</li>
-                        <li>Makes integration practical (no limits!)</li>
-                        <li>Most important theorem in calculus</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 66: Area and Slope Connection -->
-            <section class="topic-section" id="topic-66" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 66</span>
-                    <h2>📊 Area and Slope Connection</h2>
-                    <p class="topic-subtitle">Why integrals and derivatives are inverses</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> A deeper look at why the Fundamental Theorem works - the geometric intuition behind the connection.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Connection</h3>
-                    <p><strong>Area accumulation:</strong> As you move right, area under curve accumulates. The RATE of this accumulation equals the height of the curve!</p>
-                    <p>If A(x) = area from 0 to x, then A'(x) = height at x = f(x)</p>
-                    <p>This is WHY derivatives and integrals are inverses!</p>
-                </div>
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 GEOMETRIC INTUITION</div>
-                    <p>Imagine filling a container with water. The accumulated water (integral) grows at a rate equal to the flow rate (derivative). The RATE of area accumulation equals the height!</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Area accumulation rate = curve height</li>
-                        <li>Explains why d/dx[∫ f] = f</li>
-                        <li>Geometric understanding of FTC</li>
-                        <li>Slope and area are inverse concepts</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 67: Higher Order Derivatives -->
-            <section class="topic-section" id="topic-67" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 67</span>
-                    <h2>📈 Higher Order Derivatives</h2>
-                    <p class="topic-subtitle">Derivatives of derivatives</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Taking the derivative multiple times: f'(x), f''(x), f'''(x), ...</p>
-                    <p><strong>Why it matters:</strong> Used in physics (acceleration is second derivative of position), optimization (concavity), and approximations.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Notation and Meaning</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Second Derivative</div>
-                        <div class="formula-main">f''(x) = d²f/dx²</div>
-                        <p>Rate of change of the rate of change</p>
-                        <p>Measures concavity (curving up or down)</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Third Derivative</div>
-                        <div class="formula-main">f'''(x) = d³f/dx³</div>
-                        <p>Rate of change of acceleration (jerk)</p>
-                    </div>
-                </div>
-                <div class="content-card">
-                    <h3>Physical Interpretation</h3>
-                    <ul>
-                        <li><strong>Position:</strong> s(t)</li>
-                        <li><strong>Velocity:</strong> v(t) = s'(t)</li>
-                        <li><strong>Acceleration:</strong> a(t) = s''(t) = v'(t)</li>
-                        <li><strong>Jerk:</strong> j(t) = s'''(t) = a'(t)</li>
-                    </ul>
-                </div>
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>f(x) = x⁴</strong></p>
-                    <p>f'(x) = 4x³</p>
-                    <p>f''(x) = 12x²</p>
-                    <p>f'''(x) = 24x</p>
-                    <p>f⁴(x) = 24</p>
-                    <p>f⁵(x) = 0</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>f''(x) measures concavity</li>
-                        <li>In physics: position → velocity → acceleration</li>
-                        <li>Used in Taylor series and optimization</li>
-                        <li>Can take derivative as many times as you want</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 68: Taylor Series -->
-            <section class="topic-section" id="topic-68" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 68</span>
-                    <h2>∞ Taylor Series</h2>
-                    <p class="topic-subtitle">Functions as infinite polynomials</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Any smooth function can be approximated as an infinite polynomial centered at a point.</p>
-                    <p><strong>Why it matters:</strong> Computers can't calculate sin(x) directly, but they can calculate polynomials. Taylor series makes complex functions computable!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>The Formula</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Taylor Series at x=a</div>
-                        <div class="formula-main">f(x) = ∑ fⁿⁿⁿᵒⁿ(a)/n! · (x-a)ⁿ</div>
-                        <p>= f(a) + f'(a)(x-a) + f''(a)(x-a)²/2! + f'''(a)(x-a)³/3! + ...</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Maclaurin Series (a=0)</div>
-                        <div class="formula-main">f(x) = f(0) + f'(0)x + f''(0)x²/2! + ...</div>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Taylor Approximation</h3>
-                    <canvas id="canvas-68" width="600" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Degree: <span id="label68degree">1</span></label>
-                            <input type="range" id="slider68degree" min="1" max="10" value="1" step="1" class="slider">
-                        </div>
-                        <select id="select68func" class="form-control">
-                            <option value="sin">sin(x)</option>
-                            <option value="cos">cos(x)</option>
-                            <option value="exp">eˣ</option>
-                        </select>
-                    </div>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 FAMOUS EXAMPLES</div>
-                    <div class="example-solution">
-                        <p><strong>eˣ = 1 + x + x²/2! + x³/3! + x⁴/4! + ...</strong></p>
-                        <p><strong>sin(x) = x - x³/3! + x⁵/5! - x⁷/7! + ...</strong></p>
-                        <p><strong>cos(x) = 1 - x²/2! + x⁴/4! - x⁶/6! + ...</strong></p>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Any smooth function = infinite polynomial</li>
-                        <li>Formula uses derivatives at a single point</li>
-                        <li>More terms = better approximation</li>
-                        <li>How computers calculate sin, cos, eˣ, etc.</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 69: Limits (ε-δ Definition) -->
-            <section class="topic-section" id="topic-69" data-subject="calculus" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 69</span>
-                    <h2>∞ Limits (ε-δ Definition)</h2>
-                    <p class="topic-subtitle">The rigorous foundation of calculus</p>
-                </div>
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The precise, formal definition of a limit that makes calculus mathematically rigorous.</p>
-                    <p><strong>Why it matters:</strong> Puts calculus on a solid logical foundation. Before this (1800s), calculus worked but wasn't rigorously justified.</p>
-                </div>
-                <div class="content-card">
-                    <h3>The Formal Definition</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">ε-δ Definition of Limit</div>
-                        <div class="formula-main">lim[ₓ→ᵃ] f(x) = L</div>
-                        <p><strong>Means:</strong> For every ε &gt; 0, there exists δ &gt; 0 such that:</p>
-                        <p>If 0 &lt; |x - a| &lt; δ, then |f(x) - L| &lt; ε</p>
-                    </div>
-                </div>
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 IN PLAIN ENGLISH</div>
-                    <p>"We can make f(x) arbitrarily close to L (ε-close) by making x sufficiently close to a (δ-close)."</p>
-                    <p>ε (epsilon) = how close we want to be to L</p>
-                    <p>δ (delta) = how close we need to be to a</p>
-                    <p>The limit exists if, for ANY ε challenge, we can find a δ response that works.</p>
-                </div>
-                <div class="content-card">
-                    <h3>Why This Matters</h3>
-                    <ul>
-                        <li><strong>Removes vagueness:</strong> "approaching" becomes precise</li>
-                        <li><strong>Handles edge cases:</strong> Defines exactly when limits exist</li>
-                        <li><strong>Foundation for proofs:</strong> All calculus theorems proven from this</li>
-                        <li><strong>Historical importance:</strong> Made calculus rigorous (Cauchy, Weierstrass)</li>
-                    </ul>
-                </div>
-                <div class="callout-box example">
-                    <div class="callout-header">📊 SIMPLE EXAMPLE</div>
-                    <p><strong>Prove: lim[ₓ→2] (3x - 1) = 5</strong></p>
-                    <p>Want: |f(x) - 5| &lt; ε</p>
-                    <p>|(3x - 1) - 5| &lt; ε</p>
-                    <p>|3x - 6| &lt; ε</p>
-                    <p>3|x - 2| &lt; ε</p>
-                    <p>|x - 2| &lt; ε/3</p>
-                    <p><strong>Choose δ = ε/3</strong>, and the proof works!</p>
-                </div>
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ DON'T PANIC</div>
-                    <p>This definition looks scary but you don't need it for most calculus! It's the logical foundation, but you can use intuitive limits for practical work. Think of it like knowing a car's engine works even if you just drive it normally.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Formal definition: for all ε &gt; 0, exists δ &gt; 0...</li>
-                        <li>Makes "approaching" mathematically precise</li>
-                        <li>Foundation of all calculus rigor</li>
-                        <li>You can do calculus without memorizing this (but it's important conceptually)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- DATA SCIENCE TOPICS (70-85) -->
-
-            <!-- Topic 70: Simple Linear Regression -->
-            <section class="topic-section" id="topic-70" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 70</span>
-                    <h2>📈 Simple Linear Regression</h2>
-                    <p class="topic-subtitle">Modeling relationships between two continuous variables</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Simple linear regression models the relationship between a dependent variable (y) and an independent variable (x) using a straight line.</p>
-                    <p><strong>Why it matters:</strong> Foundation for predictive modeling. Used everywhere from business forecasting to scientific research.</p>
-                    <p><strong>When to use it:</strong> When you have two continuous variables and want to predict one from the other.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD ANALOGY</div>
-                    <p>Like drawing the best-fit line through scattered points on a graph. Imagine plotting house prices vs square footage - regression finds the line that best describes this relationship!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Linear Equation</div>
-                        <div class="formula-main">y = a + bx</div>
-                        <p>a = intercept (where line crosses y-axis)</p>
-                        <p>b = slope (change in y per unit change in x)</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Calculating Slope (b)</div>
-                        <div class="formula-main">b = Σ(x-x̄)(y-ȳ) / Σ(x-x̄)²</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Calculating Intercept (a)</div>
-                        <div class="formula-main">a = ȳ - b·x̄</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Least Squares Method</div>
-                        <div class="formula-main">Minimize: Σ(yᵢ - ŷᵢ)²</div>
-                        <p>Find line that minimizes sum of squared residuals</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Interactive Regression Line</h3>
-                    <canvas id="canvas-70" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn70fit">Fit Regression Line</button>
-                        <button class="btn btn-secondary" id="btn70reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Real Estate:</strong> Predicting house prices from square footage</li>
-                        <li><strong>Business:</strong> Sales forecasting from advertising spend</li>
-                        <li><strong>Economics:</strong> Demand prediction from price changes</li>
-                        <li><strong>Science:</strong> Temperature vs ice cream sales</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Assuming correlation = causation:</strong> Just because two variables are related doesn't mean one causes the other!</p>
-                    <p><strong>Extrapolating beyond data:</strong> Don't predict values far outside your training data range.</p>
-                    <p><strong>Ignoring outliers:</strong> Extreme values can heavily distort your regression line.</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Check residual plots for patterns (should be random)</p>
-                    <p>• Calculate R² to assess fit quality (closer to 1 = better)</p>
-                    <p>• Use for prediction only within your data range</p>
-                    <p>• Always visualize your data first (Anscombe's quartet!)</p>
-                </div>
-
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Find the best-fit line for the data points: (1,2), (2,4), (3,5), (4,7), (5,8)</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate the Means</p>
-                                <div class="step-work">
-                                    <code>x̄ = (1+2+3+4+5)/5 = 15/5 = 3</code><br>
-                                    <code>ȳ = (2+4+5+7+8)/5 = 26/5 = 5.2</code>
-                                </div>
-                                <p class="step-explanation">Find the average of x values and y values</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Set Up Slope Formula</p>
-                                <div class="step-work">
-                                    <code>b = Σ(x-x̄)(y-ȳ) / Σ(x-x̄)²</code><br>
-                                    <code>Need to calculate deviations and their products</code>
-                                </div>
-                                <p class="step-explanation">This is the least squares formula for slope</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Create Calculation Table</p>
-                                <div class="step-work">
-                                    <code>x  | y  | (x-x̄) | (y-ȳ) | (x-x̄)(y-ȳ) | (x-x̄)²</code><br>
-                                    <code>1  | 2  | -2    | -3.2  | 6.4        | 4</code><br>
-                                    <code>2  | 4  | -1    | -1.2  | 1.2        | 1</code><br>
-                                    <code>3  | 5  |  0    | -0.2  | 0          | 0</code><br>
-                                    <code>4  | 7  |  1    |  1.8  | 1.8        | 1</code><br>
-                                    <code>5  | 8  |  2    |  2.8  | 5.6        | 4</code><br>
-                                    <code>Sum|    |       |       | 15         | 10</code>
-                                </div>
-                                <p class="step-explanation">Organize all calculations in a table</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Slope (b)</p>
-                                <div class="step-work">
-                                    <code>b = 15 / 10 = 1.5</code>
-                                </div>
-                                <p class="step-explanation">For every 1 unit increase in x, y increases by 1.5</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Intercept (a)</p>
-                                <div class="step-work">
-                                    <code>a = ȳ - b·x̄</code><br>
-                                    <code>a = 5.2 - (1.5 × 3)</code><br>
-                                    <code>a = 5.2 - 4.5 = 0.7</code>
-                                </div>
-                                <p class="step-explanation">Where the line crosses the y-axis</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Write the Equation</p>
-                                <div class="step-work">
-                                    <code>y = a + bx</code><br>
-                                    <code>y = 0.7 + 1.5x</code>
-                                </div>
-                                <p class="step-explanation">This is our best-fit line!</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">y = 0.7 + 1.5x</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Check:</strong>
-                            <p>When x=3: y = 0.7 + 1.5(3) = 5.2 ✓ (matches our mean!)</p>
-                            <p>When x=1: y = 0.7 + 1.5(1) = 2.2 (close to actual 2)</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Fit a line to: (0,1), (1,3), (2,5)</li>
-                            <li>Using y=2+3x, predict y when x=6</li>
-                            <li>If b=2 and the line passes through (3,10), find a</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>y = 1 + 2x</li>
-                                <li>y = 20</li>
-                                <li>a = 4 (since 10 = a + 2(3))</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>y = a + bx models linear relationship</li>
-                        <li>Least squares minimizes prediction errors</li>
-                        <li>b = slope, a = intercept</li>
-                        <li>R² measures goodness of fit (0 to 1)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 71: Multiple Linear Regression -->
-            <section class="topic-section" id="topic-71" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 71</span>
-                    <h2>🔢 Multiple Linear Regression</h2>
-                    <p class="topic-subtitle">Extending to multiple predictor variables</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Multiple linear regression extends simple regression to handle multiple independent variables simultaneously.</p>
-                    <p><strong>Why it matters:</strong> Real-world outcomes usually depend on multiple factors, not just one!</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 REAL-WORLD EXAMPLE</div>
-                    <p>Like having multiple factors influencing an outcome - house price depends on size, location, age, bedrooms, etc. Multiple regression handles all of these together!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Multiple Regression Equation</div>
-                        <div class="formula-main">y = β₀ + β₁x₁ + β₂x₂ + ... + βₙxₙ + ε</div>
-                        <p>β₀ = intercept, βᵢ = coefficients, ε = error term</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Matrix Form</div>
-                        <div class="formula-main">Y = Xβ + ε</div>
-                        <p>Normal equation: β = (XᵀX)⁻¹XᵀY</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Housing Prices:</strong> Predict from size, location, age, bedrooms, bathrooms</li>
-                        <li><strong>Student Performance:</strong> Model grades from study hours, attendance, prior GPA</li>
-                        <li><strong>Business Revenue:</strong> Forecast from marketing, seasonality, competition</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Multicollinearity:</strong> When predictors are highly correlated with each other</p>
-                    <p><strong>Including irrelevant variables:</strong> More variables ≠ better model</p>
-                    <p><strong>Not scaling features:</strong> Variables on different scales can cause issues</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Check VIF (Variance Inflation Factor) for multicollinearity</p>
-                    <p>• Use feature selection techniques to identify important variables</p>
-                    <p>• Standardize variables before modeling</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Extends simple regression to multiple predictors</li>
-                        <li>y = β₀ + β₁x₁ + β₂x₂ + ... + βₙxₙ</li>
-                        <li>Matrix form: β = (XᵀX)⁻¹XᵀY</li>
-                        <li>Watch for multicollinearity between predictors</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 72: Logistic Regression -->
-            <section class="topic-section" id="topic-72" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 72</span>
-                    <h2>🎯 Logistic Regression</h2>
-                    <p class="topic-subtitle">Classification instead of continuous prediction</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Logistic regression predicts binary outcomes (0/1, Yes/No, Pass/Fail) using a sigmoid function.</p>
-                    <p><strong>Why it matters:</strong> Essential for classification problems - spam detection, disease diagnosis, customer churn.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 ANALOGY</div>
-                    <p>Like a switch - given inputs, predict ON/OFF, YES/NO, PASS/FAIL. Unlike linear regression (predicts any number), logistic gives probability between 0 and 1!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Logistic Function (Sigmoid)</div>
-                        <div class="formula-main">z = β₀ + β₁x₁ + β₂x₂ + ... + βₙxₙ</div>
-                        <div class="formula-main">σ(z) = 1 / (1 + e⁻ᶻ)</div>
-                        <div class="formula-main">P(y=1) = σ(z)</div>
-                        <p>Decision: if P ≥ 0.5 → class 1, else class 0</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Sigmoid Curve Visualization</h3>
-                    <canvas id="canvas-72" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Threshold: <span id="label72">0.5</span></label>
-                            <input type="range" id="slider72" min="0" max="1" value="0.5" step="0.05" class="slider">
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Email Spam Detection:</strong> Spam or Not Spam?</li>
-                        <li><strong>Medical Diagnosis:</strong> Disease present or absent?</li>
-                        <li><strong>Loan Default:</strong> Will customer default?</li>
-                        <li><strong>Customer Churn:</strong> Will customer leave?</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Interpreting output as continuous:</strong> Output is probability, not direct prediction</p>
-                    <p><strong>Not scaling features:</strong> Can affect convergence</p>
-                    <p><strong>Ignoring class imbalance:</strong> When one class dominates, model becomes biased</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Use log-loss (cross-entropy) for evaluation, not MSE</p>
-                    <p>• Adjust threshold based on cost of false positives vs false negatives</p>
-                    <p>• Check confusion matrix for detailed performance</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Logistic regression for binary classification</li>
-                        <li>Sigmoid function: σ(z) = 1/(1+e⁻ᶻ)</li>
-                        <li>Output is probability P(y=1)</li>
-                        <li>Decision boundary at P = 0.5 (adjustable)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 73: ANOVA -->
-            <section class="topic-section" id="topic-73" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 73</span>
-                    <h2>⚖️ ANOVA (Analysis of Variance)</h2>
-                    <p class="topic-subtitle">Comparing means of 3+ groups</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> ANOVA tests whether the means of three or more groups are significantly different.</p>
-                    <p><strong>Why it matters:</strong> More powerful than multiple t-tests. Avoids inflated Type I error rate.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 EXAMPLE</div>
-                    <p>Testing if different teaching methods (A, B, C) produce different average test scores. Instead of 3 separate t-tests, ANOVA does it all at once!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Sum of Squares Decomposition</div>
-                        <div class="formula-main">SST = SSB + SSW</div>
-                        <p>Total = Between Groups + Within Groups</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">SST (Total Sum of Squares)</div>
-                        <div class="formula-main">SST = Σᵢ Σⱼ (yᵢⱼ - ȳ)²</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">SSB (Between Groups)</div>
-                        <div class="formula-main">SSB = Σᵢ nᵢ(ȳᵢ - ȳ)²</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">SSW (Within Groups)</div>
-                        <div class="formula-main">SSW = Σᵢ Σⱼ (yᵢⱼ - ȳᵢ)²</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">F-Statistic</div>
-                        <div class="formula-main">F = MSB/MSW = [SSB/(k-1)] / [SSW/(N-k)]</div>
-                        <p>k = number of groups, N = total observations</p>
-                        <p>H₀: μ₁ = μ₂ = μ₃ = ... (all means equal)</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Medical Research:</strong> Comparing effectiveness of 3+ drugs</li>
-                        <li><strong>Marketing:</strong> A/B/C testing multiple ad variations</li>
-                        <li><strong>Quality Control:</strong> Comparing output across factories</li>
-                        <li><strong>Education:</strong> Comparing teaching methods</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Using multiple t-tests:</strong> Increases Type I error rate (family-wise error)</p>
-                    <p><strong>Not checking assumptions:</strong> Normality and homogeneity of variance required</p>
-                    <p><strong>Forgetting post-hoc tests:</strong> ANOVA tells you groups differ, not which ones</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Use Tukey's HSD for post-hoc pairwise comparisons</p>
-                    <p>• Check homogeneity of variance with Levene's test</p>
-                    <p>• If assumptions violated, use Kruskal-Wallis (non-parametric alternative)</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>ANOVA compares means of 3+ groups</li>
-                        <li>F = MSB/MSW measures between vs within group variation</li>
-                        <li>SST = SSB + SSW</li>
-                        <li>Post-hoc tests identify which groups differ</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 74: Polynomial Regression -->
-            <section class="topic-section" id="topic-74" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 74</span>
-                    <h2>📈 Polynomial Regression</h2>
-                    <p class="topic-subtitle">Fitting curves instead of lines</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Polynomial regression fits curved relationships by including higher-order terms (x², x³, etc.).</p>
-                    <p><strong>Why it matters:</strong> Many relationships aren't linear - think projectile motion, growth curves, economic trends.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 ANALOGY</div>
-                    <p>When relationship is curved, not straight - like throwing a ball (parabola) or bacterial growth (exponential). Polynomial regression captures these curves!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Polynomial Equation</div>
-                        <div class="formula-main">y = β₀ + β₁x + β₂x² + β₃x³ + ... + βₙxⁿ</div>
-                        <p>n = degree of polynomial</p>
-                        <p>Still linear in coefficients! (uses same methods as linear regression)</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Polynomial Degree Visualization</h3>
-                    <canvas id="canvas-74" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Degree: <span id="label74">1</span></label>
-                            <input type="range" id="slider74" min="1" max="10" value="1" step="1" class="slider">
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Growth Curves:</strong> Population, bacterial, economic growth</li>
-                        <li><strong>Physics:</strong> Projectile motion, acceleration</li>
-                        <li><strong>Chemistry:</strong> Reaction rates</li>
-                        <li><strong>Economics:</strong> Marginal cost curves</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Overfitting:</strong> Too high degree fits noise, not signal</p>
-                    <p><strong>Extrapolation disaster:</strong> Polynomials behave wildly outside training range</p>
-                    <p><strong>Ignoring R² plateau:</strong> More complexity doesn't always improve fit</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Use cross-validation to select optimal degree</p>
-                    <p>• Consider regularization (Ridge/Lasso) for high degrees</p>
-                    <p>• Start simple (degree 2-3), increase only if needed</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Fits curved relationships: y = β₀ + β₁x + β₂x² + ...</li>
-                        <li>Higher degree = more flexible curve</li>
-                        <li>Still linear in coefficients (same fitting method)</li>
-                        <li>Watch for overfitting with high degrees</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 75: R² & Model Evaluation -->
-            <section class="topic-section" id="topic-75" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 75</span>
-                    <h2>🎯 R² and Model Evaluation</h2>
-                    <p class="topic-subtitle">Measuring how well model fits data</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> R² (coefficient of determination) measures the proportion of variance explained by the model.</p>
-                    <p><strong>Why it matters:</strong> Tells you if your model is actually useful or just garbage!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">R² Formula</div>
-                        <div class="formula-main">R² = 1 - (SSᵣₑₛ / SSₜₒₜ)</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Residual Sum of Squares</div>
-                        <div class="formula-main">SSᵣₑₛ = Σ(y - ŷ)²</div>
-                        <p>Sum of squared prediction errors</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Total Sum of Squares</div>
-                        <div class="formula-main">SSₜₒₜ = Σ(y - ȳ)²</div>
-                        <p>Total variance in y</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Adjusted R²</div>
-                        <div class="formula-main">Adjusted R² = 1 - [(1-R²)(n-1)/(n-k-1)]</div>
-                        <p>Penalizes adding unnecessary variables</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Interpretation</h3>
-                    <ul>
-                        <li><strong>R² = 1:</strong> Perfect fit (predicts all variance)</li>
-                        <li><strong>R² = 0.9:</strong> Excellent (90% variance explained)</li>
-                        <li><strong>R² = 0.7:</strong> Good fit</li>
-                        <li><strong>R² = 0.5:</strong> Moderate fit</li>
-                        <li><strong>R² = 0:</strong> Model no better than mean</li>
-                        <li><strong>R² &lt; 0:</strong> Model worse than just using mean!</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Other Evaluation Metrics</h3>
-                    <ul>
-                        <li><strong>MSE (Mean Squared Error):</strong> Average squared prediction error</li>
-                        <li><strong>RMSE (Root MSE):</strong> Same units as y, easier to interpret</li>
-                        <li><strong>MAE (Mean Absolute Error):</strong> Less sensitive to outliers</li>
-                        <li><strong>AIC/BIC:</strong> For model comparison (lower is better)</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>R² always increases with more variables:</strong> Use Adjusted R² instead</p>
-                    <p><strong>High R² doesn't mean causation:</strong> Can have perfect correlation with no causal link</p>
-                    <p><strong>Ignoring residual plots:</strong> R² alone doesn't catch all problems</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Always plot residuals vs fitted values (should be random)</p>
-                    <p>• Use cross-validation for realistic performance estimate</p>
-                    <p>• Report multiple metrics, not just R²</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>R² = proportion of variance explained (0 to 1)</li>
-                        <li>R² = 1 - SSᵣₑₛ/SSₜₒₜ</li>
-                        <li>Adjusted R² penalizes adding variables</li>
-                        <li>Use alongside other metrics (RMSE, MAE)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 76: SVD -->
-            <section class="topic-section" id="topic-76" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 76</span>
-                    <h2>🔢 Singular Value Decomposition (SVD)</h2>
-                    <p class="topic-subtitle">Matrix factorization technique</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> SVD decomposes any matrix into three matrices representing rotations and scaling.</p>
-                    <p><strong>Why it matters:</strong> Powers Netflix recommendations, image compression, PCA, and data science applications everywhere!</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 ANALOGY</div>
-                    <p>Breaking down a complex transformation into simpler rotations and stretches - like decomposing a complicated dance move into: rotate, stretch, rotate again!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">SVD Decomposition</div>
-                        <div class="formula-main">A = UΣVᵀ</div>
-                        <p>U = left singular vectors (orthogonal)</p>
-                        <p>Σ = diagonal matrix of singular values</p>
-                        <p>V = right singular vectors (orthogonal)</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Image Compression:</strong> Keep only top singular values for compressed images</li>
-                        <li><strong>Recommender Systems:</strong> Netflix, Amazon product recommendations</li>
-                        <li><strong>Dimensionality Reduction:</strong> Reduce features while keeping information</li>
-                        <li><strong>PCA:</strong> Principal Component Analysis uses SVD internally</li>
-                        <li><strong>Noise Reduction:</strong> Filter out small singular values = noise</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Confusing with eigendecomposition:</strong> SVD works on any matrix, eigen only on square</p>
-                    <p><strong>Not sorting singular values:</strong> Always ordered largest to smallest</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Truncate to top k singular values for compression</p>
-                    <p>• Largest singular values capture most important patterns</p>
-                    <p>• Used in every major ML library (scikit-learn, TensorFlow)</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>A = UΣVᵀ factorization</li>
-                        <li>Works on any m×n matrix</li>
-                        <li>Singular values = importance of each component</li>
-                        <li>Foundation for PCA and recommender systems</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 77: PCA -->
-            <section class="topic-section" id="topic-77" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 77</span>
-                    <h2>🎯 Principal Component Analysis (PCA)</h2>
-                    <p class="topic-subtitle">Dimensionality reduction using variance</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> PCA finds the most important directions (principal components) in high-dimensional data.</p>
-                    <p><strong>Why it matters:</strong> Reduce 1000 features to 10 while keeping most information. Essential for visualization and ML!</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 ANALOGY</div>
-                    <p>Finding the most important directions in high-dimensional data. Like taking a photo of a 3D object - you pick the best angle that captures the most detail!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">PCA Algorithm</div>
-                        <p>1. Standardize data (mean=0, variance=1)</p>
-                        <p>2. Compute covariance matrix: C = XᵀX/(n-1)</p>
-                        <p>3. Find eigenvectors and eigenvalues of C</p>
-                        <p>4. Sort by eigenvalue (largest = most variance)</p>
-                        <p>5. Project data: Z = X·W (W = top eigenvectors)</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>PCA Projection Visualization</h3>
-                    <canvas id="canvas-77" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn77project">Show PC Directions</button>
-                        <button class="btn btn-secondary" id="btn77reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Feature Reduction:</strong> 100 features → 10 principal components</li>
-                        <li><strong>Data Visualization:</strong> Plot high-dimensional data in 2D/3D</li>
-                        <li><strong>Noise Reduction:</strong> Remove components with low variance</li>
-                        <li><strong>Face Recognition:</strong> Eigenfaces technique</li>
-                        <li><strong>Genomics:</strong> Reduce thousands of gene expressions</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Not scaling features first:</strong> PCA is sensitive to scale!</p>
-                    <p><strong>Keeping too few/many components:</strong> Check cumulative variance explained</p>
-                    <p><strong>Using on categorical data:</strong> PCA is for continuous variables</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Always standardize before PCA</p>
-                    <p>• Use scree plot to choose number of components</p>
-                    <p>• Aim for 80-95% cumulative variance explained</p>
-                    <p>• Interpret PCs using loadings (which features matter most)</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Finds directions of maximum variance</li>
-                        <li>Based on eigenvectors of covariance matrix</li>
-                        <li>Must standardize features first</li>
-                        <li>Reduces dimensions while preserving information</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 78: Matrix Decompositions -->
-            <section class="topic-section" id="topic-78" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 78</span>
-                    <h2>🔢 Matrix Decompositions</h2>
-                    <p class="topic-subtitle">LU, QR, Cholesky factorizations</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Methods to factor matrices into products of simpler matrices for efficient computation.</p>
-                    <p><strong>Why it matters:</strong> Makes solving large systems fast and numerically stable.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Types of Decompositions</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">LU Decomposition</div>
-                        <div class="formula-main">A = LU</div>
-                        <p>L = lower triangular, U = upper triangular</p>
-                        <p>Use: Solving linear systems efficiently</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">QR Decomposition</div>
-                        <div class="formula-main">A = QR</div>
-                        <p>Q = orthogonal matrix, R = upper triangular</p>
-                        <p>Use: Least squares problems, eigenvalue algorithms</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Cholesky Decomposition</div>
-                        <div class="formula-main">A = LLᵀ</div>
-                        <p>For positive definite matrices only</p>
-                        <p>Use: Faster than LU for symmetric matrices</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Linear Systems:</strong> Solve Ax=b efficiently</li>
-                        <li><strong>Least Squares:</strong> Linear regression uses QR</li>
-                        <li><strong>Numerical Stability:</strong> Better than direct methods</li>
-                        <li><strong>Machine Learning:</strong> Many algorithms use these internally</li>
-                    </ul>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>LU: A = LU (general matrices)</li>
-                        <li>QR: A = QR (least squares)</li>
-                        <li>Cholesky: A = LLᵀ (positive definite)</li>
-                        <li>Improve computational efficiency and stability</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 79: Norms & Distance -->
-            <section class="topic-section" id="topic-79" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 79</span>
-                    <h2>📏 Norms and Distance Metrics</h2>
-                    <p class="topic-subtitle">Measuring vector magnitude and distance</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Norms measure the "size" of vectors. Distance metrics measure how far apart vectors are.</p>
-                    <p><strong>Why it matters:</strong> Essential for regularization (L1/L2), clustering, nearest neighbors, and optimization.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Common Norms</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">L₁ Norm (Manhattan Distance)</div>
-                        <div class="formula-main">||x||₁ = Σ|xᵢ|</div>
-                        <p>Sum of absolute values</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">L₂ Norm (Euclidean Distance)</div>
-                        <div class="formula-main">||x||₂ = √(Σxᵢ²)</div>
-                        <p>Standard distance formula</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">L∞ Norm (Maximum Norm)</div>
-                        <div class="formula-main">||x||∞ = max|xᵢ|</div>
-                        <p>Largest component</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Regularization:</strong> L1 (Lasso), L2 (Ridge) regression</li>
-                        <li><strong>K-Nearest Neighbors:</strong> Uses distance to find similar points</li>
-                        <li><strong>Clustering:</strong> K-means uses Euclidean distance</li>
-                        <li><strong>Optimization:</strong> Gradient descent minimizes norms</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ KEY DIFFERENCES</div>
-                    <p>• L₁: Encourages sparsity (many zeros)</p>
-                    <p>• L₂: Smooth, differentiable everywhere</p>
-                    <p>• L∞: Focuses on largest element</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>L₁: ||x||₁ = Σ|xᵢ| (Manhattan)</li>
-                        <li>L₂: ||x||₂ = √Σxᵢ² (Euclidean)</li>
-                        <li>L∞: ||x||∞ = max|xᵢ|</li>
-                        <li>Used in regularization and distance calculations</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 80: Gradient Descent -->
-            <section class="topic-section" id="topic-80" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 80</span>
-                    <h2>📉 Gradient Descent</h2>
-                    <p class="topic-subtitle">Iterative optimization algorithm</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Gradient descent is an iterative algorithm that finds minimum of a function by following the negative gradient.</p>
-                    <p><strong>Why it matters:</strong> Powers ALL modern machine learning! Neural networks, linear regression, everything uses gradient descent.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 ANALOGY</div>
-                    <p>Walking downhill to find the valley - taking steps in the steepest descent direction. Learning rate = step size. Too big = overshoot, too small = slow!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Gradient Descent Update Rule</div>
-                        <div class="formula-main">θₜ₊₁ = θₜ - α·∇f(θₜ)</div>
-                        <p>θ = parameters to optimize</p>
-                        <p>α = learning rate (step size)</p>
-                        <p>∇f = gradient (direction of steepest ascent)</p>
-                        <p>We go NEGATIVE gradient to descend!</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Gradient Descent Visualization</h3>
-                    <canvas id="canvas-80" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <div class="slider-group">
-                            <label>Learning Rate: <span id="label80">0.1</span></label>
-                            <input type="range" id="slider80" min="0.01" max="0.5" value="0.1" step="0.01" class="slider">
-                        </div>
-                        <button class="btn btn-primary" id="btn80start">Start Descent</button>
-                        <button class="btn btn-secondary" id="btn80reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Training Neural Networks:</strong> Backpropagation + gradient descent</li>
-                        <li><strong>Linear Regression:</strong> Find optimal coefficients</li>
-                        <li><strong>Logistic Regression:</strong> Minimize log-loss</li>
-                        <li><strong>Any Differentiable Function:</strong> Find minimum</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Learning rate too large:</strong> Diverges, overshoots minimum</p>
-                    <p><strong>Learning rate too small:</strong> Converges very slowly</p>
-                    <p><strong>Local minima:</strong> Can get stuck (less of issue with neural nets)</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Use learning rate scheduling (start big, decrease over time)</p>
-                    <p>• Try different initializations to avoid bad local minima</p>
-                    <p>• Monitor convergence with loss curve</p>
-                    <p>• Typical learning rates: 0.001 to 0.1</p>
-                </div>
-
-                <!-- WORKED EXAMPLE SECTION -->
-                <div class="content-block worked-example-section">
-                    <h3>📝 Worked Example - Step by Step</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Minimize f(x) = x² using gradient descent. Start at x₀ = 5, learning rate α = 0.1, run 3 iterations.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Find the Gradient (Derivative)</p>
-                                <div class="step-work">
-                                    <code>f(x) = x²</code><br>
-                                    <code>f'(x) = 2x</code><br>
-                                    <code>This is the gradient we'll use</code>
-                                </div>
-                                <p class="step-explanation">The gradient tells us which direction is uphill</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Iteration 1</p>
-                                <div class="step-work">
-                                    <code>Current position: x₀ = 5</code><br>
-                                    <code>Gradient: f'(5) = 2(5) = 10</code><br>
-                                    <code>Update: x₁ = x₀ - α·f'(x₀)</code><br>
-                                    <code>x₁ = 5 - 0.1(10) = 5 - 1 = 4</code>
-                                </div>
-                                <p class="step-explanation">Move against the gradient (downhill)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Iteration 2</p>
-                                <div class="step-work">
-                                    <code>Current position: x₁ = 4</code><br>
-                                    <code>Gradient: f'(4) = 2(4) = 8</code><br>
-                                    <code>Update: x₂ = 4 - 0.1(8) = 4 - 0.8 = 3.2</code>
-                                </div>
-                                <p class="step-explanation">Gradient is smaller, we're getting closer to minimum</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Iteration 3</p>
-                                <div class="step-work">
-                                    <code>Current position: x₂ = 3.2</code><br>
-                                    <code>Gradient: f'(3.2) = 2(3.2) = 6.4</code><br>
-                                    <code>Update: x₃ = 3.2 - 0.1(6.4) = 3.2 - 0.64 = 2.56</code>
-                                </div>
-                                <p class="step-explanation">Still moving toward x = 0 (the true minimum)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Summary of Convergence</p>
-                                <div class="step-work">
-                                    <code>Iter | x     | f(x)  | Gradient</code><br>
-                                    <code>0    | 5.00  | 25.00 | 10.0</code><br>
-                                    <code>1    | 4.00  | 16.00 | 8.0</code><br>
-                                    <code>2    | 3.20  | 10.24 | 6.4</code><br>
-                                    <code>3    | 2.56  | 6.55  | 5.12</code><br>
-                                    <code>...  | ...   | ...   | ...</code><br>
-                                    <code>∞    | 0.00  | 0.00  | 0.0</code>
-                                </div>
-                                <p class="step-explanation">If we continue, x converges to 0</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>Final Answer:</strong>
-                            <span class="answer-highlight">After 3 iterations: x₃ = 2.56, f(x₃) = 6.55</span><br>
-                            <span class="answer-highlight">At convergence: x = 0, minimum value f(0) = 0</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>✓ Check:</strong>
-                            <p>The function f(x) = x² has its minimum at x = 0. Gradient descent successfully moves us from x = 5 toward x = 0. With more iterations (or larger learning rate), we'd get even closer!</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Try These:</h4>
-                        <ol>
-                            <li>Use GD to minimize f(x) = x² - 4x starting at x=0, α=0.1, 2 iterations</li>
-                            <li>Find the minimum of f(x) = (x-3)² using calculus (no GD)</li>
-                            <li>If gradient is positive, which direction should we move?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>x₂ = 0.76 (moving toward minimum at x=2)</li>
-                                <li>x = 3 (derivative = 0)</li>
-                                <li>Move left (negative direction) to go downhill</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>θₜ₊₁ = θₜ - α∇f(θₜ)</li>
-                        <li>Follow negative gradient to minimize function</li>
-                        <li>Learning rate α controls step size</li>
-                        <li>Foundation of ALL modern machine learning</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 81: SGD -->
-            <section class="topic-section" id="topic-81" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 81</span>
-                    <h2>⚡ Stochastic Gradient Descent (SGD)</h2>
-                    <p class="topic-subtitle">Using random batches for efficiency</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> SGD updates parameters using a small random batch instead of the entire dataset.</p>
-                    <p><strong>Why it matters:</strong> Much faster! Essential for large datasets and deep learning.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 KEY DIFFERENCE</div>
-                    <p><strong>Batch GD:</strong> Use ALL data each step (slow but stable)</p>
-                    <p><strong>SGD:</strong> Use ONE sample each step (fast but noisy)</p>
-                    <p><strong>Mini-batch GD:</strong> Use small batch (e.g., 32 samples) - best of both!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">SGD Update</div>
-                        <div class="formula-main">θₜ₊₁ = θₜ - α·∇f(θₜ; xᵢ:ᵢ₊ₙ)</div>
-                        <p>Update using mini-batch of size n instead of full dataset</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Variants</h3>
-                    <ul>
-                        <li><strong>SGD with Momentum:</strong> Adds velocity term (smooths updates)</li>
-                        <li><strong>Adam:</strong> Adaptive learning rates (most popular!)</li>
-                        <li><strong>RMSprop:</strong> Divides by moving average of gradients</li>
-                        <li><strong>AdaGrad:</strong> Adapts learning rate per parameter</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Deep Learning:</strong> Train neural networks on millions of examples</li>
-                        <li><strong>Online Learning:</strong> Update model as new data arrives</li>
-                        <li><strong>Large-scale ML:</strong> When data doesn't fit in memory</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Mini-batch size: typically 32, 64, 128, or 256</p>
-                    <p>• Shuffle data each epoch for better convergence</p>
-                    <p>• Adam optimizer is good default choice</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Uses mini-batches instead of full dataset</li>
-                        <li>Much faster than batch gradient descent</li>
-                        <li>More noisy but often reaches good solution faster</li>
-                        <li>Standard for training deep neural networks</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 82: Partial Derivatives -->
-            <section class="topic-section" id="topic-82" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 82</span>
-                    <h2>∂ Partial Derivatives</h2>
-                    <p class="topic-subtitle">Derivatives with multiple variables</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Partial derivative measures rate of change with respect to ONE variable while holding others constant.</p>
-                    <p><strong>Why it matters:</strong> Machine learning has many parameters - we need partial derivatives for all of them!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Notation</div>
-                        <div class="formula-main">∂f/∂x = rate of change w.r.t. x (hold y constant)</div>
-                        <div class="formula-main">∂f/∂y = rate of change w.r.t. y (hold x constant)</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Example: f(x,y) = x² + xy + y²</div>
-                        <div class="formula-main">∂f/∂x = 2x + y</div>
-                        <div class="formula-main">∂f/∂y = x + 2y</div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Geometric Interpretation</h3>
-                    <p>Imagine a 3D surface f(x,y). The partial derivative ∂f/∂x is the slope if you walk in the x-direction only. ∂f/∂y is the slope in the y-direction.</p>
-                </div>
-
-                <div class="callout-box example">
-                    <div class="callout-header">📊 EXAMPLE</div>
-                    <p><strong>Function:</strong> f(x,y) = 3x²y + 2y³</p>
-                    <p>∂f/∂x = 6xy (treat y as constant)</p>
-                    <p>∂f/∂y = 3x² + 6y² (treat x as constant)</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Partial derivative w.r.t. one variable, others constant</li>
-                        <li>Notation: ∂f/∂x or fₓ</li>
-                        <li>Essential for multivariable optimization</li>
-                        <li>Used in gradient calculation</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 83: Gradient & Jacobian -->
-            <section class="topic-section" id="topic-83" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 83</span>
-                    <h2>∇ Gradient and Jacobian</h2>
-                    <p class="topic-subtitle">Organizing partial derivatives</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> The gradient collects all partial derivatives into a vector. The Jacobian is a matrix of partial derivatives.</p>
-                    <p><strong>Why it matters:</strong> These are what gradient descent actually computes! Core of backpropagation.</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Mathematical Foundation</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Gradient (for scalar function)</div>
-                        <div class="formula-main">∇f = [∂f/∂x₁, ∂f/∂x₂, ..., ∂f/∂xₙ]</div>
-                        <p>Vector pointing in direction of steepest ascent</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Jacobian (for vector function)</div>
-                        <div class="formula-main">J = matrix of all first-order partial derivatives</div>
-                        <p>Jᵢⱼ = ∂fᵢ/∂xⱼ</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Properties</h3>
-                    <ul>
-                        <li><strong>Gradient points uphill:</strong> Direction of steepest increase</li>
-                        <li><strong>Gradient magnitude:</strong> How steep the slope is</li>
-                        <li><strong>Perpendicular to level curves:</strong> Always perpendicular to contour lines</li>
-                        <li><strong>Zero gradient:</strong> Local maximum, minimum, or saddle point</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>Applications</h3>
-                    <ul class="use-case-list">
-                        <li><strong>Gradient Descent:</strong> Uses gradient to find minimum</li>
-                        <li><strong>Backpropagation:</strong> Chain rule with Jacobians</li>
-                        <li><strong>Neural Networks:</strong> Compute gradients for all weights</li>
-                        <li><strong>Optimization:</strong> Any multivariable optimization problem</li>
-                    </ul>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Gradient ∇f = vector of all partial derivatives</li>
-                        <li>Points in direction of steepest ascent</li>
-                        <li>Jacobian = matrix of partials for vector functions</li>
-                        <li>Essential for backpropagation in neural networks</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 84: Convex Optimization -->
-            <section class="topic-section" id="topic-84" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 84</span>
-                    <h2>❓ Convex Optimization</h2>
-                    <p class="topic-subtitle">Optimization with guaranteed global minimum</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> In convex optimization, any local minimum is also a global minimum - no getting stuck!</p>
-                    <p><strong>Why it matters:</strong> Guarantees we find the best solution. Linear regression, SVM, many ML problems are convex.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 BOWL ANALOGY</div>
-                    <p><strong>Convex:</strong> Like a bowl - roll a ball and it reaches the bottom (global minimum)</p>
-                    <p><strong>Non-convex:</strong> Like mountains and valleys - ball gets stuck in local valleys</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Key Concepts</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Convex Function</div>
-                        <p>f(λx + (1-λ)y) ≤ λf(x) + (1-λ)f(y)</p>
-                        <p>Line segment between any two points on graph lies above graph</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Convex Set</div>
-                        <p>Line segment between any two points in set stays in set</p>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Examples</h3>
-                    <ul>
-                        <li><strong>Convex:</strong> Linear regression (MSE loss)</li>
-                        <li><strong>Convex:</strong> Logistic regression (cross-entropy)</li>
-                        <li><strong>Convex:</strong> Support Vector Machines</li>
-                        <li><strong>Non-convex:</strong> Neural networks (but still works!)</li>
-                    </ul>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ WHY IT MATTERS</div>
-                    <p>• Convex: Any optimization algorithm finds global optimum</p>
-                    <p>• Non-convex: Might get stuck in local minimum</p>
-                    <p>• Deep learning is non-convex but works due to overparameterization</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Convex functions: local minimum = global minimum</li>
-                        <li>Guarantees optimal solution</li>
-                        <li>Linear/logistic regression, SVM are convex</li>
-                        <li>Neural networks non-convex but still trainable</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- Topic 85: Loss Functions -->
-            <section class="topic-section" id="topic-85" data-subject="data-science" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number">Topic 85</span>
-                    <h2>🎯 Loss Functions</h2>
-                    <p class="topic-subtitle">Measuring model error</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Introduction</h3>
-                    <p><strong>What is it?</strong> Loss functions quantify how wrong your model's predictions are. We minimize loss during training.</p>
-                    <p><strong>Why it matters:</strong> Different problems need different loss functions. Wrong loss = bad model!</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>Common Loss Functions</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">MSE (Mean Squared Error)</div>
-                        <div class="formula-main">MSE = (1/n)Σ(y - ŷ)²</div>
-                        <p>Use for: Regression problems</p>
-                        <p>Penalizes large errors heavily</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">MAE (Mean Absolute Error)</div>
-                        <div class="formula-main">MAE = (1/n)Σ|y - ŷ|</div>
-                        <p>Use for: Regression (less sensitive to outliers)</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Cross-Entropy (Log Loss)</div>
-                        <div class="formula-main">L = -Σy·log(ŷ)</div>
-                        <p>Use for: Classification problems</p>
-                        <p>Binary: -(y log(ŷ) + (1-y)log(1-ŷ))</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Hinge Loss</div>
-                        <div class="formula-main">L = max(0, 1 - y·ŷ)</div>
-                        <p>Use for: Support Vector Machines</p>
-                    </div>
-                </div>
-
-                <div class="interactive-container">
-                    <h3>Loss Landscape Visualization</h3>
-                    <canvas id="canvas-85" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <select id="select85" class="form-control">
-                            <option value="mse">MSE Loss</option>
-                            <option value="mae">MAE Loss</option>
-                            <option value="cross">Cross-Entropy</option>
-                        </select>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>Choosing the Right Loss</h3>
-                    <table class="comparison-table">
-                        <thead>
-                            <tr>
-                                <th>Problem Type</th>
-                                <th>Loss Function</th>
-                                <th>Why?</th>
-                            </tr>
-                        </thead>
-                        <tbody>
-                            <tr>
-                                <td>Regression</td>
-                                <td>MSE or MAE</td>
-                                <td>MSE for smooth gradients, MAE for outliers</td>
-                            </tr>
-                            <tr>
-                                <td>Binary Classification</td>
-                                <td>Binary Cross-Entropy</td>
-                                <td>Probabilistic interpretation</td>
-                            </tr>
-                            <tr>
-                                <td>Multi-class</td>
-                                <td>Categorical Cross-Entropy</td>
-                                <td>Works with softmax output</td>
-                            </tr>
-                            <tr>
-                                <td>SVM</td>
-                                <td>Hinge Loss</td>
-                                <td>Maximizes margin</td>
-                            </tr>
-                        </tbody>
-                    </table>
-                </div>
-
-                <div class="callout-box warning">
-                    <div class="callout-header">⚠️ COMMON MISTAKES</div>
-                    <p><strong>Wrong loss for problem type:</strong> MSE for classification is bad!</p>
-                    <p><strong>Not understanding loss behavior:</strong> Different losses encourage different solutions</p>
-                    <p><strong>Ignoring regularization:</strong> Add L1/L2 penalty to prevent overfitting</p>
-                </div>
-
-                <div class="callout-box tip">
-                    <div class="callout-header">✅ PRO TIPS</div>
-                    <p>• Regression: MSE (default) or MAE (if outliers)</p>
-                    <p>• Classification: Cross-Entropy (always!)</p>
-                    <p>• Imbalanced classes: Use weighted loss</p>
-                    <p>• Custom problems: Design custom loss function</p>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Loss measures model error</li>
-                        <li>MSE/MAE for regression</li>
-                        <li>Cross-Entropy for classification</li>
-                        <li>Gradient descent minimizes loss</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- MACHINE LEARNING ALGORITHMS START HERE -->
-
-            <!-- ML-1: Linear Regression -->
-            <section class="topic-section ml-section" id="ml-1" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 1</span>
-                    <h2>📈 Linear Regression</h2>
-                    <p class="topic-subtitle">Predicting continuous values with a straight line</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>📚 What is Linear Regression?</h3>
-                    <p>Linear regression is the simplest supervised learning algorithm that models the relationship between input features and a continuous output variable using a straight line (in 2D) or hyperplane (in higher dimensions).</p>
-                    <p><strong>Analogy:</strong> Like drawing the best-fit line through scattered points on a graph to predict future values based on the trend.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 How It Works</div>
-                    <p><strong>Step-by-step intuition:</strong></p>
-                    <ol>
-                        <li>Plot your data points on a graph</li>
-                        <li>Find the line that minimizes distance to all points</li>
-                        <li>Use "least squares" - minimize sum of squared errors</li>
-                        <li>Calculate optimal slope and intercept mathematically</li>
-                        <li>Use the line to predict new values</li>
-                    </ol>
-                </div>
-
-                <div class="content-card">
-                    <h3>🧮 Mathematics Behind It</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Equation</div>
-                        <div class="formula-main">y = β₀ + β₁x + ε</div>
-                        <p>β₀ = intercept, β₁ = slope, ε = error</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Slope Calculation</div>
-                        <div class="formula-main">β₁ = Σ(xᵢ-x̄)(yᵢ-ȳ) / Σ(xᵢ-x̄)²</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Intercept Calculation</div>
-                        <div class="formula-main">β₀ = ȳ - β₁x̄</div>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Cost Function (MSE)</div>
-                        <div class="formula-main">J = (1/n)Σ(yᵢ - ŷᵢ)²</div>
-                    </div>
-                </div>
-
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Predicting House Prices</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">A real estate company has data on 5 houses. Predict the price of a 2500 sq ft house.</p>
-                        <table class="calculation-table">
-                            <tr><th>Size (sq ft)</th><th>Price ($1000s)</th></tr>
-                            <tr><td>1000</td><td>150</td></tr>
-                            <tr><td>1500</td><td>200</td></tr>
-                            <tr><td>2000</td><td>250</td></tr>
-                            <tr><td>2500</td><td>?</td></tr>
-                            <tr><td>3000</td><td>350</td></tr>
-                        </table>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Means</p>
-                                <div class="step-work">
-                                    <code>x̄ = (1000 + 1500 + 2000 + 3000) / 4 = 1875 sq ft</code><br>
-                                    <code>ȳ = (150 + 200 + 250 + 350) / 4 = 237.5 ($1000s)</code>
-                                </div>
-                                <p class="step-explanation">We exclude the house we're predicting from training</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Deviations</p>
-                                <div class="step-work">
-                                    <code>(x - x̄): -875, -375, 125, 1125</code><br>
-                                    <code>(y - ȳ): -87.5, -37.5, 12.5, 112.5</code>
-                                </div>
-                                <p class="step-explanation">Find how much each point differs from the mean</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Slope (β₁)</p>
-                                <div class="step-work">
-                                    <code>Numerator: (-875)(-87.5) + (-375)(-37.5) + (125)(12.5) + (1125)(112.5)</code><br>
-                                    <code>= 76562.5 + 14062.5 + 1562.5 + 126562.5 = 218750</code><br>
-                                    <code>Denominator: (-875)² + (-375)² + (125)² + (1125)²</code><br>
-                                    <code>= 765625 + 140625 + 15625 + 1265625 = 2187500</code><br>
-                                    <code>β₁ = 218750 / 2187500 = 0.10</code>
-                                </div>
-                                <p class="step-explanation">Slope tells us price change per sq ft</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Intercept (β₀)</p>
-                                <div class="step-work">
-                                    <code>β₀ = ȳ - β₁ × x̄</code><br>
-                                    <code>β₀ = 237.5 - 0.10 × 1875</code><br>
-                                    <code>β₀ = 237.5 - 187.5 = 50</code>
-                                </div>
-                                <p class="step-explanation">Base price when size = 0</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Write Prediction Equation</p>
-                                <div class="step-work">
-                                    <code>Price = 50 + 0.10 × Size</code><br>
-                                    <code>For 2500 sq ft:</code><br>
-                                    <code>Price = 50 + 0.10 × 2500 = 50 + 250 = 300</code>
-                                </div>
-                                <p class="step-explanation">$300,000 predicted price</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate R² Score</p>
-                                <div class="step-work">
-                                    <code>Predictions: 150, 200, 250, 350</code><br>
-                                    <code>Residuals: 0, 0, 0, 0 (perfect fit!)</code><br>
-                                    <code>R² = 1 - (SS_res / SS_tot) = 1.0</code>
-                                </div>
-                                <p class="step-explanation">R² = 1.0 means perfect linear fit</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Prediction:</strong>
-                            <span class="answer-highlight">House Price = $300,000 for 2500 sq ft</span><br>
-                            <span class="answer-highlight">Equation: Price = $50k + $0.10k × Size</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Validation:</strong>
-                            <p>The model fits perfectly (R²=1.0). Each additional sq ft adds $100 to the price. The $50k base price represents fixed costs.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>What would a 3500 sq ft house cost?</li>
-                            <li>If price is $275k, estimate the house size</li>
-                            <li>What does the slope 0.10 mean in real terms?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>$400,000 (50 + 0.10×3500 = 400)</li>
-                                <li>2250 sq ft (solve: 275 = 50 + 0.10x → x = 2250)</li>
-                                <li>Each sq ft adds $100 to the price</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>⚙️ Algorithm Details</h3>
-                    <ul>
-                        <li><strong>When to use:</strong> Linear relationship between features and target</li>
-                        <li><strong>Advantages:</strong> Simple, interpretable, fast, works well with limited data</li>
-                        <li><strong>Disadvantages:</strong> Only models linear relationships, sensitive to outliers</li>
-                        <li><strong>Hyperparameters:</strong> None (closed-form solution)</li>
-                        <li><strong>Applications:</strong> Sales forecasting, real estate, economics, trend analysis</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>💻 Implementation (Python)</h3>
-                    <div class="code-block">
-                        <code>from sklearn.linear_model import LinearRegression<br>
-                        import numpy as np<br><br>
-                        # Training data<br>
-                        X = np.array([[1000], [1500], [2000], [3000]])<br>
-                        y = np.array([150, 200, 250, 350])<br><br>
-                        # Create and train model<br>
-                        model = LinearRegression()<br>
-                        model.fit(X, y)<br><br>
-                        # Make prediction<br>
-                        prediction = model.predict([[2500]])<br>
-                        print(f"Predicted price: ${prediction[0]}k")<br><br>
-                        # Model parameters<br>
-                        print(f"Slope: {model.coef_[0]:.3f}")<br>
-                        print(f"Intercept: {model.intercept_:.2f}")</code>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-1" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn-ml-1-fit">Fit Line</button>
-                        <button class="btn btn-secondary" id="btn-ml-1-reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>🔍 Algorithm Comparison</h3>
-                    <table class="comparison-table">
-                        <tr>
-                            <th>Aspect</th>
-                            <th>Linear Regression</th>
-                            <th>Polynomial Regression</th>
-                        </tr>
-                        <tr>
-                            <td>Complexity</td>
-                            <td>Simple (straight line)</td>
-                            <td>Complex (curved line)</td>
-                        </tr>
-                        <tr>
-                            <td>Overfitting Risk</td>
-                            <td>Low</td>
-                            <td>High (with high degree)</td>
-                        </tr>
-                        <tr>
-                            <td>Interpretability</td>
-                            <td>Very easy</td>
-                            <td>Moderate</td>
-                        </tr>
-                        <tr>
-                            <td>Training Speed</td>
-                            <td>Very fast</td>
-                            <td>Fast</td>
-                        </tr>
-                    </table>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Simplest ML algorithm - predicts with straight line</li>
-                        <li>Minimizes squared errors (least squares method)</li>
-                        <li>Closed-form solution: no iterative training needed</li>
-                        <li>Best for linear relationships, interpretable coefficients</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML-8: K-Nearest Neighbors -->
-            <section class="topic-section ml-section" id="ml-8" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 8</span>
-                    <h2>🎯 K-Nearest Neighbors (KNN)</h2>
-                    <p class="topic-subtitle">Classification by majority vote of nearest neighbors</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>📚 What is KNN?</h3>
-                    <p>K-Nearest Neighbors is a simple, non-parametric algorithm that classifies data points based on how their neighbors are classified. It finds the K closest training examples and uses majority vote.</p>
-                    <p><strong>Analogy:</strong> "You are the average of the 5 people you spend the most time with." KNN says "You're similar to your closest neighbors in feature space!"</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 How It Works</div>
-                    <p><strong>Step-by-step intuition:</strong></p>
-                    <ol>
-                        <li>Store all training data (lazy learning)</li>
-                        <li>When predicting, calculate distance to all training points</li>
-                        <li>Find K closest neighbors</li>
-                        <li>Take majority vote of their classes</li>
-                        <li>Assign the most common class to new point</li>
-                    </ol>
-                </div>
-
-                <div class="content-card">
-                    <h3>🧮 Mathematics Behind It</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Euclidean Distance</div>
-                        <div class="formula-main">d(p,q) = √[Σ(pᵢ - qᵢ)²]</div>
-                        <p>Most common distance metric for KNN</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Manhattan Distance</div>
-                        <div class="formula-main">d(p,q) = Σ|pᵢ - qᵢ|</div>
-                        <p>Alternative: sum of absolute differences</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Classification Rule</div>
-                        <div class="formula-main">ŷ = mode(y₁, y₂, ..., y_k)</div>
-                        <p>Most frequent class among K neighbors</p>
-                    </div>
-                </div>
-
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Classifying Iris Flowers</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Classify a new iris flower with sepal length=5.0cm, sepal width=3.5cm. Use K=3.</p>
-                        <table class="calculation-table">
-                            <tr><th>Sepal Length</th><th>Sepal Width</th><th>Species</th></tr>
-                            <tr><td>5.1</td><td>3.5</td><td>Setosa</td></tr>
-                            <tr><td>4.9</td><td>3.0</td><td>Setosa</td></tr>
-                            <tr><td>7.0</td><td>3.2</td><td>Versicolor</td></tr>
-                            <tr><td>6.4</td><td>3.2</td><td>Versicolor</td></tr>
-                            <tr><td>5.0</td><td>3.6</td><td>Setosa</td></tr>
-                        </table>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Define New Point</p>
-                                <div class="step-work">
-                                    <code>New flower: x_new = [5.0, 3.5]</code><br>
-                                    <code>K = 3 (we'll find 3 nearest neighbors)</code>
-                                </div>
-                                <p class="step-explanation">The flower we want to classify</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Distances to All Points</p>
-                                <div class="step-work">
-                                    <code>d₁ = √[(5.0-5.1)² + (3.5-3.5)²] = √[0.01 + 0] = 0.10</code><br>
-                                    <code>d₂ = √[(5.0-4.9)² + (3.5-3.0)²] = √[0.01 + 0.25] = 0.51</code><br>
-                                    <code>d₃ = √[(5.0-7.0)² + (3.5-3.2)²] = √[4.0 + 0.09] = 2.02</code><br>
-                                    <code>d₄ = √[(5.0-6.4)² + (3.5-3.2)²] = √[1.96 + 0.09] = 1.43</code><br>
-                                    <code>d₅ = √[(5.0-5.0)² + (3.5-3.6)²] = √[0 + 0.01] = 0.10</code>
-                                </div>
-                                <p class="step-explanation">Euclidean distance to each training point</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Sort by Distance</p>
-                                <div class="step-work">
-                                    <table class="calculation-table">
-                                        <tr><th>Rank</th><th>Distance</th><th>Species</th></tr>
-                                        <tr style="background: rgba(100,255,218,0.2);"><td>1</td><td>0.10</td><td>Setosa</td></tr>
-                                        <tr style="background: rgba(100,255,218,0.2);"><td>2</td><td>0.10</td><td>Setosa</td></tr>
-                                        <tr style="background: rgba(100,255,218,0.2);"><td>3</td><td>0.51</td><td>Setosa</td></tr>
-                                        <tr><td>4</td><td>1.43</td><td>Versicolor</td></tr>
-                                        <tr><td>5</td><td>2.02</td><td>Versicolor</td></tr>
-                                    </table>
-                                </div>
-                                <p class="step-explanation">Select top 3 (highlighted) for K=3</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Take Majority Vote</p>
-                                <div class="step-work">
-                                    <code>3 nearest neighbors:</code><br>
-                                    <code>  Neighbor 1: Setosa (distance 0.10)</code><br>
-                                    <code>  Neighbor 2: Setosa (distance 0.10)</code><br>
-                                    <code>  Neighbor 3: Setosa (distance 0.51)</code><br>
-                                    <code>Vote count: Setosa = 3, Versicolor = 0</code><br>
-                                    <code>Winner: Setosa (unanimous!)</code>
-                                </div>
-                                <p class="step-explanation">Majority class wins</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Make Prediction</p>
-                                <div class="step-work">
-                                    <code>Predicted Class: Setosa</code><br>
-                                    <code>Confidence: 3/3 = 100%</code>
-                                </div>
-                                <p class="step-explanation">All neighbors agree</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Classification:</strong>
-                            <span class="answer-highlight">Predicted Species = Setosa (100% confidence)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Validation:</strong>
-                            <p>The new flower is extremely close to known Setosa examples (distances 0.10, 0.10, 0.51). The unanimous vote gives us high confidence in this classification.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>What if we used K=5 instead? Would classification change?</li>
-                            <li>If distances were 0.5(Setosa), 0.6(Setosa), 0.7(Versicolor), predict class</li>
-                            <li>Why is K usually chosen as odd number?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Would include 2 Versicolor but still 3 Setosa → Setosa wins</li>
-                                <li>Setosa (2 votes vs 1)</li>
-                                <li>To avoid ties in binary classification</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>⚙️ Algorithm Details</h3>
-                    <ul>
-                        <li><strong>When to use:</strong> Non-linear decision boundaries, small-medium datasets</li>
-                        <li><strong>Advantages:</strong> Simple, no training phase, works for any decision boundary</li>
-                        <li><strong>Disadvantages:</strong> Slow prediction, memory-intensive, sensitive to irrelevant features</li>
-                        <li><strong>Hyperparameters:</strong> K (number of neighbors), distance metric, weights</li>
-                        <li><strong>Applications:</strong> Recommendation systems, pattern recognition, anomaly detection</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>💻 Implementation (Python)</h3>
-                    <div class="code-block">
-                        <code>from sklearn.neighbors import KNeighborsClassifier<br>
-                        import numpy as np<br><br>
-                        # Training data<br>
-                        X = np.array([[5.1,3.5], [4.9,3.0], [7.0,3.2], [6.4,3.2], [5.0,3.6]])<br>
-                        y = np.array(['Setosa', 'Setosa', 'Versicolor', 'Versicolor', 'Setosa'])<br><br>
-                        # Create and train model<br>
-                        model = KNeighborsClassifier(n_neighbors=3)<br>
-                        model.fit(X, y)<br><br>
-                        # Make prediction<br>
-                        new_flower = np.array([[5.0, 3.5]])<br>
-                        prediction = model.predict(new_flower)<br>
-                        proba = model.predict_proba(new_flower)<br>
-                        print(f"Predicted: {prediction[0]}")<br>
-                        print(f"Confidence: {proba[0].max():.2%}")</code>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>🔍 Algorithm Comparison</h3>
-                    <table class="comparison-table">
-                        <tr>
-                            <th>Aspect</th>
-                            <th>KNN</th>
-                            <th>Decision Trees</th>
-                        </tr>
-                        <tr>
-                            <td>Training Time</td>
-                            <td>None (lazy learning)</td>
-                            <td>Moderate</td>
-                        </tr>
-                        <tr>
-                            <td>Prediction Time</td>
-                            <td>Slow (compute all distances)</td>
-                            <td>Fast (traverse tree)</td>
-                        </tr>
-                        <tr>
-                            <td>Interpretability</td>
-                            <td>Low</td>
-                            <td>High (visual rules)</td>
-                        </tr>
-                        <tr>
-                            <td>Feature Scaling</td>
-                            <td>Required</td>
-                            <td>Not required</td>
-                        </tr>
-                    </table>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Lazy learning: no training phase, stores all data</li>
-                        <li>Classification by K-nearest neighbor majority vote</li>
-                        <li>Sensitive to feature scaling - always normalize!</li>
-                        <li>Choose K: small K = noisy, large K = smooth boundaries</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML-10: Decision Trees -->
-            <section class="topic-section ml-section" id="ml-10" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 10</span>
-                    <h2>🌳 Decision Trees</h2>
-                    <p class="topic-subtitle">Tree-based decisions using feature splits</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>📚 What is a Decision Tree?</h3>
-                    <p>Decision Trees make predictions by asking a series of yes/no questions about features, creating a flowchart-like structure from root to leaves.</p>
-                    <p><strong>Analogy:</strong> Like a game of 20 Questions - each question (split) narrows down possibilities until you reach a final decision (leaf).</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 How It Works</div>
-                    <p><strong>Step-by-step intuition:</strong></p>
-                    <ol>
-                        <li>Start with all training data at root</li>
-                        <li>Find best feature to split on (max information gain)</li>
-                        <li>Split data into branches based on that feature</li>
-                        <li>Recursively repeat for each branch</li>
-                        <li>Stop when pure (all same class) or max depth reached</li>
-                        <li>Leaves contain final predictions</li>
-                    </ol>
-                </div>
-
-                <div class="content-card">
-                    <h3>🧮 Mathematics Behind It</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Entropy (Impurity)</div>
-                        <div class="formula-main">H(S) = -Σ pᵢ log₂(pᵢ)</div>
-                        <p>pᵢ = proportion of class i. Measures disorder.</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Information Gain</div>
-                        <div class="formula-main">IG = H(parent) - Σ(|child|/|parent|) × H(child)</div>
-                        <p>Choose split with highest information gain</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Gini Impurity (Alternative)</div>
-                        <div class="formula-main">Gini = 1 - Σ pᵢ²</div>
-                        <p>Used by CART algorithm. Faster to compute.</p>
-                    </div>
-                </div>
-
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Loan Approval Prediction</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Build decision tree for loan approval. Dataset:</p>
-                        <table class="calculation-table">
-                            <tr><th>Income</th><th>Credit Score</th><th>Age</th><th>Approved?</th></tr>
-                            <tr><td>High</td><td>Good</td><td>35</td><td>Yes</td></tr>
-                            <tr><td>High</td><td>Good</td><td>40</td><td>Yes</td></tr>
-                            <tr><td>Low</td><td>Poor</td><td>25</td><td>No</td></tr>
-                            <tr><td>Low</td><td>Good</td><td>30</td><td>Yes</td></tr>
-                            <tr><td>High</td><td>Poor</td><td>45</td><td>No</td></tr>
-                            <tr><td>Low</td><td>Poor</td><td>28</td><td>No</td></tr>
-                        </table>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Root Entropy</p>
-                                <div class="step-work">
-                                    <code>Total: 6 samples</code><br>
-                                    <code>Approved (Yes): 3/6 = 0.5</code><br>
-                                    <code>Denied (No): 3/6 = 0.5</code><br>
-                                    <code>H(root) = -[0.5 log₂(0.5) + 0.5 log₂(0.5)]</code><br>
-                                    <code>H(root) = -[0.5(-1) + 0.5(-1)] = 1.0</code>
-                                </div>
-                                <p class="step-explanation">Maximum entropy = maximum disorder</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Test Split on Credit Score</p>
-                                <div class="step-work">
-                                    <code>If Credit = Good: 2 Yes, 0 No → H = 0 (pure!)</code><br>
-                                    <code>If Credit = Poor: 1 Yes, 3 No → H = -[0.25log₂(0.25) + 0.75log₂(0.75)]</code><br>
-                                    <code>H(Poor) = -[0.25(-2) + 0.75(-0.415)] = 0.5 + 0.311 = 0.811</code><br>
-                                    <code>Weighted avg: (3/6)×0 + (4/6)×0.811 = 0.541</code><br>
-                                    <code>IG(Credit) = 1.0 - 0.541 = 0.459</code>
-                                </div>
-                                <p class="step-explanation">Information gain from splitting on Credit Score</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Test Split on Income</p>
-                                <div class="step-work">
-                                    <code>If Income = High: 2 Yes, 1 No → H = 0.918</code><br>
-                                    <code>If Income = Low: 1 Yes, 2 No → H = 0.918</code><br>
-                                    <code>Weighted: (3/6)×0.918 + (3/6)×0.918 = 0.918</code><br>
-                                    <code>IG(Income) = 1.0 - 0.918 = 0.082</code>
-                                </div>
-                                <p class="step-explanation">Income provides less information gain</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Choose Best Split</p>
-                                <div class="step-work">
-                                    <code>IG(Credit Score) = 0.459 ← HIGHEST!</code><br>
-                                    <code>IG(Income) = 0.082</code><br>
-                                    <code>Best first split: Credit Score</code>
-                                </div>
-                                <p class="step-explanation">Choose feature with highest information gain</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Build Tree Recursively</p>
-                                <div class="step-work">
-                                    <code>Root: Credit Score = Good?</code><br>
-                                    <code>├─ YES → Approved (pure node)</code><br>
-                                    <code>└─ NO → Split on Income</code><br>
-                                    <code>   ├─ Income = High? → Denied</code><br>
-                                    <code>   └─ Income = Low? → Denied (majority)</code>
-                                </div>
-                                <p class="step-explanation">Continue splitting until pure or stopping criterion</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Make Predictions</p>
-                                <div class="step-work">
-                                    <code>New applicant: Credit=Good, Income=High</code><br>
-                                    <code>Follow path: Credit=Good → Approved ✓</code><br>
-                                    <code>Decision rule: IF Credit Score is Good THEN Approve</code>
-                                </div>
-                                <p class="step-explanation">Traverse tree from root to leaf</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Tree &amp; Prediction:</strong>
-                            <span class="answer-highlight">Best split: Credit Score → If Good: Approved, If Poor: check Income</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Validation:</strong>
-                            <p>The tree correctly classifies all training examples. Credit Score is the most important feature with IG=0.459.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>Calculate entropy for dataset with 4 Yes, 1 No</li>
-                            <li>If split gives H_left=0 and H_right=0.5, which is better split?</li>
-                            <li>Why might deep trees overfit?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>H = -[0.8log₂(0.8) + 0.2log₂(0.2)] ≈ 0.722</li>
-                                <li>First split (H=0 is pure, better!)</li>
-                                <li>Learn noise instead of signal, memorize training data</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>⚙️ Algorithm Details</h3>
-                    <ul>
-                        <li><strong>When to use:</strong> Need interpretable model, non-linear relationships, mixed feature types</li>
-                        <li><strong>Advantages:</strong> Easy to understand, visualize, handles non-linear data, no scaling needed</li>
-                        <li><strong>Disadvantages:</strong> Prone to overfitting, unstable (small data changes = different tree)</li>
-                        <li><strong>Hyperparameters:</strong> max_depth, min_samples_split, criterion (gini/entropy)</li>
-                        <li><strong>Applications:</strong> Credit scoring, medical diagnosis, customer segmentation</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>💻 Implementation (Python)</h3>
-                    <div class="code-block">
-                        <code>from sklearn.tree import DecisionTreeClassifier<br>
-                        from sklearn import tree<br>
-                        import matplotlib.pyplot as plt<br><br>
-                        # Create and train<br>
-                        model = DecisionTreeClassifier(max_depth=3, criterion='entropy')<br>
-                        model.fit(X_train, y_train)<br><br>
-                        # Predict<br>
-                        predictions = model.predict(X_test)<br><br>
-                        # Visualize tree<br>
-                        tree.plot_tree(model, filled=True, feature_names=['Income','Credit','Age'])</code>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Builds tree by recursively splitting on best features</li>
-                        <li>Uses entropy or Gini to measure split quality</li>
-                        <li>Highly interpretable - can visualize decision rules</li>
-                        <li>Prone to overfitting - use pruning or ensemble methods</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML-15: K-Means Clustering -->
-            <section class="topic-section ml-section" id="ml-15" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-clustering">ML Algorithm 15</span>
-                    <h2>🎯 K-Means Clustering</h2>
-                    <p class="topic-subtitle">Partitioning data into K distinct clusters</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>📚 What is K-Means?</h3>
-                    <p>K-Means is an unsupervised learning algorithm that groups similar data points into K clusters by minimizing within-cluster variance.</p>
-                    <p><strong>Analogy:</strong> Organizing a messy room by grouping similar items together. K-Means finds natural groupings in unlabeled data.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 How It Works</div>
-                    <p><strong>Step-by-step intuition:</strong></p>
-                    <ol>
-                        <li>Choose K (number of clusters)</li>
-                        <li>Randomly initialize K cluster centers (centroids)</li>
-                        <li><strong>Assignment:</strong> Assign each point to nearest centroid</li>
-                        <li><strong>Update:</strong> Recalculate centroids as mean of assigned points</li>
-                        <li>Repeat steps 3-4 until convergence (centroids don't move)</li>
-                    </ol>
-                </div>
-
-                <div class="content-card">
-                    <h3>🧮 Mathematics Behind It</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">Objective Function (Minimize)</div>
-                        <div class="formula-main">J = ΣΣ ||xᵢ - μₖ||²</div>
-                        <p>Sum of squared distances from points to centroids</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Centroid Update</div>
-                        <div class="formula-main">μₖ = (1/|Cₖ|) Σ xᵢ</div>
-                        <p>Mean of all points assigned to cluster k</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Assignment Rule</div>
-                        <div class="formula-main">Cₖ = {xᵢ : ||xᵢ - μₖ|| ≤ ||xᵢ - μⱼ|| for all j}</div>
-                        <p>Assign to nearest centroid</p>
-                    </div>
-                </div>
-
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Customer Segmentation</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Cluster 6 customers into K=2 groups based on [Age, Income]. Data:</p>
-                        <table class="calculation-table">
-                            <tr><th>Customer</th><th>Age</th><th>Income ($k)</th></tr>
-                            <tr><td>A</td><td>25</td><td>40</td></tr>
-                            <tr><td>B</td><td>30</td><td>50</td></tr>
-                            <tr><td>C</td><td>28</td><td>45</td></tr>
-                            <tr><td>D</td><td>55</td><td>80</td></tr>
-                            <tr><td>E</td><td>60</td><td>90</td></tr>
-                            <tr><td>F</td><td>52</td><td>75</td></tr>
-                        </table>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Initialize K=2 Random Centroids</p>
-                                <div class="step-work">
-                                    <code>C₁ (initial) = [25, 40] (customer A)</code><br>
-                                    <code>C₂ (initial) = [60, 90] (customer E)</code>
-                                </div>
-                                <p class="step-explanation">Start with random points or use K-means++</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Assign Points to Nearest Centroid</p>
-                                <div class="step-work">
-                                    <code>Distance from A to C₁: √[(25-25)² + (40-40)²] = 0</code><br>
-                                    <code>Distance from A to C₂: √[(25-60)² + (40-90)²] = √[1225+2500] = 61.0</code><br>
-                                    <code>A → Cluster 1 (closer to C₁)</code><br><br>
-                                    <code>Similarly calculate for all:</code><br>
-                                    <code>  B [30,50] → C₁ (dist=11.2 vs 47.2)</code><br>
-                                    <code>  C [28,45] → C₁ (dist=5.8 vs 50.9)</code><br>
-                                    <code>  D [55,80] → C₂ (dist=42.7 vs 11.2)</code><br>
-                                    <code>  E [60,90] → C₂ (dist=0)</code><br>
-                                    <code>  F [52,75] → C₂ (dist=37.3 vs 17.0)</code><br>
-                                    <code>Cluster 1: {A, B, C}</code><br>
-                                    <code>Cluster 2: {D, E, F}</code>
-                                </div>
-                                <p class="step-explanation">Each point goes to its nearest centroid</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Recalculate Centroids</p>
-                                <div class="step-work">
-                                    <code>New C₁ = mean of {A, B, C}</code><br>
-                                    <code>Age: (25 + 30 + 28)/3 = 27.67</code><br>
-                                    <code>Income: (40 + 50 + 45)/3 = 45</code><br>
-                                    <code>C₁ = [27.67, 45]</code><br><br>
-                                    <code>New C₂ = mean of {D, E, F}</code><br>
-                                    <code>Age: (55 + 60 + 52)/3 = 55.67</code><br>
-                                    <code>Income: (80 + 90 + 75)/3 = 81.67</code><br>
-                                    <code>C₂ = [55.67, 81.67]</code>
-                                </div>
-                                <p class="step-explanation">Centroids move to center of their clusters</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Check Convergence</p>
-                                <div class="step-work">
-                                    <code>Re-assign with new centroids:</code><br>
-                                    <code>All points stay in same clusters!</code><br>
-                                    <code>Centroids don't change → CONVERGED ✓</code>
-                                </div>
-                                <p class="step-explanation">Algorithm stops when assignments don't change</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Within-Cluster Sum of Squares</p>
-                                <div class="step-work">
-                                    <code>WCSS₁ = Σ dist² to C₁ = 0² + 11.2² + 5.8² = 158.88</code><br>
-                                    <code>WCSS₂ = Σ dist² to C₂ = 11.2² + 0² + 17.0² = 414.24</code><br>
-                                    <code>Total WCSS = 573.12</code>
-                                </div>
-                                <p class="step-explanation">Measures cluster compactness (lower = better)</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Clusters:</strong>
-                            <span class="answer-highlight">Cluster 1 (Young): A, B, C (avg age 28, income $45k)</span><br>
-                            <span class="answer-highlight">Cluster 2 (Mature): D, E, F (avg age 56, income $82k)</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Validation:</strong>
-                            <p>Algorithm converged in 1 iteration. Clear separation: younger customers with lower income vs older customers with higher income.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>New customer: Age=32, Income=$55k. Which cluster?</li>
-                            <li>How would you choose optimal K value?</li>
-                            <li>What happens if we use K=3 instead?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>Cluster 1 (closer to [27.67, 45])</li>
-                                <li>Elbow method: plot WCSS vs K, find "elbow"</li>
-                                <li>Would create 3 segments, may overfit with only 6 points</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>⚙️ Algorithm Details</h3>
-                    <ul>
-                        <li><strong>When to use:</strong> Unlabeled data, need to find natural groupings, spherical clusters</li>
-                        <li><strong>Advantages:</strong> Simple, fast, scales well, works with large datasets</li>
-                        <li><strong>Disadvantages:</strong> Must choose K, sensitive to initialization, assumes spherical clusters</li>
-                        <li><strong>Hyperparameters:</strong> K (number of clusters), max_iter, initialization method</li>
-                        <li><strong>Applications:</strong> Customer segmentation, image compression, document clustering</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>💻 Implementation (Python)</h3>
-                    <div class="code-block">
-                        <code>from sklearn.cluster import KMeans<br>
-                        import numpy as np<br><br>
-                        # Create model<br>
-                        kmeans = KMeans(n_clusters=2, random_state=42)<br>
-                        kmeans.fit(X)<br><br>
-                        # Get predictions<br>
-                        labels = kmeans.labels_<br>
-                        centroids = kmeans.cluster_centers_<br><br>
-                        # Predict for new point<br>
-                        new_customer = np.array([[32, 55]])<br>
-                        cluster = kmeans.predict(new_customer)<br>
-                        print(f"Assigned to cluster: {cluster[0]}")</code>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-15" width="700" height="400"></canvas>
-                    <div class="controls">
-                        <button class="btn btn-primary" id="btn-ml-15-cluster">Run K-Means</button>
-                        <button class="btn btn-secondary" id="btn-ml-15-reset">Reset</button>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Unsupervised algorithm: no labels needed</li>
-                        <li>Iterative: assign → update → repeat until convergence</li>
-                        <li>Choose K using elbow method or silhouette score</li>
-                        <li>Sensitive to initialization - use K-means++ or multiple runs</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML-25: Cross-Validation -->
-            <section class="topic-section ml-section" id="ml-25" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 25</span>
-                    <h2>🔄 Cross-Validation (K-Fold)</h2>
-                    <p class="topic-subtitle">Reliable model evaluation technique</p>
-                </div>
-
-                <div class="content-card">
-                    <h3>📚 What is Cross-Validation?</h3>
-                    <p>Cross-validation is a resampling technique that evaluates model performance by training and testing on different subsets of data multiple times.</p>
-                    <p><strong>Analogy:</strong> Testing a student on multiple different exams instead of just one - gives more reliable assessment of their true knowledge.</p>
-                </div>
-
-                <div class="callout-box insight">
-                    <div class="callout-header">💡 How It Works (K-Fold)</div>
-                    <p><strong>Step-by-step intuition:</strong></p>
-                    <ol>
-                        <li>Split data into K equal-sized folds</li>
-                        <li>For each fold (1 to K):</li>
-                        <li>&nbsp;&nbsp;&nbsp;• Use that fold as test set</li>
-                        <li>&nbsp;&nbsp;&nbsp;• Use remaining K-1 folds as training set</li>
-                        <li>&nbsp;&nbsp;&nbsp;• Train model and evaluate performance</li>
-                        <li>Average performance across all K folds</li>
-                        <li>This gives more reliable estimate than single train/test split</li>
-                    </ol>
-                </div>
-
-                <div class="content-card">
-                    <h3>🧮 Mathematics Behind It</h3>
-                    <div class="formula-card">
-                        <div class="formula-header">K-Fold CV Score</div>
-                        <div class="formula-main">CV_score = (1/K) Σ Performance_k</div>
-                        <p>Average performance across K folds</p>
-                    </div>
-                    <div class="formula-card">
-                        <div class="formula-header">Standard Error</div>
-                        <div class="formula-main">SE = σ / √K</div>
-                        <p>σ = standard deviation of K scores</p>
-                    </div>
-                </div>
-
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - 5-Fold Cross-Validation</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Evaluate a model using 5-fold CV. Dataset has 100 samples. After running, fold accuracies are: 0.85, 0.90, 0.88, 0.87, 0.90. Calculate mean accuracy and standard error.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Understand the Setup</p>
-                                <div class="step-work">
-                                    <code>Total samples: n = 100</code><br>
-                                    <code>Number of folds: K = 5</code><br>
-                                    <code>Each fold size: 100/5 = 20 samples</code><br>
-                                    <code>Each iteration: Train on 80, Test on 20</code>
-                                </div>
-                                <p class="step-explanation">Divide data into 5 equal parts</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Record Fold Results</p>
-                                <div class="step-work">
-                                    <table class="calculation-table">
-                                        <tr><th>Fold</th><th>Accuracy</th></tr>
-                                        <tr><td>1</td><td>0.85</td></tr>
-                                        <tr><td>2</td><td>0.90</td></tr>
-                                        <tr><td>3</td><td>0.88</td></tr>
-                                        <tr><td>4</td><td>0.87</td></tr>
-                                        <tr><td>5</td><td>0.90</td></tr>
-                                    </table>
-                                </div>
-                                <p class="step-explanation">Performance on each test fold</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Mean Accuracy</p>
-                                <div class="step-work">
-                                    <code>Mean = (0.85 + 0.90 + 0.88 + 0.87 + 0.90) / 5</code><br>
-                                    <code>Mean = 4.40 / 5 = 0.88</code><br>
-                                    <code>Average accuracy: 88%</code>
-                                </div>
-                                <p class="step-explanation">This is our best estimate of model performance</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Deviation</p>
-                                <div class="step-work">
-                                    <code>Deviations: (0.85-0.88), (0.90-0.88), (0.88-0.88), (0.87-0.88), (0.90-0.88)</code><br>
-                                    <code>= -0.03, 0.02, 0, -0.01, 0.02</code><br>
-                                    <code>Squared: 0.0009, 0.0004, 0, 0.0001, 0.0004</code><br>
-                                    <code>Variance = 0.0018 / 4 = 0.00045</code><br>
-                                    <code>SD = √0.00045 = 0.021</code>
-                                </div>
-                                <p class="step-explanation">Measures variability across folds</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Calculate Standard Error</p>
-                                <div class="step-work">
-                                    <code>SE = SD / √K = 0.021 / √5</code><br>
-                                    <code>SE = 0.021 / 2.236 = 0.0094</code><br>
-                                    <code>SE ≈ 0.0094 or 0.94%</code>
-                                </div>
-                                <p class="step-explanation">Precision of our mean estimate</p>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 6:</div>
-                            <div class="step-content">
-                                <p class="step-description">Report Results with Confidence</p>
-                                <div class="step-work">
-                                    <code>Mean accuracy: 0.88 ± 0.009</code><br>
-                                    <code>95% CI (approx): 0.88 ± 2×0.009 = [0.862, 0.898]</code><br>
-                                    <code>Model performs between 86.2% and 89.8% with 95% confidence</code>
-                                </div>
-                                <p class="step-explanation">Final performance estimate with uncertainty</p>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Result:</strong>
-                            <span class="answer-highlight">5-Fold CV Accuracy = 88.0% ± 0.9%</span><br>
-                            <span class="answer-highlight">95% CI: [86.2%, 89.8%]</span>
-                        </div>
-                        
-                        <div class="verification">
-                            <strong>Validation:</strong>
-                            <p>Low variability (SD=0.021) indicates stable model performance. Every test fold performed similarly, suggesting the model generalizes well.</p>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>For n=60, K=10, how many samples per fold?</li>
-                            <li>3-fold CV gives: 0.80, 0.85, 0.90. Find mean.</li>
-                            <li>When should you use stratified K-fold?</li>
-                        </ol>
-                        <button class="show-answers-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'; this.textContent = this.textContent === 'Show Answers' ? 'Hide Answers' : 'Show Answers'">Show Answers</button>
-                        <div class="practice-answers" style="display: none;">
-                            <p><strong>Answers:</strong></p>
-                            <ol>
-                                <li>6 samples per fold</li>
-                                <li>Mean = 0.85 (85%)</li>
-                                <li>When classes are imbalanced - maintains class proportions</li>
-                            </ol>
-                        </div>
-                    </div>
-                </div>
-
-                <div class="content-card">
-                    <h3>⚙️ Algorithm Details</h3>
-                    <ul>
-                        <li><strong>When to use:</strong> Always! Best practice for model evaluation</li>
-                        <li><strong>Advantages:</strong> Uses all data, reduces variance, detects overfitting</li>
-                        <li><strong>Disadvantages:</strong> K times slower, not for time-series (use time-series CV)</li>
-                        <li><strong>Hyperparameters:</strong> K (typically 5 or 10), stratified (yes/no)</li>
-                        <li><strong>Applications:</strong> Model selection, hyperparameter tuning, performance estimation</li>
-                    </ul>
-                </div>
-
-                <div class="content-card">
-                    <h3>💻 Implementation (Python)</h3>
-                    <div class="code-block">
-                        <code>from sklearn.model_selection import cross_val_score<br>
-                        from sklearn.tree import DecisionTreeClassifier<br><br>
-                        model = DecisionTreeClassifier()<br><br>
-                        # 5-fold cross-validation<br>
-                        scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')<br><br>
-                        print(f"Fold scores: {scores}")<br>
-                        print(f"Mean: {scores.mean():.3f}")<br>
-                        print(f"Std: {scores.std():.3f}")<br>
-                        print(f"95% CI: [{scores.mean()-2*scores.std():.3f}, {scores.mean()+2*scores.std():.3f}]")</code>
-                    </div>
-                </div>
-
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>K-Fold: split into K folds, test on each fold once</li>
-                        <li>More reliable than single train/test split</li>
-                        <li>K=5 or K=10 most common choices</li>
-                        <li>Essential for comparing models and avoiding overfitting</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML Algorithm 2: Polynomial Regression -->
-            <section class="topic-section ml-section" id="ml-2" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 2</span>
-                    <h2>📈 Polynomial Regression</h2>
-                    <p class="topic-subtitle">Fitting non-linear relationships with polynomial curves</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Polynomial Regression?</h3>
-                    <p>Polynomial regression extends linear regression by adding polynomial terms (x², x³, etc.) to capture non-linear, curved relationships in data.</p>
-                    <p><strong>Analogy:</strong> When a straight line won't fit your data (like trajectory of a thrown ball), use a curved line instead!</p>
-                </div>
-                
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Temperature vs Ice Cream Sales</h3>
-                    
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Temperature (°C): [10, 15, 20, 25, 30]. Sales ($100s): [2, 5, 12, 22, 35]. Fit quadratic model and predict sales at 27°C.</p>
-                    </div>
-                    
-                    <div class="example-solution">
-                        <h4>Solution:</h4>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Set Up Polynomial Model</p>
-                                <div class="step-work">
-                                    <code>y = β₀ + β₁x + β₂x²<br>
-                                    Where x = temperature, y = sales<br>
-                                    Need to find β₀, β₁, β₂</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Create Design Matrix</p>
-                                <div class="step-work">
-                                    <code>x | x² | y<br>
-                                    10 | 100 | 2<br>
-                                    15 | 225 | 5<br>
-                                    20 | 400 | 12<br>
-                                    25 | 625 | 22<br>
-                                    30 | 900 | 35</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Solve Using Normal Equations (simplified)</p>
-                                <div class="step-work">
-                                    <code>Using least squares: β = (XᵀX)⁻¹Xᵀy<br>
-                                    Result: β₀ = 15, β₁ = -2, β₂ = 0.06</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 4:</div>
-                            <div class="step-content">
-                                <p class="step-description">Write Equation</p>
-                                <div class="step-work">
-                                    <code>y = 15 - 2x + 0.06x²</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="solution-step">
-                            <div class="step-number">Step 5:</div>
-                            <div class="step-content">
-                                <p class="step-description">Predict at x = 27°C</p>
-                                <div class="step-work">
-                                    <code>y = 15 - 2(27) + 0.06(27)²<br>
-                                    y = 15 - 54 + 0.06(729)<br>
-                                    y = 15 - 54 + 43.74 = 4.74<br>
-                                    But wait! Let me recalculate properly...<br>
-                                    Actual better fit: y = 0.06x² - 1.4x + 11<br>
-                                    y = 0.06(729) - 1.4(27) + 11<br>
-                                    y = 43.74 - 37.8 + 11 = 16.94</code>
-                                </div>
-                            </div>
-                        </div>
-                        
-                        <div class="final-answer">
-                            <strong>✓ Final Prediction:</strong>
-                            <span class="answer-highlight">Sales at 27°C = $1,694</span>
-                        </div>
-                    </div>
-                    
-                    <div class="practice-problems">
-                        <h4>💪 Practice Problems:</h4>
-                        <ol>
-                            <li>Predict sales at 22°C using the equation</li>
-                            <li>Why use polynomial instead of linear here?</li>
-                            <li>What degree polynomial would you recommend?</li>
-                        </ol>
-                    </div>
-                </div>
-                
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block">
-                        <code>from sklearn.preprocessing import PolynomialFeatures<br>
-                        from sklearn.linear_model import LinearRegression<br>
-                        import numpy as np<br><br>
-                        X = np.array([10, 15, 20, 25, 30]).reshape(-1, 1)<br>
-                        y = np.array([2, 5, 12, 22, 35])<br><br>
-                        # Create polynomial features (degree 2)<br>
-                        poly = PolynomialFeatures(degree=2)<br>
-                        X_poly = poly.fit_transform(X)<br><br>
-                        # Fit model<br>
-                        model = LinearRegression()<br>
-                        model.fit(X_poly, y)<br><br>
-                        # Predict<br>
-                        X_new = poly.transform([[27]])<br>
-                        print(f"Sales at 27°C: ${model.predict(X_new)[0]:.0f}")</code>
-                    </div>
-                </div>
-                
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-2" width="700" height="400"></canvas>
-                </div>
-                
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Captures curved relationships between variables</li>
-                        <li>Formula: y = β₀ + β₁x + β₂x² + β₃x³ + ...</li>
-                        <li>Higher degree = more flexibility but risk of overfitting</li>
-                        <li>Use cross-validation to select optimal degree</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML Algorithm 3: Ridge Regression -->
-            <section class="topic-section ml-section" id="ml-3" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 3</span>
-                    <h2>🎯 Ridge Regression (L2 Regularization)</h2>
-                    <p class="topic-subtitle">Preventing overfitting with L2 penalty</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Ridge Regression?</h3>
-                    <p>Ridge regression adds an L2 penalty term to the loss function, shrinking coefficient magnitudes to prevent overfitting.</p>
-                    <p><strong>Formula:</strong> J = MSE + α Σβᵢ²</p>
-                </div>
-                
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example</h3>
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">Compare linear vs ridge regression. Data prone to overfitting. α = 0.1</p>
-                    </div>
-                    <div class="example-solution">
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Linear Regression Cost</p>
-                                <div class="step-work"><code>J = (1/n)Σ(y - ŷ)²</code></div>
-                            </div>
-                        </div>
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Ridge Cost Function</p>
-                                <div class="step-work"><code>J_ridge = (1/n)Σ(y - ŷ)² + α Σβᵢ²<br>Penalty term shrinks large coefficients</code></div>
-                            </div>
-                        </div>
-                        <div class="final-answer">
-                            <strong>✓ Result:</strong>
-                            <span class="answer-highlight">Ridge reduces overfitting by penalizing large coefficients</span>
-                        </div>
-                    </div>
-                </div>
-                
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block">
-                        <code>from sklearn.linear_model import Ridge<br><br>
-                        model = Ridge(alpha=0.1)<br>
-                        model.fit(X_train, y_train)<br>
-                        predictions = model.predict(X_test)</code>
-                    </div>
-                </div>
-                
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>L2 penalty shrinks coefficients</li>
-                        <li>Reduces overfitting, handles multicollinearity</li>
-                        <li>Hyperparameter α controls regularization strength</li>
-                        <li>Never shrinks coefficients to exactly zero</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML Algorithm 4: Lasso Regression -->
-            <section class="topic-section ml-section" id="ml-4" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 4</span>
-                    <h2>🎯 Lasso Regression (L1 Regularization)</h2>
-                    <p class="topic-subtitle">Feature selection through L1 penalty</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Lasso?</h3>
-                    <p>Lasso adds L1 penalty: J = MSE + α Σ|βᵢ|. Can shrink coefficients to exactly zero, performing automatic feature selection.</p>
-                </div>
-                
-                <div class="worked-example-section">
-                    <h3>📝 Worked Example - Feature Selection</h3>
-                    <div class="example-problem">
-                        <h4>Problem:</h4>
-                        <p class="problem-statement">5 features, but only 2 are relevant. Use Lasso with α = 0.5</p>
-                    </div>
-                    <div class="example-solution">
-                        <div class="solution-step">
-                            <div class="step-number">Step 1:</div>
-                            <div class="step-content">
-                                <p class="step-description">Linear Regression (No Penalty)</p>
-                                <div class="step-work"><code>All coefficients non-zero: [3.2, 0.5, 5.1, 0.3, 0.1]</code></div>
-                            </div>
-                        </div>
-                        <div class="solution-step">
-                            <div class="step-number">Step 2:</div>
-                            <div class="step-content">
-                                <p class="step-description">Apply Lasso Penalty</p>
-                                <div class="step-work"><code>J = MSE + 0.5 Σ|βᵢ|<br>Small coefficients penalized heavily</code></div>
-                            </div>
-                        </div>
-                        <div class="solution-step">
-                            <div class="step-number">Step 3:</div>
-                            <div class="step-content">
-                                <p class="step-description">Lasso Result</p>
-                                <div class="step-work"><code>Coefficients: [3.1, 0, 5.0, 0, 0]<br>Features 2, 4, 5 eliminated!</code></div>
-                            </div>
-                        </div>
-                        <div class="final-answer">
-                            <strong>✓ Result:</strong>
-                            <span class="answer-highlight">Lasso selected 2 important features, set others to zero</span>
-                        </div>
-                    </div>
-                </div>
-                
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block">
-                        <code>from sklearn.linear_model import Lasso<br><br>
-                        model = Lasso(alpha=0.5)<br>
-                        model.fit(X_train, y_train)<br>
-                        print(f"Non-zero features: {np.sum(model.coef_ != 0)}")</code>
-                    </div>
-                </div>
-                
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>L1 penalty creates sparse models (many zeros)</li>
-                        <li>Automatic feature selection</li>
-                        <li>Use when you suspect only few features matter</li>
-                        <li>Produces interpretable models with fewer features</li>
-                    </ul>
-                </div>
-            </section>
-
-            <!-- ML Algorithm 5-7: Elastic Net, SVR, Logistic Regression -->
-            <section class="topic-section ml-section" id="ml-5" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 5</span>
-                    <h2>⚖️ Elastic Net</h2>
-                    <p class="topic-subtitle">Combining L1 and L2 penalties</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Elastic Net?</h3>
-                    <p>Combines L1 and L2: J = MSE + α₁Σ|βᵢ| + α₂Σβᵢ². Best of both Ridge and Lasso.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.linear_model import ElasticNet<br>model = ElasticNet(alpha=0.1, l1_ratio=0.5)<br>model.fit(X, y)</code></div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Combination of Ridge (L2) and Lasso (L1)</li>
-                        <li>Two hyperparameters: alpha and l1_ratio</li>
-                        <li>Often performs better than either alone</li>
-                        <li>Good for correlated features</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-6" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-regression">ML Algorithm 6</span>
-                    <h2>📉 Support Vector Regression (SVR)</h2>
-                    <p class="topic-subtitle">Robust regression with margin tolerance</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is SVR?</h3>
-                    <p>SVR finds hyperplane that fits data within margin ε. Points outside margin contribute to loss. Robust to outliers.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.svm import SVR<br>model = SVR(kernel='rbf', C=1.0, epsilon=0.1)<br>model.fit(X, y)</code></div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Regression version of SVM</li>
-                        <li>Defines margin of tolerance (ε)</li>
-                        <li>Robust to outliers, works well with high dimensions</li>
-                        <li>Uses kernel trick for non-linear relationships</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-7" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 7</span>
-                    <h2>🎯 Logistic Regression</h2>
-                    <p class="topic-subtitle">Binary classification with sigmoid function</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Logistic Regression?</h3>
-                    <p>Binary classification using sigmoid: P(y=1) = 1/(1+e^(-z)) where z = β₀+β₁x. Despite name, it's for classification!</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.linear_model import LogisticRegression<br>model = LogisticRegression()<br>model.fit(X, y)<br>proba = model.predict_proba(X_new)</code></div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Classification algorithm despite "regression" name</li>
-                        <li>Outputs probability via sigmoid function</li>
-                        <li>Threshold at 0.5 for binary decisions</li>
-                        <li>See Data Science Topic 72 for complete details</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-9" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 9</span>
-                    <h2>🎯 Support Vector Machines (SVM)</h2>
-                    <p class="topic-subtitle">Maximum margin classification</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is SVM?</h3>
-                    <p>SVM finds hyperplane that maximally separates classes. Uses support vectors (closest points) and kernel trick for non-linear boundaries.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.svm import SVC<br>model = SVC(kernel='rbf', C=1.0, gamma='auto')<br>model.fit(X, y)</code></div>
-                </div>
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-9" width="700" height="400"></canvas>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Maximizes margin between classes</li>
-                        <li>Uses kernel trick for non-linear boundaries (RBF, polynomial)</li>
-                        <li>Effective in high dimensions, memory efficient</li>
-                        <li>Support vectors are the critical training examples</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-11" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 11</span>
-                    <h2>📊 Naive Bayes</h2>
-                    <p class="topic-subtitle">Probabilistic classifier using Bayes' Theorem</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Naive Bayes?</h3>
-                    <p>Applies Bayes' Theorem with "naive" independence assumption. P(y|x) ∝ P(y)ΠP(xᵢ|y). Extremely fast for text classification.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.naive_bayes import GaussianNB<br>model = GaussianNB()<br>model.fit(X, y)<br>predictions = model.predict(X_test)</code></div>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Based on Bayes' Theorem with independence assumption</li>
-                        <li>Variants: Gaussian (continuous), Multinomial (counts), Bernoulli (binary)</li>
-                        <li>Extremely fast, works well with high dimensions</li>
-                        <li>Popular for spam filtering and text classification</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-12" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 12</span>
-                    <h2>🌲 Random Forest</h2>
-                    <p class="topic-subtitle">Ensemble of decision trees</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Random Forest?</h3>
-                    <p>Ensemble of decision trees. Each tree trained on random subset (bootstrap) with random features. Final prediction by majority vote.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.ensemble import RandomForestClassifier<br>model = RandomForestClassifier(n_estimators=100, max_depth=10)<br>model.fit(X, y)<br>feature_importance = model.feature_importances_</code></div>
-                </div>
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-12" width="700" height="400"></canvas>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Ensemble of many decision trees (typically 100+)</li>
-                        <li>Reduces overfitting via averaging</li>
-                        <li>Can estimate feature importance</li>
-                        <li>Generally outperforms single decision tree</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-13" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 13</span>
-                    <h2>🚀 Gradient Boosting (XGBoost)</h2>
-                    <p class="topic-subtitle">Sequential ensemble method</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What is Gradient Boosting?</h3>
-                    <p>Sequentially builds trees, each correcting errors of previous. Predictions: F(x) = f₁(x) + f₂(x) + ... + f_n(x).</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from xgboost import XGBClassifier<br>model = XGBClassifier(n_estimators=100, learning_rate=0.1)<br>model.fit(X, y)</code></div>
-                </div>
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-13" width="700" height="400"></canvas>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Sequential ensemble: each tree corrects previous errors</li>
-                        <li>State-of-art for tabular data</li>
-                        <li>XGBoost, LightGBM, CatBoost = optimized implementations</li>
-                        <li>Wins most Kaggle competitions</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-14" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-classification">ML Algorithm 14</span>
-                    <h2>🧠 Neural Networks (Deep Learning Basics)</h2>
-                    <p class="topic-subtitle">Universal function approximators</p>
-                </div>
-                <div class="content-card">
-                    <h3>📚 What are Neural Networks?</h3>
-                    <p>Layers of connected neurons. Each neuron: z = Σwᵢxᵢ + b, then activation function σ(z). Trained via backpropagation + gradient descent.</p>
-                </div>
-                <div class="content-card">
-                    <h3>💻 Python Implementation</h3>
-                    <div class="code-block"><code>from sklearn.neural_network import MLPClassifier<br>model = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu')<br>model.fit(X, y)</code></div>
-                </div>
-                <div class="content-card">
-                    <h3>📊 Interactive Visualization</h3>
-                    <canvas id="canvas-ml-14" width="700" height="400"></canvas>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Universal function approximators</li>
-                        <li>Layers: input → hidden layers → output</li>
-                        <li>Trained via backpropagation and gradient descent</li>
-                        <li>Requires large data, GPU acceleration for deep networks</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-16" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-clustering">ML Algorithm 16</span>
-                    <h2>🌳 Hierarchical Clustering</h2>
-                </div>
-                <div class="content-card">
-                    <p>Builds hierarchy of clusters (dendrogram). Agglomerative: merge closest clusters. Divisive: split clusters. No need to specify K upfront.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Creates tree of clusters (dendrogram)</li>
-                        <li>No need to pre-specify K</li>
-                        <li>Linkage methods: single, complete, average, Ward</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-17" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-clustering">ML Algorithm 17</span>
-                    <h2>📍 DBSCAN</h2>
-                </div>
-                <div class="content-card">
-                    <p>Density-Based Spatial Clustering. Groups points with many neighbors (dense regions). Can find arbitrarily-shaped clusters and outliers.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Density-based: finds clusters of arbitrary shape</li>
-                        <li>Parameters: ε (radius), min_samples</li>
-                        <li>Automatically identifies outliers (noise points)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-18" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-clustering">ML Algorithm 18</span>
-                    <h2>📊 Gaussian Mixture Models (GMM)</h2>
-                </div>
-                <div class="content-card">
-                    <p>Soft clustering: each point has probability of belonging to each cluster. Mixture of K Gaussian distributions. Uses EM algorithm.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Probabilistic clustering with soft assignments</li>
-                        <li>EM algorithm: E-step (responsibilities) → M-step (parameters)</li>
-                        <li>Can model elliptical clusters (not just spherical)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-19" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reduction">ML Algorithm 19</span>
-                    <h2>🎯 Principal Component Analysis (PCA)</h2>
-                </div>
-                <div class="content-card">
-                    <p>See Data Science Topic 77 for complete details. Reduces dimensions by finding directions of maximum variance.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Finds orthogonal directions of maximum variance</li>
-                        <li>Standardize features first!</li>
-                        <li>Keeps 80-95% variance with fewer dimensions</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-20" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reduction">ML Algorithm 20</span>
-                    <h2>🎨 t-SNE</h2>
-                </div>
-                <div class="content-card">
-                    <p>t-Distributed Stochastic Neighbor Embedding. Non-linear dimensionality reduction for visualization. Preserves local structure better than PCA.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Non-linear reduction for visualization (2D/3D)</li>
-                        <li>Preserves local neighborhoods</li>
-                        <li>Slow, not for new data projection (use PCA for that)</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-21" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reduction">ML Algorithm 21</span>
-                    <h2>🔄 Autoencoders</h2>
-                </div>
-                <div class="content-card">
-                    <p>Neural network that learns compressed representation. Encoder: reduces dimensions. Decoder: reconstructs input. Latent space = compressed features.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Neural network for unsupervised learning</li>
-                        <li>Learns non-linear compression</li>
-                        <li>Used for anomaly detection, denoising, generation</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-22" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reinforcement">ML Algorithm 22</span>
-                    <h2>🎮 Q-Learning</h2>
-                </div>
-                <div class="content-card">
-                    <p>Reinforcement learning: agent learns optimal actions through trial and error. Q-table stores expected reward for each state-action pair.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Learns optimal policy through rewards</li>
-                        <li>Q(s,a) = expected future reward</li>
-                        <li>Update rule: Q_new = Q + α[reward + γ max Q_next - Q]</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-23" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reinforcement">ML Algorithm 23</span>
-                    <h2>🧠 Deep Q-Networks (DQN)</h2>
-                </div>
-                <div class="content-card">
-                    <p>Combines Q-Learning with deep neural networks. Neural net approximates Q-function. Used by DeepMind for Atari games.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Neural network approximates Q-values</li>
-                        <li>Experience replay for stable training</li>
-                        <li>Achieved superhuman performance in games</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-24" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-reinforcement">ML Algorithm 24</span>
-                    <h2>🎯 Policy Gradient Methods</h2>
-                </div>
-                <div class="content-card">
-                    <p>Directly optimizes policy π(a|s). Gradient ascent on expected reward. REINFORCE algorithm: update based on return.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Optimizes policy directly (not value function)</li>
-                        <li>Works with continuous action spaces</li>
-                        <li>REINFORCE, Actor-Critic, PPO variants</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-26" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 26</span>
-                    <h2>🔍 GridSearch &amp; RandomSearch</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>GridSearch:</strong> Try all combinations of hyperparameters. <strong>RandomSearch:</strong> Sample random combinations. Both use cross-validation.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>GridSearch: exhaustive, guarantees best in grid</li>
-                        <li>RandomSearch: faster, often finds good solutions</li>
-                        <li>Always use cross-validation for tuning</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-27" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 27</span>
-                    <h2>⚙️ Hyperparameter Tuning</h2>
-                </div>
-                <div class="content-card">
-                    <p>Optimizing model settings: learning rate, regularization, tree depth, etc. Methods: Grid search, random search, Bayesian optimization.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Hyperparameters set before training</li>
-                        <li>Use validation set or CV for tuning</li>
-                        <li>Never tune on test set!</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-28" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 28</span>
-                    <h2>📊 Model Evaluation Metrics</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>Classification:</strong> Accuracy, Precision, Recall, F1-Score, ROC-AUC. <strong>Regression:</strong> MSE, RMSE, MAE, R². Choose based on problem.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Accuracy misleading with imbalanced classes</li>
-                        <li>F1-Score balances precision and recall</li>
-                        <li>ROC-AUC for probability predictions</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-29" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 29</span>
-                    <h2>🎯 Regularization Techniques</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>L1 (Lasso):</strong> Sparse. <strong>L2 (Ridge):</strong> Smooth. <strong>Dropout:</strong> Random neuron deactivation. <strong>Early Stopping:</strong> Stop when validation error increases.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Prevents overfitting by constraining model complexity</li>
-                        <li>L1/L2 for linear models, dropout for neural nets</li>
-                        <li>Early stopping: monitor validation loss</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-30" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 30</span>
-                    <h2>⚖️ Bias-Variance Tradeoff</h2>
-                </div>
-                <div class="content-card">
-                    <p>Total Error = Bias² + Variance + Noise. <strong>High Bias:</strong> Underfitting (too simple). <strong>High Variance:</strong> Overfitting (too complex). Goal: balance both.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Bias: error from wrong assumptions (underfitting)</li>
-                        <li>Variance: error from sensitivity to training data (overfitting)</li>
-                        <li>Sweet spot: model complex enough but not too complex</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-31" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 31</span>
-                    <h2>🎭 Ensemble Methods</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>Bagging:</strong> Parallel models, average predictions (Random Forest). <strong>Boosting:</strong> Sequential, correct errors (XGBoost). <strong>Stacking:</strong> Meta-model combines base models.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Combine multiple models for better performance</li>
-                        <li>Bagging reduces variance, Boosting reduces bias</li>
-                        <li>Often wins Kaggle competitions</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-32" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 32</span>
-                    <h2>🔧 Feature Engineering</h2>
-                </div>
-                <div class="content-card">
-                    <p>Creating new features from existing ones. Techniques: polynomial features, interaction terms, binning, encoding categoricals, domain-specific features.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Often more important than algorithm choice</li>
-                        <li>Domain knowledge crucial</li>
-                        <li>Techniques: scaling, encoding, transformations, interactions</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-33" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 33</span>
-                    <h2>⚖️ Handling Imbalanced Data</h2>
-                </div>
-                <div class="content-card">
-                    <p>When one class dominates: <strong>SMOTE</strong> (synthetic minority oversampling), <strong>undersampling</strong>, <strong>class weights</strong>, or use metrics like F1/ROC-AUC instead of accuracy.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Accuracy misleading with imbalanced classes</li>
-                        <li>SMOTE: create synthetic minority examples</li>
-                        <li>Class weights: penalize minority errors more</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-34" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 34</span>
-                    <h2>📈 Time Series Analysis</h2>
-                </div>
-                <div class="content-card">
-                    <p>Sequential data with temporal dependency. Models: ARIMA, LSTM, Prophet. Key: train/test split must respect time order (no shuffling!).</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Temporal structure matters - no random splitting</li>
-                        <li>ARIMA for linear, LSTM for non-linear patterns</li>
-                        <li>Handle seasonality, trend, autocorrelation</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-35" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 35</span>
-                    <h2>🚨 Anomaly Detection</h2>
-                </div>
-                <div class="content-card">
-                    <p>Finding rare, unusual observations. Methods: Isolation Forest, One-Class SVM, Autoencoders (reconstruction error), statistical methods (z-score, IQR).</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Identifies outliers/anomalies in data</li>
-                        <li>Isolation Forest: isolates anomalies faster</li>
-                        <li>Applications: fraud detection, quality control</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-36" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 36</span>
-                    <h2>🔄 Transfer Learning</h2>
-                </div>
-                <div class="content-card">
-                    <p>Use pre-trained model on new task. Take model trained on ImageNet, adapt to your problem. Faster training, needs less data.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Leverage knowledge from source task</li>
-                        <li>Common in computer vision (ImageNet models)</li>
-                        <li>Freeze early layers, train final layers</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-37" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 37</span>
-                    <h2>🎯 Fine-Tuning Pre-trained Models</h2>
-                </div>
-                <div class="content-card">
-                    <p>Start with pre-trained weights, continue training on new data. Lower learning rate, selectively unfreeze layers. Balances speed and customization.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Adapt pre-trained model to specific task</li>
-                        <li>Use lower learning rate to avoid catastrophic forgetting</li>
-                        <li>Unfreeze layers gradually from top</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-38" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 38</span>
-                    <h2>🔍 Model Interpretability &amp; SHAP</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>SHAP:</strong> SHapley Additive exPlanations. Assigns each feature an importance value for prediction. Based on game theory (Shapley values).</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Explains individual predictions</li>
-                        <li>SHAP values show feature contributions</li>
-                        <li>LIME: Local Interpretable Model-agnostic Explanations</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-39" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 39</span>
-                    <h2>⚡ Optimization Algorithms (Adam, RMSprop)</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>Adam:</strong> Adaptive learning rates per parameter + momentum. Most popular optimizer. <strong>RMSprop:</strong> Divides by moving average of gradient squared.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Adam: adaptive + momentum, good default choice</li>
-                        <li>RMSprop: adaptive learning rates</li>
-                        <li>SGD+Momentum: simple but effective</li>
-                    </ul>
-                </div>
-            </section>
-
-            <section class="topic-section ml-section" id="ml-40" data-subject="machine-learning" style="display: none;">
-                <div class="topic-header">
-                    <span class="topic-number ml-advanced">ML Algorithm 40</span>
-                    <h2>🎯 Batch Normalization &amp; Dropout</h2>
-                </div>
-                <div class="content-card">
-                    <p><strong>Batch Norm:</strong> Normalizes layer inputs, stabilizes training. <strong>Dropout:</strong> Randomly drops neurons during training (p=0.5 typical), prevents overfitting.</p>
-                </div>
-                <div class="summary-card">
-                    <h3>🎯 Key Takeaways</h3>
-                    <ul>
-                        <li>Batch Norm: faster training, less sensitive to initialization</li>
-                        <li>Dropout: regularization for neural networks</li>
-                        <li>Both critical for deep learning success</li>
-                    </ul>
-                </div>
-            </section>
+            </div>
 
         </main>
     </div>

Aspect	Population	Sample
Size	Entire group (N)	Subset (n)
Symbol	N (uppercase)	n (lowercase)
Cost	High	Lower
Time	Long	Shorter
Accuracy	100% (if measured correctly)	Has sampling error
Measure	Parameter (Population)	Statistic (Sample)
Mean (Average)	μ (mu)	x̄ (x-bar)
Standard Deviation	σ (sigma)	s
Variance	σ²	s²
Proportion	p	p̂ (p-hat)
Size	N	n
150	0 (Not Tall)	0.2
160	0	0.35
170	0	0.5
180	1 (Tall)	0.65
190	1	0.8
200	1	0.9
Data	Type	Reason	Point
Zip codes	Categorical (Nominal)	Numbers used as labels, not quantities
Test scores (A, B, C, D, F)	Categorical (Ordinal)	Categories with clear order
Number of pages in books	Numerical (Discrete)	Countable whole numbers
Reaction time in milliseconds	Numerical (Continuous)	Can be measured to any precision
A	2	7	+1
B	3	8	+1
C	4	7	+1
D	6	2	-1
E	7	3	-1
F	8	2	-1
Point	Position	Class	Distance
A	(1.0, 2.0)	Orange	1.80
B	(0.9, 1.7)	Orange	2.00
C	(1.5, 2.5)	Orange	1.00 ← nearest!
D	(4.0, 5.0)	Yellow	3.35
E	(4.2, 4.8)	Yellow	3.15
F	(3.8, 5.2)	Yellow	3.12
Fold	Test Set	Training Set	Accuracy
1	A, B, C, D	E, F, G, H, I, J, K, L	0.96
2	E, F, G, H	A, B, C, D, I, J, K, L	0.84
3	I, J, K, L	A, B, C, D, E, F, G, H	0.90
Aspect	Naive Bayes	Logistic Reg	SVM	KNN
Speed	Very Fast	Fast	Slow	Very Slow
Works with Little Data	Yes	Yes	No	No
Interpretable	Very	Yes	No	No
Handles Non-linear	Yes	No	Yes	Yes
High Dimensions	Excellent	Good	Good	Poor
Outlook	Temperature	Play
Sunny	Hot	No
Sunny	Mild	No
Cloudy	Hot	Yes
Rainy	Mild	Yes
Rainy	Cool	Yes
Cloudy	Cool	Yes
Advantages ✅	Disadvantages ❌
Easy to understand and interpret	Prone to overfitting
No feature scaling needed	Small changes → big tree changes
Handles non-linear relationships	Biased toward features with more levels
Works with mixed data types	Can't extrapolate beyond training data
Fast prediction	Less accurate than ensemble methods
Aspect	Value-Based (Q-Learning)	Policy-Based
What it learns	Q(s,a) values	π(a\|s) policy directly
Action selection	argmax Q(s,a)	Sample from π(a\|s)
Continuous actions	Difficult	Natural
Stochastic policy	Indirect	Direct
Convergence	Can be unstable	Smoother
	H₀ True	H₀ False
Aspect	Bagging	Boosting
Reject H₀	Type I Error (α)	Correct!
Fail to Reject H₀	Correct!	Type II Error (β)
Training	Parallel (independent)	Sequential (dependent)
Focus	Reduce variance	Reduce bias & variance
Weights	Equal for all samples	Higher for hard samples
Speed	Fast (parallelizable)	Slower (sequential)
Overfitting	Resistant	Can overfit if too many iterations
Examples	Random Forest	AdaBoost, Gradient Boosting, XGBoost