Aashish34 commited on
Commit
a730f26
Β·
1 Parent(s): b948ef9

update files

Browse files
ml_complete-all-topics/app.js CHANGED
@@ -107,6 +107,8 @@ function initSections() {
107
  if (section.id === 'optimal-k') initOptimalK();
108
  if (section.id === 'hyperparameter-tuning') initHyperparameterTuning();
109
  if (section.id === 'naive-bayes') initNaiveBayes();
 
 
110
  }
111
  });
112
  });
@@ -2354,34 +2356,23 @@ function drawLossCurves() {
2354
  ctx.restore();
2355
  }
2356
 
2357
- // Optimal K for KNN
2358
  function initOptimalK() {
2359
- const canvas = document.getElementById('optimal-k-canvas');
2360
- if (!canvas || canvas.dataset.initialized) return;
2361
- canvas.dataset.initialized = 'true';
2362
-
2363
- const rangeSlider = document.getElementById('k-range-slider');
2364
- const foldsSlider = document.getElementById('cv-folds-slider');
2365
-
2366
- if (rangeSlider) {
2367
- rangeSlider.addEventListener('input', (e) => {
2368
- document.getElementById('k-range-val').textContent = e.target.value;
2369
- drawOptimalK();
2370
- });
2371
  }
2372
 
2373
- if (foldsSlider) {
2374
- foldsSlider.addEventListener('input', (e) => {
2375
- document.getElementById('cv-folds-val').textContent = e.target.value;
2376
- drawOptimalK();
2377
- });
2378
  }
2379
-
2380
- drawOptimalK();
2381
  }
2382
 
2383
- function drawOptimalK() {
2384
- const canvas = document.getElementById('optimal-k-canvas');
2385
  if (!canvas) return;
2386
 
2387
  const ctx = canvas.getContext('2d');
@@ -2396,30 +2387,13 @@ function drawOptimalK() {
2396
  const chartWidth = width - 2 * padding;
2397
  const chartHeight = height - 2 * padding;
2398
 
2399
- // Use provided data
2400
- const kRange = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20];
2401
- const accuracies = [0.85, 0.88, 0.92, 0.94, 0.96, 0.97, 0.98, 0.97, 0.96, 0.95, 0.94, 0.93, 0.92, 0.91, 0.90, 0.89, 0.88, 0.87, 0.86, 0.85];
2402
- const optimalK = 7;
2403
-
2404
- const scaleX = (k) => padding + ((k - 1) / 19) * chartWidth;
2405
- const scaleY = (acc) => height - padding - ((acc - 0.8) / 0.2) * chartHeight;
2406
 
2407
- // Draw grid
2408
- ctx.strokeStyle = 'rgba(42, 53, 68, 0.5)';
2409
- ctx.lineWidth = 1;
2410
- for (let i = 0; i <= 10; i++) {
2411
- const x = padding + (chartWidth / 10) * i;
2412
- ctx.beginPath();
2413
- ctx.moveTo(x, padding);
2414
- ctx.lineTo(x, height - padding);
2415
- ctx.stroke();
2416
-
2417
- const y = padding + (chartHeight / 10) * i;
2418
- ctx.beginPath();
2419
- ctx.moveTo(padding, y);
2420
- ctx.lineTo(width - padding, y);
2421
- ctx.stroke();
2422
- }
2423
 
2424
  // Draw axes
2425
  ctx.strokeStyle = '#2a3544';
@@ -2430,11 +2404,11 @@ function drawOptimalK() {
2430
  ctx.lineTo(width - padding, height - padding);
2431
  ctx.stroke();
2432
 
2433
- // Draw line
2434
  ctx.strokeStyle = '#6aa9ff';
2435
  ctx.lineWidth = 3;
2436
  ctx.beginPath();
2437
- kRange.forEach((k, i) => {
2438
  const x = scaleX(k);
2439
  const y = scaleY(accuracies[i]);
2440
  if (i === 0) ctx.moveTo(x, y);
@@ -2443,39 +2417,24 @@ function drawOptimalK() {
2443
  ctx.stroke();
2444
 
2445
  // Draw points
2446
- kRange.forEach((k, i) => {
2447
  const x = scaleX(k);
2448
  const y = scaleY(accuracies[i]);
2449
- const isOptimal = k === optimalK;
2450
-
2451
- ctx.fillStyle = isOptimal ? '#7ef0d4' : '#6aa9ff';
2452
  ctx.beginPath();
2453
- ctx.arc(x, y, isOptimal ? 8 : 5, 0, 2 * Math.PI);
2454
  ctx.fill();
2455
-
2456
- if (isOptimal) {
2457
- ctx.strokeStyle = '#7ef0d4';
2458
- ctx.lineWidth = 2;
2459
- ctx.beginPath();
2460
- ctx.arc(x, y, 14, 0, 2 * Math.PI);
2461
- ctx.stroke();
2462
-
2463
- // Label
2464
- ctx.fillStyle = '#7ef0d4';
2465
- ctx.font = 'bold 14px sans-serif';
2466
- ctx.textAlign = 'center';
2467
- ctx.fillText(`Optimal K=${optimalK}`, x, y - 25);
2468
- ctx.fillText(`Accuracy: ${(accuracies[i] * 100).toFixed(1)}%`, x, y - 10);
2469
- }
2470
  });
2471
 
2472
- // Draw vertical line at optimal K
2473
- ctx.strokeStyle = 'rgba(126, 240, 212, 0.3)';
 
 
2474
  ctx.lineWidth = 2;
2475
  ctx.setLineDash([5, 5]);
2476
  ctx.beginPath();
2477
- ctx.moveTo(scaleX(optimalK), padding);
2478
- ctx.lineTo(scaleX(optimalK), height - padding);
2479
  ctx.stroke();
2480
  ctx.setLineDash([]);
2481
 
@@ -2483,34 +2442,133 @@ function drawOptimalK() {
2483
  ctx.fillStyle = '#a9b4c2';
2484
  ctx.font = '12px sans-serif';
2485
  ctx.textAlign = 'center';
2486
- ctx.fillText('K Value', width / 2, height - 20);
2487
  ctx.save();
2488
  ctx.translate(20, height / 2);
2489
  ctx.rotate(-Math.PI / 2);
2490
- ctx.fillText('Mean Accuracy', 0, 0);
2491
  ctx.restore();
2492
 
2493
- // X-axis labels
2494
- for (let i = 1; i <= 20; i += 2) {
2495
- ctx.fillText(i, scaleX(i), height - padding + 20);
2496
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2497
  }
2498
 
2499
- // Hyperparameter Tuning & GridSearch
2500
  function initHyperparameterTuning() {
2501
- const canvas = document.getElementById('gridsearch-canvas');
2502
- if (!canvas || canvas.dataset.initialized) return;
2503
- canvas.dataset.initialized = 'true';
2504
- drawGridSearch();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2505
  }
2506
 
2507
- function drawGridSearch() {
2508
- const canvas = document.getElementById('gridsearch-canvas');
2509
  if (!canvas) return;
2510
 
2511
  const ctx = canvas.getContext('2d');
2512
  const width = canvas.width = canvas.offsetWidth;
2513
- const height = canvas.height = 400;
2514
 
2515
  ctx.clearRect(0, 0, width, height);
2516
  ctx.fillStyle = '#1a2332';
@@ -2520,33 +2578,41 @@ function drawGridSearch() {
2520
  const chartWidth = width - 2 * padding;
2521
  const chartHeight = height - 2 * padding;
2522
 
2523
- // Grid data - C vs gamma heatmap
2524
  const cValues = [0.1, 1, 10, 100];
2525
  const gammaValues = [0.001, 0.01, 0.1, 1];
2526
 
2527
- // Scores (simulated)
2528
- const scores = [
2529
- [0.70, 0.75, 0.78, 0.76],
2530
- [0.82, 0.88, 0.92, 0.85],
2531
- [0.88, 0.95, 0.93, 0.87],
2532
- [0.85, 0.90, 0.88, 0.82]
2533
  ];
2534
 
2535
  const cellWidth = chartWidth / cValues.length;
2536
  const cellHeight = chartHeight / gammaValues.length;
2537
 
2538
- // Draw cells
2539
- cValues.forEach((c, i) => {
2540
- gammaValues.forEach((g, j) => {
2541
- const x = padding + i * cellWidth;
2542
- const y = padding + j * cellHeight;
2543
- const score = scores[i][j];
 
 
 
 
 
2544
 
2545
- // Color based on score
2546
- const intensity = (score - 0.7) / 0.25;
2547
- const r = Math.floor(255 - intensity * 155);
2548
- const gb = Math.floor(100 + intensity * 140);
2549
- ctx.fillStyle = `rgb(${r}, ${gb}, ${Math.floor(gb * 0.9)})`;
 
 
 
 
2550
  ctx.fillRect(x, y, cellWidth, cellHeight);
2551
 
2552
  // Border
@@ -2554,132 +2620,936 @@ function drawGridSearch() {
2554
  ctx.lineWidth = 2;
2555
  ctx.strokeRect(x, y, cellWidth, cellHeight);
2556
 
2557
- // Score text
2558
- ctx.fillStyle = score > 0.88 ? '#1a2332' : '#e8eef6';
2559
  ctx.font = 'bold 14px sans-serif';
2560
  ctx.textAlign = 'center';
2561
- ctx.fillText((score * 100).toFixed(0) + '%', x + cellWidth / 2, y + cellHeight / 2 + 5);
2562
-
2563
- // Highlight best
2564
- if (score === 0.95) {
2565
- ctx.strokeStyle = '#7ef0d4';
2566
- ctx.lineWidth = 4;
2567
- ctx.strokeRect(x, y, cellWidth, cellHeight);
2568
-
2569
- ctx.fillStyle = '#7ef0d4';
2570
- ctx.font = '12px sans-serif';
2571
- ctx.fillText('β˜… Best', x + cellWidth / 2, y + cellHeight / 2 + 22);
2572
- }
2573
  });
2574
  });
2575
 
2576
- // Axis labels - C
2577
- ctx.fillStyle = '#a9b4c2';
 
 
 
 
 
 
 
2578
  ctx.font = '12px sans-serif';
 
 
 
 
 
 
2579
  ctx.textAlign = 'center';
2580
- cValues.forEach((c, i) => {
2581
- const x = padding + i * cellWidth + cellWidth / 2;
2582
- ctx.fillText(`C=${c}`, x, height - padding + 25);
2583
  });
2584
 
2585
- // Axis labels - gamma
2586
- ctx.textAlign = 'right';
2587
- gammaValues.forEach((g, i) => {
2588
- const y = padding + i * cellHeight + cellHeight / 2;
2589
- ctx.fillText(`Ξ³=${g}`, padding - 10, y + 5);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2590
  });
2591
 
2592
- // Title
2593
  ctx.fillStyle = '#7ef0d4';
2594
- ctx.font = 'bold 16px sans-serif';
 
 
 
 
 
2595
  ctx.textAlign = 'center';
2596
- ctx.fillText('GridSearch Heatmap: C vs gamma (RBF kernel)', width / 2, 30);
 
2597
 
2598
- // Legend
2599
- ctx.font = '12px sans-serif';
2600
  ctx.fillStyle = '#a9b4c2';
2601
- ctx.textAlign = 'left';
2602
- ctx.fillText('Lower accuracy', padding, height - 10);
2603
- ctx.textAlign = 'right';
2604
- ctx.fillText('Higher accuracy', width - padding, height - 10);
 
 
 
 
 
 
 
 
2605
  }
2606
 
2607
- // Naive Bayes
2608
  function initNaiveBayes() {
2609
- const canvas = document.getElementById('naive-bayes-canvas');
2610
- if (!canvas || canvas.dataset.initialized) return;
2611
- canvas.dataset.initialized = 'true';
2612
- drawNaiveBayes();
 
 
 
 
 
 
 
2613
  }
2614
 
2615
- function drawNaiveBayes() {
2616
- const canvas = document.getElementById('naive-bayes-canvas');
2617
  if (!canvas) return;
2618
 
2619
  const ctx = canvas.getContext('2d');
2620
  const width = canvas.width = canvas.offsetWidth;
2621
- const height = canvas.height = 350;
2622
 
2623
  ctx.clearRect(0, 0, width, height);
2624
  ctx.fillStyle = '#1a2332';
2625
  ctx.fillRect(0, 0, width, height);
2626
 
2627
- // Display calculation flow
2628
- const steps = [
2629
- { label: 'Words', value: '["free", "money"]', color: '#6aa9ff' },
2630
- { label: 'P(free|spam)', value: '0.8', color: '#7ef0d4' },
2631
- { label: 'P(money|spam)', value: '0.7', color: '#7ef0d4' },
2632
- { label: 'P(spam)', value: '0.3', color: '#ff8c6a' },
2633
- { label: 'Likelihood', value: '0.8 Γ— 0.7 = 0.56', color: '#7ef0d4' },
2634
- { label: 'Posterior', value: '0.56 Γ— 0.3 = 0.168', color: '#7ef0d4' },
2635
- { label: 'Result', value: 'P(spam) = 0.98 (98%)', color: '#7ef0d4' }
2636
- ];
2637
 
2638
- const boxWidth = 180;
2639
- const boxHeight = 45;
2640
- const startY = 40;
2641
- const gap = 8;
 
 
 
2642
 
2643
- steps.forEach((step, i) => {
2644
- const x = (width - boxWidth) / 2;
2645
- const y = startY + i * (boxHeight + gap);
2646
-
2647
- // Box
2648
- ctx.fillStyle = '#2a3544';
2649
- ctx.fillRect(x, y, boxWidth, boxHeight);
2650
- ctx.strokeStyle = step.color;
2651
  ctx.lineWidth = 2;
2652
- ctx.strokeRect(x, y, boxWidth, boxHeight);
2653
 
2654
- // Text
2655
- ctx.fillStyle = '#a9b4c2';
2656
- ctx.font = '11px sans-serif';
2657
  ctx.textAlign = 'center';
2658
- ctx.fillText(step.label, x + boxWidth / 2, y + boxHeight / 2 - 6);
2659
 
2660
- ctx.fillStyle = step.color;
2661
- ctx.font = 'bold 13px monospace';
2662
- ctx.fillText(step.value, x + boxWidth / 2, y + boxHeight / 2 + 10);
2663
-
2664
- // Arrow
2665
- if (i < steps.length - 1) {
2666
- ctx.strokeStyle = '#6aa9ff';
2667
- ctx.fillStyle = '#6aa9ff';
2668
- ctx.lineWidth = 2;
2669
- const arrowY = y + boxHeight + gap / 2;
2670
- ctx.beginPath();
2671
- ctx.moveTo(x + boxWidth / 2, arrowY - 3);
2672
- ctx.lineTo(x + boxWidth / 2, arrowY + 3);
2673
- ctx.stroke();
2674
-
2675
- // Arrowhead
2676
- ctx.beginPath();
2677
- ctx.moveTo(x + boxWidth / 2, arrowY + 3);
2678
- ctx.lineTo(x + boxWidth / 2 - 4, arrowY - 2);
2679
- ctx.lineTo(x + boxWidth / 2 + 4, arrowY - 2);
2680
- ctx.fill();
2681
- }
2682
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2683
  }
2684
 
2685
  // Handle window resize
@@ -2708,8 +3578,19 @@ window.addEventListener('resize', () => {
2708
  drawSVMCParameter();
2709
  drawSVMTraining();
2710
  drawSVMKernel();
2711
- drawOptimalK();
2712
- drawGridSearch();
2713
- drawNaiveBayes();
 
 
 
 
 
 
 
 
 
 
 
2714
  }, 250);
2715
  });
 
107
  if (section.id === 'optimal-k') initOptimalK();
108
  if (section.id === 'hyperparameter-tuning') initHyperparameterTuning();
109
  if (section.id === 'naive-bayes') initNaiveBayes();
110
+ if (section.id === 'decision-trees') initDecisionTrees();
111
+ if (section.id === 'ensemble-methods') initEnsembleMethods();
112
  }
113
  });
114
  });
 
2356
  ctx.restore();
2357
  }
2358
 
2359
+ // Topic 13: Finding Optimal K in KNN
2360
  function initOptimalK() {
2361
+ const canvas1 = document.getElementById('elbow-canvas');
2362
+ if (canvas1 && !canvas1.dataset.initialized) {
2363
+ canvas1.dataset.initialized = 'true';
2364
+ drawElbowCurve();
 
 
 
 
 
 
 
 
2365
  }
2366
 
2367
+ const canvas2 = document.getElementById('cv-k-canvas');
2368
+ if (canvas2 && !canvas2.dataset.initialized) {
2369
+ canvas2.dataset.initialized = 'true';
2370
+ drawCVKHeatmap();
 
2371
  }
 
 
2372
  }
2373
 
2374
+ function drawElbowCurve() {
2375
+ const canvas = document.getElementById('elbow-canvas');
2376
  if (!canvas) return;
2377
 
2378
  const ctx = canvas.getContext('2d');
 
2387
  const chartWidth = width - 2 * padding;
2388
  const chartHeight = height - 2 * padding;
2389
 
2390
+ // Data from application_data_json
2391
+ const kValues = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19];
2392
+ const accuracies = [0.96, 0.94, 0.93, 0.91, 0.89, 0.87, 0.85, 0.84, 0.83, 0.82, 0.81, 0.80, 0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73];
2393
+ const optimalK = 3;
 
 
 
2394
 
2395
+ const scaleX = (k) => padding + ((k - 1) / (kValues.length - 1)) * chartWidth;
2396
+ const scaleY = (acc) => height - padding - ((acc - 0.7) / 0.3) * chartHeight;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2397
 
2398
  // Draw axes
2399
  ctx.strokeStyle = '#2a3544';
 
2404
  ctx.lineTo(width - padding, height - padding);
2405
  ctx.stroke();
2406
 
2407
+ // Draw curve
2408
  ctx.strokeStyle = '#6aa9ff';
2409
  ctx.lineWidth = 3;
2410
  ctx.beginPath();
2411
+ kValues.forEach((k, i) => {
2412
  const x = scaleX(k);
2413
  const y = scaleY(accuracies[i]);
2414
  if (i === 0) ctx.moveTo(x, y);
 
2417
  ctx.stroke();
2418
 
2419
  // Draw points
2420
+ kValues.forEach((k, i) => {
2421
  const x = scaleX(k);
2422
  const y = scaleY(accuracies[i]);
2423
+ ctx.fillStyle = k === optimalK ? '#7ef0d4' : '#6aa9ff';
 
 
2424
  ctx.beginPath();
2425
+ ctx.arc(x, y, k === optimalK ? 8 : 4, 0, 2 * Math.PI);
2426
  ctx.fill();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2427
  });
2428
 
2429
+ // Highlight optimal K
2430
+ const optX = scaleX(optimalK);
2431
+ const optY = scaleY(accuracies[optimalK - 1]);
2432
+ ctx.strokeStyle = '#7ef0d4';
2433
  ctx.lineWidth = 2;
2434
  ctx.setLineDash([5, 5]);
2435
  ctx.beginPath();
2436
+ ctx.moveTo(optX, optY);
2437
+ ctx.lineTo(optX, height - padding);
2438
  ctx.stroke();
2439
  ctx.setLineDash([]);
2440
 
 
2442
  ctx.fillStyle = '#a9b4c2';
2443
  ctx.font = '12px sans-serif';
2444
  ctx.textAlign = 'center';
2445
+ ctx.fillText('K (Number of Neighbors)', width / 2, height - 20);
2446
  ctx.save();
2447
  ctx.translate(20, height / 2);
2448
  ctx.rotate(-Math.PI / 2);
2449
+ ctx.fillText('Accuracy', 0, 0);
2450
  ctx.restore();
2451
 
2452
+ // Optimal K label
2453
+ ctx.fillStyle = '#7ef0d4';
2454
+ ctx.font = 'bold 14px sans-serif';
2455
+ ctx.textAlign = 'center';
2456
+ ctx.fillText(`Optimal K = ${optimalK}`, optX, padding + 30);
2457
+ ctx.fillText(`Accuracy: ${accuracies[optimalK - 1].toFixed(2)}`, optX, padding + 50);
2458
+ }
2459
+
2460
+ function drawCVKHeatmap() {
2461
+ const canvas = document.getElementById('cv-k-canvas');
2462
+ if (!canvas) return;
2463
+
2464
+ const ctx = canvas.getContext('2d');
2465
+ const width = canvas.width = canvas.offsetWidth;
2466
+ const height = canvas.height = 400;
2467
+
2468
+ ctx.clearRect(0, 0, width, height);
2469
+ ctx.fillStyle = '#1a2332';
2470
+ ctx.fillRect(0, 0, width, height);
2471
+
2472
+ const padding = 80;
2473
+ const chartWidth = width - 2 * padding;
2474
+ const chartHeight = height - 2 * padding;
2475
+
2476
+ const kValues = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19];
2477
+ const folds = ['Fold 1', 'Fold 2', 'Fold 3'];
2478
+ const fold1 = [0.98, 0.92, 0.88, 0.85, 0.83, 0.81, 0.79, 0.77, 0.75, 0.73];
2479
+ const fold2 = [0.96, 0.91, 0.87, 0.83, 0.81, 0.79, 0.77, 0.75, 0.73, 0.71];
2480
+ const fold3 = [0.94, 0.90, 0.86, 0.82, 0.79, 0.77, 0.75, 0.73, 0.71, 0.69];
2481
+ const allData = [fold1, fold2, fold3];
2482
+
2483
+ const cellWidth = chartWidth / kValues.length;
2484
+ const cellHeight = chartHeight / folds.length;
2485
+
2486
+ // Draw heatmap
2487
+ folds.forEach((fold, i) => {
2488
+ kValues.forEach((k, j) => {
2489
+ const acc = allData[i][j];
2490
+ const x = padding + j * cellWidth;
2491
+ const y = padding + i * cellHeight;
2492
+
2493
+ // Color based on accuracy
2494
+ const intensity = (acc - 0.65) / 0.35;
2495
+ const r = Math.floor(106 + (126 - 106) * intensity);
2496
+ const g = Math.floor(169 + (240 - 169) * intensity);
2497
+ const b = Math.floor(255 + (212 - 255) * intensity);
2498
+ ctx.fillStyle = `rgb(${r}, ${g}, ${b})`;
2499
+ ctx.fillRect(x, y, cellWidth, cellHeight);
2500
+
2501
+ // Border
2502
+ ctx.strokeStyle = '#1a2332';
2503
+ ctx.lineWidth = 1;
2504
+ ctx.strokeRect(x, y, cellWidth, cellHeight);
2505
+
2506
+ // Text
2507
+ ctx.fillStyle = '#1a2332';
2508
+ ctx.font = 'bold 11px sans-serif';
2509
+ ctx.textAlign = 'center';
2510
+ ctx.fillText(acc.toFixed(2), x + cellWidth / 2, y + cellHeight / 2 + 4);
2511
+ });
2512
+ });
2513
+
2514
+ // Row labels
2515
+ ctx.fillStyle = '#e8eef6';
2516
+ ctx.font = '12px sans-serif';
2517
+ ctx.textAlign = 'right';
2518
+ folds.forEach((fold, i) => {
2519
+ const y = padding + i * cellHeight + cellHeight / 2;
2520
+ ctx.fillText(fold, padding - 10, y + 4);
2521
+ });
2522
+
2523
+ // Column labels
2524
+ ctx.textAlign = 'center';
2525
+ kValues.forEach((k, j) => {
2526
+ const x = padding + j * cellWidth + cellWidth / 2;
2527
+ ctx.fillText(`K=${k}`, x, padding - 10);
2528
+ });
2529
+
2530
+ // Mean accuracy
2531
+ ctx.fillStyle = '#7ef0d4';
2532
+ ctx.font = 'bold 14px sans-serif';
2533
+ ctx.textAlign = 'left';
2534
+ const meanAccs = kValues.map((k, j) => {
2535
+ const sum = fold1[j] + fold2[j] + fold3[j];
2536
+ return sum / 3;
2537
+ });
2538
+ const maxMean = Math.max(...meanAccs);
2539
+ const optIdx = meanAccs.indexOf(maxMean);
2540
+ ctx.fillText(`Best K = ${kValues[optIdx]} (Mean Acc: ${maxMean.toFixed(3)})`, padding, height - 20);
2541
  }
2542
 
2543
+ // Topic 14: Hyperparameter Tuning
2544
  function initHyperparameterTuning() {
2545
+ const canvas1 = document.getElementById('gridsearch-heatmap');
2546
+ if (canvas1 && !canvas1.dataset.initialized) {
2547
+ canvas1.dataset.initialized = 'true';
2548
+ drawGridSearchHeatmap();
2549
+ }
2550
+
2551
+ const canvas2 = document.getElementById('param-surface');
2552
+ if (canvas2 && !canvas2.dataset.initialized) {
2553
+ canvas2.dataset.initialized = 'true';
2554
+ drawParamSurface();
2555
+ }
2556
+
2557
+ const radios = document.querySelectorAll('input[name="grid-model"]');
2558
+ radios.forEach(radio => {
2559
+ radio.addEventListener('change', () => {
2560
+ drawGridSearchHeatmap();
2561
+ });
2562
+ });
2563
  }
2564
 
2565
+ function drawGridSearchHeatmap() {
2566
+ const canvas = document.getElementById('gridsearch-heatmap');
2567
  if (!canvas) return;
2568
 
2569
  const ctx = canvas.getContext('2d');
2570
  const width = canvas.width = canvas.offsetWidth;
2571
+ const height = canvas.height = 450;
2572
 
2573
  ctx.clearRect(0, 0, width, height);
2574
  ctx.fillStyle = '#1a2332';
 
2578
  const chartWidth = width - 2 * padding;
2579
  const chartHeight = height - 2 * padding;
2580
 
 
2581
  const cValues = [0.1, 1, 10, 100];
2582
  const gammaValues = [0.001, 0.01, 0.1, 1];
2583
 
2584
+ // Simulate accuracy grid
2585
+ const accuracies = [
2586
+ [0.65, 0.82, 0.88, 0.75],
2587
+ [0.78, 0.91, 0.95, 0.89],
2588
+ [0.85, 0.93, 0.92, 0.87],
2589
+ [0.80, 0.88, 0.84, 0.82]
2590
  ];
2591
 
2592
  const cellWidth = chartWidth / cValues.length;
2593
  const cellHeight = chartHeight / gammaValues.length;
2594
 
2595
+ let bestAcc = 0, bestI = 0, bestJ = 0;
2596
+
2597
+ // Draw heatmap
2598
+ gammaValues.forEach((gamma, i) => {
2599
+ cValues.forEach((c, j) => {
2600
+ const acc = accuracies[i][j];
2601
+ if (acc > bestAcc) {
2602
+ bestAcc = acc;
2603
+ bestI = i;
2604
+ bestJ = j;
2605
+ }
2606
 
2607
+ const x = padding + j * cellWidth;
2608
+ const y = padding + i * cellHeight;
2609
+
2610
+ // Color gradient
2611
+ const intensity = (acc - 0.6) / 0.35;
2612
+ const r = Math.floor(255 - 149 * intensity);
2613
+ const g = Math.floor(140 + 100 * intensity);
2614
+ const b = Math.floor(106 + 106 * intensity);
2615
+ ctx.fillStyle = `rgb(${r}, ${g}, ${b})`;
2616
  ctx.fillRect(x, y, cellWidth, cellHeight);
2617
 
2618
  // Border
 
2620
  ctx.lineWidth = 2;
2621
  ctx.strokeRect(x, y, cellWidth, cellHeight);
2622
 
2623
+ // Text
2624
+ ctx.fillStyle = '#1a2332';
2625
  ctx.font = 'bold 14px sans-serif';
2626
  ctx.textAlign = 'center';
2627
+ ctx.fillText(acc.toFixed(2), x + cellWidth / 2, y + cellHeight / 2 + 5);
 
 
 
 
 
 
 
 
 
 
 
2628
  });
2629
  });
2630
 
2631
+ // Highlight best
2632
+ const bestX = padding + bestJ * cellWidth;
2633
+ const bestY = padding + bestI * cellHeight;
2634
+ ctx.strokeStyle = '#7ef0d4';
2635
+ ctx.lineWidth = 4;
2636
+ ctx.strokeRect(bestX, bestY, cellWidth, cellHeight);
2637
+
2638
+ // Labels
2639
+ ctx.fillStyle = '#e8eef6';
2640
  ctx.font = '12px sans-serif';
2641
+ ctx.textAlign = 'right';
2642
+ gammaValues.forEach((gamma, i) => {
2643
+ const y = padding + i * cellHeight + cellHeight / 2;
2644
+ ctx.fillText(`Ξ³=${gamma}`, padding - 10, y + 5);
2645
+ });
2646
+
2647
  ctx.textAlign = 'center';
2648
+ cValues.forEach((c, j) => {
2649
+ const x = padding + j * cellWidth + cellWidth / 2;
2650
+ ctx.fillText(`C=${c}`, x, padding - 10);
2651
  });
2652
 
2653
+ // Axis labels
2654
+ ctx.fillStyle = '#a9b4c2';
2655
+ ctx.font = 'bold 14px sans-serif';
2656
+ ctx.fillText('C Parameter', width / 2, height - 30);
2657
+ ctx.save();
2658
+ ctx.translate(25, height / 2);
2659
+ ctx.rotate(-Math.PI / 2);
2660
+ ctx.fillText('Gamma Parameter', 0, 0);
2661
+ ctx.restore();
2662
+
2663
+ // Best params
2664
+ ctx.fillStyle = '#7ef0d4';
2665
+ ctx.font = 'bold 14px sans-serif';
2666
+ ctx.textAlign = 'left';
2667
+ ctx.fillText(`Best: C=${cValues[bestJ]}, Ξ³=${gammaValues[bestI]} β†’ Acc=${bestAcc.toFixed(2)}`, padding, height - 30);
2668
+ }
2669
+
2670
+ function drawParamSurface() {
2671
+ const canvas = document.getElementById('param-surface');
2672
+ if (!canvas) return;
2673
+
2674
+ const ctx = canvas.getContext('2d');
2675
+ const width = canvas.width = canvas.offsetWidth;
2676
+ const height = canvas.height = 400;
2677
+
2678
+ ctx.clearRect(0, 0, width, height);
2679
+ ctx.fillStyle = '#1a2332';
2680
+ ctx.fillRect(0, 0, width, height);
2681
+
2682
+ const padding = 60;
2683
+ const centerX = width / 2;
2684
+ const centerY = height / 2;
2685
+
2686
+ // Draw 3D-ish surface using contour lines
2687
+ const levels = [0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95];
2688
+ const colors = ['#ff8c6a', '#ffa07a', '#ffb490', '#ffc8a6', '#7ef0d4', '#6aa9ff', '#5a99ef'];
2689
+
2690
+ levels.forEach((level, i) => {
2691
+ const radius = 150 - i * 20;
2692
+ ctx.strokeStyle = colors[i];
2693
+ ctx.lineWidth = 3;
2694
+ ctx.beginPath();
2695
+ ctx.ellipse(centerX, centerY, radius, radius * 0.6, 0, 0, 2 * Math.PI);
2696
+ ctx.stroke();
2697
+
2698
+ // Label
2699
+ ctx.fillStyle = colors[i];
2700
+ ctx.font = '11px sans-serif';
2701
+ ctx.textAlign = 'left';
2702
+ ctx.fillText(level.toFixed(2), centerX + radius + 10, centerY);
2703
  });
2704
 
2705
+ // Center point (optimum)
2706
  ctx.fillStyle = '#7ef0d4';
2707
+ ctx.beginPath();
2708
+ ctx.arc(centerX, centerY, 8, 0, 2 * Math.PI);
2709
+ ctx.fill();
2710
+
2711
+ ctx.fillStyle = '#7ef0d4';
2712
+ ctx.font = 'bold 14px sans-serif';
2713
  ctx.textAlign = 'center';
2714
+ ctx.fillText('Optimal Point', centerX, centerY - 20);
2715
+ ctx.fillText('(C=1, Ξ³=scale)', centerX, centerY + 35);
2716
 
2717
+ // Axis labels
 
2718
  ctx.fillStyle = '#a9b4c2';
2719
+ ctx.font = '12px sans-serif';
2720
+ ctx.fillText('C Parameter β†’', width - 80, height - 20);
2721
+ ctx.save();
2722
+ ctx.translate(30, 60);
2723
+ ctx.rotate(-Math.PI / 2);
2724
+ ctx.fillText('← Gamma', 0, 0);
2725
+ ctx.restore();
2726
+
2727
+ ctx.fillStyle = '#e8eef6';
2728
+ ctx.font = 'bold 16px sans-serif';
2729
+ ctx.textAlign = 'center';
2730
+ ctx.fillText('Performance Surface (3D Contour View)', width / 2, 30);
2731
  }
2732
 
2733
+ // Topic 15: Naive Bayes
2734
  function initNaiveBayes() {
2735
+ const canvas1 = document.getElementById('bayes-theorem-viz');
2736
+ if (canvas1 && !canvas1.dataset.initialized) {
2737
+ canvas1.dataset.initialized = 'true';
2738
+ drawBayesTheorem();
2739
+ }
2740
+
2741
+ const canvas2 = document.getElementById('spam-classification');
2742
+ if (canvas2 && !canvas2.dataset.initialized) {
2743
+ canvas2.dataset.initialized = 'true';
2744
+ drawSpamClassification();
2745
+ }
2746
  }
2747
 
2748
+ function drawBayesTheorem() {
2749
+ const canvas = document.getElementById('bayes-theorem-viz');
2750
  if (!canvas) return;
2751
 
2752
  const ctx = canvas.getContext('2d');
2753
  const width = canvas.width = canvas.offsetWidth;
2754
+ const height = canvas.height = 400;
2755
 
2756
  ctx.clearRect(0, 0, width, height);
2757
  ctx.fillStyle = '#1a2332';
2758
  ctx.fillRect(0, 0, width, height);
2759
 
2760
+ const centerX = width / 2;
2761
+ const centerY = height / 2;
 
 
 
 
 
 
 
 
2762
 
2763
+ // Draw formula components as boxes
2764
+ const boxes = [
2765
+ { x: centerX - 300, y: centerY - 80, w: 120, h: 60, text: 'P(C|F)', label: 'Posterior', color: '#7ef0d4' },
2766
+ { x: centerX - 100, y: centerY - 80, w: 120, h: 60, text: 'P(F|C)', label: 'Likelihood', color: '#6aa9ff' },
2767
+ { x: centerX + 100, y: centerY - 80, w: 100, h: 60, text: 'P(C)', label: 'Prior', color: '#ffb490' },
2768
+ { x: centerX - 50, y: centerY + 60, w: 100, h: 60, text: 'P(F)', label: 'Evidence', color: '#ff8c6a' }
2769
+ ];
2770
 
2771
+ boxes.forEach(box => {
2772
+ ctx.fillStyle = box.color + '33';
2773
+ ctx.fillRect(box.x, box.y, box.w, box.h);
2774
+ ctx.strokeStyle = box.color;
 
 
 
 
2775
  ctx.lineWidth = 2;
2776
+ ctx.strokeRect(box.x, box.y, box.w, box.h);
2777
 
2778
+ ctx.fillStyle = box.color;
2779
+ ctx.font = 'bold 16px sans-serif';
 
2780
  ctx.textAlign = 'center';
2781
+ ctx.fillText(box.text, box.x + box.w / 2, box.y + box.h / 2);
2782
 
2783
+ ctx.font = '12px sans-serif';
2784
+ ctx.fillStyle = '#a9b4c2';
2785
+ ctx.fillText(box.label, box.x + box.w / 2, box.y + box.h + 20);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2786
  });
2787
+
2788
+ // Draw arrows and operators
2789
+ ctx.fillStyle = '#e8eef6';
2790
+ ctx.font = 'bold 20px sans-serif';
2791
+ ctx.textAlign = 'center';
2792
+ ctx.fillText('=', centerX - 160, centerY - 40);
2793
+ ctx.fillText('Γ—', centerX + 40, centerY - 40);
2794
+ ctx.fillText('Γ·', centerX, centerY + 20);
2795
+
2796
+ // Title
2797
+ ctx.fillStyle = '#7ef0d4';
2798
+ ctx.font = 'bold 18px sans-serif';
2799
+ ctx.fillText("Bayes' Theorem Breakdown", centerX, 40);
2800
+ }
2801
+
2802
+ function drawSpamClassification() {
2803
+ const canvas = document.getElementById('spam-classification');
2804
+ if (!canvas) return;
2805
+
2806
+ const ctx = canvas.getContext('2d');
2807
+ const width = canvas.width = canvas.offsetWidth;
2808
+ const height = canvas.height = 400;
2809
+
2810
+ ctx.clearRect(0, 0, width, height);
2811
+ ctx.fillStyle = '#1a2332';
2812
+ ctx.fillRect(0, 0, width, height);
2813
+
2814
+ const padding = 40;
2815
+ const stepHeight = 70;
2816
+ const startY = 60;
2817
+
2818
+ // Step 1: Features
2819
+ ctx.fillStyle = '#6aa9ff';
2820
+ ctx.font = 'bold 14px sans-serif';
2821
+ ctx.textAlign = 'left';
2822
+ ctx.fillText('Step 1: Email Features', padding, startY);
2823
+ ctx.fillStyle = '#e8eef6';
2824
+ ctx.font = '13px sans-serif';
2825
+ ctx.fillText('Words: ["free", "winner", "click"]', padding + 20, startY + 25);
2826
+
2827
+ // Step 2: Calculate P(spam)
2828
+ const y2 = startY + stepHeight;
2829
+ ctx.fillStyle = '#6aa9ff';
2830
+ ctx.font = 'bold 14px sans-serif';
2831
+ ctx.fillText('Step 2: P(spam | features)', padding, y2);
2832
+ ctx.fillStyle = '#e8eef6';
2833
+ ctx.font = '12px monospace';
2834
+ ctx.fillText('= P("free"|spam) Γ— P("winner"|spam) Γ— P("click"|spam) Γ— P(spam)', padding + 20, y2 + 25);
2835
+ ctx.fillText('= 0.8 Γ— 0.7 Γ— 0.6 Γ— 0.3', padding + 20, y2 + 45);
2836
+ ctx.fillStyle = '#7ef0d4';
2837
+ ctx.font = 'bold 14px monospace';
2838
+ ctx.fillText('= 0.1008', padding + 20, y2 + 65);
2839
+
2840
+ // Step 3: Calculate P(not spam)
2841
+ const y3 = y2 + stepHeight + 50;
2842
+ ctx.fillStyle = '#6aa9ff';
2843
+ ctx.font = 'bold 14px sans-serif';
2844
+ ctx.fillText('Step 3: P(not-spam | features)', padding, y3);
2845
+ ctx.fillStyle = '#e8eef6';
2846
+ ctx.font = '12px monospace';
2847
+ ctx.fillText('= P("free"|not-spam) Γ— P("winner"|not-spam) Γ— P("click"|not-spam) Γ— P(not-spam)', padding + 20, y3 + 25);
2848
+ ctx.fillText('= 0.1 Γ— 0.05 Γ— 0.2 Γ— 0.7', padding + 20, y3 + 45);
2849
+ ctx.fillStyle = '#ff8c6a';
2850
+ ctx.font = 'bold 14px monospace';
2851
+ ctx.fillText('= 0.0007', padding + 20, y3 + 65);
2852
+
2853
+ // Step 4: Decision
2854
+ const y4 = y3 + stepHeight + 50;
2855
+ ctx.fillStyle = '#7ef0d4';
2856
+ ctx.font = 'bold 16px sans-serif';
2857
+ ctx.fillText('Decision: 0.1008 > 0.0007', padding, y4);
2858
+ ctx.fillStyle = '#7ef0d4';
2859
+ ctx.font = 'bold 18px sans-serif';
2860
+ ctx.fillText('β†’ SPAM! πŸ“§βŒ', padding, y4 + 30);
2861
+
2862
+ // Visual comparison
2863
+ const barY = y4 + 60;
2864
+ const barMaxWidth = width - 2 * padding - 100;
2865
+ ctx.fillStyle = '#7ef0d4';
2866
+ ctx.fillRect(padding, barY, 0.1008 / 0.1008 * barMaxWidth, 20);
2867
+ ctx.fillStyle = '#e8eef6';
2868
+ ctx.font = '11px sans-serif';
2869
+ ctx.textAlign = 'right';
2870
+ ctx.fillText('Spam', padding + barMaxWidth + 80, barY + 15);
2871
+
2872
+ ctx.fillStyle = '#ff8c6a';
2873
+ ctx.fillRect(padding, barY + 30, 0.0007 / 0.1008 * barMaxWidth, 20);
2874
+ ctx.fillStyle = '#e8eef6';
2875
+ ctx.fillText('Not Spam', padding + barMaxWidth + 80, barY + 45);
2876
+ }
2877
+
2878
+ // Topic 16: Decision Trees
2879
+ function initDecisionTrees() {
2880
+ const canvas1 = document.getElementById('decision-tree-viz');
2881
+ if (canvas1 && !canvas1.dataset.initialized) {
2882
+ canvas1.dataset.initialized = 'true';
2883
+ drawDecisionTree();
2884
+ }
2885
+
2886
+ const canvas2 = document.getElementById('entropy-viz');
2887
+ if (canvas2 && !canvas2.dataset.initialized) {
2888
+ canvas2.dataset.initialized = 'true';
2889
+ drawEntropyViz();
2890
+ }
2891
+
2892
+ const canvas3 = document.getElementById('split-comparison');
2893
+ if (canvas3 && !canvas3.dataset.initialized) {
2894
+ canvas3.dataset.initialized = 'true';
2895
+ drawSplitComparison();
2896
+ }
2897
+
2898
+ const canvas4 = document.getElementById('tree-boundary');
2899
+ if (canvas4 && !canvas4.dataset.initialized) {
2900
+ canvas4.dataset.initialized = 'true';
2901
+ drawTreeBoundary();
2902
+ }
2903
+ }
2904
+
2905
+ function drawDecisionTree() {
2906
+ const canvas = document.getElementById('decision-tree-viz');
2907
+ if (!canvas) return;
2908
+
2909
+ const ctx = canvas.getContext('2d');
2910
+ const width = canvas.width = canvas.offsetWidth;
2911
+ const height = canvas.height = 450;
2912
+
2913
+ ctx.clearRect(0, 0, width, height);
2914
+ ctx.fillStyle = '#1a2332';
2915
+ ctx.fillRect(0, 0, width, height);
2916
+
2917
+ const centerX = width / 2;
2918
+
2919
+ // Node structure
2920
+ const nodes = [
2921
+ { x: centerX, y: 60, text: 'Has "free"?', type: 'root' },
2922
+ { x: centerX - 150, y: 160, text: 'Has link?', type: 'internal' },
2923
+ { x: centerX + 150, y: 160, text: 'Sender new?', type: 'internal' },
2924
+ { x: centerX - 220, y: 260, text: 'SPAM', type: 'leaf', class: 'spam' },
2925
+ { x: centerX - 80, y: 260, text: 'NOT SPAM', type: 'leaf', class: 'not-spam' },
2926
+ { x: centerX + 80, y: 260, text: 'SPAM', type: 'leaf', class: 'spam' },
2927
+ { x: centerX + 220, y: 260, text: 'NOT SPAM', type: 'leaf', class: 'not-spam' }
2928
+ ];
2929
+
2930
+ const edges = [
2931
+ { from: 0, to: 1, label: 'Yes' },
2932
+ { from: 0, to: 2, label: 'No' },
2933
+ { from: 1, to: 3, label: 'Yes' },
2934
+ { from: 1, to: 4, label: 'No' },
2935
+ { from: 2, to: 5, label: 'Yes' },
2936
+ { from: 2, to: 6, label: 'No' }
2937
+ ];
2938
+
2939
+ // Draw edges
2940
+ ctx.strokeStyle = '#6aa9ff';
2941
+ ctx.lineWidth = 2;
2942
+ edges.forEach(edge => {
2943
+ const from = nodes[edge.from];
2944
+ const to = nodes[edge.to];
2945
+ ctx.beginPath();
2946
+ ctx.moveTo(from.x, from.y + 25);
2947
+ ctx.lineTo(to.x, to.y - 25);
2948
+ ctx.stroke();
2949
+
2950
+ // Edge label
2951
+ ctx.fillStyle = '#7ef0d4';
2952
+ ctx.font = '11px sans-serif';
2953
+ ctx.textAlign = 'center';
2954
+ const midX = (from.x + to.x) / 2;
2955
+ const midY = (from.y + to.y) / 2;
2956
+ ctx.fillText(edge.label, midX + 15, midY);
2957
+ });
2958
+
2959
+ // Draw nodes
2960
+ nodes.forEach(node => {
2961
+ if (node.type === 'leaf') {
2962
+ ctx.fillStyle = node.class === 'spam' ? '#ff8c6a33' : '#7ef0d433';
2963
+ ctx.strokeStyle = node.class === 'spam' ? '#ff8c6a' : '#7ef0d4';
2964
+ } else {
2965
+ ctx.fillStyle = '#6aa9ff33';
2966
+ ctx.strokeStyle = '#6aa9ff';
2967
+ }
2968
+
2969
+ ctx.lineWidth = 2;
2970
+ ctx.beginPath();
2971
+ ctx.rect(node.x - 60, node.y - 20, 120, 40);
2972
+ ctx.fill();
2973
+ ctx.stroke();
2974
+
2975
+ ctx.fillStyle = '#e8eef6';
2976
+ ctx.font = node.type === 'leaf' ? 'bold 13px sans-serif' : '12px sans-serif';
2977
+ ctx.textAlign = 'center';
2978
+ ctx.fillText(node.text, node.x, node.y + 5);
2979
+ });
2980
+
2981
+ // Title
2982
+ ctx.fillStyle = '#7ef0d4';
2983
+ ctx.font = 'bold 16px sans-serif';
2984
+ ctx.fillText('Decision Tree: Email Spam Classifier', centerX, 30);
2985
+
2986
+ // Example path
2987
+ ctx.fillStyle = '#a9b4c2';
2988
+ ctx.font = '12px sans-serif';
2989
+ ctx.textAlign = 'left';
2990
+ ctx.fillText('Example: Email with "free" + link β†’ SPAM', 40, height - 20);
2991
+ }
2992
+
2993
+ function drawEntropyViz() {
2994
+ const canvas = document.getElementById('entropy-viz');
2995
+ if (!canvas) return;
2996
+
2997
+ const ctx = canvas.getContext('2d');
2998
+ const width = canvas.width = canvas.offsetWidth;
2999
+ const height = canvas.height = 400;
3000
+
3001
+ ctx.clearRect(0, 0, width, height);
3002
+ ctx.fillStyle = '#1a2332';
3003
+ ctx.fillRect(0, 0, width, height);
3004
+
3005
+ const padding = 60;
3006
+ const chartWidth = width - 2 * padding;
3007
+ const chartHeight = height - 2 * padding;
3008
+
3009
+ // Draw entropy curve
3010
+ ctx.strokeStyle = '#6aa9ff';
3011
+ ctx.lineWidth = 3;
3012
+ ctx.beginPath();
3013
+ for (let p = 0.01; p <= 0.99; p += 0.01) {
3014
+ const entropy = -p * Math.log2(p) - (1 - p) * Math.log2(1 - p);
3015
+ const x = padding + p * chartWidth;
3016
+ const y = height - padding - entropy * chartHeight;
3017
+ if (p === 0.01) ctx.moveTo(x, y);
3018
+ else ctx.lineTo(x, y);
3019
+ }
3020
+ ctx.stroke();
3021
+
3022
+ // Mark key points
3023
+ const points = [
3024
+ { p: 0.1, label: 'Pure\n(low)' },
3025
+ { p: 0.5, label: 'Maximum\n(high)' },
3026
+ { p: 0.9, label: 'Pure\n(low)' }
3027
+ ];
3028
+
3029
+ points.forEach(point => {
3030
+ const entropy = -point.p * Math.log2(point.p) - (1 - point.p) * Math.log2(1 - point.p);
3031
+ const x = padding + point.p * chartWidth;
3032
+ const y = height - padding - entropy * chartHeight;
3033
+
3034
+ ctx.fillStyle = '#7ef0d4';
3035
+ ctx.beginPath();
3036
+ ctx.arc(x, y, 6, 0, 2 * Math.PI);
3037
+ ctx.fill();
3038
+
3039
+ ctx.fillStyle = '#7ef0d4';
3040
+ ctx.font = '11px sans-serif';
3041
+ ctx.textAlign = 'center';
3042
+ const lines = point.label.split('\n');
3043
+ lines.forEach((line, i) => {
3044
+ ctx.fillText(line, x, y - 15 - (lines.length - 1 - i) * 12);
3045
+ });
3046
+ });
3047
+
3048
+ // Axes
3049
+ ctx.strokeStyle = '#2a3544';
3050
+ ctx.lineWidth = 2;
3051
+ ctx.beginPath();
3052
+ ctx.moveTo(padding, padding);
3053
+ ctx.lineTo(padding, height - padding);
3054
+ ctx.lineTo(width - padding, height - padding);
3055
+ ctx.stroke();
3056
+
3057
+ // Labels
3058
+ ctx.fillStyle = '#a9b4c2';
3059
+ ctx.font = '12px sans-serif';
3060
+ ctx.textAlign = 'center';
3061
+ ctx.fillText('Proportion of Positive Class (p)', width / 2, height - 20);
3062
+ ctx.save();
3063
+ ctx.translate(20, height / 2);
3064
+ ctx.rotate(-Math.PI / 2);
3065
+ ctx.fillText('Entropy H(p)', 0, 0);
3066
+ ctx.restore();
3067
+
3068
+ // Title
3069
+ ctx.fillStyle = '#7ef0d4';
3070
+ ctx.font = 'bold 16px sans-serif';
3071
+ ctx.textAlign = 'center';
3072
+ ctx.fillText('Entropy: Measuring Disorder', width / 2, 30);
3073
+ }
3074
+
3075
+ function drawSplitComparison() {
3076
+ const canvas = document.getElementById('split-comparison');
3077
+ if (!canvas) return;
3078
+
3079
+ const ctx = canvas.getContext('2d');
3080
+ const width = canvas.width = canvas.offsetWidth;
3081
+ const height = canvas.height = 400;
3082
+
3083
+ ctx.clearRect(0, 0, width, height);
3084
+ ctx.fillStyle = '#1a2332';
3085
+ ctx.fillRect(0, 0, width, height);
3086
+
3087
+ const splits = [
3088
+ { name: 'Split A: "Contains FREE"', ig: 0.034, color: '#ff8c6a' },
3089
+ { name: 'Split B: "Has Link"', ig: 0.156, color: '#7ef0d4' },
3090
+ { name: 'Split C: "Urgent"', ig: 0.089, color: '#ffb490' }
3091
+ ];
3092
+
3093
+ const padding = 60;
3094
+ const barHeight = 60;
3095
+ const maxWidth = width - 2 * padding - 200;
3096
+ const maxIG = Math.max(...splits.map(s => s.ig));
3097
+
3098
+ splits.forEach((split, i) => {
3099
+ const y = 80 + i * (barHeight + 40);
3100
+ const barWidth = (split.ig / maxIG) * maxWidth;
3101
+
3102
+ // Bar
3103
+ ctx.fillStyle = split.color;
3104
+ ctx.fillRect(padding, y, barWidth, barHeight);
3105
+
3106
+ // Border
3107
+ ctx.strokeStyle = split.color;
3108
+ ctx.lineWidth = 2;
3109
+ ctx.strokeRect(padding, y, barWidth, barHeight);
3110
+
3111
+ // Label
3112
+ ctx.fillStyle = '#e8eef6';
3113
+ ctx.font = 'bold 13px sans-serif';
3114
+ ctx.textAlign = 'left';
3115
+ ctx.fillText(split.name, padding, y - 10);
3116
+
3117
+ // Value
3118
+ ctx.fillStyle = '#1a2332';
3119
+ ctx.font = 'bold 16px sans-serif';
3120
+ ctx.textAlign = 'center';
3121
+ ctx.fillText(`IG = ${split.ig.toFixed(3)}`, padding + barWidth / 2, y + barHeight / 2 + 6);
3122
+ });
3123
+
3124
+ // Winner
3125
+ ctx.fillStyle = '#7ef0d4';
3126
+ ctx.font = 'bold 16px sans-serif';
3127
+ ctx.textAlign = 'center';
3128
+ ctx.fillText('βœ“ Best split: Highest Information Gain!', width / 2, height - 30);
3129
+
3130
+ // Title
3131
+ ctx.fillStyle = '#7ef0d4';
3132
+ ctx.font = 'bold 16px sans-serif';
3133
+ ctx.fillText('Comparing Split Quality', width / 2, 40);
3134
+ }
3135
+
3136
+ function drawTreeBoundary() {
3137
+ const canvas = document.getElementById('tree-boundary');
3138
+ if (!canvas) return;
3139
+
3140
+ const ctx = canvas.getContext('2d');
3141
+ const width = canvas.width = canvas.offsetWidth;
3142
+ const height = canvas.height = 400;
3143
+
3144
+ ctx.clearRect(0, 0, width, height);
3145
+ ctx.fillStyle = '#1a2332';
3146
+ ctx.fillRect(0, 0, width, height);
3147
+
3148
+ const padding = 60;
3149
+ const chartWidth = width - 2 * padding;
3150
+ const chartHeight = height - 2 * padding;
3151
+
3152
+ // Draw regions
3153
+ const regions = [
3154
+ { x1: 0, y1: 0, x2: 0.5, y2: 0.6, class: 'orange' },
3155
+ { x1: 0.5, y1: 0, x2: 1, y2: 0.6, class: 'yellow' },
3156
+ { x1: 0, y1: 0.6, x2: 0.3, y2: 1, class: 'yellow' },
3157
+ { x1: 0.3, y1: 0.6, x2: 1, y2: 1, class: 'orange' }
3158
+ ];
3159
+
3160
+ regions.forEach(region => {
3161
+ const x = padding + region.x1 * chartWidth;
3162
+ const y = padding + region.y1 * chartHeight;
3163
+ const w = (region.x2 - region.x1) * chartWidth;
3164
+ const h = (region.y2 - region.y1) * chartHeight;
3165
+
3166
+ ctx.fillStyle = region.class === 'orange' ? 'rgba(255, 140, 106, 0.2)' : 'rgba(255, 235, 59, 0.2)';
3167
+ ctx.fillRect(x, y, w, h);
3168
+
3169
+ ctx.strokeStyle = region.class === 'orange' ? '#ff8c6a' : '#ffeb3b';
3170
+ ctx.lineWidth = 2;
3171
+ ctx.strokeRect(x, y, w, h);
3172
+ });
3173
+
3174
+ // Generate random points
3175
+ const orangePoints = [];
3176
+ const yellowPoints = [];
3177
+ for (let i = 0; i < 15; i++) {
3178
+ if (Math.random() < 0.3) {
3179
+ orangePoints.push({ x: Math.random() * 0.5, y: Math.random() * 0.6 });
3180
+ }
3181
+ if (Math.random() < 0.3) {
3182
+ yellowPoints.push({ x: 0.5 + Math.random() * 0.5, y: Math.random() * 0.6 });
3183
+ }
3184
+ if (Math.random() < 0.3) {
3185
+ orangePoints.push({ x: 0.3 + Math.random() * 0.7, y: 0.6 + Math.random() * 0.4 });
3186
+ }
3187
+ if (Math.random() < 0.3) {
3188
+ yellowPoints.push({ x: Math.random() * 0.3, y: 0.6 + Math.random() * 0.4 });
3189
+ }
3190
+ }
3191
+
3192
+ // Draw points
3193
+ orangePoints.forEach(p => {
3194
+ ctx.fillStyle = '#ff8c6a';
3195
+ ctx.beginPath();
3196
+ ctx.arc(padding + p.x * chartWidth, padding + p.y * chartHeight, 5, 0, 2 * Math.PI);
3197
+ ctx.fill();
3198
+ });
3199
+
3200
+ yellowPoints.forEach(p => {
3201
+ ctx.fillStyle = '#ffeb3b';
3202
+ ctx.beginPath();
3203
+ ctx.arc(padding + p.x * chartWidth, padding + p.y * chartHeight, 5, 0, 2 * Math.PI);
3204
+ ctx.fill();
3205
+ });
3206
+
3207
+ // Labels
3208
+ ctx.fillStyle = '#a9b4c2';
3209
+ ctx.font = '12px sans-serif';
3210
+ ctx.textAlign = 'center';
3211
+ ctx.fillText('Feature 1', width / 2, height - 20);
3212
+ ctx.save();
3213
+ ctx.translate(20, height / 2);
3214
+ ctx.rotate(-Math.PI / 2);
3215
+ ctx.fillText('Feature 2', 0, 0);
3216
+ ctx.restore();
3217
+
3218
+ // Title
3219
+ ctx.fillStyle = '#7ef0d4';
3220
+ ctx.font = 'bold 16px sans-serif';
3221
+ ctx.textAlign = 'center';
3222
+ ctx.fillText('Decision Tree Creates Rectangular Regions', width / 2, 30);
3223
+ }
3224
+
3225
+ // Topic 17: Ensemble Methods
3226
+ function initEnsembleMethods() {
3227
+ const canvas1 = document.getElementById('bagging-viz');
3228
+ if (canvas1 && !canvas1.dataset.initialized) {
3229
+ canvas1.dataset.initialized = 'true';
3230
+ drawBaggingViz();
3231
+ }
3232
+
3233
+ const canvas2 = document.getElementById('boosting-viz');
3234
+ if (canvas2 && !canvas2.dataset.initialized) {
3235
+ canvas2.dataset.initialized = 'true';
3236
+ drawBoostingViz();
3237
+ }
3238
+
3239
+ const canvas3 = document.getElementById('random-forest-viz');
3240
+ if (canvas3 && !canvas3.dataset.initialized) {
3241
+ canvas3.dataset.initialized = 'true';
3242
+ drawRandomForestViz();
3243
+ }
3244
+ }
3245
+
3246
+ function drawBaggingViz() {
3247
+ const canvas = document.getElementById('bagging-viz');
3248
+ if (!canvas) return;
3249
+
3250
+ const ctx = canvas.getContext('2d');
3251
+ const width = canvas.width = canvas.offsetWidth;
3252
+ const height = canvas.height = 400;
3253
+
3254
+ ctx.clearRect(0, 0, width, height);
3255
+ ctx.fillStyle = '#1a2332';
3256
+ ctx.fillRect(0, 0, width, height);
3257
+
3258
+ const boxWidth = 150;
3259
+ const boxHeight = 60;
3260
+ const startY = 60;
3261
+ const spacing = (width - 3 * boxWidth) / 4;
3262
+
3263
+ // Original data
3264
+ ctx.fillStyle = '#6aa9ff33';
3265
+ ctx.fillRect(width / 2 - 100, startY, 200, boxHeight);
3266
+ ctx.strokeStyle = '#6aa9ff';
3267
+ ctx.lineWidth = 2;
3268
+ ctx.strokeRect(width / 2 - 100, startY, 200, boxHeight);
3269
+ ctx.fillStyle = '#e8eef6';
3270
+ ctx.font = 'bold 14px sans-serif';
3271
+ ctx.textAlign = 'center';
3272
+ ctx.fillText('Original Dataset', width / 2, startY + boxHeight / 2 + 5);
3273
+
3274
+ // Bootstrap samples
3275
+ const sampleY = startY + boxHeight + 60;
3276
+ for (let i = 0; i < 3; i++) {
3277
+ const x = spacing + i * (boxWidth + spacing);
3278
+
3279
+ // Arrow
3280
+ ctx.strokeStyle = '#7ef0d4';
3281
+ ctx.lineWidth = 2;
3282
+ ctx.beginPath();
3283
+ ctx.moveTo(width / 2, startY + boxHeight);
3284
+ ctx.lineTo(x + boxWidth / 2, sampleY);
3285
+ ctx.stroke();
3286
+
3287
+ // Sample box
3288
+ ctx.fillStyle = '#7ef0d433';
3289
+ ctx.fillRect(x, sampleY, boxWidth, boxHeight);
3290
+ ctx.strokeStyle = '#7ef0d4';
3291
+ ctx.strokeRect(x, sampleY, boxWidth, boxHeight);
3292
+
3293
+ ctx.fillStyle = '#e8eef6';
3294
+ ctx.font = 'bold 12px sans-serif';
3295
+ ctx.fillText(`Bootstrap ${i + 1}`, x + boxWidth / 2, sampleY + boxHeight / 2 - 5);
3296
+ ctx.font = '10px sans-serif';
3297
+ ctx.fillStyle = '#a9b4c2';
3298
+ ctx.fillText('(random sample)', x + boxWidth / 2, sampleY + boxHeight / 2 + 10);
3299
+
3300
+ // Model
3301
+ const modelY = sampleY + boxHeight + 40;
3302
+ ctx.fillStyle = '#ffb49033';
3303
+ ctx.fillRect(x, modelY, boxWidth, boxHeight);
3304
+ ctx.strokeStyle = '#ffb490';
3305
+ ctx.strokeRect(x, modelY, boxWidth, boxHeight);
3306
+
3307
+ ctx.fillStyle = '#e8eef6';
3308
+ ctx.font = 'bold 12px sans-serif';
3309
+ ctx.fillText(`Model ${i + 1}`, x + boxWidth / 2, modelY + boxHeight / 2 + 5);
3310
+
3311
+ // Arrow to final
3312
+ ctx.strokeStyle = '#ffb490';
3313
+ ctx.beginPath();
3314
+ ctx.moveTo(x + boxWidth / 2, modelY + boxHeight);
3315
+ ctx.lineTo(width / 2, height - 60);
3316
+ ctx.stroke();
3317
+ }
3318
+
3319
+ // Final prediction
3320
+ ctx.fillStyle = '#ff8c6a33';
3321
+ ctx.fillRect(width / 2 - 100, height - 60, 200, boxHeight);
3322
+ ctx.strokeStyle = '#ff8c6a';
3323
+ ctx.lineWidth = 3;
3324
+ ctx.strokeRect(width / 2 - 100, height - 60, 200, boxHeight);
3325
+ ctx.fillStyle = '#e8eef6';
3326
+ ctx.font = 'bold 14px sans-serif';
3327
+ ctx.fillText('Average / Vote', width / 2, height - 60 + boxHeight / 2 + 5);
3328
+
3329
+ // Title
3330
+ ctx.fillStyle = '#7ef0d4';
3331
+ ctx.font = 'bold 16px sans-serif';
3332
+ ctx.fillText('Bagging: Bootstrap Aggregating', width / 2, 30);
3333
+ }
3334
+
3335
+ function drawBoostingViz() {
3336
+ const canvas = document.getElementById('boosting-viz');
3337
+ if (!canvas) return;
3338
+
3339
+ const ctx = canvas.getContext('2d');
3340
+ const width = canvas.width = canvas.offsetWidth;
3341
+ const height = canvas.height = 450;
3342
+
3343
+ ctx.clearRect(0, 0, width, height);
3344
+ ctx.fillStyle = '#1a2332';
3345
+ ctx.fillRect(0, 0, width, height);
3346
+
3347
+ const iterY = [80, 180, 280];
3348
+ const dataX = 100;
3349
+ const modelX = width / 2;
3350
+ const predX = width - 150;
3351
+
3352
+ for (let i = 0; i < 3; i++) {
3353
+ const y = iterY[i];
3354
+ const alpha = i === 0 ? 1 : (i === 1 ? 0.7 : 0.5);
3355
+
3356
+ // Iteration label
3357
+ ctx.fillStyle = '#7ef0d4';
3358
+ ctx.font = 'bold 14px sans-serif';
3359
+ ctx.textAlign = 'left';
3360
+ ctx.fillText(`Iteration ${i + 1}`, 20, y + 30);
3361
+
3362
+ // Data with weights
3363
+ ctx.globalAlpha = alpha;
3364
+ ctx.fillStyle = '#6aa9ff33';
3365
+ ctx.fillRect(dataX, y, 120, 60);
3366
+ ctx.strokeStyle = '#6aa9ff';
3367
+ ctx.lineWidth = 2;
3368
+ ctx.strokeRect(dataX, y, 120, 60);
3369
+ ctx.globalAlpha = 1;
3370
+
3371
+ ctx.fillStyle = '#e8eef6';
3372
+ ctx.font = '12px sans-serif';
3373
+ ctx.textAlign = 'center';
3374
+ ctx.fillText('Weighted Data', dataX + 60, y + 25);
3375
+ ctx.fillStyle = i > 0 ? '#ff8c6a' : '#7ef0d4';
3376
+ ctx.font = 'bold 11px sans-serif';
3377
+ ctx.fillText(i > 0 ? '↑ Focus on errors' : 'Equal weights', dataX + 60, y + 45);
3378
+
3379
+ // Arrow
3380
+ ctx.strokeStyle = '#7ef0d4';
3381
+ ctx.lineWidth = 2;
3382
+ ctx.beginPath();
3383
+ ctx.moveTo(dataX + 120, y + 30);
3384
+ ctx.lineTo(modelX - 60, y + 30);
3385
+ ctx.stroke();
3386
+
3387
+ // Model
3388
+ ctx.fillStyle = '#ffb49033';
3389
+ ctx.fillRect(modelX - 60, y, 120, 60);
3390
+ ctx.strokeStyle = '#ffb490';
3391
+ ctx.strokeRect(modelX - 60, y, 120, 60);
3392
+
3393
+ ctx.fillStyle = '#e8eef6';
3394
+ ctx.font = 'bold 12px sans-serif';
3395
+ ctx.fillText(`Model ${i + 1}`, modelX, y + 35);
3396
+
3397
+ // Arrow
3398
+ ctx.strokeStyle = '#ffb490';
3399
+ ctx.beginPath();
3400
+ ctx.moveTo(modelX + 60, y + 30);
3401
+ ctx.lineTo(predX - 60, y + 30);
3402
+ ctx.stroke();
3403
+
3404
+ // Predictions
3405
+ ctx.fillStyle = '#7ef0d433';
3406
+ ctx.fillRect(predX - 60, y, 120, 60);
3407
+ ctx.strokeStyle = '#7ef0d4';
3408
+ ctx.strokeRect(predX - 60, y, 120, 60);
3409
+
3410
+ ctx.fillStyle = '#e8eef6';
3411
+ ctx.font = '11px sans-serif';
3412
+ ctx.fillText('Predictions', predX, y + 25);
3413
+ ctx.fillStyle = i < 2 ? '#ff8c6a' : '#7ef0d4';
3414
+ ctx.font = 'bold 10px sans-serif';
3415
+ ctx.fillText(i < 2 ? 'Some errors' : 'Better!', predX, y + 45);
3416
+
3417
+ // Feedback arrow
3418
+ if (i < 2) {
3419
+ ctx.strokeStyle = '#ff8c6a';
3420
+ ctx.lineWidth = 2;
3421
+ ctx.setLineDash([5, 5]);
3422
+ ctx.beginPath();
3423
+ ctx.moveTo(predX - 60, y + 60);
3424
+ ctx.lineTo(dataX + 60, y + 90);
3425
+ ctx.stroke();
3426
+ ctx.setLineDash([]);
3427
+
3428
+ ctx.fillStyle = '#ff8c6a';
3429
+ ctx.font = '10px sans-serif';
3430
+ ctx.textAlign = 'center';
3431
+ ctx.fillText('Increase weights for errors', width / 2, y + 80);
3432
+ }
3433
+ }
3434
+
3435
+ // Title
3436
+ ctx.fillStyle = '#7ef0d4';
3437
+ ctx.font = 'bold 16px sans-serif';
3438
+ ctx.textAlign = 'center';
3439
+ ctx.fillText('Boosting: Sequential Learning from Mistakes', width / 2, 30);
3440
+
3441
+ // Final
3442
+ ctx.fillStyle = '#ff8c6a';
3443
+ ctx.font = 'bold 14px sans-serif';
3444
+ ctx.fillText('Final Prediction = Weighted Combination of All Models', width / 2, height - 20);
3445
+ }
3446
+
3447
+ function drawRandomForestViz() {
3448
+ const canvas = document.getElementById('random-forest-viz');
3449
+ if (!canvas) return;
3450
+
3451
+ const ctx = canvas.getContext('2d');
3452
+ const width = canvas.width = canvas.offsetWidth;
3453
+ const height = canvas.height = 400;
3454
+
3455
+ ctx.clearRect(0, 0, width, height);
3456
+ ctx.fillStyle = '#1a2332';
3457
+ ctx.fillRect(0, 0, width, height);
3458
+
3459
+ const treeY = 120;
3460
+ const numTrees = 5;
3461
+ const treeSpacing = (width - 100) / numTrees;
3462
+ const treeSize = 50;
3463
+
3464
+ // Original data
3465
+ ctx.fillStyle = '#6aa9ff33';
3466
+ ctx.fillRect(width / 2 - 100, 40, 200, 50);
3467
+ ctx.strokeStyle = '#6aa9ff';
3468
+ ctx.lineWidth = 2;
3469
+ ctx.strokeRect(width / 2 - 100, 40, 200, 50);
3470
+ ctx.fillStyle = '#e8eef6';
3471
+ ctx.font = 'bold 14px sans-serif';
3472
+ ctx.textAlign = 'center';
3473
+ ctx.fillText('Training Data', width / 2, 70);
3474
+
3475
+ // Trees
3476
+ for (let i = 0; i < numTrees; i++) {
3477
+ const x = 50 + i * treeSpacing + treeSpacing / 2;
3478
+
3479
+ // Arrow from data
3480
+ ctx.strokeStyle = '#7ef0d4';
3481
+ ctx.lineWidth = 1;
3482
+ ctx.beginPath();
3483
+ ctx.moveTo(width / 2, 90);
3484
+ ctx.lineTo(x, treeY - 20);
3485
+ ctx.stroke();
3486
+
3487
+ // Tree icon (triangle)
3488
+ ctx.fillStyle = '#7ef0d4';
3489
+ ctx.beginPath();
3490
+ ctx.moveTo(x, treeY - 20);
3491
+ ctx.lineTo(x - treeSize / 2, treeY + treeSize - 20);
3492
+ ctx.lineTo(x + treeSize / 2, treeY + treeSize - 20);
3493
+ ctx.closePath();
3494
+ ctx.fill();
3495
+
3496
+ // Trunk
3497
+ ctx.fillStyle = '#ffb490';
3498
+ ctx.fillRect(x - 8, treeY + treeSize - 20, 16, 30);
3499
+
3500
+ // Tree label
3501
+ ctx.fillStyle = '#e8eef6';
3502
+ ctx.font = 'bold 11px sans-serif';
3503
+ ctx.textAlign = 'center';
3504
+ ctx.fillText(`Tree ${i + 1}`, x, treeY + treeSize + 25);
3505
+
3506
+ // Random features note
3507
+ if (i === 0) {
3508
+ ctx.font = '9px sans-serif';
3509
+ ctx.fillStyle = '#a9b4c2';
3510
+ ctx.fillText('Random', x, treeY + treeSize + 40);
3511
+ ctx.fillText('subset', x, treeY + treeSize + 52);
3512
+ }
3513
+
3514
+ // Prediction
3515
+ const predY = treeY + treeSize + 70;
3516
+ ctx.fillStyle = i < 3 ? '#ff8c6a' : '#7ef0d4';
3517
+ ctx.beginPath();
3518
+ ctx.arc(x, predY, 12, 0, 2 * Math.PI);
3519
+ ctx.fill();
3520
+
3521
+ ctx.fillStyle = '#1a2332';
3522
+ ctx.font = 'bold 10px sans-serif';
3523
+ ctx.fillText(i < 3 ? '1' : '0', x, predY + 4);
3524
+
3525
+ // Arrow to vote
3526
+ ctx.strokeStyle = i < 3 ? '#ff8c6a' : '#7ef0d4';
3527
+ ctx.lineWidth = 2;
3528
+ ctx.beginPath();
3529
+ ctx.moveTo(x, predY + 12);
3530
+ ctx.lineTo(width / 2, height - 80);
3531
+ ctx.stroke();
3532
+ }
3533
+
3534
+ // Vote box
3535
+ ctx.fillStyle = '#7ef0d433';
3536
+ ctx.fillRect(width / 2 - 80, height - 80, 160, 60);
3537
+ ctx.strokeStyle = '#7ef0d4';
3538
+ ctx.lineWidth = 3;
3539
+ ctx.strokeRect(width / 2 - 80, height - 80, 160, 60);
3540
+
3541
+ ctx.fillStyle = '#e8eef6';
3542
+ ctx.font = 'bold 14px sans-serif';
3543
+ ctx.textAlign = 'center';
3544
+ ctx.fillText('Majority Vote', width / 2, height - 60);
3545
+ ctx.font = 'bold 16px sans-serif';
3546
+ ctx.fillStyle = '#ff8c6a';
3547
+ ctx.fillText('Class 1 wins (3 vs 2)', width / 2, height - 35);
3548
+
3549
+ // Title
3550
+ ctx.fillStyle = '#7ef0d4';
3551
+ ctx.font = 'bold 16px sans-serif';
3552
+ ctx.fillText('Random Forest: Ensemble of Decision Trees', width / 2, 25);
3553
  }
3554
 
3555
  // Handle window resize
 
3578
  drawSVMCParameter();
3579
  drawSVMTraining();
3580
  drawSVMKernel();
3581
+ // New topics
3582
+ drawElbowCurve();
3583
+ drawCVKHeatmap();
3584
+ drawGridSearchHeatmap();
3585
+ drawParamSurface();
3586
+ drawBayesTheorem();
3587
+ drawSpamClassification();
3588
+ drawDecisionTree();
3589
+ drawEntropyViz();
3590
+ drawSplitComparison();
3591
+ drawTreeBoundary();
3592
+ drawBaggingViz();
3593
+ drawBoostingViz();
3594
+ drawRandomForestViz();
3595
  }, 250);
3596
  });
ml_complete-all-topics/index.html CHANGED
@@ -496,9 +496,11 @@ canvas {
496
  <a href="#cross-validation" class="toc-link">10. Cross-Validation</a>
497
  <a href="#preprocessing" class="toc-link">11. Data Preprocessing</a>
498
  <a href="#loss-functions" class="toc-link">12. Loss Functions</a>
499
- <a href="#optimal-k" class="toc-link">13. Finding Optimal K for KNN</a>
500
- <a href="#hyperparameter-tuning" class="toc-link">14. Hyperparameter Tuning &amp; GridSearch</a>
501
- <a href="#naive-bayes" class="toc-link">15. Naive Bayes Classifier</a>
 
 
502
  </nav>
503
  </aside>
504
 
@@ -2374,517 +2376,632 @@ Actual Pos TP FN
2374
  </div>
2375
  </div>
2376
 
2377
- <h3>πŸŽ‰ Congratulations!</h3>
2378
- <p style="font-size: 18px; color: #7ef0d4; margin-top: 24px;">
2379
- You've completed all 12 machine learning topics! You now understand the fundamentals of ML from linear regression to loss functions. Keep practicing and building projects! πŸš€
2380
- </p>
2381
  </div>
2382
  </div>
2383
 
2384
- <!-- Section 13: Finding Optimal K for KNN -->
2385
  <div class="section" id="optimal-k">
2386
  <div class="section-header">
2387
- <h2>13. Finding Optimal K for KNN 🎯</h2>
2388
  <button class="section-toggle">β–Ό</button>
2389
  </div>
2390
  <div class="section-body">
2391
- <p>In KNN, choosing the right K value is crucial! Too small = overfitting, too large = underfitting. How do we find the optimal K? Use cross-validation!</p>
2392
 
2393
  <div class="info-card">
2394
- <div class="info-card-title">The Problem</div>
2395
  <ul class="info-card-list">
2396
- <li>K=1: Overfits (memorizes training data, including noise)</li>
2397
- <li>K=too large: Underfits (boundary too smooth, misses patterns)</li>
2398
- <li>Need: K that balances bias and variance</li>
2399
- <li>K controls model complexity</li>
2400
  </ul>
2401
  </div>
2402
 
2403
- <h3>Why K Matters</h3>
2404
- <ul>
2405
- <li><strong>K controls model complexity:</strong> Small K = complex boundaries, large K = simple boundaries</li>
2406
- <li><strong>Affects decision boundary smoothness:</strong> Directly impacts predictions</li>
2407
- <li><strong>Impacts generalization ability:</strong> Wrong K hurts test performance</li>
2408
- <li><strong>Must be chosen carefully:</strong> Can't just guess!</li>
2409
- </ul>
2410
 
2411
- <h3>The Solution: Cross-Validation</h3>
2412
- <div class="formula">
2413
- <strong>K-Selection Algorithm:</strong>
2414
- For K = 1 to 20:<br>
2415
- &nbsp;&nbsp;For each fold in K-Fold CV:<br>
2416
- &nbsp;&nbsp;&nbsp;&nbsp;Train KNN with this K value<br>
2417
- &nbsp;&nbsp;&nbsp;&nbsp;Test on validation fold<br>
2418
- &nbsp;&nbsp;&nbsp;&nbsp;Record accuracy<br>
2419
- &nbsp;&nbsp;Calculate mean accuracy across all folds<br>
2420
- &nbsp;&nbsp;Store: (K, mean_accuracy)<br>
2421
- <br>
2422
- Plot K vs Mean Accuracy<br>
2423
- Choose K with highest mean accuracy
2424
  </div>
2425
 
2426
- <h3>Step-by-Step Process</h3>
2427
- <ol>
2428
- <li><strong>Define K Range:</strong> Try K = 1, 2, 3, ..., 20 (or use √n as starting point)</li>
2429
- <li><strong>Set Up Cross-Validation:</strong> Use k-fold CV (e.g., k=10) to ensure robust evaluation</li>
2430
- <li><strong>Train and Evaluate:</strong> For each K value, run k-fold CV, get accuracy for each fold, calculate mean Β± std dev</li>
2431
- <li><strong>Select Optimal K:</strong> Choose K with highest mean accuracy (or use elbow method)</li>
2432
- </ol>
2433
-
2434
- <h3>Example Walkthrough</h3>
2435
- <p><strong>Dataset:</strong> A, B, C, D, E, F (6 samples), k-fold = 3</p>
2436
 
2437
- <table class="data-table">
2438
- <thead>
2439
- <tr><th>K Value</th><th>Fold 1</th><th>Fold 2</th><th>Fold 3</th><th>Mean Accuracy</th></tr>
2440
- </thead>
2441
- <tbody>
2442
- <tr><td>K=1</td><td>100%</td><td>100%</td><td>50%</td><td>83.3%</td></tr>
2443
- <tr style="background: rgba(126, 240, 212, 0.1);"><td><strong>K=3</strong></td><td>100%</td><td>100%</td><td>100%</td><td><strong>100% ← Best!</strong></td></tr>
2444
- <tr><td>K=5</td><td>100%</td><td>50%</td><td>100%</td><td>83.3%</td></tr>
2445
- </tbody>
2446
- </table>
 
 
2447
 
2448
  <div class="figure">
2449
  <div class="figure-placeholder" style="height: 400px">
2450
- <canvas id="optimal-k-canvas"></canvas>
2451
  </div>
2452
- <p class="figure-caption"><strong>Figure:</strong> K vs Accuracy plot showing optimal K value</p>
2453
  </div>
2454
 
2455
- <div class="controls">
2456
- <div class="control-group">
2457
- <label>K Range (max): <span id="k-range-val">20</span></label>
2458
- <input type="range" id="k-range-slider" min="10" max="30" step="5" value="20">
2459
- </div>
2460
- <div class="control-group">
2461
- <label>CV Folds: <span id="cv-folds-val">10</span></label>
2462
- <input type="range" id="cv-folds-slider" min="3" max="10" step="1" value="10">
 
2463
  </div>
2464
  </div>
2465
 
2466
- <h3>Elbow Method</h3>
2467
- <p>Look for the "elbow point" where accuracy stops improving significantly:</p>
2468
  <ul>
2469
- <li><strong>Sharp increase:</strong> Significant improvement with larger K</li>
2470
- <li><strong>Elbow point:</strong> Diminishing returns begin</li>
2471
- <li><strong>Plateau:</strong> Little benefit from larger K</li>
2472
- <li><strong>Choose K at/near elbow:</strong> Best trade-off</li>
2473
  </ul>
2474
 
2475
  <div class="callout info">
2476
- <div class="callout-title">πŸ’‘ Odd K Values</div>
2477
  <div class="callout-content">
2478
- Always prefer odd K values (3, 5, 7, 9) for binary classification! This avoids ties when neighbors vote. For K=4, you might get 2 votes for each class.
 
 
 
 
2479
  </div>
2480
  </div>
 
 
2481
 
2482
- <div class="callout warning">
2483
- <div class="callout-title">⚠️ Don't Use Test Set!</div>
2484
- <div class="callout-content">
2485
- Never use the test set for K selection! Always use cross-validation on training data only. The test set should remain untouched until final evaluation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2486
  </div>
 
2487
  </div>
2488
 
2489
- <h3>Practical Tips</h3>
2490
- <ul>
2491
- <li><strong>Start with K = √n:</strong> n = training samples (good starting point)</li>
2492
- <li><strong>Use odd K:</strong> Avoids ties in binary classification</li>
2493
- <li><strong>Consider computational cost:</strong> Large K = more neighbors to check</li>
2494
- <li><strong>Visualize decision boundaries:</strong> For different K values</li>
2495
- <li><strong>Use stratified k-fold:</strong> For imbalanced data</li>
2496
- </ul>
 
2497
 
2498
- <h3>Real-World Example</h3>
2499
- <div class="info-card">
2500
- <div class="info-card-title">🌸 Iris Flower Classification (150 samples)</div>
2501
- <p style="margin: 12px 0; line-height: 1.6;">
2502
- <strong>Process:</strong> Try K = 1 to 20, Use 10-fold CV<br>
2503
- <strong>Results:</strong><br>
2504
- β€’ K=1: 95% accuracy (overfits to noise)<br>
2505
- β€’ K=7: 97% accuracy (optimal! βœ“)<br>
2506
- β€’ K=15: 94% accuracy (underfits, too smooth)<br>
2507
- <br>
2508
- The optimal K=7 provides the best balance between model complexity and generalization!
2509
- </p>
2510
  </div>
2511
 
2512
- <div class="callout success">
2513
- <div class="callout-title">βœ… Key Takeaway</div>
 
2514
  <div class="callout-content">
2515
- Finding optimal K is not guesswork! Use systematic cross-validation to evaluate multiple K values and choose the one with highest mean accuracy. This ensures your KNN model generalizes well to unseen data.
 
 
 
 
 
 
 
2516
  </div>
2517
  </div>
 
 
 
 
 
 
 
 
 
2518
  </div>
2519
  </div>
2520
 
2521
- <!-- Section 14: Hyperparameter Tuning & GridSearch -->
2522
- <div class="section" id="hyperparameter-tuning">
2523
  <div class="section-header">
2524
- <h2>14. Hyperparameter Tuning &amp; GridSearch βš™οΈ</h2>
2525
  <button class="section-toggle">β–Ό</button>
2526
  </div>
2527
  <div class="section-body">
2528
- <p>Models have two types of parameters: <strong>learned parameters</strong> (like weights) and <strong>hyperparameters</strong> (like learning rate). We must tune hyperparameters to get the best model!</p>
2529
-
2530
- <h3>What Are Hyperparameters?</h3>
2531
- <p><strong>Definition:</strong> Parameters that control the learning process but aren't learned from data.</p>
2532
 
2533
  <div class="info-card">
2534
- <div class="info-card-title">Parameters vs Hyperparameters</div>
2535
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-top: 12px;">
2536
- <div style="background: rgba(126, 240, 212, 0.1); padding: 12px; border-radius: 6px;">
2537
- <strong style="color: #7ef0d4;">Parameters (Learned)</strong>
2538
- <ul style="margin-top: 8px; font-size: 14px;">
2539
- <li>Linear Regression: w, b</li>
2540
- <li>Logistic Regression: coefficients</li>
2541
- <li>SVM: support vector positions</li>
2542
- <li>Optimized during training</li>
2543
- </ul>
2544
- </div>
2545
- <div style="background: rgba(106, 169, 255, 0.1); padding: 12px; border-radius: 6px;">
2546
- <strong style="color: #6aa9ff;">Hyperparameters (Set Before)</strong>
2547
- <ul style="margin-top: 8px; font-size: 14px;">
2548
- <li>Learning rate (Ξ±)</li>
2549
- <li>Number of iterations</li>
2550
- <li>SVM: C, gamma, kernel</li>
2551
- <li>KNN: K value</li>
2552
- <li>Must be tuned manually</li>
2553
- </ul>
2554
- </div>
2555
- </div>
2556
  </div>
2557
 
2558
- <h3>Examples Across Algorithms</h3>
2559
-
2560
- <h4>Linear/Logistic Regression:</h4>
2561
- <ul>
2562
- <li><strong>Learning rate (Ξ±):</strong> 0.001, 0.01, 0.1</li>
2563
- <li><strong>Number of iterations:</strong> 100, 1000, 10000</li>
2564
- <li><strong>Regularization strength (Ξ»):</strong> 0.01, 0.1, 1, 10</li>
2565
- </ul>
 
2566
 
2567
- <h4>SVM:</h4>
2568
- <ul>
2569
- <li><strong>C (regularization):</strong> 0.1, 1, 10, 100, 1000</li>
2570
- <li><strong>gamma (kernel coefficient):</strong> 'scale', 'auto', 0.001, 0.01, 0.1</li>
2571
- <li><strong>kernel:</strong> 'linear', 'poly', 'rbf', 'sigmoid'</li>
2572
- <li><strong>degree (for poly):</strong> 2, 3, 4, 5</li>
2573
- </ul>
2574
 
2575
- <h4>KNN:</h4>
2576
- <ul>
2577
- <li><strong>K (neighbors):</strong> 1, 3, 5, 7, 9, 11</li>
2578
- <li><strong>Distance metric:</strong> 'euclidean', 'manhattan', 'minkowski'</li>
2579
- <li><strong>Weights:</strong> 'uniform', 'distance'</li>
2580
- </ul>
2581
 
2582
- <div class="callout warning">
2583
- <div class="callout-title">⚠️ The Problem with Random Values</div>
2584
- <div class="callout-content">
2585
- If we just try random hyperparameter values:<br>
2586
- β€’ Inefficient (might miss optimal combination)<br>
2587
- β€’ No systematic approach<br>
2588
- β€’ Hard to reproduce<br>
2589
- β€’ Wastes time and resources
2590
  </div>
 
2591
  </div>
2592
 
2593
- <h3>Solution: GridSearch!</h3>
2594
- <p><strong>What is GridSearch?</strong> Systematically try all combinations of hyperparameters and pick the best.</p>
2595
 
2596
  <div class="formula">
2597
- <strong>GridSearch Algorithm:</strong><br>
2598
- 1. Define parameter grid:<br>
2599
- &nbsp;&nbsp;{ 'C': [0.1, 1, 10, 100],<br>
2600
- &nbsp;&nbsp;&nbsp;&nbsp;'gamma': ['scale', 'auto', 0.001, 0.01],<br>
2601
- &nbsp;&nbsp;&nbsp;&nbsp;'kernel': ['linear', 'rbf', 'poly'] }<br>
2602
  <br>
2603
- 2. Generate all combinations:<br>
2604
- &nbsp;&nbsp;Total: 4 Γ— 4 Γ— 3 = 48 combinations<br>
 
2605
  <br>
2606
- 3. For each combination:<br>
2607
- &nbsp;&nbsp;- Train model with these hyperparameters<br>
2608
- &nbsp;&nbsp;- Evaluate using cross-validation<br>
2609
- &nbsp;&nbsp;- Record mean CV score<br>
2610
  <br>
2611
- 4. Select best combination:<br>
2612
- &nbsp;&nbsp;- Highest CV score = best hyperparameters
2613
  </div>
2614
 
2615
  <div class="figure">
2616
  <div class="figure-placeholder" style="height: 400px">
2617
- <canvas id="gridsearch-canvas"></canvas>
2618
  </div>
2619
- <p class="figure-caption"><strong>Figure:</strong> GridSearch heatmap showing parameter combinations and their scores</p>
2620
  </div>
2621
 
2622
- <h3>SVM GridSearch Example</h3>
2623
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2624
  <table class="data-table">
2625
  <thead>
2626
- <tr><th>#</th><th>C</th><th>gamma</th><th>kernel</th><th>CV Score</th></tr>
 
 
 
 
 
 
2627
  </thead>
2628
  <tbody>
2629
- <tr><td>1</td><td>0.1</td><td>0.001</td><td>linear</td><td>0.85</td></tr>
2630
- <tr><td>2</td><td>0.1</td><td>0.001</td><td>rbf</td><td>0.88</td></tr>
2631
- <tr><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td></tr>
2632
- <tr style="background: rgba(126, 240, 212, 0.1);"><td><strong>32</strong></td><td><strong>10</strong></td><td><strong>0.01</strong></td><td><strong>rbf</strong></td><td><strong>0.95 ← Best!</strong></td></tr>
 
2633
  </tbody>
2634
  </table>
2635
 
2636
- <p><strong>Result:</strong> Best parameters found automatically: C=10, gamma=0.01, kernel='rbf'</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2637
 
2638
- <h3>Computational Cost</h3>
2639
  <div class="formula">
2640
- <strong>Total Time Formula:</strong><br>
2641
- Total Time = n_combinations Γ— cv_folds Γ— training_time<br>
2642
  <br>
2643
- <strong>Example:</strong><br>
2644
- β€’ 48 combinations<br>
2645
- β€’ 5-fold CV<br>
2646
- β€’ 1 second per training<br>
2647
- <strong>Total:</strong> 48 Γ— 5 Γ— 1 = 240 seconds (4 minutes)
 
2648
  </div>
2649
 
2650
- <div class="callout warning">
2651
- <div class="callout-title">⚠️ GridSearch Can Be Slow!</div>
2652
- <div class="callout-content">
2653
- For large parameter grids, GridSearch can take hours or days! Solutions:<br>
2654
- β€’ Use fewer parameter values (coarse then fine grid)<br>
2655
- β€’ Use RandomizedSearchCV (samples random combinations)<br>
2656
- β€’ Use parallel processing (n_jobs=-1)
 
 
 
 
 
 
2657
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
2658
  </div>
2659
 
 
 
 
2660
  <div class="callout info">
2661
- <div class="callout-title">πŸ’‘ Always Use Cross-Validation!</div>
2662
  <div class="callout-content">
2663
- GridSearch must use cross-validation internally to avoid overfitting to validation set. Never tune hyperparameters on test set!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2664
  </div>
2665
  </div>
2666
 
2667
- <h3>Practical Workflow</h3>
2668
- <ol>
2669
- <li><strong>Step 1 - Coarse Grid:</strong> Wide range, few values (e.g., C = [0.1, 1, 10, 100, 1000]) to find approximate best region</li>
2670
- <li><strong>Step 2 - Fine Grid:</strong> Narrow range, more values (e.g., C = [5, 7, 9, 11, 13]) to refine optimal value</li>
2671
- <li><strong>Step 3 - Final Model:</strong> Train on full training set using best hyperparameters, then evaluate on test set</li>
2672
- </ol>
2673
 
2674
- <div class="callout success">
2675
- <div class="callout-title">βœ… Key Takeaway</div>
 
 
 
 
 
 
 
 
 
2676
  <div class="callout-content">
2677
- GridSearch finds optimal hyperparameters automatically - no manual guessing needed! It's the standard approach for hyperparameter tuning in machine learning. Just be patient with large grids!
 
 
 
 
 
 
2678
  </div>
2679
  </div>
2680
 
2681
- <h3>Advanced: RandomizedSearchCV</h3>
2682
- <p>For very large hyperparameter spaces, use <strong>RandomizedSearchCV</strong>:</p>
2683
- <ul>
2684
- <li>Samples random combinations instead of trying all</li>
2685
- <li>Much faster than exhaustive GridSearch</li>
2686
- <li>Good for many hyperparameters or continuous ranges</li>
2687
- <li>Specify number of iterations (e.g., 100 random combinations)</li>
2688
- </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2689
  </div>
2690
  </div>
2691
 
2692
- <!-- Section 15: Naive Bayes Classifier -->
2693
- <div class="section" id="naive-bayes">
2694
  <div class="section-header">
2695
- <h2>15. Naive Bayes Classifier πŸ“Š</h2>
2696
  <button class="section-toggle">β–Ό</button>
2697
  </div>
2698
  <div class="section-body">
2699
- <p>Naive Bayes is a probabilistic classifier based on Bayes' Theorem. It's called "naive" because it assumes features are independent (which often isn't true, but it works surprisingly well anyway!)</p>
2700
 
2701
  <div class="info-card">
2702
  <div class="info-card-title">Key Concepts</div>
2703
  <ul class="info-card-list">
2704
- <li>Based on Bayes' Theorem and probability</li>
2705
- <li>Assumes features are independent ("naive" assumption)</li>
2706
- <li>Fast training and prediction</li>
2707
- <li>Works well for text classification</li>
2708
  </ul>
2709
  </div>
2710
 
2711
- <h3>Bayes' Theorem</h3>
2712
- <div class="formula">
2713
- <strong>Bayes' Theorem:</strong><br>
2714
- P(A|B) = P(B|A) Γ— P(A) / P(B)<br>
2715
- <br>
2716
- <strong>In classification context:</strong><br>
2717
- P(class|features) = P(features|class) Γ— P(class) / P(features)<br>
2718
- <br>
2719
- <small>where:<br>
2720
- β€’ P(class|features) = Posterior probability (what we want)<br>
2721
- β€’ P(features|class) = Likelihood<br>
2722
- β€’ P(class) = Prior probability<br>
2723
- β€’ P(features) = Evidence (normalizing constant)</small>
2724
- </div>
2725
 
2726
- <h3>Simple Example: Email Spam Classification</h3>
2727
- <p>Email contains words: ["free", "money"]</p>
2728
- <p><strong>Calculate:</strong> P(spam|free, money)</p>
 
 
 
 
 
 
 
 
 
 
 
2729
 
2730
- <h4>Given:</h4>
2731
- <ul>
2732
- <li>P(spam) = 0.3 (30% emails are spam)</li>
2733
- <li>P(not spam) = 0.7</li>
2734
- <li>P(free|spam) = 0.8</li>
2735
- <li>P(money|spam) = 0.7</li>
2736
- <li>P(free|not spam) = 0.1</li>
2737
- <li>P(money|not spam) = 0.05</li>
2738
- </ul>
2739
 
2740
- <h4>Naive Assumption (features are independent):</h4>
2741
  <div class="formula">
2742
- P(free, money|spam) = P(free|spam) Γ— P(money|spam)<br>
2743
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.8 Γ— 0.7 = 0.56<br>
 
 
 
 
2744
  <br>
2745
- P(free, money|not spam) = 0.1 Γ— 0.05 = 0.005
 
 
 
 
 
 
 
2746
  </div>
2747
 
2748
- <h4>Calculate Posterior:</h4>
 
 
2749
  <div class="formula">
2750
- P(spam|features) = P(free, money|spam) Γ— P(spam)<br>
2751
- &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;= 0.56 Γ— 0.3 = 0.168<br>
2752
- <br>
2753
- P(not spam|features) = 0.005 Γ— 0.7 = 0.0035<br>
 
 
 
2754
  <br>
2755
- <strong>Normalize:</strong><br>
2756
- P(spam|features) = 0.168 / (0.168 + 0.0035) = 0.98<br>
2757
- <br>
2758
- <strong style="color: #7ef0d4;">Result: 98% probability it's spam! πŸ“§</strong>
2759
  </div>
2760
 
2761
  <div class="figure">
2762
- <div class="figure-placeholder" style="height: 350px">
2763
- <canvas id="naive-bayes-canvas"></canvas>
2764
  </div>
2765
- <p class="figure-caption"><strong>Figure:</strong> Naive Bayes probability calculations for spam detection</p>
2766
  </div>
2767
 
2768
- <h3>Types of Naive Bayes</h3>
2769
-
2770
- <h4>1. Gaussian Naive Bayes</h4>
2771
- <ul>
2772
- <li><strong>For:</strong> Continuous features</li>
2773
- <li><strong>Assumes:</strong> Normal distribution</li>
2774
- <li><strong>Formula:</strong> P(x|class) = (1/√(2πσ²)) Γ— e^(-(x-ΞΌ)Β²/(2σ²))</li>
2775
- <li><strong>Use case:</strong> Real-valued features (height, weight, temperature)</li>
2776
- </ul>
2777
-
2778
- <h4>2. Multinomial Naive Bayes</h4>
2779
- <ul>
2780
- <li><strong>For:</strong> Count data</li>
2781
- <li><strong>Features:</strong> Frequencies (e.g., word counts)</li>
2782
- <li><strong>Use case:</strong> Text classification (word counts in documents)</li>
2783
- </ul>
2784
 
2785
- <h4>3. Bernoulli Naive Bayes</h4>
2786
- <ul>
2787
- <li><strong>For:</strong> Binary features (0/1, yes/no)</li>
2788
- <li><strong>Features:</strong> Presence/absence</li>
2789
- <li><strong>Use case:</strong> Document classification (word present or not)</li>
2790
- </ul>
2791
-
2792
- <h3>Training Algorithm</h3>
2793
  <div class="formula">
2794
- <strong>Training Process:</strong><br>
2795
- For each class:<br>
2796
- &nbsp;&nbsp;Calculate P(class) = count(class) / total_samples<br>
2797
- &nbsp;&nbsp;<br>
2798
- &nbsp;&nbsp;For each feature:<br>
2799
- &nbsp;&nbsp;&nbsp;&nbsp;Calculate P(feature|class)<br>
2800
- &nbsp;&nbsp;&nbsp;&nbsp;<br>
2801
- &nbsp;&nbsp;&nbsp;&nbsp;Gaussian: Estimate ΞΌ and Οƒ<br>
2802
- &nbsp;&nbsp;&nbsp;&nbsp;Multinomial: Count frequencies<br>
2803
- &nbsp;&nbsp;&nbsp;&nbsp;Bernoulli: Count presence<br>
2804
- <br>
2805
- <strong>Prediction Process:</strong><br>
2806
- For each class:<br>
2807
- &nbsp;&nbsp;posterior = P(class) Γ— ∏ P(feature_i|class)<br>
2808
  <br>
2809
- Choose class with maximum posterior
2810
  </div>
2811
 
2812
- <h3>Worked Example: Play Tennis Dataset</h3>
2813
- <p>Predict: Should we play tennis?</p>
2814
- <p><strong>Given:</strong> Sunny, Cool, High humidity, Windy</p>
 
 
 
2815
 
 
2816
  <table class="data-table">
2817
  <thead>
2818
- <tr><th>Outlook</th><th>Temp</th><th>Humidity</th><th>Windy</th><th>Play</th></tr>
2819
  </thead>
2820
  <tbody>
2821
- <tr><td>Sunny</td><td>Hot</td><td>High</td><td>No</td><td>No</td></tr>
2822
- <tr><td>Sunny</td><td>Hot</td><td>High</td><td>Yes</td><td>No</td></tr>
2823
- <tr><td>Overcast</td><td>Hot</td><td>High</td><td>No</td><td>Yes</td></tr>
2824
- <tr><td>Rain</td><td>Mild</td><td>High</td><td>No</td><td>Yes</td></tr>
2825
- <tr><td>Rain</td><td>Cool</td><td>Normal</td><td>No</td><td>Yes</td></tr>
2826
- <tr><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td></tr>
2827
  </tbody>
2828
  </table>
2829
 
2830
- <p>Calculate P(Yes|features) and P(No|features), then compare!</p>
2831
-
2832
- <h3>Advantages</h3>
2833
- <ul>
2834
- <li>βœ“ <strong>Fast training and prediction:</strong> Very efficient</li>
2835
- <li>βœ“ <strong>Works well with high dimensions:</strong> Many features</li>
2836
- <li>βœ“ <strong>Requires small training data:</strong> Good for limited data</li>
2837
- <li>βœ“ <strong>Handles missing values well:</strong> Robust</li>
2838
- <li>βœ“ <strong>Probabilistic predictions:</strong> Returns confidence scores</li>
2839
- <li>βœ“ <strong>Good baseline classifier:</strong> Easy to implement</li>
2840
- </ul>
2841
-
2842
- <h3>Disadvantages</h3>
2843
  <ul>
2844
- <li>βœ— <strong>Independence assumption often wrong:</strong> Features are usually correlated</li>
2845
- <li>βœ— <strong>Zero probability problem:</strong> Needs Laplace smoothing</li>
2846
- <li>βœ— <strong>Not great for correlated features:</strong> Performance suffers</li>
2847
- <li>βœ— <strong>Requires distribution assumption:</strong> For continuous features</li>
2848
  </ul>
2849
 
2850
  <div class="callout info">
2851
- <div class="callout-title">πŸ’‘ Despite "Naive" Assumption</div>
2852
  <div class="callout-content">
2853
- Despite the naive independence assumption being violated in most real-world datasets, Naive Bayes often works remarkably well in practice! It's especially powerful for text classification tasks.
2854
- </div>
2855
- </div>
2856
-
2857
- <div class="callout warning">
2858
- <div class="callout-title">⚠️ Zero Probability Problem</div>
2859
- <div class="callout-content">
2860
- If a feature value never occurs with a class in training, P = 0! This makes the entire posterior zero.<br>
2861
  <br>
2862
- <strong>Solution: Laplace Smoothing</strong><br>
2863
- P(feature|class) = (count + Ξ±) / (total + Ξ± Γ— n_features)<br>
2864
- where Ξ± = smoothing parameter (usually 1)
2865
- </div>
2866
- </div>
2867
-
2868
- <h3>Applications</h3>
2869
- <ul>
2870
- <li><strong>Spam filtering:</strong> Email classification (spam/not spam)</li>
2871
- <li><strong>Sentiment analysis:</strong> Positive/negative reviews</li>
2872
- <li><strong>Document classification:</strong> Topic categorization</li>
2873
- <li><strong>Medical diagnosis:</strong> Disease prediction from symptoms</li>
2874
- <li><strong>Real-time prediction:</strong> Fast classification needed</li>
2875
- <li><strong>Recommendation systems:</strong> User preferences</li>
2876
- </ul>
2877
-
2878
- <div class="callout success">
2879
- <div class="callout-title">βœ… Key Takeaway</div>
2880
- <div class="callout-content">
2881
- Naive Bayes is simple, fast, and surprisingly effective! Despite its "naive" independence assumption, it's a powerful baseline classifier that works especially well for text classification. Great for when you need quick results with limited data!
2882
  </div>
2883
  </div>
2884
 
2885
- <h3>πŸŽ‰ Congratulations!</h3>
2886
  <p style="font-size: 18px; color: #7ef0d4; margin-top: 24px;">
2887
- You've now completed all 15 machine learning topics! From basic concepts to advanced techniques, you've learned linear regression, gradient descent, classification algorithms, model evaluation, regularization, hyperparameter tuning, and probabilistic methods. You're ready to build real ML projects! πŸš€
 
 
 
 
 
 
 
 
 
 
2888
  </p>
2889
  </div>
2890
  </div>
 
496
  <a href="#cross-validation" class="toc-link">10. Cross-Validation</a>
497
  <a href="#preprocessing" class="toc-link">11. Data Preprocessing</a>
498
  <a href="#loss-functions" class="toc-link">12. Loss Functions</a>
499
+ <a href="#optimal-k" class="toc-link">13. Finding Optimal K in KNN</a>
500
+ <a href="#hyperparameter-tuning" class="toc-link">14. Hyperparameter Tuning</a>
501
+ <a href="#naive-bayes" class="toc-link">15. Naive Bayes</a>
502
+ <a href="#decision-trees" class="toc-link">16. Decision Trees</a>
503
+ <a href="#ensemble-methods" class="toc-link">17. Ensemble Methods</a>
504
  </nav>
505
  </aside>
506
 
 
2376
  </div>
2377
  </div>
2378
 
 
 
 
 
2379
  </div>
2380
  </div>
2381
 
2382
+ <!-- Section 13: Finding Optimal K in KNN -->
2383
  <div class="section" id="optimal-k">
2384
  <div class="section-header">
2385
+ <h2>13. Finding Optimal K in KNN</h2>
2386
  <button class="section-toggle">β–Ό</button>
2387
  </div>
2388
  <div class="section-body">
2389
+ <p>Choosing the right K value is critical for KNN performance! Too small causes overfitting, too large causes underfitting. Let's explore systematic methods to find the optimal K.</p>
2390
 
2391
  <div class="info-card">
2392
+ <div class="info-card-title">Key Methods</div>
2393
  <ul class="info-card-list">
2394
+ <li>Elbow Method: Plot accuracy vs K, find the "elbow"</li>
2395
+ <li>Cross-Validation: Test multiple K values with k-fold CV</li>
2396
+ <li>Grid Search: Systematically test K values</li>
2397
+ <li>Avoid K=1 (overfits) and K=n (underfits)</li>
2398
  </ul>
2399
  </div>
2400
 
2401
+ <h3>Method 1: Elbow Method</h3>
2402
+ <p>Test different K values and plot performance. Look for the "elbow" where adding more neighbors doesn't help much.</p>
 
 
 
 
 
2403
 
2404
+ <div class="figure">
2405
+ <div class="figure-placeholder" style="height: 400px">
2406
+ <canvas id="elbow-canvas"></canvas>
2407
+ </div>
2408
+ <p class="figure-caption"><strong>Figure 1:</strong> Elbow curve showing optimal K at the bend</p>
 
 
 
 
 
 
 
 
2409
  </div>
2410
 
2411
+ <h3>Method 2: Cross-Validation Approach</h3>
2412
+ <p>For each K value, run k-fold cross-validation and calculate mean accuracy. Choose K with highest mean accuracy.</p>
 
 
 
 
 
 
 
 
2413
 
2414
+ <div class="formula">
2415
+ <strong>Cross-Validation Process:</strong>
2416
+ for K in [1, 2, 3, ..., 20]:<br>
2417
+ &nbsp;&nbsp;accuracies = []<br>
2418
+ &nbsp;&nbsp;for fold in [1, 2, 3]:<br>
2419
+ &nbsp;&nbsp;&nbsp;&nbsp;train model with K neighbors<br>
2420
+ &nbsp;&nbsp;&nbsp;&nbsp;test on validation fold<br>
2421
+ &nbsp;&nbsp;&nbsp;&nbsp;accuracies.append(accuracy)<br>
2422
+ &nbsp;&nbsp;mean_accuracy[K] = mean(accuracies)<br>
2423
+ <br>
2424
+ optimal_K = argmax(mean_accuracy)
2425
+ </div>
2426
 
2427
  <div class="figure">
2428
  <div class="figure-placeholder" style="height: 400px">
2429
+ <canvas id="cv-k-canvas"></canvas>
2430
  </div>
2431
+ <p class="figure-caption"><strong>Figure 2:</strong> Cross-validation accuracies heatmap for different K values</p>
2432
  </div>
2433
 
2434
+ <div class="callout success">
2435
+ <div class="callout-title">βœ… Why Cross-Validation is Better</div>
2436
+ <div class="callout-content">
2437
+ Single train-test split might be lucky/unlucky. Cross-validation gives you:
2438
+ <ul>
2439
+ <li>Mean accuracy (average performance)</li>
2440
+ <li>Standard deviation (how stable is K?)</li>
2441
+ <li>Confidence in your choice</li>
2442
+ </ul>
2443
  </div>
2444
  </div>
2445
 
2446
+ <h3>Practical Guidelines</h3>
 
2447
  <ul>
2448
+ <li><strong>Start with K = √n:</strong> Good rule of thumb</li>
2449
+ <li><strong>Try odd K values:</strong> Avoids ties in binary classification</li>
2450
+ <li><strong>Test range [1, 20]:</strong> Covers most practical scenarios</li>
2451
+ <li><strong>Check for stability:</strong> Low std dev across folds</li>
2452
  </ul>
2453
 
2454
  <div class="callout info">
2455
+ <div class="callout-title">πŸ’‘ Real-World Example</div>
2456
  <div class="callout-content">
2457
+ <strong>Iris Dataset (150 samples):</strong><br>
2458
+ √150 β‰ˆ 12, so start testing around K=11, K=13, K=15<br>
2459
+ After CV: K=5 gives 96% Β± 2% β†’ Optimal choice!<br>
2460
+ K=1 gives 94% Β± 8% β†’ Too much variance<br>
2461
+ K=25 gives 88% Β± 1% β†’ Too smooth, underfitting
2462
  </div>
2463
  </div>
2464
+ </div>
2465
+ </div>
2466
 
2467
+ <!-- Section 14: Hyperparameter Tuning -->
2468
+ <div class="section" id="hyperparameter-tuning">
2469
+ <div class="section-header">
2470
+ <h2>14. Hyperparameter Tuning with GridSearch</h2>
2471
+ <button class="section-toggle">β–Ό</button>
2472
+ </div>
2473
+ <div class="section-body">
2474
+ <p>Hyperparameters control how your model learns. Unlike model parameters (learned from data), hyperparameters are set BEFORE training. GridSearch systematically finds the best combination!</p>
2475
+
2476
+ <div class="info-card">
2477
+ <div class="info-card-title">Common Hyperparameters</div>
2478
+ <ul class="info-card-list">
2479
+ <li>Learning rate (Ξ±) - Gradient Descent step size</li>
2480
+ <li>K - Number of neighbors in KNN</li>
2481
+ <li>C, gamma - SVM parameters</li>
2482
+ <li>Max depth - Decision Tree depth</li>
2483
+ <li>Number of trees - Random Forest</li>
2484
+ </ul>
2485
+ </div>
2486
+
2487
+ <h3>GridSearch Explained</h3>
2488
+ <p>GridSearch tests ALL combinations of hyperparameters you specify. It's exhaustive but guarantees finding the best combination in your grid.</p>
2489
+
2490
+ <div class="formula">
2491
+ <strong>Example: SVM GridSearch</strong>
2492
+ param_grid = {<br>
2493
+ &nbsp;&nbsp;'C': [0.1, 1, 10, 100],<br>
2494
+ &nbsp;&nbsp;'gamma': [0.001, 0.01, 0.1, 1],<br>
2495
+ &nbsp;&nbsp;'kernel': ['linear', 'rbf']<br>
2496
+ }<br>
2497
+ <br>
2498
+ Total combinations: 4 Γ— 4 Γ— 2 = 32<br>
2499
+ With 5-fold CV: 32 Γ— 5 = 160 model trainings!
2500
+ </div>
2501
+
2502
+ <div class="figure">
2503
+ <div class="figure-placeholder" style="height: 450px">
2504
+ <canvas id="gridsearch-heatmap"></canvas>
2505
  </div>
2506
+ <p class="figure-caption"><strong>Figure:</strong> GridSearch heatmap showing accuracy for C vs gamma combinations</p>
2507
  </div>
2508
 
2509
+ <div class="controls">
2510
+ <div class="control-group">
2511
+ <label>Select Model:</label>
2512
+ <div class="radio-group">
2513
+ <label><input type="radio" name="grid-model" value="svm" checked> SVM</label>
2514
+ <label><input type="radio" name="grid-model" value="rf"> Random Forest</label>
2515
+ </div>
2516
+ </div>
2517
+ </div>
2518
 
2519
+ <h3>Performance Surface (3D View)</h3>
2520
+ <div class="figure">
2521
+ <div class="figure-placeholder" style="height: 400px">
2522
+ <canvas id="param-surface"></canvas>
2523
+ </div>
2524
+ <p class="figure-caption"><strong>Figure:</strong> 3D surface showing how parameters affect performance</p>
 
 
 
 
 
 
2525
  </div>
2526
 
2527
+ <h3>When GridSearch Fails</h3>
2528
+ <div class="callout warning">
2529
+ <div class="callout-title">⚠️ The Curse of Dimensionality</div>
2530
  <div class="callout-content">
2531
+ <strong>Problem:</strong> Too many hyperparameters = exponential search space<br>
2532
+ <br>
2533
+ <strong>Example:</strong> 5 hyperparameters Γ— 10 values each = 100,000 combinations!<br>
2534
+ <br>
2535
+ <strong>Solutions:</strong><br>
2536
+ β€’ RandomSearchCV: Random sampling (faster, often good enough)<br>
2537
+ β€’ Bayesian Optimization: Smart search using previous results<br>
2538
+ β€’ Halving GridSearch: Eliminate poor performers early
2539
  </div>
2540
  </div>
2541
+
2542
+ <h3>Best Practices</h3>
2543
+ <ul>
2544
+ <li><strong>Start coarse:</strong> Wide range, few values (e.g., C: [0.1, 1, 10, 100])</li>
2545
+ <li><strong>Then refine:</strong> Narrow range around best (e.g., C: [5, 7, 9, 11])</li>
2546
+ <li><strong>Use cross-validation:</strong> Avoid overfitting to validation set</li>
2547
+ <li><strong>Log scale for wide ranges:</strong> [0.001, 0.01, 0.1, 1, 10, 100]</li>
2548
+ <li><strong>Consider computation time:</strong> More folds = more reliable but slower</li>
2549
+ </ul>
2550
  </div>
2551
  </div>
2552
 
2553
+ <!-- Section 15: Naive Bayes -->
2554
+ <div class="section" id="naive-bayes">
2555
  <div class="section-header">
2556
+ <h2>15. Naive Bayes Classification</h2>
2557
  <button class="section-toggle">β–Ό</button>
2558
  </div>
2559
  <div class="section-body">
2560
+ <p>Naive Bayes is a probabilistic classifier based on Bayes' Theorem. Despite its "naive" independence assumption, it works surprisingly well for text classification and other tasks!</p>
 
 
 
2561
 
2562
  <div class="info-card">
2563
+ <div class="info-card-title">Key Concepts</div>
2564
+ <ul class="info-card-list">
2565
+ <li>Based on Bayes' Theorem from probability theory</li>
2566
+ <li>Assumes features are independent (naive assumption)</li>
2567
+ <li>Very fast training and prediction</li>
2568
+ <li>Works well with high-dimensional data</li>
2569
+ </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2570
  </div>
2571
 
2572
+ <h3>Bayes' Theorem</h3>
2573
+ <div class="formula">
2574
+ <strong>The Foundation:</strong>
2575
+ P(Class|Features) = P(Features|Class) Γ— P(Class) / P(Features)<br>
2576
+ <br>
2577
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;↓<br>
2578
+ Posterior&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Likelihood&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Prior&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Evidence<br>
2579
+ (What we want)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(From data)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(Baseline)&nbsp;&nbsp;(Normalizer)
2580
+ </div>
2581
 
2582
+ <h3>The Naive Independence Assumption</h3>
2583
+ <p>"Naive" because we assume all features are independent given the class:</p>
 
 
 
 
 
2584
 
2585
+ <div class="formula">
2586
+ <strong>Independence Assumption:</strong>
2587
+ P(x₁, xβ‚‚, ..., xβ‚™ | Class) = P(x₁|Class) Γ— P(xβ‚‚|Class) Γ— ... Γ— P(xβ‚™|Class)<br>
2588
+ <br>
2589
+ <small>This is often NOT true in reality, but works anyway!</small>
2590
+ </div>
2591
 
2592
+ <div class="figure">
2593
+ <div class="figure-placeholder" style="height: 400px">
2594
+ <canvas id="bayes-theorem-viz"></canvas>
 
 
 
 
 
2595
  </div>
2596
+ <p class="figure-caption"><strong>Figure 1:</strong> Bayes' Theorem visual explanation</p>
2597
  </div>
2598
 
2599
+ <h3>Real-World Example: Email Spam Detection</h3>
2600
+ <p>Let's classify an email with words: ["free", "winner", "click"]</p>
2601
 
2602
  <div class="formula">
2603
+ <strong>Training Data:</strong><br>
2604
+ β€’ 300 spam emails (30%)<br>
2605
+ β€’ 700 not-spam emails (70%)<br>
 
 
2606
  <br>
2607
+ <strong>Word frequencies:</strong><br>
2608
+ P("free" | spam) = 0.8 (appears in 80% of spam)<br>
2609
+ P("free" | not-spam) = 0.1 (appears in 10% of not-spam)<br>
2610
  <br>
2611
+ P("winner" | spam) = 0.7<br>
2612
+ P("winner" | not-spam) = 0.05<br>
 
 
2613
  <br>
2614
+ P("click" | spam) = 0.6<br>
2615
+ P("click" | not-spam) = 0.2
2616
  </div>
2617
 
2618
  <div class="figure">
2619
  <div class="figure-placeholder" style="height: 400px">
2620
+ <canvas id="spam-classification"></canvas>
2621
  </div>
2622
+ <p class="figure-caption"><strong>Figure 2:</strong> Spam classification calculation step-by-step</p>
2623
  </div>
2624
 
2625
+ <h3>Step-by-Step Calculation</h3>
2626
+ <div class="callout info">
2627
+ <div class="callout-title">πŸ“§ Classifying Our Email</div>
2628
+ <div class="callout-content">
2629
+ <strong>P(spam | features):</strong><br>
2630
+ = P("free"|spam) Γ— P("winner"|spam) Γ— P("click"|spam) Γ— P(spam)<br>
2631
+ = 0.8 Γ— 0.7 Γ— 0.6 Γ— 0.3<br>
2632
+ = 0.1008<br>
2633
+ <br>
2634
+ <strong>P(not-spam | features):</strong><br>
2635
+ = P("free"|not-spam) Γ— P("winner"|not-spam) Γ— P("click"|not-spam) Γ— P(not-spam)<br>
2636
+ = 0.1 Γ— 0.05 Γ— 0.2 Γ— 0.7<br>
2637
+ = 0.0007<br>
2638
+ <br>
2639
+ <strong>Prediction:</strong> 0.1008 &gt; 0.0007 β†’ SPAM! πŸ“§βŒ
2640
+ </div>
2641
+ </div>
2642
+
2643
+ <h3>Why It Works Despite Wrong Assumption</h3>
2644
+ <ul>
2645
+ <li><strong>Don't need exact probabilities:</strong> Just need correct ranking</li>
2646
+ <li><strong>Errors cancel out:</strong> Multiple features reduce impact</li>
2647
+ <li><strong>Simple is robust:</strong> Fewer parameters = less overfitting</li>
2648
+ <li><strong>Fast:</strong> Just multiply probabilities!</li>
2649
+ </ul>
2650
+
2651
+ <h3>Comparison with Other Classifiers</h3>
2652
  <table class="data-table">
2653
  <thead>
2654
+ <tr>
2655
+ <th>Aspect</th>
2656
+ <th>Naive Bayes</th>
2657
+ <th>Logistic Reg</th>
2658
+ <th>SVM</th>
2659
+ <th>KNN</th>
2660
+ </tr>
2661
  </thead>
2662
  <tbody>
2663
+ <tr><td>Speed</td><td>Very Fast</td><td>Fast</td><td>Slow</td><td>Very Slow</td></tr>
2664
+ <tr><td>Works with Little Data</td><td>Yes</td><td>Yes</td><td>No</td><td>No</td></tr>
2665
+ <tr><td>Interpretable</td><td>Very</td><td>Yes</td><td>No</td><td>No</td></tr>
2666
+ <tr><td>Handles Non-linear</td><td>Yes</td><td>No</td><td>Yes</td><td>Yes</td></tr>
2667
+ <tr><td>High Dimensions</td><td>Excellent</td><td>Good</td><td>Good</td><td>Poor</td></tr>
2668
  </tbody>
2669
  </table>
2670
 
2671
+ <div class="callout success">
2672
+ <div class="callout-title">βœ… When to Use Naive Bayes</div>
2673
+ <div class="callout-content">
2674
+ <strong>Perfect for:</strong><br>
2675
+ β€’ Text classification (spam detection, sentiment analysis)<br>
2676
+ β€’ Document categorization<br>
2677
+ β€’ Real-time prediction (very fast)<br>
2678
+ β€’ High-dimensional data<br>
2679
+ β€’ Small training datasets<br>
2680
+ <br>
2681
+ <strong>Avoid when:</strong><br>
2682
+ β€’ Features are highly correlated<br>
2683
+ β€’ Need probability calibration<br>
2684
+ β€’ Complex feature interactions matter
2685
+ </div>
2686
+ </div>
2687
+ </div>
2688
+ </div>
2689
+
2690
+ <!-- Section 16: Decision Trees -->
2691
+ <div class="section" id="decision-trees">
2692
+ <div class="section-header">
2693
+ <h2>16. Decision Trees</h2>
2694
+ <button class="section-toggle">β–Ό</button>
2695
+ </div>
2696
+ <div class="section-body">
2697
+ <p>Decision Trees make decisions by asking yes/no questions recursively. They're interpretable, powerful, and the foundation for ensemble methods like Random Forests!</p>
2698
+
2699
+ <div class="info-card">
2700
+ <div class="info-card-title">Key Concepts</div>
2701
+ <ul class="info-card-list">
2702
+ <li>Recursive partitioning of feature space</li>
2703
+ <li>Each node asks a yes/no question</li>
2704
+ <li>Leaves contain predictions</li>
2705
+ <li>Uses Information Gain or Gini Impurity for splitting</li>
2706
+ </ul>
2707
+ </div>
2708
+
2709
+ <h3>How Decision Trees Work</h3>
2710
+ <p>Imagine you're playing "20 Questions" to guess an animal. Each question splits possibilities into two groups. Decision Trees work the same way!</p>
2711
+
2712
+ <div class="figure">
2713
+ <div class="figure-placeholder" style="height: 450px">
2714
+ <canvas id="decision-tree-viz"></canvas>
2715
+ </div>
2716
+ <p class="figure-caption"><strong>Figure 1:</strong> Interactive decision tree structure</p>
2717
+ </div>
2718
+
2719
+ <h3>Splitting Criteria</h3>
2720
+ <p>How do we choose which question to ask at each node? We want splits that maximize information gain!</p>
2721
 
2722
+ <h4>1. Entropy (Information Theory)</h4>
2723
  <div class="formula">
2724
+ <strong>Entropy Formula:</strong>
2725
+ H(S) = -Ξ£ pα΅’ Γ— logβ‚‚(pα΅’)<br>
2726
  <br>
2727
+ where pα΅’ = proportion of class i<br>
2728
+ <br>
2729
+ <strong>Interpretation:</strong><br>
2730
+ β€’ Entropy = 0: Pure (all same class)<br>
2731
+ β€’ Entropy = 1: Maximum disorder (50-50 split)<br>
2732
+ β€’ Lower entropy = better!
2733
  </div>
2734
 
2735
+ <h4>2. Information Gain</h4>
2736
+ <div class="formula">
2737
+ <strong>Information Gain Formula:</strong>
2738
+ IG(S, A) = H(S) - Ξ£ |Sα΅₯|/|S| Γ— H(Sα΅₯)<br>
2739
+ <br>
2740
+ = Entropy before split - Weighted entropy after split<br>
2741
+ <br>
2742
+ <strong>We choose the split with HIGHEST information gain!</strong>
2743
+ </div>
2744
+
2745
+ <div class="figure">
2746
+ <div class="figure-placeholder" style="height: 400px">
2747
+ <canvas id="entropy-viz"></canvas>
2748
  </div>
2749
+ <p class="figure-caption"><strong>Figure 2:</strong> Entropy and Information Gain visualization</p>
2750
+ </div>
2751
+
2752
+ <h4>3. Gini Impurity (Alternative)</h4>
2753
+ <div class="formula">
2754
+ <strong>Gini Formula:</strong>
2755
+ Gini(S) = 1 - Ξ£ pα΅’Β²<br>
2756
+ <br>
2757
+ <strong>Interpretation:</strong><br>
2758
+ β€’ Gini = 0: Pure<br>
2759
+ β€’ Gini = 0.5: Maximum impurity (binary)<br>
2760
+ β€’ Faster to compute than entropy
2761
  </div>
2762
 
2763
+ <h3>Worked Example: Email Classification</h3>
2764
+ <p>Dataset: 10 emails - 7 spam, 3 not spam</p>
2765
+
2766
  <div class="callout info">
2767
+ <div class="callout-title">πŸ“Š Calculating Information Gain</div>
2768
  <div class="callout-content">
2769
+ <strong>Initial Entropy:</strong><br>
2770
+ H(S) = -7/10Γ—logβ‚‚(7/10) - 3/10Γ—logβ‚‚(3/10)<br>
2771
+ H(S) = 0.881 bits<br>
2772
+ <br>
2773
+ <strong>Split by "Contains 'FREE'":</strong><br>
2774
+ β€’ Left (5 emails): 4 spam, 1 not β†’ H = 0.722<br>
2775
+ β€’ Right (5 emails): 3 spam, 2 not β†’ H = 0.971<br>
2776
+ <br>
2777
+ <strong>Weighted Entropy:</strong><br>
2778
+ = 5/10 Γ— 0.722 + 5/10 Γ— 0.971 = 0.847<br>
2779
+ <br>
2780
+ <strong>Information Gain:</strong><br>
2781
+ IG = 0.881 - 0.847 = 0.034 bits<br>
2782
+ <br>
2783
+ <strong>Split by "Has suspicious link":</strong><br>
2784
+ IG = 0.156 bits ← BETTER! Use this split!
2785
  </div>
2786
  </div>
2787
 
2788
+ <div class="figure">
2789
+ <div class="figure-placeholder" style="height: 400px">
2790
+ <canvas id="split-comparison"></canvas>
2791
+ </div>
2792
+ <p class="figure-caption"><strong>Figure 3:</strong> Comparing different splits by information gain</p>
2793
+ </div>
2794
 
2795
+ <h3>Decision Boundaries</h3>
2796
+ <div class="figure">
2797
+ <div class="figure-placeholder" style="height: 400px">
2798
+ <canvas id="tree-boundary"></canvas>
2799
+ </div>
2800
+ <p class="figure-caption"><strong>Figure 4:</strong> Decision tree creates rectangular regions</p>
2801
+ </div>
2802
+
2803
+ <h3>Overfitting in Decision Trees</h3>
2804
+ <div class="callout warning">
2805
+ <div class="callout-title">⚠️ The Overfitting Problem</div>
2806
  <div class="callout-content">
2807
+ Without constraints, decision trees grow until each leaf has ONE sample!<br>
2808
+ <br>
2809
+ <strong>Solutions:</strong><br>
2810
+ β€’ <strong>Max depth:</strong> Limit tree height (e.g., max_depth=5)<br>
2811
+ β€’ <strong>Min samples split:</strong> Need X samples to split (e.g., min=10)<br>
2812
+ β€’ <strong>Min samples leaf:</strong> Each leaf must have X samples<br>
2813
+ β€’ <strong>Pruning:</strong> Grow full tree, then remove branches
2814
  </div>
2815
  </div>
2816
 
2817
+ <h3>Advantages vs Disadvantages</h3>
2818
+ <table class="data-table">
2819
+ <thead>
2820
+ <tr><th>Advantages βœ…</th><th>Disadvantages ❌</th></tr>
2821
+ </thead>
2822
+ <tbody>
2823
+ <tr>
2824
+ <td>Easy to understand and interpret</td>
2825
+ <td>Prone to overfitting</td>
2826
+ </tr>
2827
+ <tr>
2828
+ <td>No feature scaling needed</td>
2829
+ <td>Small changes β†’ big tree changes</td>
2830
+ </tr>
2831
+ <tr>
2832
+ <td>Handles non-linear relationships</td>
2833
+ <td>Biased toward features with more levels</td>
2834
+ </tr>
2835
+ <tr>
2836
+ <td>Works with mixed data types</td>
2837
+ <td>Can't extrapolate beyond training data</td>
2838
+ </tr>
2839
+ <tr>
2840
+ <td>Fast prediction</td>
2841
+ <td>Less accurate than ensemble methods</td>
2842
+ </tr>
2843
+ </tbody>
2844
+ </table>
2845
  </div>
2846
  </div>
2847
 
2848
+ <!-- Section 17: Ensemble Methods -->
2849
+ <div class="section" id="ensemble-methods">
2850
  <div class="section-header">
2851
+ <h2>17. Ensemble Methods</h2>
2852
  <button class="section-toggle">β–Ό</button>
2853
  </div>
2854
  <div class="section-body">
2855
+ <p>"Wisdom of the crowds" applied to machine learning! Ensemble methods combine multiple weak learners to create a strong learner. They power most Kaggle competition winners!</p>
2856
 
2857
  <div class="info-card">
2858
  <div class="info-card-title">Key Concepts</div>
2859
  <ul class="info-card-list">
2860
+ <li>Combine multiple models for better predictions</li>
2861
+ <li>Bagging: Train on random subsets (parallel)</li>
2862
+ <li>Boosting: Sequential learning from mistakes</li>
2863
+ <li>Stacking: Meta-learner combines base models</li>
2864
  </ul>
2865
  </div>
2866
 
2867
+ <h3>Why Ensembles Work</h3>
2868
+ <p>Imagine 100 doctors diagnosing a patient. Even if each is 70% accurate individually, their majority vote is 95%+ accurate! Same principle applies to ML.</p>
 
 
 
 
 
 
 
 
 
 
 
 
2869
 
2870
+ <div class="callout success">
2871
+ <div class="callout-title">🎯 The Magic of Diversity</div>
2872
+ <div class="callout-content">
2873
+ <strong>Key insight:</strong> Each model makes DIFFERENT errors!<br>
2874
+ <br>
2875
+ Model A: Correct on samples [1,2,3,5,7,9] - 60% accuracy<br>
2876
+ Model B: Correct on samples [2,4,5,6,8,10] - 60% accuracy<br>
2877
+ Model C: Correct on samples [1,3,4,6,7,8] - 60% accuracy<br>
2878
+ <br>
2879
+ <strong>Majority vote:</strong> Correct on [1,2,3,4,5,6,7,8] - 80% accuracy!<br>
2880
+ <br>
2881
+ Diversity reduces variance!
2882
+ </div>
2883
+ </div>
2884
 
2885
+ <h3>Method 1: Bagging (Bootstrap Aggregating)</h3>
2886
+ <p>Train multiple models on different random subsets of data (with replacement), then average predictions.</p>
 
 
 
 
 
 
 
2887
 
 
2888
  <div class="formula">
2889
+ <strong>Bagging Algorithm:</strong><br>
2890
+ 1. Create B bootstrap samples (random sampling with replacement)<br>
2891
+ 2. Train a model on each sample independently<br>
2892
+ 3. For prediction:<br>
2893
+ &nbsp;&nbsp;&nbsp;β€’ Regression: Average all predictions<br>
2894
+ &nbsp;&nbsp;&nbsp;β€’ Classification: Majority vote<br>
2895
  <br>
2896
+ <strong>Effect:</strong> Reduces variance, prevents overfitting
2897
+ </div>
2898
+
2899
+ <div class="figure">
2900
+ <div class="figure-placeholder" style="height: 400px">
2901
+ <canvas id="bagging-viz"></canvas>
2902
+ </div>
2903
+ <p class="figure-caption"><strong>Figure 1:</strong> Bagging process - multiple models from bootstrap samples</p>
2904
  </div>
2905
 
2906
+ <h3>Method 2: Boosting (Sequential Learning)</h3>
2907
+ <p>Train models sequentially, where each new model focuses on examples the previous models got wrong.</p>
2908
+
2909
  <div class="formula">
2910
+ <strong>Boosting Algorithm:</strong><br>
2911
+ 1. Start with equal weights for all samples<br>
2912
+ 2. Train model on weighted data<br>
2913
+ 3. Increase weights for misclassified samples<br>
2914
+ 4. Train next model (focuses on hard examples)<br>
2915
+ 5. Repeat for M iterations<br>
2916
+ 6. Final prediction = weighted vote of all models<br>
2917
  <br>
2918
+ <strong>Effect:</strong> Reduces bias AND variance
 
 
 
2919
  </div>
2920
 
2921
  <div class="figure">
2922
+ <div class="figure-placeholder" style="height: 450px">
2923
+ <canvas id="boosting-viz"></canvas>
2924
  </div>
2925
+ <p class="figure-caption"><strong>Figure 2:</strong> Boosting iteration - focusing on misclassified points</p>
2926
  </div>
2927
 
2928
+ <h3>Random Forest: Bagging + Decision Trees</h3>
2929
+ <p>The most popular ensemble method! Combines bagging with feature randomness.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2930
 
 
 
 
 
 
 
 
 
2931
  <div class="formula">
2932
+ <strong>Random Forest Algorithm:</strong><br>
2933
+ 1. Create B bootstrap samples<br>
2934
+ 2. For each sample:<br>
2935
+ &nbsp;&nbsp;&nbsp;β€’ Grow decision tree<br>
2936
+ &nbsp;&nbsp;&nbsp;β€’ At each split, consider random subset of features<br>
2937
+ &nbsp;&nbsp;&nbsp;β€’ Don't prune (let trees overfit!)<br>
2938
+ 3. Final prediction = average/vote of all trees<br>
 
 
 
 
 
 
 
2939
  <br>
2940
+ <strong>Typical values:</strong> B=100-500 trees, √features per split
2941
  </div>
2942
 
2943
+ <div class="figure">
2944
+ <div class="figure-placeholder" style="height: 400px">
2945
+ <canvas id="random-forest-viz"></canvas>
2946
+ </div>
2947
+ <p class="figure-caption"><strong>Figure 3:</strong> Random Forest - multiple diverse trees voting</p>
2948
+ </div>
2949
 
2950
+ <h3>Comparison: Bagging vs Boosting</h3>
2951
  <table class="data-table">
2952
  <thead>
2953
+ <tr><th>Aspect</th><th>Bagging</th><th>Boosting</th></tr>
2954
  </thead>
2955
  <tbody>
2956
+ <tr><td>Training</td><td>Parallel (independent)</td><td>Sequential (dependent)</td></tr>
2957
+ <tr><td>Focus</td><td>Reduce variance</td><td>Reduce bias &amp; variance</td></tr>
2958
+ <tr><td>Weights</td><td>Equal for all samples</td><td>Higher for hard samples</td></tr>
2959
+ <tr><td>Speed</td><td>Fast (parallelizable)</td><td>Slower (sequential)</td></tr>
2960
+ <tr><td>Overfitting</td><td>Resistant</td><td>Can overfit if too many iterations</td></tr>
2961
+ <tr><td>Examples</td><td>Random Forest</td><td>AdaBoost, Gradient Boosting, XGBoost</td></tr>
2962
  </tbody>
2963
  </table>
2964
 
2965
+ <h3>Real-World Success Stories</h3>
 
 
 
 
 
 
 
 
 
 
 
 
2966
  <ul>
2967
+ <li><strong>Netflix Prize (2009):</strong> Winning team used ensemble of 100+ models</li>
2968
+ <li><strong>Kaggle competitions:</strong> 99% of winners use ensembles</li>
2969
+ <li><strong>XGBoost:</strong> Most popular algorithm for structured data</li>
2970
+ <li><strong>Random Forests:</strong> Default choice for many data scientists</li>
2971
  </ul>
2972
 
2973
  <div class="callout info">
2974
+ <div class="callout-title">πŸ’‘ When to Use Each Method</div>
2975
  <div class="callout-content">
2976
+ <strong>Use Random Forest when:</strong><br>
2977
+ β€’ You want good accuracy with minimal tuning<br>
2978
+ β€’ You have high-variance base models<br>
2979
+ β€’ Interpretability is secondary<br>
 
 
 
 
2980
  <br>
2981
+ <strong>Use Gradient Boosting (XGBoost) when:</strong><br>
2982
+ β€’ You want maximum accuracy<br>
2983
+ β€’ You can afford hyperparameter tuning<br>
2984
+ β€’ You have high-bias base models<br>
2985
+ <br>
2986
+ <strong>Use Stacking when:</strong><br>
2987
+ β€’ You want to combine very different model types<br>
2988
+ β€’ You're in a competition (squeeze every 0.1%!)
 
 
 
 
 
 
 
 
 
 
 
 
2989
  </div>
2990
  </div>
2991
 
2992
+ <h3>πŸŽ‰ Course Complete!</h3>
2993
  <p style="font-size: 18px; color: #7ef0d4; margin-top: 24px;">
2994
+ Congratulations! You've mastered all 17 machine learning topics - from basic linear regression to advanced ensemble methods! You now have the knowledge to:
2995
+ </p>
2996
+ <ul style="color: #7ef0d4; font-size: 16px;">
2997
+ <li>Choose the right algorithm for any problem</li>
2998
+ <li>Understand the math behind each method</li>
2999
+ <li>Tune hyperparameters systematically</li>
3000
+ <li>Evaluate models properly</li>
3001
+ <li>Build production-ready ML systems</li>
3002
+ </ul>
3003
+ <p style="font-size: 18px; color: #7ef0d4; margin-top: 16px;">
3004
+ Keep practicing, building projects, and exploring! The ML journey never ends. πŸš€βœ¨
3005
  </p>
3006
  </div>
3007
  </div>
ml_complete-all-topics/script.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Extract new topics from the latest PDFs
3
+
4
+ new_topics = {
5
+ "optimal_k_knn": {
6
+ "title": "Finding Optimal K in KNN",
7
+ "concepts": [
8
+ "Elbow method for finding optimal K",
9
+ "Cross-validation to find best K",
10
+ "Testing K values 1-20",
11
+ "Mean accuracy across k-folds",
12
+ "Avoiding underfitting and overfitting"
13
+ ],
14
+ "data": {
15
+ "k_values": list(range(1, 20)),
16
+ "accuracies_fold1": [0.98, 0.95, 0.92, 0.90, 0.88, 0.86, 0.85, 0.84, 0.83, 0.82, 0.81, 0.80, 0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73],
17
+ "accuracies_fold2": [0.96, 0.93, 0.91, 0.89, 0.87, 0.85, 0.83, 0.82, 0.81, 0.80, 0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.71],
18
+ "accuracies_fold3": [0.94, 0.92, 0.90, 0.88, 0.86, 0.84, 0.82, 0.80, 0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.71, 0.70, 0.69]
19
+ }
20
+ },
21
+
22
+ "hyperparameter_tuning": {
23
+ "title": "Hyperparameter Tuning with GridSearch",
24
+ "concepts": [
25
+ "What are hyperparameters?",
26
+ "GridSearch exhaustive search",
27
+ "Testing multiple parameter combinations",
28
+ "Finding optimal hyperparameters",
29
+ "Train/test performance comparison"
30
+ ],
31
+ "svm_params": {
32
+ "C": [0.1, 1, 10, 100],
33
+ "gamma": ["scale", "auto", 0.001, 0.01],
34
+ "kernel": ["linear", "poly", "rbf"]
35
+ },
36
+ "results": {
37
+ "best_C": 1,
38
+ "best_gamma": "scale",
39
+ "best_kernel": "rbf",
40
+ "best_score": 0.95
41
+ }
42
+ },
43
+
44
+ "naive_bayes": {
45
+ "title": "Naive Bayes Classification",
46
+ "concepts": [
47
+ "Probabilistic classifier",
48
+ "Bayes' theorem",
49
+ "Independence assumption",
50
+ "Prior and posterior probabilities",
51
+ "Feature independence"
52
+ ],
53
+ "formulas": [
54
+ "P(C|X) = P(X|C) Γ— P(C) / P(X)",
55
+ "P(X|C) = P(x1|C) Γ— P(x2|C) Γ— ... Γ— P(xn|C)",
56
+ "Posterior = Likelihood Γ— Prior / Evidence"
57
+ ]
58
+ },
59
+
60
+ "decision_trees": {
61
+ "title": "Decision Trees",
62
+ "concepts": [
63
+ "Tree structure with nodes and branches",
64
+ "Splitting criteria (Information Gain, Gini)",
65
+ "Entropy calculation",
66
+ "Recursive splitting",
67
+ "Leaf nodes for predictions"
68
+ ]
69
+ },
70
+
71
+ "ensemble_methods": {
72
+ "title": "Ensemble Methods",
73
+ "concepts": [
74
+ "Bagging (Bootstrap Aggregating)",
75
+ "Boosting (AdaBoost, Gradient Boosting)",
76
+ "Random Forest",
77
+ "Combining weak learners",
78
+ "Voting mechanisms"
79
+ ]
80
+ }
81
+ }
82
+
83
+ print("="*80)
84
+ print("NEW TOPICS FROM 26-10-2025 LECTURES")
85
+ print("="*80)
86
+
87
+ for topic_id, topic_data in new_topics.items():
88
+ print(f"\nπŸ“š {topic_data['title'].upper()}")
89
+ print(f" Concepts: {len(topic_data['concepts'])}")
90
+ for i, concept in enumerate(topic_data['concepts'], 1):
91
+ print(f" {i}. {concept}")
92
+
93
+ print("\n" + "="*80)
94
+ print("TOPICS TO ADD TO APPLICATION")
95
+ print("="*80)
96
+ print("""
97
+ NEW TOPICS (from 26-10-2025):
98
+ 1. βœ… Finding Optimal K in KNN (Elbow Method + Cross-Validation)
99
+ 2. βœ… Hyperparameter Tuning with GridSearch
100
+ 3. βœ… Naive Bayes Classification
101
+ 4. βœ… Decision Trees
102
+ 5. βœ… Ensemble Methods (Bagging, Boosting, Random Forest)
103
+
104
+ FIXES NEEDED:
105
+ 1. βœ… Fix Linear Regression Visualization (currently not showing)
106
+ 2. βœ… Add MORE visualizations for every algorithm
107
+ 3. βœ… Add Mathematical explanations for WHY each algorithm
108
+ 4. βœ… Add More Real-World Examples
109
+ 5. βœ… Explain WHY one algorithm works vs another
110
+ 6. βœ… Add comparison visualizations between algorithms
111
+ """)
112
+
113
+ print("\n" + "="*80)
114
+ print("ENHANCED LINEAR REGRESSION VISUALIZATION FIX")
115
+ print("="*80)
116
+ print("""
117
+ The Linear Regression visualization issue will be fixed with:
118
+ 1. Proper Canvas initialization
119
+ 2. Error handling for drawing
120
+ 3. Auto-scaling for data points
121
+ 4. Clear axes and labels
122
+ 5. Live updating as sliders move
123
+ 6. Residual lines visualization
124
+ 7. MSE display with calculation breakdown
125
+ """)