AashishAIHub commited on
Commit
ad51ad1
Β·
1 Parent(s): 5b394bc

update ml play list

Browse files
ml_complete-all-topics/app.js CHANGED
@@ -236,6 +236,11 @@ function initSections() {
236
  if (section.id === 'boosting-adaboost') initBoostingAdaBoost();
237
  if (section.id === 'random-forest') initRandomForest();
238
  if (section.id === 'ensemble-methods') initEnsembleMethods();
 
 
 
 
 
239
  if (section.id === 'diagnostics') {
240
  // Wait for all visualizations to initialize
241
  setTimeout(showDiagnostics, 500);
@@ -6435,6 +6440,568 @@ function showDiagnosticDetails(filter) {
6435
  container.innerHTML = html;
6436
  }
6437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6438
  // Handle window resize
6439
  let resizeTimer;
6440
  window.addEventListener('resize', () => {
 
236
  if (section.id === 'boosting-adaboost') initBoostingAdaBoost();
237
  if (section.id === 'random-forest') initRandomForest();
238
  if (section.id === 'ensemble-methods') initEnsembleMethods();
239
+ if (section.id === 'gradient-boosting-classification') initGradientBoostingClassification();
240
+ if (section.id === 'xgboost-classification') initXGBoostClassification();
241
+ if (section.id === 'hierarchical-clustering') initHierarchicalClustering();
242
+ if (section.id === 'dbscan') initDBSCAN();
243
+ if (section.id === 'clustering-evaluation') initClusteringEvaluation();
244
  if (section.id === 'diagnostics') {
245
  // Wait for all visualizations to initialize
246
  setTimeout(showDiagnostics, 500);
 
6440
  container.innerHTML = html;
6441
  }
6442
 
6443
+ // NEW VISUALIZATIONS FOR ADDED TOPICS
6444
+
6445
+ // Gradient Boosting Classification
6446
+ function initGradientBoostingClassification() {
6447
+ const canvas1 = document.getElementById('gb-class-sequential-canvas');
6448
+ if (canvas1 && !canvas1.dataset.initialized) {
6449
+ canvas1.dataset.initialized = 'true';
6450
+ drawGBClassSequential();
6451
+ }
6452
+
6453
+ const canvas2 = document.getElementById('gb-class-gradients-canvas');
6454
+ if (canvas2 && !canvas2.dataset.initialized) {
6455
+ canvas2.dataset.initialized = 'true';
6456
+ drawGBClassGradients();
6457
+ }
6458
+ }
6459
+
6460
+ function drawGBClassSequential() {
6461
+ const canvas = document.getElementById('gb-class-sequential-canvas');
6462
+ if (!canvas) return;
6463
+
6464
+ const iterations = [0, 1, 2, 3, 4, 5, 10];
6465
+ const house1 = [0.4, 0.39, 0.37, 0.35, 0.33, 0.31, 0.22];
6466
+ const house4 = [0.4, 0.43, 0.47, 0.52, 0.57, 0.62, 0.78];
6467
+
6468
+ createVerifiedVisualization('gb-class-sequential-canvas', {
6469
+ type: 'line',
6470
+ data: {
6471
+ labels: iterations,
6472
+ datasets: [
6473
+ {
6474
+ label: 'House 1 (y=0): Probability ↓',
6475
+ data: house1,
6476
+ borderColor: '#7ef0d4',
6477
+ backgroundColor: 'rgba(126, 240, 212, 0.1)',
6478
+ borderWidth: 3,
6479
+ fill: true
6480
+ },
6481
+ {
6482
+ label: 'House 4 (y=1): Probability ↑',
6483
+ data: house4,
6484
+ borderColor: '#6aa9ff',
6485
+ backgroundColor: 'rgba(106, 169, 255, 0.1)',
6486
+ borderWidth: 3,
6487
+ fill: true
6488
+ }
6489
+ ]
6490
+ },
6491
+ options: {
6492
+ responsive: true,
6493
+ maintainAspectRatio: false,
6494
+ plugins: {
6495
+ title: {
6496
+ display: true,
6497
+ text: 'Gradient Boosting Classification: Probability Updates',
6498
+ color: '#e8eef6',
6499
+ font: { size: 16 }
6500
+ },
6501
+ legend: { labels: { color: '#a9b4c2' } }
6502
+ },
6503
+ scales: {
6504
+ x: {
6505
+ title: { display: true, text: 'Iteration', color: '#a9b4c2' },
6506
+ grid: { color: '#2a3544' },
6507
+ ticks: { color: '#a9b4c2' }
6508
+ },
6509
+ y: {
6510
+ title: { display: true, text: 'P(y=1)', color: '#a9b4c2' },
6511
+ grid: { color: '#2a3544' },
6512
+ ticks: { color: '#a9b4c2' },
6513
+ min: 0,
6514
+ max: 1
6515
+ }
6516
+ }
6517
+ }
6518
+ }, 'GB Classification', 'Sequential Updates');
6519
+ }
6520
+
6521
+ function drawGBClassGradients() {
6522
+ const canvas = document.getElementById('gb-class-gradients-canvas');
6523
+ if (!canvas) return;
6524
+
6525
+ createVerifiedVisualization('gb-class-gradients-canvas', {
6526
+ type: 'bar',
6527
+ data: {
6528
+ labels: ['House 1', 'House 2', 'House 3', 'House 4', 'House 5'],
6529
+ datasets: [
6530
+ {
6531
+ label: 'Iteration 0 Gradients',
6532
+ data: [0.4, 0.4, 0.4, -0.6, -0.6],
6533
+ backgroundColor: '#ff8c6a'
6534
+ },
6535
+ {
6536
+ label: 'Iteration 5 Gradients',
6537
+ data: [0.1, 0.08, 0.09, -0.15, -0.12],
6538
+ backgroundColor: '#7ef0d4'
6539
+ }
6540
+ ]
6541
+ },
6542
+ options: {
6543
+ responsive: true,
6544
+ maintainAspectRatio: false,
6545
+ plugins: {
6546
+ title: {
6547
+ display: true,
6548
+ text: 'Gradient Values: Shrinking Over Iterations',
6549
+ color: '#e8eef6',
6550
+ font: { size: 16 }
6551
+ },
6552
+ legend: { labels: { color: '#a9b4c2' } }
6553
+ },
6554
+ scales: {
6555
+ x: {
6556
+ grid: { color: '#2a3544' },
6557
+ ticks: { color: '#a9b4c2' }
6558
+ },
6559
+ y: {
6560
+ title: { display: true, text: 'Gradient (p - y)', color: '#a9b4c2' },
6561
+ grid: { color: '#2a3544' },
6562
+ ticks: { color: '#a9b4c2' }
6563
+ }
6564
+ }
6565
+ }
6566
+ }, 'GB Classification', 'Gradient Values');
6567
+ }
6568
+
6569
+ // XGBoost Classification
6570
+ function initXGBoostClassification() {
6571
+ const canvas = document.getElementById('xgb-class-hessian-canvas');
6572
+ if (canvas && !canvas.dataset.initialized) {
6573
+ canvas.dataset.initialized = 'true';
6574
+ drawXGBClassHessian();
6575
+ }
6576
+ }
6577
+
6578
+ function drawXGBClassHessian() {
6579
+ const canvas = document.getElementById('xgb-class-hessian-canvas');
6580
+ if (!canvas) return;
6581
+
6582
+ const houses = ['House 1', 'House 2', 'House 3', 'House 4', 'House 5'];
6583
+ const gradients = [0.4, 0.4, 0.4, -0.6, -0.6];
6584
+ const hessians = [0.24, 0.24, 0.24, 0.24, 0.24];
6585
+
6586
+ createVerifiedVisualization('xgb-class-hessian-canvas', {
6587
+ type: 'bar',
6588
+ data: {
6589
+ labels: houses,
6590
+ datasets: [
6591
+ {
6592
+ label: 'Gradient (g)',
6593
+ data: gradients,
6594
+ backgroundColor: '#6aa9ff',
6595
+ yAxisID: 'y'
6596
+ },
6597
+ {
6598
+ label: 'Hessian (h)',
6599
+ data: hessians,
6600
+ backgroundColor: '#7ef0d4',
6601
+ yAxisID: 'y1'
6602
+ }
6603
+ ]
6604
+ },
6605
+ options: {
6606
+ responsive: true,
6607
+ maintainAspectRatio: false,
6608
+ plugins: {
6609
+ title: {
6610
+ display: true,
6611
+ text: 'XGBoost: Gradient + Hessian Information',
6612
+ color: '#e8eef6',
6613
+ font: { size: 16 }
6614
+ },
6615
+ legend: { labels: { color: '#a9b4c2' } }
6616
+ },
6617
+ scales: {
6618
+ x: {
6619
+ grid: { color: '#2a3544' },
6620
+ ticks: { color: '#a9b4c2' }
6621
+ },
6622
+ y: {
6623
+ type: 'linear',
6624
+ position: 'left',
6625
+ title: { display: true, text: 'Gradient', color: '#6aa9ff' },
6626
+ grid: { color: '#2a3544' },
6627
+ ticks: { color: '#a9b4c2' }
6628
+ },
6629
+ y1: {
6630
+ type: 'linear',
6631
+ position: 'right',
6632
+ title: { display: true, text: 'Hessian', color: '#7ef0d4' },
6633
+ grid: { display: false },
6634
+ ticks: { color: '#a9b4c2' }
6635
+ }
6636
+ }
6637
+ }
6638
+ }, 'XGBoost Classification', 'Hessian Values');
6639
+ }
6640
+
6641
+ // Hierarchical Clustering
6642
+ function initHierarchicalClustering() {
6643
+ const canvas = document.getElementById('hierarchical-dendrogram-canvas');
6644
+ if (canvas && !canvas.dataset.initialized) {
6645
+ canvas.dataset.initialized = 'true';
6646
+ drawHierarchicalDendrogram();
6647
+ }
6648
+ }
6649
+
6650
+ function drawHierarchicalDendrogram() {
6651
+ const canvas = document.getElementById('hierarchical-dendrogram-canvas');
6652
+ if (!canvas) {
6653
+ logViz('Hierarchical Clustering', 'Dendrogram', 'failed', 'Canvas not found');
6654
+ return;
6655
+ }
6656
+
6657
+ const ctx = canvas.getContext('2d');
6658
+ const width = canvas.width = canvas.offsetWidth;
6659
+ const height = canvas.height = 450;
6660
+
6661
+ ctx.clearRect(0, 0, width, height);
6662
+ ctx.fillStyle = '#1a2332';
6663
+ ctx.fillRect(0, 0, width, height);
6664
+
6665
+ const padding = 60;
6666
+ const numPoints = 6;
6667
+ const pointSpacing = (width - 2 * padding) / numPoints;
6668
+ const labels = ['A', 'B', 'C', 'D', 'E', 'F'];
6669
+
6670
+ // Draw points at bottom
6671
+ const pointY = height - 40;
6672
+ labels.forEach((label, i) => {
6673
+ const x = padding + i * pointSpacing + pointSpacing / 2;
6674
+
6675
+ ctx.fillStyle = '#7ef0d4';
6676
+ ctx.beginPath();
6677
+ ctx.arc(x, pointY, 6, 0, 2 * Math.PI);
6678
+ ctx.fill();
6679
+
6680
+ ctx.fillStyle = '#e8eef6';
6681
+ ctx.font = 'bold 12px sans-serif';
6682
+ ctx.textAlign = 'center';
6683
+ ctx.fillText(label, x, pointY + 20);
6684
+ });
6685
+
6686
+ // Draw dendrogram merges
6687
+ const merges = [
6688
+ { points: [0, 1], height: 320 },
6689
+ { points: [3, 4], height: 330 },
6690
+ { points: [0, 1, 2], height: 220 },
6691
+ { points: [3, 4, 5], height: 200 },
6692
+ { points: [0, 1, 2, 3, 4, 5], height: 80 }
6693
+ ];
6694
+
6695
+ ctx.strokeStyle = '#6aa9ff';
6696
+ ctx.lineWidth = 2;
6697
+
6698
+ // Merge A-B
6699
+ let x1 = padding + 0 * pointSpacing + pointSpacing / 2;
6700
+ let x2 = padding + 1 * pointSpacing + pointSpacing / 2;
6701
+ ctx.beginPath();
6702
+ ctx.moveTo(x1, pointY);
6703
+ ctx.lineTo(x1, merges[0].height);
6704
+ ctx.lineTo(x2, merges[0].height);
6705
+ ctx.lineTo(x2, pointY);
6706
+ ctx.stroke();
6707
+
6708
+ // Merge D-E
6709
+ x1 = padding + 3 * pointSpacing + pointSpacing / 2;
6710
+ x2 = padding + 4 * pointSpacing + pointSpacing / 2;
6711
+ ctx.beginPath();
6712
+ ctx.moveTo(x1, pointY);
6713
+ ctx.lineTo(x1, merges[1].height);
6714
+ ctx.lineTo(x2, merges[1].height);
6715
+ ctx.lineTo(x2, pointY);
6716
+ ctx.stroke();
6717
+
6718
+ // Merge (A-B)-C
6719
+ x1 = padding + 0.5 * pointSpacing + pointSpacing / 2;
6720
+ x2 = padding + 2 * pointSpacing + pointSpacing / 2;
6721
+ ctx.beginPath();
6722
+ ctx.moveTo(x1, merges[0].height);
6723
+ ctx.lineTo(x1, merges[2].height);
6724
+ ctx.lineTo(x2, merges[2].height);
6725
+ ctx.lineTo(x2, pointY);
6726
+ ctx.stroke();
6727
+
6728
+ // Merge (D-E)-F
6729
+ x1 = padding + 3.5 * pointSpacing + pointSpacing / 2;
6730
+ x2 = padding + 5 * pointSpacing + pointSpacing / 2;
6731
+ ctx.beginPath();
6732
+ ctx.moveTo(x1, merges[1].height);
6733
+ ctx.lineTo(x1, merges[3].height);
6734
+ ctx.lineTo(x2, merges[3].height);
6735
+ ctx.lineTo(x2, pointY);
6736
+ ctx.stroke();
6737
+
6738
+ // Final merge
6739
+ x1 = padding + 1.5 * pointSpacing;
6740
+ x2 = padding + 4.5 * pointSpacing;
6741
+ ctx.beginPath();
6742
+ ctx.moveTo(x1, merges[2].height);
6743
+ ctx.lineTo(x1, merges[4].height);
6744
+ ctx.lineTo(x2, merges[4].height);
6745
+ ctx.lineTo(x2, merges[3].height);
6746
+ ctx.stroke();
6747
+
6748
+ // Title
6749
+ ctx.fillStyle = '#7ef0d4';
6750
+ ctx.font = 'bold 16px sans-serif';
6751
+ ctx.textAlign = 'center';
6752
+ ctx.fillText('Dendrogram: Cluster Merging History', width / 2, 30);
6753
+
6754
+ // Y-axis label
6755
+ ctx.fillStyle = '#a9b4c2';
6756
+ ctx.font = '12px sans-serif';
6757
+ ctx.save();
6758
+ ctx.translate(20, height / 2);
6759
+ ctx.rotate(-Math.PI / 2);
6760
+ ctx.fillText('Distance', 0, 0);
6761
+ ctx.restore();
6762
+
6763
+ logViz('Hierarchical Clustering', 'Dendrogram', 'success');
6764
+ }
6765
+
6766
+ // DBSCAN
6767
+ function initDBSCAN() {
6768
+ const canvas = document.getElementById('dbscan-clusters-canvas');
6769
+ if (canvas && !canvas.dataset.initialized) {
6770
+ canvas.dataset.initialized = 'true';
6771
+ drawDBSCANClusters();
6772
+ }
6773
+ }
6774
+
6775
+ function drawDBSCANClusters() {
6776
+ const canvas = document.getElementById('dbscan-clusters-canvas');
6777
+ if (!canvas) {
6778
+ logViz('DBSCAN', 'Clusters Visualization', 'failed', 'Canvas not found');
6779
+ return;
6780
+ }
6781
+
6782
+ const ctx = canvas.getContext('2d');
6783
+ const width = canvas.width = canvas.offsetWidth;
6784
+ const height = canvas.height = 450;
6785
+
6786
+ ctx.clearRect(0, 0, width, height);
6787
+ ctx.fillStyle = '#1a2332';
6788
+ ctx.fillRect(0, 0, width, height);
6789
+
6790
+ const padding = 60;
6791
+ const chartWidth = width - 2 * padding;
6792
+ const chartHeight = height - 2 * padding;
6793
+
6794
+ const scaleX = (x) => padding + (x / 10) * chartWidth;
6795
+ const scaleY = (y) => height - padding - (y / 10) * chartHeight;
6796
+
6797
+ const eps = 1.5;
6798
+ const epsPixels = (eps / 10) * chartWidth;
6799
+
6800
+ // Core points (cluster 1)
6801
+ const core1 = [{x: 1, y: 1}, {x: 1.2, y: 1.5}, {x: 1.5, y: 1.2}];
6802
+ // Core points (cluster 2)
6803
+ const core2 = [{x: 8, y: 8}, {x: 8.2, y: 8.5}, {x: 8.5, y: 8.2}];
6804
+ // Border points
6805
+ const border = [{x: 2.2, y: 2}];
6806
+ // Outliers
6807
+ const outliers = [{x: 5, y: 5}, {x: 4.5, y: 6}];
6808
+
6809
+ // Draw eps circles around core points
6810
+ ctx.strokeStyle = 'rgba(126, 240, 212, 0.3)';
6811
+ ctx.lineWidth = 1;
6812
+ ctx.setLineDash([3, 3]);
6813
+ core1.forEach(p => {
6814
+ ctx.beginPath();
6815
+ ctx.arc(scaleX(p.x), scaleY(p.y), epsPixels, 0, 2 * Math.PI);
6816
+ ctx.stroke();
6817
+ });
6818
+ ctx.setLineDash([]);
6819
+
6820
+ // Draw core points
6821
+ core1.forEach(p => {
6822
+ ctx.fillStyle = '#7ef0d4';
6823
+ ctx.beginPath();
6824
+ ctx.arc(scaleX(p.x), scaleY(p.y), 10, 0, 2 * Math.PI);
6825
+ ctx.fill();
6826
+ ctx.strokeStyle = '#1a2332';
6827
+ ctx.lineWidth = 2;
6828
+ ctx.stroke();
6829
+ });
6830
+
6831
+ core2.forEach(p => {
6832
+ ctx.fillStyle = '#6aa9ff';
6833
+ ctx.beginPath();
6834
+ ctx.arc(scaleX(p.x), scaleY(p.y), 10, 0, 2 * Math.PI);
6835
+ ctx.fill();
6836
+ ctx.strokeStyle = '#1a2332';
6837
+ ctx.lineWidth = 2;
6838
+ ctx.stroke();
6839
+ });
6840
+
6841
+ // Draw border points
6842
+ border.forEach(p => {
6843
+ ctx.fillStyle = '#ffb490';
6844
+ ctx.beginPath();
6845
+ ctx.arc(scaleX(p.x), scaleY(p.y), 8, 0, 2 * Math.PI);
6846
+ ctx.fill();
6847
+ });
6848
+
6849
+ // Draw outliers
6850
+ outliers.forEach(p => {
6851
+ ctx.strokeStyle = '#ff8c6a';
6852
+ ctx.lineWidth = 3;
6853
+ ctx.beginPath();
6854
+ ctx.arc(scaleX(p.x), scaleY(p.y), 8, 0, 2 * Math.PI);
6855
+ ctx.stroke();
6856
+ });
6857
+
6858
+ // Legend
6859
+ ctx.fillStyle = '#7ef0d4';
6860
+ ctx.beginPath();
6861
+ ctx.arc(padding + 20, 30, 8, 0, 2 * Math.PI);
6862
+ ctx.fill();
6863
+ ctx.fillStyle = '#e8eef6';
6864
+ ctx.font = '12px sans-serif';
6865
+ ctx.textAlign = 'left';
6866
+ ctx.fillText('Core points', padding + 35, 35);
6867
+
6868
+ ctx.fillStyle = '#ffb490';
6869
+ ctx.beginPath();
6870
+ ctx.arc(padding + 140, 30, 8, 0, 2 * Math.PI);
6871
+ ctx.fill();
6872
+ ctx.fillStyle = '#e8eef6';
6873
+ ctx.fillText('Border points', padding + 155, 35);
6874
+
6875
+ ctx.strokeStyle = '#ff8c6a';
6876
+ ctx.lineWidth = 3;
6877
+ ctx.beginPath();
6878
+ ctx.arc(padding + 270, 30, 8, 0, 2 * Math.PI);
6879
+ ctx.stroke();
6880
+ ctx.fillStyle = '#e8eef6';
6881
+ ctx.fillText('Outliers', padding + 285, 35);
6882
+
6883
+ // Title
6884
+ ctx.fillStyle = '#7ef0d4';
6885
+ ctx.font = 'bold 16px sans-serif';
6886
+ ctx.textAlign = 'center';
6887
+ ctx.fillText('DBSCAN: Core, Border, and Outlier Points', width / 2, height - 10);
6888
+
6889
+ logViz('DBSCAN', 'Clusters Visualization', 'success');
6890
+ }
6891
+
6892
+ // Clustering Evaluation
6893
+ function initClusteringEvaluation() {
6894
+ const canvas1 = document.getElementById('silhouette-plot-canvas');
6895
+ if (canvas1 && !canvas1.dataset.initialized) {
6896
+ canvas1.dataset.initialized = 'true';
6897
+ drawSilhouettePlot();
6898
+ }
6899
+
6900
+ const canvas2 = document.getElementById('ch-index-canvas');
6901
+ if (canvas2 && !canvas2.dataset.initialized) {
6902
+ canvas2.dataset.initialized = 'true';
6903
+ drawCHIndex();
6904
+ }
6905
+ }
6906
+
6907
+ function drawSilhouettePlot() {
6908
+ const canvas = document.getElementById('silhouette-plot-canvas');
6909
+ if (!canvas) return;
6910
+
6911
+ createVerifiedVisualization('silhouette-plot-canvas', {
6912
+ type: 'bar',
6913
+ data: {
6914
+ labels: ['Cluster 1 Avg', 'Cluster 2 Avg', 'Cluster 3 Avg', 'Overall'],
6915
+ datasets: [{
6916
+ label: 'Silhouette Coefficient',
6917
+ data: [0.72, 0.68, 0.81, 0.74],
6918
+ backgroundColor: ['#7ef0d4', '#6aa9ff', '#ffb490', '#ff8c6a'],
6919
+ borderColor: ['#7ef0d4', '#6aa9ff', '#ffb490', '#ff8c6a'],
6920
+ borderWidth: 2
6921
+ }]
6922
+ },
6923
+ options: {
6924
+ indexAxis: 'y',
6925
+ responsive: true,
6926
+ maintainAspectRatio: false,
6927
+ plugins: {
6928
+ title: {
6929
+ display: true,
6930
+ text: 'Silhouette Coefficients: All Above 0.7 = Excellent!',
6931
+ color: '#e8eef6',
6932
+ font: { size: 16 }
6933
+ },
6934
+ legend: { display: false }
6935
+ },
6936
+ scales: {
6937
+ x: {
6938
+ title: { display: true, text: 'Silhouette Coefficient', color: '#a9b4c2' },
6939
+ grid: { color: '#2a3544' },
6940
+ ticks: { color: '#a9b4c2' },
6941
+ min: 0,
6942
+ max: 1
6943
+ },
6944
+ y: {
6945
+ grid: { display: false },
6946
+ ticks: { color: '#a9b4c2' }
6947
+ }
6948
+ }
6949
+ }
6950
+ }, 'Clustering Evaluation', 'Silhouette Plot');
6951
+ }
6952
+
6953
+ function drawCHIndex() {
6954
+ const canvas = document.getElementById('ch-index-canvas');
6955
+ if (!canvas) return;
6956
+
6957
+ const kValues = [2, 3, 4, 5, 6, 7, 8];
6958
+ const chScores = [89, 234, 187, 145, 112, 95, 78];
6959
+
6960
+ createVerifiedVisualization('ch-index-canvas', {
6961
+ type: 'line',
6962
+ data: {
6963
+ labels: kValues,
6964
+ datasets: [{
6965
+ label: 'Calinski-Harabasz Index',
6966
+ data: chScores,
6967
+ borderColor: '#6aa9ff',
6968
+ backgroundColor: 'rgba(106, 169, 255, 0.1)',
6969
+ borderWidth: 3,
6970
+ fill: true,
6971
+ pointRadius: kValues.map(k => k === 3 ? 10 : 6),
6972
+ pointBackgroundColor: kValues.map(k => k === 3 ? '#7ef0d4' : '#6aa9ff'),
6973
+ pointBorderWidth: kValues.map(k => k === 3 ? 3 : 2)
6974
+ }]
6975
+ },
6976
+ options: {
6977
+ responsive: true,
6978
+ maintainAspectRatio: false,
6979
+ plugins: {
6980
+ title: {
6981
+ display: true,
6982
+ text: 'Calinski-Harabasz Index: Optimal k = 3',
6983
+ color: '#e8eef6',
6984
+ font: { size: 16 }
6985
+ },
6986
+ legend: { labels: { color: '#a9b4c2' } }
6987
+ },
6988
+ scales: {
6989
+ x: {
6990
+ title: { display: true, text: 'Number of Clusters (k)', color: '#a9b4c2' },
6991
+ grid: { color: '#2a3544' },
6992
+ ticks: { color: '#a9b4c2' }
6993
+ },
6994
+ y: {
6995
+ title: { display: true, text: 'CH Index (higher is better)', color: '#a9b4c2' },
6996
+ grid: { color: '#2a3544' },
6997
+ ticks: { color: '#a9b4c2' },
6998
+ min: 0
6999
+ }
7000
+ }
7001
+ }
7002
+ }, 'Clustering Evaluation', 'CH Index');
7003
+ }
7004
+
7005
  // Handle window resize
7006
  let resizeTimer;
7007
  window.addEventListener('resize', () => {
ml_complete-all-topics/index.html CHANGED
@@ -533,8 +533,10 @@ canvas {
533
  <a href="#decision-trees" class="toc-link toc-sub">Decision Trees (Classification)</a>
534
  <a href="#bagging" class="toc-link toc-sub">Bagging</a>
535
  <a href="#boosting-adaboost" class="toc-link toc-sub">Boosting (AdaBoost)</a>
536
- <a href="#gradient-boosting" class="toc-link toc-sub">Gradient Boosting</a>
537
- <a href="#xgboost" class="toc-link toc-sub">XGBoost</a>
 
 
538
  <a href="#random-forest" class="toc-link toc-sub">Random Forest</a>
539
  <a href="#ensemble-methods" class="toc-link toc-sub">Ensemble Methods Overview</a>
540
  </div>
@@ -560,6 +562,9 @@ canvas {
560
  <div class="toc-subcategory">
561
  <div class="toc-subcategory-title">Clustering</div>
562
  <a href="#kmeans" class="toc-link toc-sub">K-means Clustering</a>
 
 
 
563
  </div>
564
  <div class="toc-subcategory">
565
  <div class="toc-subcategory-title">Preprocessing</div>
@@ -4086,6 +4091,129 @@ New property: 1650 sq ft
4086
  </div>
4087
  </div>
4088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4089
  <!-- Section 19a: Gradient Boosting (NEW FROM PDF) -->
4090
  <div class="section" id="gradient-boosting">
4091
  <div class="section-header">
@@ -4301,6 +4429,55 @@ After 10 iterations:
4301
  </div>
4302
  </div>
4303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4304
  <!-- Section 19b: XGBoost (NEW FROM PDF) -->
4305
  <div class="section" id="xgboost">
4306
  <div class="section-header">
@@ -5069,6 +5246,213 @@ OOB MAE = Average of all errors = <strong style="color: #7ef0d4;">β‚Ή2.1L</stron
5069
  </div>
5070
  </div>
5071
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5072
  <!-- Section: Diagnostics -->
5073
  <div class="section" id="diagnostics">
5074
  <div class="section-header">
 
533
  <a href="#decision-trees" class="toc-link toc-sub">Decision Trees (Classification)</a>
534
  <a href="#bagging" class="toc-link toc-sub">Bagging</a>
535
  <a href="#boosting-adaboost" class="toc-link toc-sub">Boosting (AdaBoost)</a>
536
+ <a href="#gradient-boosting" class="toc-link toc-sub">Gradient Boosting (Regression)</a>
537
+ <a href="#gradient-boosting-classification" class="toc-link toc-sub">Gradient Boosting (Classification)</a>
538
+ <a href="#xgboost" class="toc-link toc-sub">XGBoost (Regression)</a>
539
+ <a href="#xgboost-classification" class="toc-link toc-sub">XGBoost (Classification)</a>
540
  <a href="#random-forest" class="toc-link toc-sub">Random Forest</a>
541
  <a href="#ensemble-methods" class="toc-link toc-sub">Ensemble Methods Overview</a>
542
  </div>
 
562
  <div class="toc-subcategory">
563
  <div class="toc-subcategory-title">Clustering</div>
564
  <a href="#kmeans" class="toc-link toc-sub">K-means Clustering</a>
565
+ <a href="#hierarchical-clustering" class="toc-link toc-sub">Hierarchical Clustering</a>
566
+ <a href="#dbscan" class="toc-link toc-sub">DBSCAN Clustering</a>
567
+ <a href="#clustering-evaluation" class="toc-link toc-sub">Clustering Evaluation</a>
568
  </div>
569
  <div class="toc-subcategory">
570
  <div class="toc-subcategory-title">Preprocessing</div>
 
4091
  </div>
4092
  </div>
4093
 
4094
+ <!-- Section 19a-NEW: Gradient Boosting Classification -->
4095
+ <div class="section" id="gradient-boosting-classification">
4096
+ <div class="section-header">
4097
+ <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">πŸ“Š Supervised - Classification</span> Gradient Boosting Classification</h2>
4098
+ <button class="section-toggle collapsed">β–Ό</button>
4099
+ </div>
4100
+ <div class="section-body">
4101
+ <p>Gradient Boosting for classification predicts probabilities using sequential trees that minimize log loss. Each tree corrects the previous model's errors by fitting to gradients!</p>
4102
+
4103
+ <div class="info-card">
4104
+ <div class="info-card-title">Simple Math Breakdown</div>
4105
+ <ul class="info-card-list">
4106
+ <li>Step 1: Start with log-odds F(0) = log(pos/neg)</li>
4107
+ <li>Step 2: Calculate gradient g = p - y</li>
4108
+ <li>Step 3: Build tree on gradients</li>
4109
+ <li>Step 4: Update F(x) = F(0) + lr Γ— tree</li>
4110
+ <li>Step 5: Repeat to minimize errors</li>
4111
+ </ul>
4112
+ </div>
4113
+
4114
+ <div class="formula">
4115
+ <strong>Simple Explanation:</strong><br>
4116
+ Step 1: F(0) = log(positive_count / negative_count)<br>
4117
+ Step 2: g = p - y (how wrong we are)<br>
4118
+ Step 3: Build tree to fix errors<br>
4119
+ Step 4: F(x) = F(0) + learning_rate Γ— tree(x)<br>
4120
+ Step 5: Repeat Steps 2-4 multiple times
4121
+ </div>
4122
+
4123
+ <h3>Real Example: House Price β‰₯ 170k</h3>
4124
+ <table class="data-table">
4125
+ <thead>
4126
+ <tr><th>ID</th><th>Size</th><th>Price</th><th>β‰₯170k?</th></tr>
4127
+ </thead>
4128
+ <tbody>
4129
+ <tr><td>1</td><td>800</td><td>120k</td><td>0 (No)</td></tr>
4130
+ <tr><td>2</td><td>900</td><td>130k</td><td>0 (No)</td></tr>
4131
+ <tr><td>3</td><td>1000</td><td>150k</td><td>0 (No)</td></tr>
4132
+ <tr><td>4</td><td>1100</td><td>170k</td><td>1 (Yes)</td></tr>
4133
+ <tr><td>5</td><td>1200</td><td>200k</td><td>1 (Yes)</td></tr>
4134
+ </tbody>
4135
+ </table>
4136
+
4137
+ <div class="step">
4138
+ <div class="step-title">STEP 1: Initialize F(0)</div>
4139
+ <div class="step-calculation">
4140
+ F(0) = log(positive / negative)
4141
+ = log(2 / 3)
4142
+ = <strong style="color: #7ef0d4;">-0.405</strong>
4143
+
4144
+ Meaning: 40.5% initial chance of β‰₯170k
4145
+ </div>
4146
+ </div>
4147
+
4148
+ <div class="step">
4149
+ <div class="step-title">STEP 2: Calculate Gradients</div>
4150
+ <div class="step-calculation">
4151
+ For House 1:
4152
+ p = sigmoid(-0.405) = <strong>0.4</strong> (40% probability)
4153
+ y = 0 (actual)
4154
+ gradient g = 0.4 - 0 = <strong style="color: #ff8c6a;">0.4</strong>
4155
+
4156
+ For House 4:
4157
+ p = sigmoid(-0.405) = 0.4
4158
+ y = 1 (actual)
4159
+ gradient g = 0.4 - 1 = <strong style="color: #7ef0d4;">-0.6</strong>
4160
+ </div>
4161
+ </div>
4162
+
4163
+ <div class="step">
4164
+ <div class="step-title">STEP 3: Find Best Split</div>
4165
+ <div class="step-calculation">
4166
+ Test split: Size &lt; 1050
4167
+
4168
+ <strong>Left (Size ≀ 1050):</strong> Houses 1,2,3
4169
+ Gradients: [0.4, 0.4, 0.4]
4170
+ Average = <strong>0.4</strong>
4171
+
4172
+ <strong>Right (Size &gt; 1050):</strong> Houses 4,5
4173
+ Gradients: [-0.6, -0.6]
4174
+ Average = <strong>-0.6</strong>
4175
+
4176
+ βœ“ This split separates positive/negative gradients!
4177
+ </div>
4178
+ </div>
4179
+
4180
+ <div class="step">
4181
+ <div class="step-title">STEP 4: Update Predictions</div>
4182
+ <div class="step-calculation">
4183
+ F1(x) = F(0) + learning_rate Γ— tree(x)
4184
+
4185
+ For House 1 (Size=800):
4186
+ F1(1) = -0.405 + 0.1 Γ— (-0.4)
4187
+ = -0.405 - 0.04
4188
+ = <strong style="color: #7ef0d4;">-0.445</strong>
4189
+
4190
+ New probability = sigmoid(-0.445) = <strong>0.39</strong> βœ“ Lower!
4191
+ </div>
4192
+ </div>
4193
+
4194
+ <div class="figure">
4195
+ <div class="figure-placeholder" style="height: 400px">
4196
+ <canvas id="gb-class-sequential-canvas"></canvas>
4197
+ </div>
4198
+ <p class="figure-caption"><strong>Figure 1:</strong> Sequential prediction updates across iterations</p>
4199
+ </div>
4200
+
4201
+ <div class="figure">
4202
+ <div class="figure-placeholder" style="height: 400px">
4203
+ <canvas id="gb-class-gradients-canvas"></canvas>
4204
+ </div>
4205
+ <p class="figure-caption"><strong>Figure 2:</strong> Gradient values per sample showing error correction</p>
4206
+ </div>
4207
+
4208
+ <div class="callout success">
4209
+ <div class="callout-title">βœ… Key Takeaway</div>
4210
+ <div class="callout-content">
4211
+ Gradient Boosting Classification uses gradients (p - y) to sequentially build trees that correct probability predictions. Each tree reduces log loss by fitting to the errors!
4212
+ </div>
4213
+ </div>
4214
+ </div>
4215
+ </div>
4216
+
4217
  <!-- Section 19a: Gradient Boosting (NEW FROM PDF) -->
4218
  <div class="section" id="gradient-boosting">
4219
  <div class="section-header">
 
4429
  </div>
4430
  </div>
4431
 
4432
+ <!-- Section 19b-NEW: XGBoost Classification -->
4433
+ <div class="section" id="xgboost-classification">
4434
+ <div class="section-header">
4435
+ <h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">πŸ“Š Supervised - Classification</span> XGBoost Classification</h2>
4436
+ <button class="section-toggle collapsed">β–Ό</button>
4437
+ </div>
4438
+ <div class="section-body">
4439
+ <p>XGBoost Classification adds Hessian (2nd derivative) and regularization to Gradient Boosting for better accuracy and less overfitting!</p>
4440
+
4441
+ <div class="info-card">
4442
+ <div class="info-card-title">Difference from Gradient Boosting</div>
4443
+ <ul class="info-card-list">
4444
+ <li>GB: Uses gradient g = p - y</li>
4445
+ <li>XGB: Uses gradient g AND Hessian h = p(1-p)</li>
4446
+ <li>XGB: Adds regularization Ξ» to prevent overfitting</li>
4447
+ <li>XGB: Better gain calculation for splits</li>
4448
+ </ul>
4449
+ </div>
4450
+
4451
+ <div class="formula">
4452
+ <strong>Hessian Formula:</strong><br>
4453
+ h = p Γ— (1 - p)<br>
4454
+ <br>
4455
+ Measures confidence of prediction:<br>
4456
+ β€’ p = 0.5 β†’ h = 0.25 (most uncertain)<br>
4457
+ β€’ p = 0.9 β†’ h = 0.09 (very confident)<br>
4458
+ <br>
4459
+ <strong>Gain Formula:</strong><br>
4460
+ Gain = GLΒ²/(HL+Ξ») + GRΒ²/(HR+Ξ») - GpΒ²/(Hp+Ξ»)
4461
+ </div>
4462
+
4463
+ <div class="figure">
4464
+ <div class="figure-placeholder" style="height: 400px">
4465
+ <canvas id="xgb-class-hessian-canvas"></canvas>
4466
+ </div>
4467
+ <p class="figure-caption"><strong>Figure:</strong> Hessian values showing prediction confidence</p>
4468
+ </div>
4469
+
4470
+ <div class="callout success">
4471
+ <div class="callout-title">βœ… Why XGBoost is Better</div>
4472
+ <div class="callout-content">
4473
+ Hessian gives curvature information β†’ better optimization path<br>
4474
+ Regularization Ξ» prevents overfitting β†’ better generalization<br>
4475
+ Result: State-of-the-art accuracy on classification tasks!
4476
+ </div>
4477
+ </div>
4478
+ </div>
4479
+ </div>
4480
+
4481
  <!-- Section 19b: XGBoost (NEW FROM PDF) -->
4482
  <div class="section" id="xgboost">
4483
  <div class="section-header">
 
5246
  </div>
5247
  </div>
5248
 
5249
+ <!-- Section: Hierarchical Clustering -->
5250
+ <div class="section" id="hierarchical-clustering">
5251
+ <div class="section-header">
5252
+ <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">πŸ” Unsupervised - Clustering</span> Hierarchical Clustering</h2>
5253
+ <button class="section-toggle collapsed">β–Ό</button>
5254
+ </div>
5255
+ <div class="section-body">
5256
+ <p>Hierarchical Clustering builds a tree of clusters by repeatedly merging the closest pairs. No need to specify K upfront!</p>
5257
+
5258
+ <div class="info-card">
5259
+ <div class="info-card-title">Simple Steps</div>
5260
+ <ul class="info-card-list">
5261
+ <li>Step 1: Start with each point as its own cluster</li>
5262
+ <li>Step 2: Find two closest clusters</li>
5263
+ <li>Step 3: Merge them into one cluster</li>
5264
+ <li>Step 4: Repeat until all in one cluster</li>
5265
+ <li>Result: Dendrogram tree showing hierarchy</li>
5266
+ </ul>
5267
+ </div>
5268
+
5269
+ <div class="formula">
5270
+ <strong>Distance Metrics:</strong><br>
5271
+ Euclidean: d = √((x2-x1)² + (y2-y1)²)<br>
5272
+ Manhattan: d = |x2-x1| + |y2-y1|<br>
5273
+ <br>
5274
+ <strong>Linkage Methods:</strong><br>
5275
+ β€’ Complete: max distance between any two points<br>
5276
+ β€’ Single: min distance between any two points<br>
5277
+ β€’ Average: average distance between all points<br>
5278
+ β€’ Ward: minimizes variance (BEST for most cases)
5279
+ </div>
5280
+
5281
+ <div class="figure">
5282
+ <div class="figure-placeholder" style="height: 450px">
5283
+ <canvas id="hierarchical-dendrogram-canvas"></canvas>
5284
+ </div>
5285
+ <p class="figure-caption"><strong>Figure:</strong> Dendrogram showing cluster merging history</p>
5286
+ </div>
5287
+
5288
+ <div class="callout info">
5289
+ <div class="callout-title">πŸ’‘ When to Use</div>
5290
+ <div class="callout-content">
5291
+ βœ“ Don't know number of clusters<br>
5292
+ βœ“ Want to see cluster hierarchy<br>
5293
+ βœ“ Small to medium datasets (&lt;5000 points)<br>
5294
+ βœ“ Need interpretable results
5295
+ </div>
5296
+ </div>
5297
+ </div>
5298
+ </div>
5299
+
5300
+ <!-- Section: DBSCAN -->
5301
+ <div class="section" id="dbscan">
5302
+ <div class="section-header">
5303
+ <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">πŸ” Unsupervised - Clustering</span> DBSCAN Clustering</h2>
5304
+ <button class="section-toggle collapsed">β–Ό</button>
5305
+ </div>
5306
+ <div class="section-body">
5307
+ <p>DBSCAN finds clusters of arbitrary shapes and automatically detects outliers! Based on density, not distance to centroids.</p>
5308
+
5309
+ <div class="info-card">
5310
+ <div class="info-card-title">Key Parameters</div>
5311
+ <ul class="info-card-list">
5312
+ <li>eps: Neighborhood radius (e.g., 0.4)</li>
5313
+ <li>min_samples: Minimum points in neighborhood (e.g., 3)</li>
5314
+ <li>Core point: Has β‰₯ min_samples within eps</li>
5315
+ <li>Border point: Near core point but not core itself</li>
5316
+ <li>Outlier: Not near any core point</li>
5317
+ </ul>
5318
+ </div>
5319
+
5320
+ <div class="formula">
5321
+ <strong>Simple Algorithm:</strong><br>
5322
+ Step 1: Pick random unvisited point<br>
5323
+ Step 2: Find all points within eps radius<br>
5324
+ Step 3: If count β‰₯ min_samples β†’ Core point!<br>
5325
+ Step 4: Mark all reachable points in same cluster<br>
5326
+ Step 5: Move to next unvisited point<br>
5327
+ Step 6: Points alone = Outliers ❌
5328
+ </div>
5329
+
5330
+ <div class="step">
5331
+ <div class="step-title">Example: eps=0.4, min_samples=3</div>
5332
+ <div class="step-calculation">
5333
+ <strong>Point A at (1, 1):</strong>
5334
+ Points within 0.4 units: [A, B, C]
5335
+ Count = 3 βœ“ Core point!
5336
+ Start Cluster 1 with A, B, C
5337
+
5338
+ <strong>Point D at (8, 8):</strong>
5339
+ Points within 0.4 units: [D, E]
5340
+ Count = 2 βœ— Not core
5341
+ But near core E β†’ Border point in Cluster 2
5342
+
5343
+ <strong>Point G at (5, 5):</strong>
5344
+ No neighbors within 0.4
5345
+ Mark as <strong style="color: #ff8c6a;">OUTLIER</strong> ❌
5346
+ </div>
5347
+ </div>
5348
+
5349
+ <div class="figure">
5350
+ <div class="figure-placeholder" style="height: 450px">
5351
+ <canvas id="dbscan-clusters-canvas"></canvas>
5352
+ </div>
5353
+ <p class="figure-caption"><strong>Figure:</strong> DBSCAN showing core, border, and outlier points</p>
5354
+ </div>
5355
+
5356
+ <div class="callout success">
5357
+ <div class="callout-title">βœ… Advantages</div>
5358
+ <div class="callout-content">
5359
+ βœ“ Finds clusters of ANY shape<br>
5360
+ βœ“ Automatically detects outliers<br>
5361
+ βœ“ No need to specify number of clusters<br>
5362
+ βœ“ Robust to noise
5363
+ </div>
5364
+ </div>
5365
+ </div>
5366
+ </div>
5367
+
5368
+ <!-- Section: Clustering Evaluation -->
5369
+ <div class="section" id="clustering-evaluation">
5370
+ <div class="section-header">
5371
+ <h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">πŸ” Unsupervised - Evaluation</span> Clustering Evaluation Metrics</h2>
5372
+ <button class="section-toggle collapsed">β–Ό</button>
5373
+ </div>
5374
+ <div class="section-body">
5375
+ <p>How do we know if our clustering is good? Use Silhouette Coefficient and Calinski-Harabasz Index!</p>
5376
+
5377
+ <div class="info-card">
5378
+ <div class="info-card-title">Key Metrics</div>
5379
+ <ul class="info-card-list">
5380
+ <li>Silhouette: Measures how well points fit in clusters</li>
5381
+ <li>Range: -1 to +1 (higher is better)</li>
5382
+ <li>Calinski-Harabasz: Between-cluster vs within-cluster variance</li>
5383
+ <li>Range: 0 to ∞ (higher is better)</li>
5384
+ </ul>
5385
+ </div>
5386
+
5387
+ <h3>Silhouette Coefficient</h3>
5388
+ <div class="formula">
5389
+ <strong>For each point:</strong><br>
5390
+ a = average distance to points in SAME cluster<br>
5391
+ b = average distance to points in NEAREST cluster<br>
5392
+ <br>
5393
+ Silhouette = (b - a) / max(a, b)<br>
5394
+ <br>
5395
+ <strong>Interpretation:</strong><br>
5396
+ +0.7 to +1.0: Excellent clustering<br>
5397
+ +0.5 to +0.7: Good clustering<br>
5398
+ +0.25 to +0.5: Weak clustering<br>
5399
+ &lt; +0.25: Poor or no clustering
5400
+ </div>
5401
+
5402
+ <div class="step">
5403
+ <div class="step-title">Example Calculation</div>
5404
+ <div class="step-calculation">
5405
+ <strong>Point A in Cluster 1:</strong>
5406
+ Distance to other points in Cluster 1: [0.1, 0.2]
5407
+ a = average = <strong>0.15</strong>
5408
+
5409
+ Distance to nearest points in Cluster 2: [1.5, 1.8]
5410
+ b = average = <strong>1.65</strong>
5411
+
5412
+ Silhouette(A) = (1.65 - 0.15) / 1.65
5413
+ = 1.5 / 1.65
5414
+ = <strong style="color: #7ef0d4;">0.909</strong> βœ“ Excellent!
5415
+ </div>
5416
+ </div>
5417
+
5418
+ <h3>Calinski-Harabasz Index</h3>
5419
+ <div class="formula">
5420
+ <strong>Formula:</strong><br>
5421
+ CH = (Between-cluster variance) / (Within-cluster variance)<br>
5422
+ <br>
5423
+ <strong>Interpretation:</strong><br>
5424
+ 0-20: Poor clustering<br>
5425
+ 20-50: Okay clustering<br>
5426
+ 50-150: Good clustering<br>
5427
+ 150-500: Very good clustering<br>
5428
+ &gt; 500: Excellent clustering
5429
+ </div>
5430
+
5431
+ <div class="figure">
5432
+ <div class="figure-placeholder" style="height: 400px">
5433
+ <canvas id="silhouette-plot-canvas"></canvas>
5434
+ </div>
5435
+ <p class="figure-caption"><strong>Figure 1:</strong> Silhouette plot showing score per cluster</p>
5436
+ </div>
5437
+
5438
+ <div class="figure">
5439
+ <div class="figure-placeholder" style="height: 400px">
5440
+ <canvas id="ch-index-canvas"></canvas>
5441
+ </div>
5442
+ <p class="figure-caption"><strong>Figure 2:</strong> Calinski-Harabasz index vs number of clusters</p>
5443
+ </div>
5444
+
5445
+ <div class="callout info">
5446
+ <div class="callout-title">πŸ’‘ Choosing the Right Metric</div>
5447
+ <div class="callout-content">
5448
+ <strong>Silhouette:</strong> Best for interpretability, shows per-point quality<br>
5449
+ <strong>CH Index:</strong> Fast to compute, good for finding optimal k<br>
5450
+ <strong>Both together:</strong> Most reliable assessment!
5451
+ </div>
5452
+ </div>
5453
+ </div>
5454
+ </div>
5455
+
5456
  <!-- Section: Diagnostics -->
5457
  <div class="section" id="diagnostics">
5458
  <div class="section-header">