Spaces:
Running
Running
Commit Β·
ad51ad1
1
Parent(s): 5b394bc
update ml play list
Browse files- ml_complete-all-topics/app.js +567 -0
- ml_complete-all-topics/index.html +386 -2
ml_complete-all-topics/app.js
CHANGED
|
@@ -236,6 +236,11 @@ function initSections() {
|
|
| 236 |
if (section.id === 'boosting-adaboost') initBoostingAdaBoost();
|
| 237 |
if (section.id === 'random-forest') initRandomForest();
|
| 238 |
if (section.id === 'ensemble-methods') initEnsembleMethods();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
if (section.id === 'diagnostics') {
|
| 240 |
// Wait for all visualizations to initialize
|
| 241 |
setTimeout(showDiagnostics, 500);
|
|
@@ -6435,6 +6440,568 @@ function showDiagnosticDetails(filter) {
|
|
| 6435 |
container.innerHTML = html;
|
| 6436 |
}
|
| 6437 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6438 |
// Handle window resize
|
| 6439 |
let resizeTimer;
|
| 6440 |
window.addEventListener('resize', () => {
|
|
|
|
| 236 |
if (section.id === 'boosting-adaboost') initBoostingAdaBoost();
|
| 237 |
if (section.id === 'random-forest') initRandomForest();
|
| 238 |
if (section.id === 'ensemble-methods') initEnsembleMethods();
|
| 239 |
+
if (section.id === 'gradient-boosting-classification') initGradientBoostingClassification();
|
| 240 |
+
if (section.id === 'xgboost-classification') initXGBoostClassification();
|
| 241 |
+
if (section.id === 'hierarchical-clustering') initHierarchicalClustering();
|
| 242 |
+
if (section.id === 'dbscan') initDBSCAN();
|
| 243 |
+
if (section.id === 'clustering-evaluation') initClusteringEvaluation();
|
| 244 |
if (section.id === 'diagnostics') {
|
| 245 |
// Wait for all visualizations to initialize
|
| 246 |
setTimeout(showDiagnostics, 500);
|
|
|
|
| 6440 |
container.innerHTML = html;
|
| 6441 |
}
|
| 6442 |
|
| 6443 |
+
// NEW VISUALIZATIONS FOR ADDED TOPICS
|
| 6444 |
+
|
| 6445 |
+
// Gradient Boosting Classification
|
| 6446 |
+
function initGradientBoostingClassification() {
|
| 6447 |
+
const canvas1 = document.getElementById('gb-class-sequential-canvas');
|
| 6448 |
+
if (canvas1 && !canvas1.dataset.initialized) {
|
| 6449 |
+
canvas1.dataset.initialized = 'true';
|
| 6450 |
+
drawGBClassSequential();
|
| 6451 |
+
}
|
| 6452 |
+
|
| 6453 |
+
const canvas2 = document.getElementById('gb-class-gradients-canvas');
|
| 6454 |
+
if (canvas2 && !canvas2.dataset.initialized) {
|
| 6455 |
+
canvas2.dataset.initialized = 'true';
|
| 6456 |
+
drawGBClassGradients();
|
| 6457 |
+
}
|
| 6458 |
+
}
|
| 6459 |
+
|
| 6460 |
+
function drawGBClassSequential() {
|
| 6461 |
+
const canvas = document.getElementById('gb-class-sequential-canvas');
|
| 6462 |
+
if (!canvas) return;
|
| 6463 |
+
|
| 6464 |
+
const iterations = [0, 1, 2, 3, 4, 5, 10];
|
| 6465 |
+
const house1 = [0.4, 0.39, 0.37, 0.35, 0.33, 0.31, 0.22];
|
| 6466 |
+
const house4 = [0.4, 0.43, 0.47, 0.52, 0.57, 0.62, 0.78];
|
| 6467 |
+
|
| 6468 |
+
createVerifiedVisualization('gb-class-sequential-canvas', {
|
| 6469 |
+
type: 'line',
|
| 6470 |
+
data: {
|
| 6471 |
+
labels: iterations,
|
| 6472 |
+
datasets: [
|
| 6473 |
+
{
|
| 6474 |
+
label: 'House 1 (y=0): Probability β',
|
| 6475 |
+
data: house1,
|
| 6476 |
+
borderColor: '#7ef0d4',
|
| 6477 |
+
backgroundColor: 'rgba(126, 240, 212, 0.1)',
|
| 6478 |
+
borderWidth: 3,
|
| 6479 |
+
fill: true
|
| 6480 |
+
},
|
| 6481 |
+
{
|
| 6482 |
+
label: 'House 4 (y=1): Probability β',
|
| 6483 |
+
data: house4,
|
| 6484 |
+
borderColor: '#6aa9ff',
|
| 6485 |
+
backgroundColor: 'rgba(106, 169, 255, 0.1)',
|
| 6486 |
+
borderWidth: 3,
|
| 6487 |
+
fill: true
|
| 6488 |
+
}
|
| 6489 |
+
]
|
| 6490 |
+
},
|
| 6491 |
+
options: {
|
| 6492 |
+
responsive: true,
|
| 6493 |
+
maintainAspectRatio: false,
|
| 6494 |
+
plugins: {
|
| 6495 |
+
title: {
|
| 6496 |
+
display: true,
|
| 6497 |
+
text: 'Gradient Boosting Classification: Probability Updates',
|
| 6498 |
+
color: '#e8eef6',
|
| 6499 |
+
font: { size: 16 }
|
| 6500 |
+
},
|
| 6501 |
+
legend: { labels: { color: '#a9b4c2' } }
|
| 6502 |
+
},
|
| 6503 |
+
scales: {
|
| 6504 |
+
x: {
|
| 6505 |
+
title: { display: true, text: 'Iteration', color: '#a9b4c2' },
|
| 6506 |
+
grid: { color: '#2a3544' },
|
| 6507 |
+
ticks: { color: '#a9b4c2' }
|
| 6508 |
+
},
|
| 6509 |
+
y: {
|
| 6510 |
+
title: { display: true, text: 'P(y=1)', color: '#a9b4c2' },
|
| 6511 |
+
grid: { color: '#2a3544' },
|
| 6512 |
+
ticks: { color: '#a9b4c2' },
|
| 6513 |
+
min: 0,
|
| 6514 |
+
max: 1
|
| 6515 |
+
}
|
| 6516 |
+
}
|
| 6517 |
+
}
|
| 6518 |
+
}, 'GB Classification', 'Sequential Updates');
|
| 6519 |
+
}
|
| 6520 |
+
|
| 6521 |
+
function drawGBClassGradients() {
|
| 6522 |
+
const canvas = document.getElementById('gb-class-gradients-canvas');
|
| 6523 |
+
if (!canvas) return;
|
| 6524 |
+
|
| 6525 |
+
createVerifiedVisualization('gb-class-gradients-canvas', {
|
| 6526 |
+
type: 'bar',
|
| 6527 |
+
data: {
|
| 6528 |
+
labels: ['House 1', 'House 2', 'House 3', 'House 4', 'House 5'],
|
| 6529 |
+
datasets: [
|
| 6530 |
+
{
|
| 6531 |
+
label: 'Iteration 0 Gradients',
|
| 6532 |
+
data: [0.4, 0.4, 0.4, -0.6, -0.6],
|
| 6533 |
+
backgroundColor: '#ff8c6a'
|
| 6534 |
+
},
|
| 6535 |
+
{
|
| 6536 |
+
label: 'Iteration 5 Gradients',
|
| 6537 |
+
data: [0.1, 0.08, 0.09, -0.15, -0.12],
|
| 6538 |
+
backgroundColor: '#7ef0d4'
|
| 6539 |
+
}
|
| 6540 |
+
]
|
| 6541 |
+
},
|
| 6542 |
+
options: {
|
| 6543 |
+
responsive: true,
|
| 6544 |
+
maintainAspectRatio: false,
|
| 6545 |
+
plugins: {
|
| 6546 |
+
title: {
|
| 6547 |
+
display: true,
|
| 6548 |
+
text: 'Gradient Values: Shrinking Over Iterations',
|
| 6549 |
+
color: '#e8eef6',
|
| 6550 |
+
font: { size: 16 }
|
| 6551 |
+
},
|
| 6552 |
+
legend: { labels: { color: '#a9b4c2' } }
|
| 6553 |
+
},
|
| 6554 |
+
scales: {
|
| 6555 |
+
x: {
|
| 6556 |
+
grid: { color: '#2a3544' },
|
| 6557 |
+
ticks: { color: '#a9b4c2' }
|
| 6558 |
+
},
|
| 6559 |
+
y: {
|
| 6560 |
+
title: { display: true, text: 'Gradient (p - y)', color: '#a9b4c2' },
|
| 6561 |
+
grid: { color: '#2a3544' },
|
| 6562 |
+
ticks: { color: '#a9b4c2' }
|
| 6563 |
+
}
|
| 6564 |
+
}
|
| 6565 |
+
}
|
| 6566 |
+
}, 'GB Classification', 'Gradient Values');
|
| 6567 |
+
}
|
| 6568 |
+
|
| 6569 |
+
// XGBoost Classification
|
| 6570 |
+
function initXGBoostClassification() {
|
| 6571 |
+
const canvas = document.getElementById('xgb-class-hessian-canvas');
|
| 6572 |
+
if (canvas && !canvas.dataset.initialized) {
|
| 6573 |
+
canvas.dataset.initialized = 'true';
|
| 6574 |
+
drawXGBClassHessian();
|
| 6575 |
+
}
|
| 6576 |
+
}
|
| 6577 |
+
|
| 6578 |
+
function drawXGBClassHessian() {
|
| 6579 |
+
const canvas = document.getElementById('xgb-class-hessian-canvas');
|
| 6580 |
+
if (!canvas) return;
|
| 6581 |
+
|
| 6582 |
+
const houses = ['House 1', 'House 2', 'House 3', 'House 4', 'House 5'];
|
| 6583 |
+
const gradients = [0.4, 0.4, 0.4, -0.6, -0.6];
|
| 6584 |
+
const hessians = [0.24, 0.24, 0.24, 0.24, 0.24];
|
| 6585 |
+
|
| 6586 |
+
createVerifiedVisualization('xgb-class-hessian-canvas', {
|
| 6587 |
+
type: 'bar',
|
| 6588 |
+
data: {
|
| 6589 |
+
labels: houses,
|
| 6590 |
+
datasets: [
|
| 6591 |
+
{
|
| 6592 |
+
label: 'Gradient (g)',
|
| 6593 |
+
data: gradients,
|
| 6594 |
+
backgroundColor: '#6aa9ff',
|
| 6595 |
+
yAxisID: 'y'
|
| 6596 |
+
},
|
| 6597 |
+
{
|
| 6598 |
+
label: 'Hessian (h)',
|
| 6599 |
+
data: hessians,
|
| 6600 |
+
backgroundColor: '#7ef0d4',
|
| 6601 |
+
yAxisID: 'y1'
|
| 6602 |
+
}
|
| 6603 |
+
]
|
| 6604 |
+
},
|
| 6605 |
+
options: {
|
| 6606 |
+
responsive: true,
|
| 6607 |
+
maintainAspectRatio: false,
|
| 6608 |
+
plugins: {
|
| 6609 |
+
title: {
|
| 6610 |
+
display: true,
|
| 6611 |
+
text: 'XGBoost: Gradient + Hessian Information',
|
| 6612 |
+
color: '#e8eef6',
|
| 6613 |
+
font: { size: 16 }
|
| 6614 |
+
},
|
| 6615 |
+
legend: { labels: { color: '#a9b4c2' } }
|
| 6616 |
+
},
|
| 6617 |
+
scales: {
|
| 6618 |
+
x: {
|
| 6619 |
+
grid: { color: '#2a3544' },
|
| 6620 |
+
ticks: { color: '#a9b4c2' }
|
| 6621 |
+
},
|
| 6622 |
+
y: {
|
| 6623 |
+
type: 'linear',
|
| 6624 |
+
position: 'left',
|
| 6625 |
+
title: { display: true, text: 'Gradient', color: '#6aa9ff' },
|
| 6626 |
+
grid: { color: '#2a3544' },
|
| 6627 |
+
ticks: { color: '#a9b4c2' }
|
| 6628 |
+
},
|
| 6629 |
+
y1: {
|
| 6630 |
+
type: 'linear',
|
| 6631 |
+
position: 'right',
|
| 6632 |
+
title: { display: true, text: 'Hessian', color: '#7ef0d4' },
|
| 6633 |
+
grid: { display: false },
|
| 6634 |
+
ticks: { color: '#a9b4c2' }
|
| 6635 |
+
}
|
| 6636 |
+
}
|
| 6637 |
+
}
|
| 6638 |
+
}, 'XGBoost Classification', 'Hessian Values');
|
| 6639 |
+
}
|
| 6640 |
+
|
| 6641 |
+
// Hierarchical Clustering
|
| 6642 |
+
function initHierarchicalClustering() {
|
| 6643 |
+
const canvas = document.getElementById('hierarchical-dendrogram-canvas');
|
| 6644 |
+
if (canvas && !canvas.dataset.initialized) {
|
| 6645 |
+
canvas.dataset.initialized = 'true';
|
| 6646 |
+
drawHierarchicalDendrogram();
|
| 6647 |
+
}
|
| 6648 |
+
}
|
| 6649 |
+
|
| 6650 |
+
function drawHierarchicalDendrogram() {
|
| 6651 |
+
const canvas = document.getElementById('hierarchical-dendrogram-canvas');
|
| 6652 |
+
if (!canvas) {
|
| 6653 |
+
logViz('Hierarchical Clustering', 'Dendrogram', 'failed', 'Canvas not found');
|
| 6654 |
+
return;
|
| 6655 |
+
}
|
| 6656 |
+
|
| 6657 |
+
const ctx = canvas.getContext('2d');
|
| 6658 |
+
const width = canvas.width = canvas.offsetWidth;
|
| 6659 |
+
const height = canvas.height = 450;
|
| 6660 |
+
|
| 6661 |
+
ctx.clearRect(0, 0, width, height);
|
| 6662 |
+
ctx.fillStyle = '#1a2332';
|
| 6663 |
+
ctx.fillRect(0, 0, width, height);
|
| 6664 |
+
|
| 6665 |
+
const padding = 60;
|
| 6666 |
+
const numPoints = 6;
|
| 6667 |
+
const pointSpacing = (width - 2 * padding) / numPoints;
|
| 6668 |
+
const labels = ['A', 'B', 'C', 'D', 'E', 'F'];
|
| 6669 |
+
|
| 6670 |
+
// Draw points at bottom
|
| 6671 |
+
const pointY = height - 40;
|
| 6672 |
+
labels.forEach((label, i) => {
|
| 6673 |
+
const x = padding + i * pointSpacing + pointSpacing / 2;
|
| 6674 |
+
|
| 6675 |
+
ctx.fillStyle = '#7ef0d4';
|
| 6676 |
+
ctx.beginPath();
|
| 6677 |
+
ctx.arc(x, pointY, 6, 0, 2 * Math.PI);
|
| 6678 |
+
ctx.fill();
|
| 6679 |
+
|
| 6680 |
+
ctx.fillStyle = '#e8eef6';
|
| 6681 |
+
ctx.font = 'bold 12px sans-serif';
|
| 6682 |
+
ctx.textAlign = 'center';
|
| 6683 |
+
ctx.fillText(label, x, pointY + 20);
|
| 6684 |
+
});
|
| 6685 |
+
|
| 6686 |
+
// Draw dendrogram merges
|
| 6687 |
+
const merges = [
|
| 6688 |
+
{ points: [0, 1], height: 320 },
|
| 6689 |
+
{ points: [3, 4], height: 330 },
|
| 6690 |
+
{ points: [0, 1, 2], height: 220 },
|
| 6691 |
+
{ points: [3, 4, 5], height: 200 },
|
| 6692 |
+
{ points: [0, 1, 2, 3, 4, 5], height: 80 }
|
| 6693 |
+
];
|
| 6694 |
+
|
| 6695 |
+
ctx.strokeStyle = '#6aa9ff';
|
| 6696 |
+
ctx.lineWidth = 2;
|
| 6697 |
+
|
| 6698 |
+
// Merge A-B
|
| 6699 |
+
let x1 = padding + 0 * pointSpacing + pointSpacing / 2;
|
| 6700 |
+
let x2 = padding + 1 * pointSpacing + pointSpacing / 2;
|
| 6701 |
+
ctx.beginPath();
|
| 6702 |
+
ctx.moveTo(x1, pointY);
|
| 6703 |
+
ctx.lineTo(x1, merges[0].height);
|
| 6704 |
+
ctx.lineTo(x2, merges[0].height);
|
| 6705 |
+
ctx.lineTo(x2, pointY);
|
| 6706 |
+
ctx.stroke();
|
| 6707 |
+
|
| 6708 |
+
// Merge D-E
|
| 6709 |
+
x1 = padding + 3 * pointSpacing + pointSpacing / 2;
|
| 6710 |
+
x2 = padding + 4 * pointSpacing + pointSpacing / 2;
|
| 6711 |
+
ctx.beginPath();
|
| 6712 |
+
ctx.moveTo(x1, pointY);
|
| 6713 |
+
ctx.lineTo(x1, merges[1].height);
|
| 6714 |
+
ctx.lineTo(x2, merges[1].height);
|
| 6715 |
+
ctx.lineTo(x2, pointY);
|
| 6716 |
+
ctx.stroke();
|
| 6717 |
+
|
| 6718 |
+
// Merge (A-B)-C
|
| 6719 |
+
x1 = padding + 0.5 * pointSpacing + pointSpacing / 2;
|
| 6720 |
+
x2 = padding + 2 * pointSpacing + pointSpacing / 2;
|
| 6721 |
+
ctx.beginPath();
|
| 6722 |
+
ctx.moveTo(x1, merges[0].height);
|
| 6723 |
+
ctx.lineTo(x1, merges[2].height);
|
| 6724 |
+
ctx.lineTo(x2, merges[2].height);
|
| 6725 |
+
ctx.lineTo(x2, pointY);
|
| 6726 |
+
ctx.stroke();
|
| 6727 |
+
|
| 6728 |
+
// Merge (D-E)-F
|
| 6729 |
+
x1 = padding + 3.5 * pointSpacing + pointSpacing / 2;
|
| 6730 |
+
x2 = padding + 5 * pointSpacing + pointSpacing / 2;
|
| 6731 |
+
ctx.beginPath();
|
| 6732 |
+
ctx.moveTo(x1, merges[1].height);
|
| 6733 |
+
ctx.lineTo(x1, merges[3].height);
|
| 6734 |
+
ctx.lineTo(x2, merges[3].height);
|
| 6735 |
+
ctx.lineTo(x2, pointY);
|
| 6736 |
+
ctx.stroke();
|
| 6737 |
+
|
| 6738 |
+
// Final merge
|
| 6739 |
+
x1 = padding + 1.5 * pointSpacing;
|
| 6740 |
+
x2 = padding + 4.5 * pointSpacing;
|
| 6741 |
+
ctx.beginPath();
|
| 6742 |
+
ctx.moveTo(x1, merges[2].height);
|
| 6743 |
+
ctx.lineTo(x1, merges[4].height);
|
| 6744 |
+
ctx.lineTo(x2, merges[4].height);
|
| 6745 |
+
ctx.lineTo(x2, merges[3].height);
|
| 6746 |
+
ctx.stroke();
|
| 6747 |
+
|
| 6748 |
+
// Title
|
| 6749 |
+
ctx.fillStyle = '#7ef0d4';
|
| 6750 |
+
ctx.font = 'bold 16px sans-serif';
|
| 6751 |
+
ctx.textAlign = 'center';
|
| 6752 |
+
ctx.fillText('Dendrogram: Cluster Merging History', width / 2, 30);
|
| 6753 |
+
|
| 6754 |
+
// Y-axis label
|
| 6755 |
+
ctx.fillStyle = '#a9b4c2';
|
| 6756 |
+
ctx.font = '12px sans-serif';
|
| 6757 |
+
ctx.save();
|
| 6758 |
+
ctx.translate(20, height / 2);
|
| 6759 |
+
ctx.rotate(-Math.PI / 2);
|
| 6760 |
+
ctx.fillText('Distance', 0, 0);
|
| 6761 |
+
ctx.restore();
|
| 6762 |
+
|
| 6763 |
+
logViz('Hierarchical Clustering', 'Dendrogram', 'success');
|
| 6764 |
+
}
|
| 6765 |
+
|
| 6766 |
+
// DBSCAN
|
| 6767 |
+
function initDBSCAN() {
|
| 6768 |
+
const canvas = document.getElementById('dbscan-clusters-canvas');
|
| 6769 |
+
if (canvas && !canvas.dataset.initialized) {
|
| 6770 |
+
canvas.dataset.initialized = 'true';
|
| 6771 |
+
drawDBSCANClusters();
|
| 6772 |
+
}
|
| 6773 |
+
}
|
| 6774 |
+
|
| 6775 |
+
function drawDBSCANClusters() {
|
| 6776 |
+
const canvas = document.getElementById('dbscan-clusters-canvas');
|
| 6777 |
+
if (!canvas) {
|
| 6778 |
+
logViz('DBSCAN', 'Clusters Visualization', 'failed', 'Canvas not found');
|
| 6779 |
+
return;
|
| 6780 |
+
}
|
| 6781 |
+
|
| 6782 |
+
const ctx = canvas.getContext('2d');
|
| 6783 |
+
const width = canvas.width = canvas.offsetWidth;
|
| 6784 |
+
const height = canvas.height = 450;
|
| 6785 |
+
|
| 6786 |
+
ctx.clearRect(0, 0, width, height);
|
| 6787 |
+
ctx.fillStyle = '#1a2332';
|
| 6788 |
+
ctx.fillRect(0, 0, width, height);
|
| 6789 |
+
|
| 6790 |
+
const padding = 60;
|
| 6791 |
+
const chartWidth = width - 2 * padding;
|
| 6792 |
+
const chartHeight = height - 2 * padding;
|
| 6793 |
+
|
| 6794 |
+
const scaleX = (x) => padding + (x / 10) * chartWidth;
|
| 6795 |
+
const scaleY = (y) => height - padding - (y / 10) * chartHeight;
|
| 6796 |
+
|
| 6797 |
+
const eps = 1.5;
|
| 6798 |
+
const epsPixels = (eps / 10) * chartWidth;
|
| 6799 |
+
|
| 6800 |
+
// Core points (cluster 1)
|
| 6801 |
+
const core1 = [{x: 1, y: 1}, {x: 1.2, y: 1.5}, {x: 1.5, y: 1.2}];
|
| 6802 |
+
// Core points (cluster 2)
|
| 6803 |
+
const core2 = [{x: 8, y: 8}, {x: 8.2, y: 8.5}, {x: 8.5, y: 8.2}];
|
| 6804 |
+
// Border points
|
| 6805 |
+
const border = [{x: 2.2, y: 2}];
|
| 6806 |
+
// Outliers
|
| 6807 |
+
const outliers = [{x: 5, y: 5}, {x: 4.5, y: 6}];
|
| 6808 |
+
|
| 6809 |
+
// Draw eps circles around core points
|
| 6810 |
+
ctx.strokeStyle = 'rgba(126, 240, 212, 0.3)';
|
| 6811 |
+
ctx.lineWidth = 1;
|
| 6812 |
+
ctx.setLineDash([3, 3]);
|
| 6813 |
+
core1.forEach(p => {
|
| 6814 |
+
ctx.beginPath();
|
| 6815 |
+
ctx.arc(scaleX(p.x), scaleY(p.y), epsPixels, 0, 2 * Math.PI);
|
| 6816 |
+
ctx.stroke();
|
| 6817 |
+
});
|
| 6818 |
+
ctx.setLineDash([]);
|
| 6819 |
+
|
| 6820 |
+
// Draw core points
|
| 6821 |
+
core1.forEach(p => {
|
| 6822 |
+
ctx.fillStyle = '#7ef0d4';
|
| 6823 |
+
ctx.beginPath();
|
| 6824 |
+
ctx.arc(scaleX(p.x), scaleY(p.y), 10, 0, 2 * Math.PI);
|
| 6825 |
+
ctx.fill();
|
| 6826 |
+
ctx.strokeStyle = '#1a2332';
|
| 6827 |
+
ctx.lineWidth = 2;
|
| 6828 |
+
ctx.stroke();
|
| 6829 |
+
});
|
| 6830 |
+
|
| 6831 |
+
core2.forEach(p => {
|
| 6832 |
+
ctx.fillStyle = '#6aa9ff';
|
| 6833 |
+
ctx.beginPath();
|
| 6834 |
+
ctx.arc(scaleX(p.x), scaleY(p.y), 10, 0, 2 * Math.PI);
|
| 6835 |
+
ctx.fill();
|
| 6836 |
+
ctx.strokeStyle = '#1a2332';
|
| 6837 |
+
ctx.lineWidth = 2;
|
| 6838 |
+
ctx.stroke();
|
| 6839 |
+
});
|
| 6840 |
+
|
| 6841 |
+
// Draw border points
|
| 6842 |
+
border.forEach(p => {
|
| 6843 |
+
ctx.fillStyle = '#ffb490';
|
| 6844 |
+
ctx.beginPath();
|
| 6845 |
+
ctx.arc(scaleX(p.x), scaleY(p.y), 8, 0, 2 * Math.PI);
|
| 6846 |
+
ctx.fill();
|
| 6847 |
+
});
|
| 6848 |
+
|
| 6849 |
+
// Draw outliers
|
| 6850 |
+
outliers.forEach(p => {
|
| 6851 |
+
ctx.strokeStyle = '#ff8c6a';
|
| 6852 |
+
ctx.lineWidth = 3;
|
| 6853 |
+
ctx.beginPath();
|
| 6854 |
+
ctx.arc(scaleX(p.x), scaleY(p.y), 8, 0, 2 * Math.PI);
|
| 6855 |
+
ctx.stroke();
|
| 6856 |
+
});
|
| 6857 |
+
|
| 6858 |
+
// Legend
|
| 6859 |
+
ctx.fillStyle = '#7ef0d4';
|
| 6860 |
+
ctx.beginPath();
|
| 6861 |
+
ctx.arc(padding + 20, 30, 8, 0, 2 * Math.PI);
|
| 6862 |
+
ctx.fill();
|
| 6863 |
+
ctx.fillStyle = '#e8eef6';
|
| 6864 |
+
ctx.font = '12px sans-serif';
|
| 6865 |
+
ctx.textAlign = 'left';
|
| 6866 |
+
ctx.fillText('Core points', padding + 35, 35);
|
| 6867 |
+
|
| 6868 |
+
ctx.fillStyle = '#ffb490';
|
| 6869 |
+
ctx.beginPath();
|
| 6870 |
+
ctx.arc(padding + 140, 30, 8, 0, 2 * Math.PI);
|
| 6871 |
+
ctx.fill();
|
| 6872 |
+
ctx.fillStyle = '#e8eef6';
|
| 6873 |
+
ctx.fillText('Border points', padding + 155, 35);
|
| 6874 |
+
|
| 6875 |
+
ctx.strokeStyle = '#ff8c6a';
|
| 6876 |
+
ctx.lineWidth = 3;
|
| 6877 |
+
ctx.beginPath();
|
| 6878 |
+
ctx.arc(padding + 270, 30, 8, 0, 2 * Math.PI);
|
| 6879 |
+
ctx.stroke();
|
| 6880 |
+
ctx.fillStyle = '#e8eef6';
|
| 6881 |
+
ctx.fillText('Outliers', padding + 285, 35);
|
| 6882 |
+
|
| 6883 |
+
// Title
|
| 6884 |
+
ctx.fillStyle = '#7ef0d4';
|
| 6885 |
+
ctx.font = 'bold 16px sans-serif';
|
| 6886 |
+
ctx.textAlign = 'center';
|
| 6887 |
+
ctx.fillText('DBSCAN: Core, Border, and Outlier Points', width / 2, height - 10);
|
| 6888 |
+
|
| 6889 |
+
logViz('DBSCAN', 'Clusters Visualization', 'success');
|
| 6890 |
+
}
|
| 6891 |
+
|
| 6892 |
+
// Clustering Evaluation
|
| 6893 |
+
function initClusteringEvaluation() {
|
| 6894 |
+
const canvas1 = document.getElementById('silhouette-plot-canvas');
|
| 6895 |
+
if (canvas1 && !canvas1.dataset.initialized) {
|
| 6896 |
+
canvas1.dataset.initialized = 'true';
|
| 6897 |
+
drawSilhouettePlot();
|
| 6898 |
+
}
|
| 6899 |
+
|
| 6900 |
+
const canvas2 = document.getElementById('ch-index-canvas');
|
| 6901 |
+
if (canvas2 && !canvas2.dataset.initialized) {
|
| 6902 |
+
canvas2.dataset.initialized = 'true';
|
| 6903 |
+
drawCHIndex();
|
| 6904 |
+
}
|
| 6905 |
+
}
|
| 6906 |
+
|
| 6907 |
+
function drawSilhouettePlot() {
|
| 6908 |
+
const canvas = document.getElementById('silhouette-plot-canvas');
|
| 6909 |
+
if (!canvas) return;
|
| 6910 |
+
|
| 6911 |
+
createVerifiedVisualization('silhouette-plot-canvas', {
|
| 6912 |
+
type: 'bar',
|
| 6913 |
+
data: {
|
| 6914 |
+
labels: ['Cluster 1 Avg', 'Cluster 2 Avg', 'Cluster 3 Avg', 'Overall'],
|
| 6915 |
+
datasets: [{
|
| 6916 |
+
label: 'Silhouette Coefficient',
|
| 6917 |
+
data: [0.72, 0.68, 0.81, 0.74],
|
| 6918 |
+
backgroundColor: ['#7ef0d4', '#6aa9ff', '#ffb490', '#ff8c6a'],
|
| 6919 |
+
borderColor: ['#7ef0d4', '#6aa9ff', '#ffb490', '#ff8c6a'],
|
| 6920 |
+
borderWidth: 2
|
| 6921 |
+
}]
|
| 6922 |
+
},
|
| 6923 |
+
options: {
|
| 6924 |
+
indexAxis: 'y',
|
| 6925 |
+
responsive: true,
|
| 6926 |
+
maintainAspectRatio: false,
|
| 6927 |
+
plugins: {
|
| 6928 |
+
title: {
|
| 6929 |
+
display: true,
|
| 6930 |
+
text: 'Silhouette Coefficients: All Above 0.7 = Excellent!',
|
| 6931 |
+
color: '#e8eef6',
|
| 6932 |
+
font: { size: 16 }
|
| 6933 |
+
},
|
| 6934 |
+
legend: { display: false }
|
| 6935 |
+
},
|
| 6936 |
+
scales: {
|
| 6937 |
+
x: {
|
| 6938 |
+
title: { display: true, text: 'Silhouette Coefficient', color: '#a9b4c2' },
|
| 6939 |
+
grid: { color: '#2a3544' },
|
| 6940 |
+
ticks: { color: '#a9b4c2' },
|
| 6941 |
+
min: 0,
|
| 6942 |
+
max: 1
|
| 6943 |
+
},
|
| 6944 |
+
y: {
|
| 6945 |
+
grid: { display: false },
|
| 6946 |
+
ticks: { color: '#a9b4c2' }
|
| 6947 |
+
}
|
| 6948 |
+
}
|
| 6949 |
+
}
|
| 6950 |
+
}, 'Clustering Evaluation', 'Silhouette Plot');
|
| 6951 |
+
}
|
| 6952 |
+
|
| 6953 |
+
function drawCHIndex() {
|
| 6954 |
+
const canvas = document.getElementById('ch-index-canvas');
|
| 6955 |
+
if (!canvas) return;
|
| 6956 |
+
|
| 6957 |
+
const kValues = [2, 3, 4, 5, 6, 7, 8];
|
| 6958 |
+
const chScores = [89, 234, 187, 145, 112, 95, 78];
|
| 6959 |
+
|
| 6960 |
+
createVerifiedVisualization('ch-index-canvas', {
|
| 6961 |
+
type: 'line',
|
| 6962 |
+
data: {
|
| 6963 |
+
labels: kValues,
|
| 6964 |
+
datasets: [{
|
| 6965 |
+
label: 'Calinski-Harabasz Index',
|
| 6966 |
+
data: chScores,
|
| 6967 |
+
borderColor: '#6aa9ff',
|
| 6968 |
+
backgroundColor: 'rgba(106, 169, 255, 0.1)',
|
| 6969 |
+
borderWidth: 3,
|
| 6970 |
+
fill: true,
|
| 6971 |
+
pointRadius: kValues.map(k => k === 3 ? 10 : 6),
|
| 6972 |
+
pointBackgroundColor: kValues.map(k => k === 3 ? '#7ef0d4' : '#6aa9ff'),
|
| 6973 |
+
pointBorderWidth: kValues.map(k => k === 3 ? 3 : 2)
|
| 6974 |
+
}]
|
| 6975 |
+
},
|
| 6976 |
+
options: {
|
| 6977 |
+
responsive: true,
|
| 6978 |
+
maintainAspectRatio: false,
|
| 6979 |
+
plugins: {
|
| 6980 |
+
title: {
|
| 6981 |
+
display: true,
|
| 6982 |
+
text: 'Calinski-Harabasz Index: Optimal k = 3',
|
| 6983 |
+
color: '#e8eef6',
|
| 6984 |
+
font: { size: 16 }
|
| 6985 |
+
},
|
| 6986 |
+
legend: { labels: { color: '#a9b4c2' } }
|
| 6987 |
+
},
|
| 6988 |
+
scales: {
|
| 6989 |
+
x: {
|
| 6990 |
+
title: { display: true, text: 'Number of Clusters (k)', color: '#a9b4c2' },
|
| 6991 |
+
grid: { color: '#2a3544' },
|
| 6992 |
+
ticks: { color: '#a9b4c2' }
|
| 6993 |
+
},
|
| 6994 |
+
y: {
|
| 6995 |
+
title: { display: true, text: 'CH Index (higher is better)', color: '#a9b4c2' },
|
| 6996 |
+
grid: { color: '#2a3544' },
|
| 6997 |
+
ticks: { color: '#a9b4c2' },
|
| 6998 |
+
min: 0
|
| 6999 |
+
}
|
| 7000 |
+
}
|
| 7001 |
+
}
|
| 7002 |
+
}, 'Clustering Evaluation', 'CH Index');
|
| 7003 |
+
}
|
| 7004 |
+
|
| 7005 |
// Handle window resize
|
| 7006 |
let resizeTimer;
|
| 7007 |
window.addEventListener('resize', () => {
|
ml_complete-all-topics/index.html
CHANGED
|
@@ -533,8 +533,10 @@ canvas {
|
|
| 533 |
<a href="#decision-trees" class="toc-link toc-sub">Decision Trees (Classification)</a>
|
| 534 |
<a href="#bagging" class="toc-link toc-sub">Bagging</a>
|
| 535 |
<a href="#boosting-adaboost" class="toc-link toc-sub">Boosting (AdaBoost)</a>
|
| 536 |
-
<a href="#gradient-boosting" class="toc-link toc-sub">Gradient Boosting</a>
|
| 537 |
-
<a href="#
|
|
|
|
|
|
|
| 538 |
<a href="#random-forest" class="toc-link toc-sub">Random Forest</a>
|
| 539 |
<a href="#ensemble-methods" class="toc-link toc-sub">Ensemble Methods Overview</a>
|
| 540 |
</div>
|
|
@@ -560,6 +562,9 @@ canvas {
|
|
| 560 |
<div class="toc-subcategory">
|
| 561 |
<div class="toc-subcategory-title">Clustering</div>
|
| 562 |
<a href="#kmeans" class="toc-link toc-sub">K-means Clustering</a>
|
|
|
|
|
|
|
|
|
|
| 563 |
</div>
|
| 564 |
<div class="toc-subcategory">
|
| 565 |
<div class="toc-subcategory-title">Preprocessing</div>
|
|
@@ -4086,6 +4091,129 @@ New property: 1650 sq ft
|
|
| 4086 |
</div>
|
| 4087 |
</div>
|
| 4088 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4089 |
<!-- Section 19a: Gradient Boosting (NEW FROM PDF) -->
|
| 4090 |
<div class="section" id="gradient-boosting">
|
| 4091 |
<div class="section-header">
|
|
@@ -4301,6 +4429,55 @@ After 10 iterations:
|
|
| 4301 |
</div>
|
| 4302 |
</div>
|
| 4303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4304 |
<!-- Section 19b: XGBoost (NEW FROM PDF) -->
|
| 4305 |
<div class="section" id="xgboost">
|
| 4306 |
<div class="section-header">
|
|
@@ -5069,6 +5246,213 @@ OOB MAE = Average of all errors = <strong style="color: #7ef0d4;">βΉ2.1L</stron
|
|
| 5069 |
</div>
|
| 5070 |
</div>
|
| 5071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5072 |
<!-- Section: Diagnostics -->
|
| 5073 |
<div class="section" id="diagnostics">
|
| 5074 |
<div class="section-header">
|
|
|
|
| 533 |
<a href="#decision-trees" class="toc-link toc-sub">Decision Trees (Classification)</a>
|
| 534 |
<a href="#bagging" class="toc-link toc-sub">Bagging</a>
|
| 535 |
<a href="#boosting-adaboost" class="toc-link toc-sub">Boosting (AdaBoost)</a>
|
| 536 |
+
<a href="#gradient-boosting" class="toc-link toc-sub">Gradient Boosting (Regression)</a>
|
| 537 |
+
<a href="#gradient-boosting-classification" class="toc-link toc-sub">Gradient Boosting (Classification)</a>
|
| 538 |
+
<a href="#xgboost" class="toc-link toc-sub">XGBoost (Regression)</a>
|
| 539 |
+
<a href="#xgboost-classification" class="toc-link toc-sub">XGBoost (Classification)</a>
|
| 540 |
<a href="#random-forest" class="toc-link toc-sub">Random Forest</a>
|
| 541 |
<a href="#ensemble-methods" class="toc-link toc-sub">Ensemble Methods Overview</a>
|
| 542 |
</div>
|
|
|
|
| 562 |
<div class="toc-subcategory">
|
| 563 |
<div class="toc-subcategory-title">Clustering</div>
|
| 564 |
<a href="#kmeans" class="toc-link toc-sub">K-means Clustering</a>
|
| 565 |
+
<a href="#hierarchical-clustering" class="toc-link toc-sub">Hierarchical Clustering</a>
|
| 566 |
+
<a href="#dbscan" class="toc-link toc-sub">DBSCAN Clustering</a>
|
| 567 |
+
<a href="#clustering-evaluation" class="toc-link toc-sub">Clustering Evaluation</a>
|
| 568 |
</div>
|
| 569 |
<div class="toc-subcategory">
|
| 570 |
<div class="toc-subcategory-title">Preprocessing</div>
|
|
|
|
| 4091 |
</div>
|
| 4092 |
</div>
|
| 4093 |
|
| 4094 |
+
<!-- Section 19a-NEW: Gradient Boosting Classification -->
|
| 4095 |
+
<div class="section" id="gradient-boosting-classification">
|
| 4096 |
+
<div class="section-header">
|
| 4097 |
+
<h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">π Supervised - Classification</span> Gradient Boosting Classification</h2>
|
| 4098 |
+
<button class="section-toggle collapsed">βΌ</button>
|
| 4099 |
+
</div>
|
| 4100 |
+
<div class="section-body">
|
| 4101 |
+
<p>Gradient Boosting for classification predicts probabilities using sequential trees that minimize log loss. Each tree corrects the previous model's errors by fitting to gradients!</p>
|
| 4102 |
+
|
| 4103 |
+
<div class="info-card">
|
| 4104 |
+
<div class="info-card-title">Simple Math Breakdown</div>
|
| 4105 |
+
<ul class="info-card-list">
|
| 4106 |
+
<li>Step 1: Start with log-odds F(0) = log(pos/neg)</li>
|
| 4107 |
+
<li>Step 2: Calculate gradient g = p - y</li>
|
| 4108 |
+
<li>Step 3: Build tree on gradients</li>
|
| 4109 |
+
<li>Step 4: Update F(x) = F(0) + lr Γ tree</li>
|
| 4110 |
+
<li>Step 5: Repeat to minimize errors</li>
|
| 4111 |
+
</ul>
|
| 4112 |
+
</div>
|
| 4113 |
+
|
| 4114 |
+
<div class="formula">
|
| 4115 |
+
<strong>Simple Explanation:</strong><br>
|
| 4116 |
+
Step 1: F(0) = log(positive_count / negative_count)<br>
|
| 4117 |
+
Step 2: g = p - y (how wrong we are)<br>
|
| 4118 |
+
Step 3: Build tree to fix errors<br>
|
| 4119 |
+
Step 4: F(x) = F(0) + learning_rate Γ tree(x)<br>
|
| 4120 |
+
Step 5: Repeat Steps 2-4 multiple times
|
| 4121 |
+
</div>
|
| 4122 |
+
|
| 4123 |
+
<h3>Real Example: House Price β₯ 170k</h3>
|
| 4124 |
+
<table class="data-table">
|
| 4125 |
+
<thead>
|
| 4126 |
+
<tr><th>ID</th><th>Size</th><th>Price</th><th>β₯170k?</th></tr>
|
| 4127 |
+
</thead>
|
| 4128 |
+
<tbody>
|
| 4129 |
+
<tr><td>1</td><td>800</td><td>120k</td><td>0 (No)</td></tr>
|
| 4130 |
+
<tr><td>2</td><td>900</td><td>130k</td><td>0 (No)</td></tr>
|
| 4131 |
+
<tr><td>3</td><td>1000</td><td>150k</td><td>0 (No)</td></tr>
|
| 4132 |
+
<tr><td>4</td><td>1100</td><td>170k</td><td>1 (Yes)</td></tr>
|
| 4133 |
+
<tr><td>5</td><td>1200</td><td>200k</td><td>1 (Yes)</td></tr>
|
| 4134 |
+
</tbody>
|
| 4135 |
+
</table>
|
| 4136 |
+
|
| 4137 |
+
<div class="step">
|
| 4138 |
+
<div class="step-title">STEP 1: Initialize F(0)</div>
|
| 4139 |
+
<div class="step-calculation">
|
| 4140 |
+
F(0) = log(positive / negative)
|
| 4141 |
+
= log(2 / 3)
|
| 4142 |
+
= <strong style="color: #7ef0d4;">-0.405</strong>
|
| 4143 |
+
|
| 4144 |
+
Meaning: 40.5% initial chance of β₯170k
|
| 4145 |
+
</div>
|
| 4146 |
+
</div>
|
| 4147 |
+
|
| 4148 |
+
<div class="step">
|
| 4149 |
+
<div class="step-title">STEP 2: Calculate Gradients</div>
|
| 4150 |
+
<div class="step-calculation">
|
| 4151 |
+
For House 1:
|
| 4152 |
+
p = sigmoid(-0.405) = <strong>0.4</strong> (40% probability)
|
| 4153 |
+
y = 0 (actual)
|
| 4154 |
+
gradient g = 0.4 - 0 = <strong style="color: #ff8c6a;">0.4</strong>
|
| 4155 |
+
|
| 4156 |
+
For House 4:
|
| 4157 |
+
p = sigmoid(-0.405) = 0.4
|
| 4158 |
+
y = 1 (actual)
|
| 4159 |
+
gradient g = 0.4 - 1 = <strong style="color: #7ef0d4;">-0.6</strong>
|
| 4160 |
+
</div>
|
| 4161 |
+
</div>
|
| 4162 |
+
|
| 4163 |
+
<div class="step">
|
| 4164 |
+
<div class="step-title">STEP 3: Find Best Split</div>
|
| 4165 |
+
<div class="step-calculation">
|
| 4166 |
+
Test split: Size < 1050
|
| 4167 |
+
|
| 4168 |
+
<strong>Left (Size β€ 1050):</strong> Houses 1,2,3
|
| 4169 |
+
Gradients: [0.4, 0.4, 0.4]
|
| 4170 |
+
Average = <strong>0.4</strong>
|
| 4171 |
+
|
| 4172 |
+
<strong>Right (Size > 1050):</strong> Houses 4,5
|
| 4173 |
+
Gradients: [-0.6, -0.6]
|
| 4174 |
+
Average = <strong>-0.6</strong>
|
| 4175 |
+
|
| 4176 |
+
β This split separates positive/negative gradients!
|
| 4177 |
+
</div>
|
| 4178 |
+
</div>
|
| 4179 |
+
|
| 4180 |
+
<div class="step">
|
| 4181 |
+
<div class="step-title">STEP 4: Update Predictions</div>
|
| 4182 |
+
<div class="step-calculation">
|
| 4183 |
+
F1(x) = F(0) + learning_rate Γ tree(x)
|
| 4184 |
+
|
| 4185 |
+
For House 1 (Size=800):
|
| 4186 |
+
F1(1) = -0.405 + 0.1 Γ (-0.4)
|
| 4187 |
+
= -0.405 - 0.04
|
| 4188 |
+
= <strong style="color: #7ef0d4;">-0.445</strong>
|
| 4189 |
+
|
| 4190 |
+
New probability = sigmoid(-0.445) = <strong>0.39</strong> β Lower!
|
| 4191 |
+
</div>
|
| 4192 |
+
</div>
|
| 4193 |
+
|
| 4194 |
+
<div class="figure">
|
| 4195 |
+
<div class="figure-placeholder" style="height: 400px">
|
| 4196 |
+
<canvas id="gb-class-sequential-canvas"></canvas>
|
| 4197 |
+
</div>
|
| 4198 |
+
<p class="figure-caption"><strong>Figure 1:</strong> Sequential prediction updates across iterations</p>
|
| 4199 |
+
</div>
|
| 4200 |
+
|
| 4201 |
+
<div class="figure">
|
| 4202 |
+
<div class="figure-placeholder" style="height: 400px">
|
| 4203 |
+
<canvas id="gb-class-gradients-canvas"></canvas>
|
| 4204 |
+
</div>
|
| 4205 |
+
<p class="figure-caption"><strong>Figure 2:</strong> Gradient values per sample showing error correction</p>
|
| 4206 |
+
</div>
|
| 4207 |
+
|
| 4208 |
+
<div class="callout success">
|
| 4209 |
+
<div class="callout-title">β
Key Takeaway</div>
|
| 4210 |
+
<div class="callout-content">
|
| 4211 |
+
Gradient Boosting Classification uses gradients (p - y) to sequentially build trees that correct probability predictions. Each tree reduces log loss by fitting to the errors!
|
| 4212 |
+
</div>
|
| 4213 |
+
</div>
|
| 4214 |
+
</div>
|
| 4215 |
+
</div>
|
| 4216 |
+
|
| 4217 |
<!-- Section 19a: Gradient Boosting (NEW FROM PDF) -->
|
| 4218 |
<div class="section" id="gradient-boosting">
|
| 4219 |
<div class="section-header">
|
|
|
|
| 4429 |
</div>
|
| 4430 |
</div>
|
| 4431 |
|
| 4432 |
+
<!-- Section 19b-NEW: XGBoost Classification -->
|
| 4433 |
+
<div class="section" id="xgboost-classification">
|
| 4434 |
+
<div class="section-header">
|
| 4435 |
+
<h2><span class="badge" style="background: rgba(106, 169, 255, 0.3); color: #6aa9ff;">π Supervised - Classification</span> XGBoost Classification</h2>
|
| 4436 |
+
<button class="section-toggle collapsed">βΌ</button>
|
| 4437 |
+
</div>
|
| 4438 |
+
<div class="section-body">
|
| 4439 |
+
<p>XGBoost Classification adds Hessian (2nd derivative) and regularization to Gradient Boosting for better accuracy and less overfitting!</p>
|
| 4440 |
+
|
| 4441 |
+
<div class="info-card">
|
| 4442 |
+
<div class="info-card-title">Difference from Gradient Boosting</div>
|
| 4443 |
+
<ul class="info-card-list">
|
| 4444 |
+
<li>GB: Uses gradient g = p - y</li>
|
| 4445 |
+
<li>XGB: Uses gradient g AND Hessian h = p(1-p)</li>
|
| 4446 |
+
<li>XGB: Adds regularization Ξ» to prevent overfitting</li>
|
| 4447 |
+
<li>XGB: Better gain calculation for splits</li>
|
| 4448 |
+
</ul>
|
| 4449 |
+
</div>
|
| 4450 |
+
|
| 4451 |
+
<div class="formula">
|
| 4452 |
+
<strong>Hessian Formula:</strong><br>
|
| 4453 |
+
h = p Γ (1 - p)<br>
|
| 4454 |
+
<br>
|
| 4455 |
+
Measures confidence of prediction:<br>
|
| 4456 |
+
β’ p = 0.5 β h = 0.25 (most uncertain)<br>
|
| 4457 |
+
β’ p = 0.9 β h = 0.09 (very confident)<br>
|
| 4458 |
+
<br>
|
| 4459 |
+
<strong>Gain Formula:</strong><br>
|
| 4460 |
+
Gain = GLΒ²/(HL+Ξ») + GRΒ²/(HR+Ξ») - GpΒ²/(Hp+Ξ»)
|
| 4461 |
+
</div>
|
| 4462 |
+
|
| 4463 |
+
<div class="figure">
|
| 4464 |
+
<div class="figure-placeholder" style="height: 400px">
|
| 4465 |
+
<canvas id="xgb-class-hessian-canvas"></canvas>
|
| 4466 |
+
</div>
|
| 4467 |
+
<p class="figure-caption"><strong>Figure:</strong> Hessian values showing prediction confidence</p>
|
| 4468 |
+
</div>
|
| 4469 |
+
|
| 4470 |
+
<div class="callout success">
|
| 4471 |
+
<div class="callout-title">β
Why XGBoost is Better</div>
|
| 4472 |
+
<div class="callout-content">
|
| 4473 |
+
Hessian gives curvature information β better optimization path<br>
|
| 4474 |
+
Regularization Ξ» prevents overfitting β better generalization<br>
|
| 4475 |
+
Result: State-of-the-art accuracy on classification tasks!
|
| 4476 |
+
</div>
|
| 4477 |
+
</div>
|
| 4478 |
+
</div>
|
| 4479 |
+
</div>
|
| 4480 |
+
|
| 4481 |
<!-- Section 19b: XGBoost (NEW FROM PDF) -->
|
| 4482 |
<div class="section" id="xgboost">
|
| 4483 |
<div class="section-header">
|
|
|
|
| 5246 |
</div>
|
| 5247 |
</div>
|
| 5248 |
|
| 5249 |
+
<!-- Section: Hierarchical Clustering -->
|
| 5250 |
+
<div class="section" id="hierarchical-clustering">
|
| 5251 |
+
<div class="section-header">
|
| 5252 |
+
<h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">π Unsupervised - Clustering</span> Hierarchical Clustering</h2>
|
| 5253 |
+
<button class="section-toggle collapsed">βΌ</button>
|
| 5254 |
+
</div>
|
| 5255 |
+
<div class="section-body">
|
| 5256 |
+
<p>Hierarchical Clustering builds a tree of clusters by repeatedly merging the closest pairs. No need to specify K upfront!</p>
|
| 5257 |
+
|
| 5258 |
+
<div class="info-card">
|
| 5259 |
+
<div class="info-card-title">Simple Steps</div>
|
| 5260 |
+
<ul class="info-card-list">
|
| 5261 |
+
<li>Step 1: Start with each point as its own cluster</li>
|
| 5262 |
+
<li>Step 2: Find two closest clusters</li>
|
| 5263 |
+
<li>Step 3: Merge them into one cluster</li>
|
| 5264 |
+
<li>Step 4: Repeat until all in one cluster</li>
|
| 5265 |
+
<li>Result: Dendrogram tree showing hierarchy</li>
|
| 5266 |
+
</ul>
|
| 5267 |
+
</div>
|
| 5268 |
+
|
| 5269 |
+
<div class="formula">
|
| 5270 |
+
<strong>Distance Metrics:</strong><br>
|
| 5271 |
+
Euclidean: d = β((x2-x1)Β² + (y2-y1)Β²)<br>
|
| 5272 |
+
Manhattan: d = |x2-x1| + |y2-y1|<br>
|
| 5273 |
+
<br>
|
| 5274 |
+
<strong>Linkage Methods:</strong><br>
|
| 5275 |
+
β’ Complete: max distance between any two points<br>
|
| 5276 |
+
β’ Single: min distance between any two points<br>
|
| 5277 |
+
β’ Average: average distance between all points<br>
|
| 5278 |
+
β’ Ward: minimizes variance (BEST for most cases)
|
| 5279 |
+
</div>
|
| 5280 |
+
|
| 5281 |
+
<div class="figure">
|
| 5282 |
+
<div class="figure-placeholder" style="height: 450px">
|
| 5283 |
+
<canvas id="hierarchical-dendrogram-canvas"></canvas>
|
| 5284 |
+
</div>
|
| 5285 |
+
<p class="figure-caption"><strong>Figure:</strong> Dendrogram showing cluster merging history</p>
|
| 5286 |
+
</div>
|
| 5287 |
+
|
| 5288 |
+
<div class="callout info">
|
| 5289 |
+
<div class="callout-title">π‘ When to Use</div>
|
| 5290 |
+
<div class="callout-content">
|
| 5291 |
+
β Don't know number of clusters<br>
|
| 5292 |
+
β Want to see cluster hierarchy<br>
|
| 5293 |
+
β Small to medium datasets (<5000 points)<br>
|
| 5294 |
+
β Need interpretable results
|
| 5295 |
+
</div>
|
| 5296 |
+
</div>
|
| 5297 |
+
</div>
|
| 5298 |
+
</div>
|
| 5299 |
+
|
| 5300 |
+
<!-- Section: DBSCAN -->
|
| 5301 |
+
<div class="section" id="dbscan">
|
| 5302 |
+
<div class="section-header">
|
| 5303 |
+
<h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">π Unsupervised - Clustering</span> DBSCAN Clustering</h2>
|
| 5304 |
+
<button class="section-toggle collapsed">βΌ</button>
|
| 5305 |
+
</div>
|
| 5306 |
+
<div class="section-body">
|
| 5307 |
+
<p>DBSCAN finds clusters of arbitrary shapes and automatically detects outliers! Based on density, not distance to centroids.</p>
|
| 5308 |
+
|
| 5309 |
+
<div class="info-card">
|
| 5310 |
+
<div class="info-card-title">Key Parameters</div>
|
| 5311 |
+
<ul class="info-card-list">
|
| 5312 |
+
<li>eps: Neighborhood radius (e.g., 0.4)</li>
|
| 5313 |
+
<li>min_samples: Minimum points in neighborhood (e.g., 3)</li>
|
| 5314 |
+
<li>Core point: Has β₯ min_samples within eps</li>
|
| 5315 |
+
<li>Border point: Near core point but not core itself</li>
|
| 5316 |
+
<li>Outlier: Not near any core point</li>
|
| 5317 |
+
</ul>
|
| 5318 |
+
</div>
|
| 5319 |
+
|
| 5320 |
+
<div class="formula">
|
| 5321 |
+
<strong>Simple Algorithm:</strong><br>
|
| 5322 |
+
Step 1: Pick random unvisited point<br>
|
| 5323 |
+
Step 2: Find all points within eps radius<br>
|
| 5324 |
+
Step 3: If count β₯ min_samples β Core point!<br>
|
| 5325 |
+
Step 4: Mark all reachable points in same cluster<br>
|
| 5326 |
+
Step 5: Move to next unvisited point<br>
|
| 5327 |
+
Step 6: Points alone = Outliers β
|
| 5328 |
+
</div>
|
| 5329 |
+
|
| 5330 |
+
<div class="step">
|
| 5331 |
+
<div class="step-title">Example: eps=0.4, min_samples=3</div>
|
| 5332 |
+
<div class="step-calculation">
|
| 5333 |
+
<strong>Point A at (1, 1):</strong>
|
| 5334 |
+
Points within 0.4 units: [A, B, C]
|
| 5335 |
+
Count = 3 β Core point!
|
| 5336 |
+
Start Cluster 1 with A, B, C
|
| 5337 |
+
|
| 5338 |
+
<strong>Point D at (8, 8):</strong>
|
| 5339 |
+
Points within 0.4 units: [D, E]
|
| 5340 |
+
Count = 2 β Not core
|
| 5341 |
+
But near core E β Border point in Cluster 2
|
| 5342 |
+
|
| 5343 |
+
<strong>Point G at (5, 5):</strong>
|
| 5344 |
+
No neighbors within 0.4
|
| 5345 |
+
Mark as <strong style="color: #ff8c6a;">OUTLIER</strong> β
|
| 5346 |
+
</div>
|
| 5347 |
+
</div>
|
| 5348 |
+
|
| 5349 |
+
<div class="figure">
|
| 5350 |
+
<div class="figure-placeholder" style="height: 450px">
|
| 5351 |
+
<canvas id="dbscan-clusters-canvas"></canvas>
|
| 5352 |
+
</div>
|
| 5353 |
+
<p class="figure-caption"><strong>Figure:</strong> DBSCAN showing core, border, and outlier points</p>
|
| 5354 |
+
</div>
|
| 5355 |
+
|
| 5356 |
+
<div class="callout success">
|
| 5357 |
+
<div class="callout-title">β
Advantages</div>
|
| 5358 |
+
<div class="callout-content">
|
| 5359 |
+
β Finds clusters of ANY shape<br>
|
| 5360 |
+
β Automatically detects outliers<br>
|
| 5361 |
+
β No need to specify number of clusters<br>
|
| 5362 |
+
β Robust to noise
|
| 5363 |
+
</div>
|
| 5364 |
+
</div>
|
| 5365 |
+
</div>
|
| 5366 |
+
</div>
|
| 5367 |
+
|
| 5368 |
+
<!-- Section: Clustering Evaluation -->
|
| 5369 |
+
<div class="section" id="clustering-evaluation">
|
| 5370 |
+
<div class="section-header">
|
| 5371 |
+
<h2><span class="badge" style="background: rgba(126, 240, 212, 0.3); color: #7ef0d4;">π Unsupervised - Evaluation</span> Clustering Evaluation Metrics</h2>
|
| 5372 |
+
<button class="section-toggle collapsed">βΌ</button>
|
| 5373 |
+
</div>
|
| 5374 |
+
<div class="section-body">
|
| 5375 |
+
<p>How do we know if our clustering is good? Use Silhouette Coefficient and Calinski-Harabasz Index!</p>
|
| 5376 |
+
|
| 5377 |
+
<div class="info-card">
|
| 5378 |
+
<div class="info-card-title">Key Metrics</div>
|
| 5379 |
+
<ul class="info-card-list">
|
| 5380 |
+
<li>Silhouette: Measures how well points fit in clusters</li>
|
| 5381 |
+
<li>Range: -1 to +1 (higher is better)</li>
|
| 5382 |
+
<li>Calinski-Harabasz: Between-cluster vs within-cluster variance</li>
|
| 5383 |
+
<li>Range: 0 to β (higher is better)</li>
|
| 5384 |
+
</ul>
|
| 5385 |
+
</div>
|
| 5386 |
+
|
| 5387 |
+
<h3>Silhouette Coefficient</h3>
|
| 5388 |
+
<div class="formula">
|
| 5389 |
+
<strong>For each point:</strong><br>
|
| 5390 |
+
a = average distance to points in SAME cluster<br>
|
| 5391 |
+
b = average distance to points in NEAREST cluster<br>
|
| 5392 |
+
<br>
|
| 5393 |
+
Silhouette = (b - a) / max(a, b)<br>
|
| 5394 |
+
<br>
|
| 5395 |
+
<strong>Interpretation:</strong><br>
|
| 5396 |
+
+0.7 to +1.0: Excellent clustering<br>
|
| 5397 |
+
+0.5 to +0.7: Good clustering<br>
|
| 5398 |
+
+0.25 to +0.5: Weak clustering<br>
|
| 5399 |
+
< +0.25: Poor or no clustering
|
| 5400 |
+
</div>
|
| 5401 |
+
|
| 5402 |
+
<div class="step">
|
| 5403 |
+
<div class="step-title">Example Calculation</div>
|
| 5404 |
+
<div class="step-calculation">
|
| 5405 |
+
<strong>Point A in Cluster 1:</strong>
|
| 5406 |
+
Distance to other points in Cluster 1: [0.1, 0.2]
|
| 5407 |
+
a = average = <strong>0.15</strong>
|
| 5408 |
+
|
| 5409 |
+
Distance to nearest points in Cluster 2: [1.5, 1.8]
|
| 5410 |
+
b = average = <strong>1.65</strong>
|
| 5411 |
+
|
| 5412 |
+
Silhouette(A) = (1.65 - 0.15) / 1.65
|
| 5413 |
+
= 1.5 / 1.65
|
| 5414 |
+
= <strong style="color: #7ef0d4;">0.909</strong> β Excellent!
|
| 5415 |
+
</div>
|
| 5416 |
+
</div>
|
| 5417 |
+
|
| 5418 |
+
<h3>Calinski-Harabasz Index</h3>
|
| 5419 |
+
<div class="formula">
|
| 5420 |
+
<strong>Formula:</strong><br>
|
| 5421 |
+
CH = (Between-cluster variance) / (Within-cluster variance)<br>
|
| 5422 |
+
<br>
|
| 5423 |
+
<strong>Interpretation:</strong><br>
|
| 5424 |
+
0-20: Poor clustering<br>
|
| 5425 |
+
20-50: Okay clustering<br>
|
| 5426 |
+
50-150: Good clustering<br>
|
| 5427 |
+
150-500: Very good clustering<br>
|
| 5428 |
+
> 500: Excellent clustering
|
| 5429 |
+
</div>
|
| 5430 |
+
|
| 5431 |
+
<div class="figure">
|
| 5432 |
+
<div class="figure-placeholder" style="height: 400px">
|
| 5433 |
+
<canvas id="silhouette-plot-canvas"></canvas>
|
| 5434 |
+
</div>
|
| 5435 |
+
<p class="figure-caption"><strong>Figure 1:</strong> Silhouette plot showing score per cluster</p>
|
| 5436 |
+
</div>
|
| 5437 |
+
|
| 5438 |
+
<div class="figure">
|
| 5439 |
+
<div class="figure-placeholder" style="height: 400px">
|
| 5440 |
+
<canvas id="ch-index-canvas"></canvas>
|
| 5441 |
+
</div>
|
| 5442 |
+
<p class="figure-caption"><strong>Figure 2:</strong> Calinski-Harabasz index vs number of clusters</p>
|
| 5443 |
+
</div>
|
| 5444 |
+
|
| 5445 |
+
<div class="callout info">
|
| 5446 |
+
<div class="callout-title">π‘ Choosing the Right Metric</div>
|
| 5447 |
+
<div class="callout-content">
|
| 5448 |
+
<strong>Silhouette:</strong> Best for interpretability, shows per-point quality<br>
|
| 5449 |
+
<strong>CH Index:</strong> Fast to compute, good for finding optimal k<br>
|
| 5450 |
+
<strong>Both together:</strong> Most reliable assessment!
|
| 5451 |
+
</div>
|
| 5452 |
+
</div>
|
| 5453 |
+
</div>
|
| 5454 |
+
</div>
|
| 5455 |
+
|
| 5456 |
<!-- Section: Diagnostics -->
|
| 5457 |
<div class="section" id="diagnostics">
|
| 5458 |
<div class="section-header">
|