`;
}
function matrixTable(rowLabels, colLabels, matrix, opts = {}) {
let max = 0;
matrix.forEach((row) => row.forEach((v) => { if (typeof v === "number" && v > max) max = v; }));
let html = `
Two real limitations: the vector size grows with every new word in the vocabulary, and a one-hot vector can't tell you that "cat" and "dog" are more alike than "cat" and "umbrella" — there's no notion of relationship or context.
Character-level bigrams+trigrams (analyzer='char_wb') on the same corpus produce ${data.charLevel.vocabularySize} features. Sample: ${data.charLevel.sample.map((s) => `"${escapeHtml(s)}"`).join(", ")} …
sklearn additionally L2-normalizes each row, so the exact numbers differ slightly from the hand-rolled version above — the ranking of important words matches.
`,
},
{ stage: 5, label: "Top 3 highest-weighted words per document", html: topWordsHtml },
]);
}
// ============================================================
// 6. WORD EMBEDDINGS
// ============================================================
function renderScatter(points) {
const w = 560, h = 340, pad = 40;
if (!points.length) return `
PCA reduces the 50-dimensional Word2Vec space to 2D so it can be drawn — distance and direction on this plot are only an approximation of similarity in the real, high-dimensional space.
No vocabulary produced — add a few more training sentences.
`,
},
{
stage: 2,
label: "Cosine similarity between word pairs, Skip-gram vs CBOW",
html: `
pair
skip-gram
cbow
${simRows.join("")}
`,
},
{
stage: 3,
label: "Most similar words (Skip-gram model)",
html: mostSimHtml || `
None of the probe words were found in this vocabulary — try adding "cat", "king" or "paris" to your sentences.
`,
},
{
stage: 4,
label: "All trained vectors, projected to 2D with PCA",
html: renderScatter(data.pcaPoints),
},
];
if (data.fastText && !data.fastText.error) {
items.push({
stage: 5,
label: "FastText: vectors for out-of-vocabulary words via character n-grams",
html: `
“${escapeHtml(data.fastText.inVocabWord)}” — seen during training
Why this works: ${escapeHtml(data.fastText.note)} Word2Vec, by contrast, would raise a KeyError for an unseen word — it has no concept of subword structure.
`,
});
}
reveal("out-embeddings", "tape-embeddings", items);
}
// ============================================================
// Navigation + wiring
// ============================================================
const RUNNERS = {
onehot: runOnehot,
count: runCount,
bow: runBow,
ngrams: runNgrams,
tfidf: runTfidf,
embeddings: runEmbeddings,
};
const DEFAULT_TEXT = {
"onehot-corpus": "I love NLP\nNLP is fun\nI love coding",
"count-corpus": "I love NLP and I love Python\nNLP is amazing and fun\nPython is great for NLP",
"bow-corpus": "the cat sat on the mat\nthe dog sat on the log\nthe cat and the dog are friends",
"ngrams-sentence": "I love studying Natural Language Processing",
"ngrams-corpus": "I love NLP and machine learning\nmachine learning is part of AI\nNLP is a branch of AI",
"tfidf-corpus": "I love NLP and machine learning\nmachine learning is part of AI\nNLP is a branch of AI\nI love AI and deep learning",
"embed-sentences":
"the cat sat on the mat\nthe dog ran on the grass\ncats and dogs are pets\ni love my cat\ni love my dog\nking and queen are royalty\nman and woman are humans\nparis is the capital of france\nberlin is the capital of germany",
};
const autoRan = new Set();
function activateSection(target) {
$$(".section").forEach((s) => s.classList.toggle("active", s.id === `sec-${target}`));
$$(".nav-item").forEach((b) => b.classList.toggle("active", b.dataset.target === target));
window.scrollTo({ top: 0, behavior: "instant" in window ? "instant" : "auto" });
if (RUNNERS[target] && !autoRan.has(target)) {
autoRan.add(target);
RUNNERS[target]();
}
}
function wireNav() {
$$(".nav-item, .tech-card").forEach((btn) => {
btn.addEventListener("click", () => activateSection(btn.dataset.target));
});
}
function wireRunButtons() {
$$("[data-run]").forEach((btn) => {
const key = btn.dataset.run;
btn.addEventListener("click", withLoading(btn, RUNNERS[key]));
});
$$("[data-reset]").forEach((btn) => {
btn.addEventListener("click", () => {
const key = btn.dataset.reset;
Object.keys(DEFAULT_TEXT)
.filter((id) => id.startsWith(key + "-"))
.forEach((id) => {
const el = document.getElementById(id);
if (el) el.value = DEFAULT_TEXT[id];
});
if (key === "count") {
$("#count-maxfeatures").value = "";
$("#count-newdoc").value = "";
$("#count-stopwords").checked = false;
}
RUNNERS[key]();
});
});
}
document.addEventListener("DOMContentLoaded", () => {
wireNav();
wireRunButtons();
});
})();