Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
12e3370
1
Parent(s): e4acf3b
simplified inference throughput visualization
Browse files
app/src/content/embeds/inference-throughput-compare.html
CHANGED
|
@@ -360,7 +360,7 @@
|
|
| 360 |
</div>
|
| 361 |
<div class="bottom-strip">
|
| 362 |
<div class="throughput-strip">
|
| 363 |
-
<span data-role="booksRateA">0
|
| 364 |
<span class="tps" data-role="tpsInlineA">(0 TPS)</span>
|
| 365 |
</div>
|
| 366 |
<div class="output-panel">
|
|
@@ -447,8 +447,7 @@
|
|
| 447 |
}
|
| 448 |
|
| 449 |
const embedConfig = readEmbedConfig();
|
| 450 |
-
const
|
| 451 |
-
const isCompareMode = modelCount === 2;
|
| 452 |
const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
|
| 453 |
const defaultModelB = String(embedConfig.modelB || 1724);
|
| 454 |
root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
|
|
@@ -465,7 +464,7 @@
|
|
| 465 |
const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
|
| 466 |
const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
|
| 467 |
|
| 468 |
-
const
|
| 469 |
{ gpus: 8, name: 'BERT', row: 'a1',
|
| 470 |
desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
|
| 471 |
{ gpus: 32, name: 'GPT-2', row: 'a1',
|
|
@@ -486,28 +485,7 @@
|
|
| 486 |
desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
|
| 487 |
];
|
| 488 |
|
| 489 |
-
const
|
| 490 |
-
{ gpus: 8, name: 'BERT', row: 'a1',
|
| 491 |
-
desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia.' },
|
| 492 |
-
{ gpus: 32, name: 'GPT-2', row: 'a1',
|
| 493 |
-
desc: '<b>GPT-2</b> (OpenAI, 2019)<br>\u224832 V100 GPUs. 1.5B params.' },
|
| 494 |
-
{ gpus: 384, name: 'BLOOM', row: 'a1',
|
| 495 |
-
desc: '<b>BLOOM</b> (BigScience, 2022)<br>384 A100 80GB GPUs. 176B params.' },
|
| 496 |
-
{ gpus: 2_048, name: 'Llama 1', row: 'a2',
|
| 497 |
-
desc: '<b>Llama 1</b> (Meta, 2023)<br>2,048 A100 GPUs. 65B params.' },
|
| 498 |
-
{ gpus: 2_788, name: 'DeepSeek', row: 'a1',
|
| 499 |
-
desc: '<b>DeepSeek V3</b> (DeepSeek, 2024)<br>2,048 H800 GPUs. 671B MoE params.' },
|
| 500 |
-
{ gpus: 10_000, name: 'GPT-3', row: 'a1',
|
| 501 |
-
desc: '<b>GPT-3</b> (OpenAI, 2020)<br>10,000 V100 GPUs. 175B params.' },
|
| 502 |
-
{ gpus: 16_384, name: 'Llama 3', row: 'a2',
|
| 503 |
-
desc: '<b>Llama 3</b> (Meta, 2024)<br>16,384 H100 GPUs. 405B params.' },
|
| 504 |
-
{ gpus: 25_000, name: 'GPT-4', row: 'a1',
|
| 505 |
-
desc: '<b>GPT-4</b> (OpenAI, 2023, estimated)<br>\u224825K A100 GPUs. \u22481.8T MoE params.' },
|
| 506 |
-
{ gpus: 50_000, name: 'GPT-5', row: 'a1',
|
| 507 |
-
desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs.' },
|
| 508 |
-
];
|
| 509 |
-
|
| 510 |
-
const INFRA_LANDMARKS_SINGLE = [
|
| 511 |
{ gpus: 1, name: '1 GPU', row: 'b1',
|
| 512 |
desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
|
| 513 |
{ gpus: 8, name: '1 node', row: 'b1',
|
|
@@ -534,33 +512,6 @@
|
|
| 534 |
desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
|
| 535 |
];
|
| 536 |
|
| 537 |
-
const INFRA_LANDMARKS_COMPARE = [
|
| 538 |
-
{ gpus: 1, name: '1 GPU', row: 'b1',
|
| 539 |
-
desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.' },
|
| 540 |
-
{ gpus: 8, name: '1 node', row: 'b1',
|
| 541 |
-
desc: '<b>DGX H100</b> \u2014 8\u00d7H100 SXM<br>640 GB HBM3, NVLink 900 GB/s.' },
|
| 542 |
-
{ gpus: 32, name: '1 rack', row: 'b1',
|
| 543 |
-
desc: '<b>DGX SuperPOD rack</b> \u2014 4\u00d7DGX H100<br>32 GPUs, 2.5 TB HBM3.' },
|
| 544 |
-
{ gpus: 256, name: 'SuperPOD', row: 'b1',
|
| 545 |
-
desc: '<b>DGX SuperPOD (1 SU)</b> \u2014 32 nodes, 256 GPUs.' },
|
| 546 |
-
{ gpus: 10_752, name: 'ALPS', row: 'b1',
|
| 547 |
-
desc: '<b>ALPS</b> \u2014 CSCS, Lugano<br>10,752 GH200 Grace-Hopper superchips.' },
|
| 548 |
-
{ gpus: 12_288, name: 'ByteDance', row: 'b2',
|
| 549 |
-
desc: '<b>ByteDance MegaScale</b><br>12,288 GPUs (A100/H800 mix).' },
|
| 550 |
-
{ gpus: 64_000, name: 'Stargate', row: 'b1',
|
| 551 |
-
desc: '<b>Stargate</b> \u2014 OpenAI / Oracle, Abilene, TX<br>64K GB200 GPUs (planned).' },
|
| 552 |
-
{ gpus: 100_000, name: 'Tencent', row: 'b2',
|
| 553 |
-
desc: '<b>Tencent Xingmai 2.0</b><br>100K GPUs in a single cluster.' },
|
| 554 |
-
{ gpus: 200_000, name: 'Colossus', row: 'b1',
|
| 555 |
-
desc: '<b>Colossus</b> \u2014 xAI, Memphis, TN<br>200K H100/H200 GPUs.' },
|
| 556 |
-
{ gpus: 250_000, name: 'CoreWeave', row: 'b2',
|
| 557 |
-
desc: '<b>CoreWeave</b> \u2014 250K+ GPUs across 32 data centers.' },
|
| 558 |
-
{ gpus: 600_000, name: 'Meta', row: 'b2',
|
| 559 |
-
desc: '<b>Meta AI fleet</b> \u2014 600K H100-equivalent GPUs.' },
|
| 560 |
-
{ gpus: 1_000_000, name: 'Colossus 2', row: 'b1',
|
| 561 |
-
desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs.' },
|
| 562 |
-
];
|
| 563 |
-
|
| 564 |
function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
|
| 565 |
function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
|
| 566 |
|
|
@@ -586,13 +537,8 @@
|
|
| 586 |
|
| 587 |
datasetsTitleAEl.textContent = 'Time to generate dataset';
|
| 588 |
datasetsTitleBEl.textContent = 'Time to generate dataset';
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
booksRateAEl.textContent = '0 pages/sec';
|
| 592 |
-
booksRateBEl.textContent = '0 pages/sec';
|
| 593 |
-
} else {
|
| 594 |
-
booksRateAEl.textContent = '0 books/sec';
|
| 595 |
-
}
|
| 596 |
|
| 597 |
const themeTokens = {};
|
| 598 |
function refreshThemeTokens() {
|
|
@@ -651,10 +597,10 @@
|
|
| 651 |
el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
|
| 652 |
rowEls[lm.row].appendChild(el);
|
| 653 |
}
|
| 654 |
-
|
| 655 |
-
|
| 656 |
|
| 657 |
-
const
|
| 658 |
{ name: 'BookCorpus', tokens: 1e9,
|
| 659 |
desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
|
| 660 |
{ name: 'Wikipedia', tokens: 6e9,
|
|
@@ -669,22 +615,7 @@
|
|
| 669 |
desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
|
| 670 |
];
|
| 671 |
|
| 672 |
-
const
|
| 673 |
-
{ name: 'BookCorpus', tokens: 1e9,
|
| 674 |
-
desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com.' },
|
| 675 |
-
{ name: 'Wikipedia', tokens: 6e9,
|
| 676 |
-
desc: '<b>Multilingual Wikipedia</b><br>All articles across all 300+ language editions. ~6B tokens.' },
|
| 677 |
-
{ name: 'FinePhrase', tokens: 1e12,
|
| 678 |
-
desc: '<b>FinePhrase</b> (Hugging Face, 2026)<br>1T tokens of LLM-rephrased web text.' },
|
| 679 |
-
{ name: 'RedPajama', tokens: 100e12,
|
| 680 |
-
desc: '<b>RedPajama v2</b> (Together AI, 2023)<br>100T raw tokens from 84 Common Crawl snapshots.' },
|
| 681 |
-
{ name: 'Common Crawl', tokens: 3e15,
|
| 682 |
-
desc: '<b>Common Crawl</b> (ongoing since 2008)<br>The raw web archive. Petabytes of HTML from billions of pages.' },
|
| 683 |
-
{ name: 'The Internet', tokens: 100e15,
|
| 684 |
-
desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online.' },
|
| 685 |
-
];
|
| 686 |
-
|
| 687 |
-
const DATASETS = isCompareMode ? DATASETS_COMPARE : DATASETS_SINGLE;
|
| 688 |
|
| 689 |
// --- Utility functions ---
|
| 690 |
function formatNum(n) {
|
|
|
|
| 360 |
</div>
|
| 361 |
<div class="bottom-strip">
|
| 362 |
<div class="throughput-strip">
|
| 363 |
+
<span data-role="booksRateA">0 pages/sec</span>
|
| 364 |
<span class="tps" data-role="tpsInlineA">(0 TPS)</span>
|
| 365 |
</div>
|
| 366 |
<div class="output-panel">
|
|
|
|
| 447 |
}
|
| 448 |
|
| 449 |
const embedConfig = readEmbedConfig();
|
| 450 |
+
const isCompareMode = Number(embedConfig.modelCount) === 2;
|
|
|
|
| 451 |
const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
|
| 452 |
const defaultModelB = String(embedConfig.modelB || 1724);
|
| 453 |
root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
|
|
|
|
| 464 |
const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
|
| 465 |
const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
|
| 466 |
|
| 467 |
+
const TRAINING_RUNS = [
|
| 468 |
{ gpus: 8, name: 'BERT', row: 'a1',
|
| 469 |
desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
|
| 470 |
{ gpus: 32, name: 'GPT-2', row: 'a1',
|
|
|
|
| 485 |
desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
|
| 486 |
];
|
| 487 |
|
| 488 |
+
const INFRA_LANDMARKS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
{ gpus: 1, name: '1 GPU', row: 'b1',
|
| 490 |
desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
|
| 491 |
{ gpus: 8, name: '1 node', row: 'b1',
|
|
|
|
| 512 |
desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
|
| 513 |
];
|
| 514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
|
| 516 |
function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
|
| 517 |
|
|
|
|
| 537 |
|
| 538 |
datasetsTitleAEl.textContent = 'Time to generate dataset';
|
| 539 |
datasetsTitleBEl.textContent = 'Time to generate dataset';
|
| 540 |
+
booksRateAEl.textContent = '0 pages/sec';
|
| 541 |
+
booksRateBEl.textContent = '0 pages/sec';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
const themeTokens = {};
|
| 544 |
function refreshThemeTokens() {
|
|
|
|
| 597 |
el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
|
| 598 |
rowEls[lm.row].appendChild(el);
|
| 599 |
}
|
| 600 |
+
TRAINING_RUNS.forEach(addLandmark);
|
| 601 |
+
INFRA_LANDMARKS.forEach(addLandmark);
|
| 602 |
|
| 603 |
+
const DATASETS_BASE = [
|
| 604 |
{ name: 'BookCorpus', tokens: 1e9,
|
| 605 |
desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
|
| 606 |
{ name: 'Wikipedia', tokens: 6e9,
|
|
|
|
| 615 |
desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
|
| 616 |
];
|
| 617 |
|
| 618 |
+
const DATASETS = DATASETS_BASE;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
|
| 620 |
// --- Utility functions ---
|
| 621 |
function formatNum(n) {
|