joelniklaus HF Staff commited on
Commit
12e3370
·
1 Parent(s): e4acf3b

simplified inference throughput visualization

Browse files
app/src/content/embeds/inference-throughput-compare.html CHANGED
@@ -360,7 +360,7 @@
360
  </div>
361
  <div class="bottom-strip">
362
  <div class="throughput-strip">
363
- <span data-role="booksRateA">0 books/sec</span>
364
  <span class="tps" data-role="tpsInlineA">(0 TPS)</span>
365
  </div>
366
  <div class="output-panel">
@@ -447,8 +447,7 @@
447
  }
448
 
449
  const embedConfig = readEmbedConfig();
450
- const modelCount = Number(embedConfig.modelCount) === 2 ? 2 : 1;
451
- const isCompareMode = modelCount === 2;
452
  const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
453
  const defaultModelB = String(embedConfig.modelB || 1724);
454
  root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
@@ -465,7 +464,7 @@
465
  const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
466
  const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
467
 
468
- const TRAINING_RUNS_SINGLE = [
469
  { gpus: 8, name: 'BERT', row: 'a1',
470
  desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
471
  { gpus: 32, name: 'GPT-2', row: 'a1',
@@ -486,28 +485,7 @@
486
  desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
487
  ];
488
 
489
- const TRAINING_RUNS_COMPARE = [
490
- { gpus: 8, name: 'BERT', row: 'a1',
491
- desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia.' },
492
- { gpus: 32, name: 'GPT-2', row: 'a1',
493
- desc: '<b>GPT-2</b> (OpenAI, 2019)<br>\u224832 V100 GPUs. 1.5B params.' },
494
- { gpus: 384, name: 'BLOOM', row: 'a1',
495
- desc: '<b>BLOOM</b> (BigScience, 2022)<br>384 A100 80GB GPUs. 176B params.' },
496
- { gpus: 2_048, name: 'Llama 1', row: 'a2',
497
- desc: '<b>Llama 1</b> (Meta, 2023)<br>2,048 A100 GPUs. 65B params.' },
498
- { gpus: 2_788, name: 'DeepSeek', row: 'a1',
499
- desc: '<b>DeepSeek V3</b> (DeepSeek, 2024)<br>2,048 H800 GPUs. 671B MoE params.' },
500
- { gpus: 10_000, name: 'GPT-3', row: 'a1',
501
- desc: '<b>GPT-3</b> (OpenAI, 2020)<br>10,000 V100 GPUs. 175B params.' },
502
- { gpus: 16_384, name: 'Llama 3', row: 'a2',
503
- desc: '<b>Llama 3</b> (Meta, 2024)<br>16,384 H100 GPUs. 405B params.' },
504
- { gpus: 25_000, name: 'GPT-4', row: 'a1',
505
- desc: '<b>GPT-4</b> (OpenAI, 2023, estimated)<br>\u224825K A100 GPUs. \u22481.8T MoE params.' },
506
- { gpus: 50_000, name: 'GPT-5', row: 'a1',
507
- desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs.' },
508
- ];
509
-
510
- const INFRA_LANDMARKS_SINGLE = [
511
  { gpus: 1, name: '1 GPU', row: 'b1',
512
  desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
513
  { gpus: 8, name: '1 node', row: 'b1',
@@ -534,33 +512,6 @@
534
  desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
535
  ];
536
 
537
- const INFRA_LANDMARKS_COMPARE = [
538
- { gpus: 1, name: '1 GPU', row: 'b1',
539
- desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.' },
540
- { gpus: 8, name: '1 node', row: 'b1',
541
- desc: '<b>DGX H100</b> \u2014 8\u00d7H100 SXM<br>640 GB HBM3, NVLink 900 GB/s.' },
542
- { gpus: 32, name: '1 rack', row: 'b1',
543
- desc: '<b>DGX SuperPOD rack</b> \u2014 4\u00d7DGX H100<br>32 GPUs, 2.5 TB HBM3.' },
544
- { gpus: 256, name: 'SuperPOD', row: 'b1',
545
- desc: '<b>DGX SuperPOD (1 SU)</b> \u2014 32 nodes, 256 GPUs.' },
546
- { gpus: 10_752, name: 'ALPS', row: 'b1',
547
- desc: '<b>ALPS</b> \u2014 CSCS, Lugano<br>10,752 GH200 Grace-Hopper superchips.' },
548
- { gpus: 12_288, name: 'ByteDance', row: 'b2',
549
- desc: '<b>ByteDance MegaScale</b><br>12,288 GPUs (A100/H800 mix).' },
550
- { gpus: 64_000, name: 'Stargate', row: 'b1',
551
- desc: '<b>Stargate</b> \u2014 OpenAI / Oracle, Abilene, TX<br>64K GB200 GPUs (planned).' },
552
- { gpus: 100_000, name: 'Tencent', row: 'b2',
553
- desc: '<b>Tencent Xingmai 2.0</b><br>100K GPUs in a single cluster.' },
554
- { gpus: 200_000, name: 'Colossus', row: 'b1',
555
- desc: '<b>Colossus</b> \u2014 xAI, Memphis, TN<br>200K H100/H200 GPUs.' },
556
- { gpus: 250_000, name: 'CoreWeave', row: 'b2',
557
- desc: '<b>CoreWeave</b> \u2014 250K+ GPUs across 32 data centers.' },
558
- { gpus: 600_000, name: 'Meta', row: 'b2',
559
- desc: '<b>Meta AI fleet</b> \u2014 600K H100-equivalent GPUs.' },
560
- { gpus: 1_000_000, name: 'Colossus 2', row: 'b1',
561
- desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs.' },
562
- ];
563
-
564
  function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
565
  function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
566
 
@@ -586,13 +537,8 @@
586
 
587
  datasetsTitleAEl.textContent = 'Time to generate dataset';
588
  datasetsTitleBEl.textContent = 'Time to generate dataset';
589
-
590
- if (isCompareMode) {
591
- booksRateAEl.textContent = '0 pages/sec';
592
- booksRateBEl.textContent = '0 pages/sec';
593
- } else {
594
- booksRateAEl.textContent = '0 books/sec';
595
- }
596
 
597
  const themeTokens = {};
598
  function refreshThemeTokens() {
@@ -651,10 +597,10 @@
651
  el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
652
  rowEls[lm.row].appendChild(el);
653
  }
654
- (isCompareMode ? TRAINING_RUNS_COMPARE : TRAINING_RUNS_SINGLE).forEach(addLandmark);
655
- (isCompareMode ? INFRA_LANDMARKS_COMPARE : INFRA_LANDMARKS_SINGLE).forEach(addLandmark);
656
 
657
- const DATASETS_SINGLE = [
658
  { name: 'BookCorpus', tokens: 1e9,
659
  desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
660
  { name: 'Wikipedia', tokens: 6e9,
@@ -669,22 +615,7 @@
669
  desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
670
  ];
671
 
672
- const DATASETS_COMPARE = [
673
- { name: 'BookCorpus', tokens: 1e9,
674
- desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com.' },
675
- { name: 'Wikipedia', tokens: 6e9,
676
- desc: '<b>Multilingual Wikipedia</b><br>All articles across all 300+ language editions. ~6B tokens.' },
677
- { name: 'FinePhrase', tokens: 1e12,
678
- desc: '<b>FinePhrase</b> (Hugging Face, 2026)<br>1T tokens of LLM-rephrased web text.' },
679
- { name: 'RedPajama', tokens: 100e12,
680
- desc: '<b>RedPajama v2</b> (Together AI, 2023)<br>100T raw tokens from 84 Common Crawl snapshots.' },
681
- { name: 'Common Crawl', tokens: 3e15,
682
- desc: '<b>Common Crawl</b> (ongoing since 2008)<br>The raw web archive. Petabytes of HTML from billions of pages.' },
683
- { name: 'The Internet', tokens: 100e15,
684
- desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online.' },
685
- ];
686
-
687
- const DATASETS = isCompareMode ? DATASETS_COMPARE : DATASETS_SINGLE;
688
 
689
  // --- Utility functions ---
690
  function formatNum(n) {
 
360
  </div>
361
  <div class="bottom-strip">
362
  <div class="throughput-strip">
363
+ <span data-role="booksRateA">0 pages/sec</span>
364
  <span class="tps" data-role="tpsInlineA">(0 TPS)</span>
365
  </div>
366
  <div class="output-panel">
 
447
  }
448
 
449
  const embedConfig = readEmbedConfig();
450
+ const isCompareMode = Number(embedConfig.modelCount) === 2;
 
451
  const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
452
  const defaultModelB = String(embedConfig.modelB || 1724);
453
  root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
 
464
  const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
465
  const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
466
 
467
+ const TRAINING_RUNS = [
468
  { gpus: 8, name: 'BERT', row: 'a1',
469
  desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
470
  { gpus: 32, name: 'GPT-2', row: 'a1',
 
485
  desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
486
  ];
487
 
488
+ const INFRA_LANDMARKS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  { gpus: 1, name: '1 GPU', row: 'b1',
490
  desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
491
  { gpus: 8, name: '1 node', row: 'b1',
 
512
  desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
513
  ];
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
516
  function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
517
 
 
537
 
538
  datasetsTitleAEl.textContent = 'Time to generate dataset';
539
  datasetsTitleBEl.textContent = 'Time to generate dataset';
540
+ booksRateAEl.textContent = '0 pages/sec';
541
+ booksRateBEl.textContent = '0 pages/sec';
 
 
 
 
 
542
 
543
  const themeTokens = {};
544
  function refreshThemeTokens() {
 
597
  el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
598
  rowEls[lm.row].appendChild(el);
599
  }
600
+ TRAINING_RUNS.forEach(addLandmark);
601
+ INFRA_LANDMARKS.forEach(addLandmark);
602
 
603
+ const DATASETS_BASE = [
604
  { name: 'BookCorpus', tokens: 1e9,
605
  desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
606
  { name: 'Wikipedia', tokens: 6e9,
 
615
  desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
616
  ];
617
 
618
+ const DATASETS = DATASETS_BASE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
  // --- Utility functions ---
621
  function formatNum(n) {