finephrase

Running on CPU Upgrade

App Files Files Community

joelniklaus HF Staff commited on Mar 11

Commit

12e3370

1 Parent(s): e4acf3b

simplified inference throughput visualization

Browse files

Files changed (1) hide show

app/src/content/embeds/inference-throughput-compare.html +10 -79

app/src/content/embeds/inference-throughput-compare.html CHANGED Viewed

@@ -360,7 +360,7 @@
         </div>
         <div class="bottom-strip">
           <div class="throughput-strip">
-            <span data-role="booksRateA">0 books/sec</span>
             <span class="tps" data-role="tpsInlineA">(0 TPS)</span>
           </div>
           <div class="output-panel">
@@ -447,8 +447,7 @@
   }
   const embedConfig = readEmbedConfig();
-  const modelCount = Number(embedConfig.modelCount) === 2 ? 2 : 1;
-  const isCompareMode = modelCount === 2;
   const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
   const defaultModelB = String(embedConfig.modelB || 1724);
   root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
@@ -465,7 +464,7 @@
   const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
   const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
-  const TRAINING_RUNS_SINGLE = [
     { gpus: 8,       name: 'BERT',       row: 'a1',
       desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
     { gpus: 32,      name: 'GPT-2',      row: 'a1',
@@ -486,28 +485,7 @@
       desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
   ];
-  const TRAINING_RUNS_COMPARE = [
-    { gpus: 8,       name: 'BERT',       row: 'a1',
-      desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia.' },
-    { gpus: 32,      name: 'GPT-2',      row: 'a1',
-      desc: '<b>GPT-2</b> (OpenAI, 2019)<br>\u224832 V100 GPUs. 1.5B params.' },
-    { gpus: 384,     name: 'BLOOM',      row: 'a1',
-      desc: '<b>BLOOM</b> (BigScience, 2022)<br>384 A100 80GB GPUs. 176B params.' },
-    { gpus: 2_048,   name: 'Llama 1',    row: 'a2',
-      desc: '<b>Llama 1</b> (Meta, 2023)<br>2,048 A100 GPUs. 65B params.' },
-    { gpus: 2_788,   name: 'DeepSeek',   row: 'a1',
-      desc: '<b>DeepSeek V3</b> (DeepSeek, 2024)<br>2,048 H800 GPUs. 671B MoE params.' },
-    { gpus: 10_000,  name: 'GPT-3',      row: 'a1',
-      desc: '<b>GPT-3</b> (OpenAI, 2020)<br>10,000 V100 GPUs. 175B params.' },
-    { gpus: 16_384,  name: 'Llama 3',    row: 'a2',
-      desc: '<b>Llama 3</b> (Meta, 2024)<br>16,384 H100 GPUs. 405B params.' },
-    { gpus: 25_000,  name: 'GPT-4',      row: 'a1',
-      desc: '<b>GPT-4</b> (OpenAI, 2023, estimated)<br>\u224825K A100 GPUs. \u22481.8T MoE params.' },
-    { gpus: 50_000,  name: 'GPT-5',      row: 'a1',
-      desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs.' },
-  ];
-  const INFRA_LANDMARKS_SINGLE = [
     { gpus: 1,       name: '1 GPU',       row: 'b1',
       desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
     { gpus: 8,       name: '1 node',      row: 'b1',
@@ -534,33 +512,6 @@
       desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
   ];
-  const INFRA_LANDMARKS_COMPARE = [
-    { gpus: 1,       name: '1 GPU',       row: 'b1',
-      desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.' },
-    { gpus: 8,       name: '1 node',      row: 'b1',
-      desc: '<b>DGX H100</b> \u2014 8\u00d7H100 SXM<br>640 GB HBM3, NVLink 900 GB/s.' },
-    { gpus: 32,      name: '1 rack',      row: 'b1',
-      desc: '<b>DGX SuperPOD rack</b> \u2014 4\u00d7DGX H100<br>32 GPUs, 2.5 TB HBM3.' },
-    { gpus: 256,     name: 'SuperPOD',    row: 'b1',
-      desc: '<b>DGX SuperPOD (1 SU)</b> \u2014 32 nodes, 256 GPUs.' },
-    { gpus: 10_752,  name: 'ALPS',        row: 'b1',
-      desc: '<b>ALPS</b> \u2014 CSCS, Lugano<br>10,752 GH200 Grace-Hopper superchips.' },
-    { gpus: 12_288,  name: 'ByteDance',   row: 'b2',
-      desc: '<b>ByteDance MegaScale</b><br>12,288 GPUs (A100/H800 mix).' },
-    { gpus: 64_000,  name: 'Stargate',    row: 'b1',
-      desc: '<b>Stargate</b> \u2014 OpenAI / Oracle, Abilene, TX<br>64K GB200 GPUs (planned).' },
-    { gpus: 100_000, name: 'Tencent',     row: 'b2',
-      desc: '<b>Tencent Xingmai 2.0</b><br>100K GPUs in a single cluster.' },
-    { gpus: 200_000, name: 'Colossus',    row: 'b1',
-      desc: '<b>Colossus</b> \u2014 xAI, Memphis, TN<br>200K H100/H200 GPUs.' },
-    { gpus: 250_000, name: 'CoreWeave',   row: 'b2',
-      desc: '<b>CoreWeave</b> \u2014 250K+ GPUs across 32 data centers.' },
-    { gpus: 600_000, name: 'Meta',        row: 'b2',
-      desc: '<b>Meta AI fleet</b> \u2014 600K H100-equivalent GPUs.' },
-    { gpus: 1_000_000, name: 'Colossus 2', row: 'b1',
-      desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs.' },
-  ];
   function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
   function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
@@ -586,13 +537,8 @@
   datasetsTitleAEl.textContent = 'Time to generate dataset';
   datasetsTitleBEl.textContent = 'Time to generate dataset';
-  if (isCompareMode) {
-    booksRateAEl.textContent = '0 pages/sec';
-    booksRateBEl.textContent = '0 pages/sec';
-  } else {
-    booksRateAEl.textContent = '0 books/sec';
-  }
   const themeTokens = {};
   function refreshThemeTokens() {
@@ -651,10 +597,10 @@
     el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
     rowEls[lm.row].appendChild(el);
   }
-  (isCompareMode ? TRAINING_RUNS_COMPARE : TRAINING_RUNS_SINGLE).forEach(addLandmark);
-  (isCompareMode ? INFRA_LANDMARKS_COMPARE : INFRA_LANDMARKS_SINGLE).forEach(addLandmark);
-  const DATASETS_SINGLE = [
     { name: 'BookCorpus', tokens: 1e9,
       desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
     { name: 'Wikipedia', tokens: 6e9,
@@ -669,22 +615,7 @@
       desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
   ];
-  const DATASETS_COMPARE = [
-    { name: 'BookCorpus', tokens: 1e9,
-      desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com.' },
-    { name: 'Wikipedia', tokens: 6e9,
-      desc: '<b>Multilingual Wikipedia</b><br>All articles across all 300+ language editions. ~6B tokens.' },
-    { name: 'FinePhrase', tokens: 1e12,
-      desc: '<b>FinePhrase</b> (Hugging Face, 2026)<br>1T tokens of LLM-rephrased web text.' },
-    { name: 'RedPajama', tokens: 100e12,
-      desc: '<b>RedPajama v2</b> (Together AI, 2023)<br>100T raw tokens from 84 Common Crawl snapshots.' },
-    { name: 'Common Crawl', tokens: 3e15,
-      desc: '<b>Common Crawl</b> (ongoing since 2008)<br>The raw web archive. Petabytes of HTML from billions of pages.' },
-    { name: 'The Internet', tokens: 100e15,
-      desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online.' },
-  ];
-  const DATASETS = isCompareMode ? DATASETS_COMPARE : DATASETS_SINGLE;
   // --- Utility functions ---
   function formatNum(n) {

         </div>
         <div class="bottom-strip">
           <div class="throughput-strip">
+            <span data-role="booksRateA">0 pages/sec</span>
             <span class="tps" data-role="tpsInlineA">(0 TPS)</span>
           </div>
           <div class="output-panel">
   }
   const embedConfig = readEmbedConfig();
+  const isCompareMode = Number(embedConfig.modelCount) === 2;
   const defaultModelA = String(embedConfig.modelA || (isCompareMode ? 6443 : 45540));
   const defaultModelB = String(embedConfig.modelB || 1724);
   root.classList.add(isCompareMode ? 'mode-compare' : 'mode-single');
   const MIN_GPUS = 1, MAX_GPUS = 1_000_000;
   const LOG_MIN = Math.log(MIN_GPUS), LOG_MAX = Math.log(MAX_GPUS);
+  const TRAINING_RUNS = [
     { gpus: 8,       name: 'BERT',       row: 'a1',
       desc: '<b>BERT</b> (Google, 2018)<br>16 TPU v3 chips. 340M params.<br>Trained on BooksCorpus + Wikipedia. Introduced masked language modeling. Changed NLP forever.' },
     { gpus: 32,      name: 'GPT-2',      row: 'a1',
       desc: '<b>GPT-5</b> (OpenAI, 2025, estimated)<br>\u224850K H100-equiv GPUs (est.).<br>Used less training compute than GPT-4.5 due to focus on post-training scaling. Trained on Stargate infrastructure.' },
   ];
+  const INFRA_LANDMARKS = [
     { gpus: 1,       name: '1 GPU',       row: 'b1',
       desc: '<b>NVIDIA H100 SXM</b><br>80 GB HBM3, 3.96 PFLOPS FP8.<br>The workhorse of modern AI training and inference.' },
     { gpus: 8,       name: '1 node',      row: 'b1',
       desc: '<b>Colossus 2</b> \u2014 xAI (planned)<br>1M+ H100-equivalent GPUs, 2 GW power.<br>$20B Series E from NVIDIA, Cisco, and others. Expanding across Memphis-area facilities.' },
   ];
   function gpusToSlider(gpus) { return (Math.log(Math.max(gpus, 1)) - LOG_MIN) / (LOG_MAX - LOG_MIN); }
   function sliderToGpus(val) { return Math.round(Math.exp(LOG_MIN + val * (LOG_MAX - LOG_MIN))); }
   datasetsTitleAEl.textContent = 'Time to generate dataset';
   datasetsTitleBEl.textContent = 'Time to generate dataset';
+  booksRateAEl.textContent = '0 pages/sec';
+  booksRateBEl.textContent = '0 pages/sec';
   const themeTokens = {};
   function refreshThemeTokens() {
     el.addEventListener('click', () => { gpuSlider.value = gpusToSlider(lm.gpus); updateSliderGradient(); updateGpuLabel(); instances.forEach(inst => inst.reset()); });
     rowEls[lm.row].appendChild(el);
   }
+  TRAINING_RUNS.forEach(addLandmark);
+  INFRA_LANDMARKS.forEach(addLandmark);
+  const DATASETS_BASE = [
     { name: 'BookCorpus', tokens: 1e9,
       desc: '<b>BookCorpus</b> (2015)<br>11K unpublished books scraped from smashwords.com. Used to train the original BERT and GPT-1.' },
     { name: 'Wikipedia', tokens: 6e9,
       desc: '<b>The entire Internet</b> (estimate)<br>Rough estimate of all text ever published online. Nobody has actually tokenized it all.' },
   ];
+  const DATASETS = DATASETS_BASE;
   // --- Utility functions ---
   function formatNum(n) {