Spaces:

fhueni
/

on-device-vs-cloud-llm-inference

Running

App Files Files Community

fhueni commited on Nov 11

Commit

5c8fb6b

1 Parent(s): bfaf968

feat: add dataset, adjust scheduler to run full dataset, implement statistic download, add model load button

Browse files

Files changed (5) hide show

dataset/boolq_validation.csv +0 -0
index.html +5 -4
src/main.js +54 -12
src/requestManager.js +1 -1
src/scheduler.js +50 -20

dataset/boolq_validation.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

index.html CHANGED Viewed

@@ -25,6 +25,7 @@
             <h2>On-Device</h2>
             <label>Model (transformers.js) <input id="deviceModel" value="distilgpt2" /></label>
             <div id="deviceStatus">Not loaded</div>
         </div>
@@ -32,9 +33,9 @@
             <h2>Request Pattern</h2>
             <select id="patternSelect">
                 <option value="once-per-sec">1 request / sec</option>
-                <option value="ten-per-sec">10 requests / sec</option>
-                <option value="batch-10-every-5s">Batch: 10 every 5s</option>
-                <option value="burst">Burst: 50 then idle</option>
             </select>
             <label>Route strategy
                 <select id="routeStrategy">
@@ -51,11 +52,11 @@
             </div>
         </div>
         <div class="card wide">
             <h2>Live Log & Results</h2>
             <div id="log" class="log"></div>
             <div id="stats"></div>
         </div>
     </section>

             <h2>On-Device</h2>
             <label>Model (transformers.js) <input id="deviceModel" value="distilgpt2" /></label>
             <div id="deviceStatus">Not loaded</div>
+            <button id="loadDeviceModelBtn">Load Model</button>
         </div>
             <h2>Request Pattern</h2>
             <select id="patternSelect">
                 <option value="once-per-sec">1 request / sec</option>
+                <option value="every-ten-sec">Every 10 sec 1 request</option>
+                <option disabled value="batch-10-every-5s">(not implemented) Batch: 10 every 5s</option>
+                <option disabled value="burst">(not implemented) Burst: 50 then idle</option>
             </select>
             <label>Route strategy
                 <select id="routeStrategy">
             </div>
         </div>
         <div class="card wide">
             <h2>Live Log & Results</h2>
             <div id="log" class="log"></div>
             <div id="stats"></div>
+            <button id="downloadStats">Download Statistics</button>
         </div>
     </section>

src/main.js CHANGED Viewed

@@ -20,22 +20,14 @@ const evaluator = new Evaluator();
 const requestManager = new RequestManager({
     deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
-        logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} f1=${evt.evalRes.f1.toFixed(2)}`);
         updateStats();
     }
 });
 // instantiate the job scheduler with some mock prompts TODO: replace with real prompts
-const scheduler = new JobScheduler([
-    {prompt: 'Translate to German: Hello world', groundTruth: 'Hallo Welt'},
-    {
-        prompt: 'What is 3*6?',
-        groundTruth: '18'
-    },
-    {prompt: 'Answer: What is 2+2?', groundTruth: '4'},
-    {prompt: 'What is the capital of switzerland?', groundTruth: 'Bern'}
-]);
 scheduler.onJob(async (job) => {
@@ -87,19 +79,69 @@ document.getElementById('stopBtn').addEventListener('click', () => {
     document.getElementById('stopBtn').disabled = true;
 });
 async function loadDeviceModel() {
     deviceStatusEl.textContent = 'Loading...';
     try {
         await onDeviceInferenceService.load((s) => deviceStatusEl.textContent = s);
-        deviceStatusEl.textContent = 'Ready';
     } catch (e) {
         deviceStatusEl.textContent = `Error: ${e.message}`;
     }
 }
 function updateStats() {
     const s = requestManager.stats;
-    statsEl.innerHTML = `<pre>Processed: ${s.count}\nCloud: ${s.cloud}\nDevice: ${s.device}\nAvg latency (ms): ${s.count ? (s.totalLatencyMs / s.count).toFixed(1) : 0}\nRecent evaluations: ${Math.min(10, s.evaluations.length)}</pre>`;
 }

 const requestManager = new RequestManager({
     deviceService: onDeviceInferenceService, cloudService: cloudInferenceService, evaluator, logger: evt => {
+        logTo(logEl, `${evt.job.id} -> ${evt.route} | latency=${evt.latency}ms | exact=${evt.evalRes.exact} | question="${evt.job.prompt.substring(0, 30)}..."`);
         updateStats();
     }
 });
 // instantiate the job scheduler with some mock prompts TODO: replace with real prompts
+const scheduler = new JobScheduler('boolq_validation');
 scheduler.onJob(async (job) => {
     document.getElementById('stopBtn').disabled = true;
 });
+document.getElementById('downloadStats').addEventListener('click', () => {
+    downloadStats();
+});
+document.getElementById('loadDeviceModelBtn').addEventListener('click', () => {
+    loadDeviceModel();
+});
 async function loadDeviceModel() {
     deviceStatusEl.textContent = 'Loading...';
+    document.getElementById('loadDeviceModelBtn').disabled = true;
     try {
         await onDeviceInferenceService.load((s) => deviceStatusEl.textContent = s);
+        deviceStatusEl.textContent = 'Model Ready';
     } catch (e) {
         deviceStatusEl.textContent = `Error: ${e.message}`;
+        document.getElementById('loadDeviceModelBtn').disabled = false;
     }
 }
+function downloadStats() {
+    const s = requestManager.stats;
+    // add average latency to stats for device and cloud
+    s.avgLatencyMs = s.count ? (s.totalLatencyMs / s.count) : 0;
+    s.avgDeviceLatencyMs = s.device ? (s.evaluations.filter(e => e.route === 'device').reduce((a, b) => a + b.latency, 0) / s.device) : 0;
+    s.avgCloudLatencyMs = s.cloud ? (s.evaluations.filter(e => e.route === 'cloud').reduce((a, b) => a + b.latency, 0) / s.cloud) : 0;
+    const dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(s, null, 2));
+    const dlAnchorElem = document.createElement('a');
+    dlAnchorElem.setAttribute("href", dataStr);
+    dlAnchorElem.setAttribute("download", "stats.json");
+    dlAnchorElem.click();
+}
+/**
+ * Update the statistics display in the UI based on the request manager's stats
+ */
 function updateStats() {
     const s = requestManager.stats;
+    statsEl.innerHTML = `
+        <div style="display: flex; justify-content: space-between;">
+            <div>
+                <h3>General Stats</h3>
+                <pre>
+Processed: ${s.count}
+Avg latency (ms): ${s.count ? (s.totalLatencyMs / s.count).toFixed(1) : 0}
+Recent evaluations: ${Math.min(10, s.evaluations.length)}
+                </pre>
+            </div>
+            <div>
+                <h3>Cloud Stats</h3>
+                <pre>
+Requests: ${s.cloud}
+Avg latency (ms): ${s.cloud ? (s.evaluations.filter(e => e.route === 'cloud').reduce((a, b) => a + b.latency, 0) / s.cloud).toFixed(1) : 0}
+                </pre>
+            </div>
+            <div>
+                <h3>On-Device Stats</h3>
+                <pre>
+Requests: ${s.device}
+Avg latency (ms): ${s.cloud ? (s.evaluations.filter(e => e.route === 'device').reduce((a, b) => a + b.latency, 0) / s.cloud).toFixed(1) : 0}
+                </pre>
+            </div>
+        </div>`;
 }

src/requestManager.js CHANGED Viewed

@@ -87,7 +87,7 @@ export class RequestManager {
      * Handle a single inference job by routing it to the appropriate service,
      * performing inference, evaluating the result, and recording statistics.
      *
-     * @param job
      * @returns {Promise<{route: string, latency: number, text: string, job, evalRes: (*|XPathResult|{exact: *, f1: *})}>}
      */
     async handle(job) {

      * Handle a single inference job by routing it to the appropriate service,
      * performing inference, evaluating the result, and recording statistics.
      *
+     * @param job - The job object containing prompt and ground truth
      * @returns {Promise<{route: string, latency: number, text: string, job, evalRes: (*|XPathResult|{exact: *, f1: *})}>}
      */
     async handle(job) {

src/scheduler.js CHANGED Viewed

@@ -1,15 +1,18 @@
 import {sleep} from './utils.js';
 /**
  * JobScheduler emits jobs based on predefined patterns.
  * Can be used to simulate different load scenarios like batch processing or on-request per second
  */
 export class JobScheduler {
-    constructor(promptSource = []) {
-        this.promptSource = promptSource;
         this.running = false;
         this._onJob = null; // callback
     }
@@ -31,27 +34,27 @@ export class JobScheduler {
         // once per second until user stopp evaluation
         if (patternName === 'once-per-sec') {
             let i = 0;
-            while (this.running) {
-                this._emit(i++);
                 await sleep(1000);
             }
-        } else if (patternName === 'ten-per-sec') {
             let i = 0;
             const interval = 100; // ms
-            while (this.running) {
-                this._emit(i++);
                 await sleep(interval);
             }
         } else if (patternName === 'batch-10-every-5s') {
             let i = 0;
             while (this.running) {
                 for (let j = 0; j < 10 && this.running; j++) this._emit(i++);
                 await sleep(5000);
             }
-        } else if (patternName === 'burst') {
-            // single burst
-            for (let i = 0; i < 50; i++) this._emit(i);
-            this.running = false;
         }
     }
@@ -63,17 +66,44 @@ export class JobScheduler {
         this.running = false;
     }
-    _pickPrompt(id) {
-        if (this.promptSource.length === 0) return {prompt: `Hello world ${id}`, groundTruth: `Hello world ${id}`};
-        return this.promptSource[id % this.promptSource.length];
-    }
-    _emit(id) {
         if (this._onJob) {
-            const p = this._pickPrompt(id);
-            const job = {id: `job-${Date.now()}-${id}`, prompt: p.prompt, groundTruth: p.groundTruth};
             this._onJob(job);
         }
     }
 }

 import {sleep} from './utils.js';
 /**
  * JobScheduler emits jobs based on predefined patterns.
  * Can be used to simulate different load scenarios like batch processing or on-request per second
  */
 export class JobScheduler {
+    constructor(datasetName = 'boolq_validation') {
+        // TODO implement dataset loading based on configuration parameter
         this.running = false;
+        this._dataset = null;
         this._onJob = null; // callback
+        this._datasetName = datasetName
+        this._loadDataset(this._datasetName);
     }
         // once per second until user stopp evaluation
         if (patternName === 'once-per-sec') {
             let i = 0;
+            while (this._dataset.length > 0 && this.running) {
+                console.log(this._dataset.length)
+                const item = this._dataset.pop();
+                this._emit(item);
                 await sleep(1000);
             }
+        } else if (patternName === 'every-ten-sec') {
             let i = 0;
             const interval = 100; // ms
+            while (this._dataset.length > 0 && this.running) {
+                const item = this._dataset.pop();
+                this._emit(item);
                 await sleep(interval);
             }
         } else if (patternName === 'batch-10-every-5s') {
             let i = 0;
             while (this.running) {
+                // TODO implement batch processing!
                 for (let j = 0; j < 10 && this.running; j++) this._emit(i++);
                 await sleep(5000);
             }
         }
     }
         this.running = false;
     }
+    /**
+     * Emit a job with the item from the dataset to process
+     *
+     * @param item - The dataset item containing prompt and ground truth
+     * @private
+     */
+    _emit(item) {
         if (this._onJob) {
+            const job = {prompt: item.prompt, groundTruth: item.groundTruth};
             this._onJob(job);
         }
     }
+    /**
+     * Load the dataset from CSV file based on the given name
+     *
+     * @param name - Name of the csv dataset to load without file extension
+     * @private
+     */
+    _loadDataset(name) {
+        const path = `./dataset/${name}.csv`;
+        fetch(path)
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error(`Dataset file not found: ${path}`);
+                }
+                return response.text();
+            })
+            .then(data => {
+                this._dataset = data.split('\n').slice(1).map(line => {
+                    const [question, answer, context] = line.split(',');
+                    return {prompt: question, groundTruth: answer};
+                });
+            })
+            .catch(error => {
+                console.error(error);
+            });
+    }
 }