phi-offline

Running

App Files Files Community

plug commited on Dec 18, 2023

Commit

2c12345

1 Parent(s): b519309

Update phiWorker.js

Browse files

Files changed (1) hide show

phiWorker.js +115 -66

phiWorker.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import init, { Model } from "./build/m.js";
 function fixTwo(x) { return Math.floor(x * 100) / 100 }
@@ -10,62 +10,109 @@ function humanSize(size) {
     return `${fixTwo(size/1e12)}tb`
 }
 let lastSend = 0
 async function fetchArrayBuffer(url) {
-    const cacheName = "phi-mixformer-candle-cache";
-    const cache = await caches.open(cacheName);
-    const cachedResponse = await cache.match(url);
     if (cachedResponse) {
-        const data = await cachedResponse.arrayBuffer();
-        return new Uint8Array(data);
     }
-    const res = await fetch(url, { cache: "force-cache" });
     while (!res.body) { }
-    const reader = res.body.getReader();
-    const contentLength = +(res.headers.get('Content-Length') ?? 0);
-    let receivedLength = 0;
-    let chunks = [];
     while (true) {
-        const { done, value } = await reader.read();
         if (done) {
-            break;
         }
-        chunks.push(value);
-        receivedLength += value.length;
-        let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})\nLink: ${url}\nTotal size: ${humanSize(fixTwo(contentLength))}`
         if(Date.now() - lastSend > 250) {
             self.postMessage({ status: "loading", message: downloadMessage })
-            console.log(downloadMessage)
             lastSend = Date.now()
         }
     }
-    let chunksAll = new Uint8Array(receivedLength);
-    let position = 0;
     for (let chunk of chunks) {
-        chunksAll.set(chunk, position);
-        position += chunk.length;
     }
-    cache.put(url, new Response(chunksAll));
-    return chunksAll;
 }
 async function concatenateArrayBuffers(urls) {
-    const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)));
-    let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0);
-    let concatenatedBuffer = new Uint8Array(totalLength);
-    let offset = 0;
     arrayBuffers.forEach(buffer => {
-        concatenatedBuffer.set(new Uint8Array(buffer), offset);
-        offset += buffer.byteLength;
-    });
-    return concatenatedBuffer;
 }
 class Phi {
-    static instance = {};
     static async getInstance(
         weightsURL,
@@ -76,36 +123,36 @@ class Phi {
     ) {
         // load individual modelID only once
         if (!this.instance[modelID]) {
-            await init();
-            self.postMessage({ status: "loading", message: "Loading Model" });
             const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
                 await Promise.all([
                     weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
                     fetchArrayBuffer(tokenizerURL),
                     fetchArrayBuffer(configURL),
-                ]);
             this.instance[modelID] = new Model(
                 weightsArrayU8,
                 tokenizerArrayU8,
                 configArrayU8,
                 quantized
-            );
         }
-        return this.instance[modelID];
     }
 }
-let controller = null;
 self.addEventListener("message", (event) => {
     if (event.data.command === "start") {
-        controller = new AbortController();
-        generate(event.data);
     } else if (event.data.command === "abort") {
-        controller.abort();
     }
-});
 async function generate(data) {
     const {
@@ -120,18 +167,19 @@ async function generate(data) {
         repeatPenalty,
         seed,
         maxSeqLen,
-    } = data;
     try {
-        self.postMessage({ status: "loading", message: "Starting Phi" });
         const model = await Phi.getInstance(
             weightsURL,
             modelID,
             tokenizerURL,
             configURL,
             quantized
-        );
-        self.postMessage({ status: "loading", message: "Initializing model" });
         const firstToken = model.init_with_prompt(
             prompt,
             temp,
@@ -139,13 +187,13 @@ async function generate(data) {
             repeatPenalty,
             64,
             BigInt(seed)
-        );
-        const seq_len = 2048;
-        let sentence = firstToken;
-        let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
-        let startTime = performance.now();
-        let tokensCount = 0;
         while (tokensCount < maxTokens) {
             await new Promise(async (resolve) => {
                 if (controller && controller.signal.aborted) {
@@ -153,22 +201,23 @@ async function generate(data) {
                         status: "aborted",
                         message: "Aborted",
                         output: prompt + sentence,
-                    });
-                    return;
                 }
-                const token = await model.next_token();
-                if (token === "<|endoftext|>") {
                     self.postMessage({
                         status: "complete",
                         message: "complete",
                         output: prompt + sentence,
-                    });
-                    return;
                 }
                 const tokensSec =
-                    ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
-                sentence += token;
                 self.postMessage({
                     status: "generating",
                     message: "Generating token",
@@ -177,17 +226,17 @@ async function generate(data) {
                     totalTime: performance.now() - startTime,
                     tokensSec,
                     prompt: prompt,
-                });
-                setTimeout(resolve, 0);
-            });
-            tokensCount++;
         }
         self.postMessage({
             status: "complete",
             message: "complete",
             output: prompt + sentence,
-        });
     } catch (e) {
-        self.postMessage({ error: e });
     }
 }

+import init, { Model } from "./build/m.js"
 function fixTwo(x) { return Math.floor(x * 100) / 100 }
     return `${fixTwo(size/1e12)}tb`
 }
+function humanTime(seconds) {
+    const _year = 31536e3
+    const _mon = 2592e3
+    const _day = 864e2
+    const _hour = 36e2
+    const _min = 60
+    const _sec = 1
+    const year_rem = seconds % _year
+    const years = (seconds - year_rem) / _year
+    const month_rem = year_rem % _mon
+    const months = (year_rem - month_rem) / _mon
+    const day_rem = month_rem % _day
+    const days = (month_rem - day_rem) / _day
+    const hour_rem = day_rem % _hour
+    const hours = (day_rem - hour_rem) / _hour
+    const minute_rem = hour_rem % _min
+    const minutes = (hour_rem - minute_rem) / _min
+    const second_rem = minute_rem % _sec
+    const second = (minute_rem - second_rem) / _sec
+    return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
+        (days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
+        (minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
+}
 let lastSend = 0
+let lastTime = Infinity
+let times = [0, 0, 0, 0]
 async function fetchArrayBuffer(url) {
+    const cacheName = "phi-mixformer-candle-cache"
+    const cache = await caches.open(cacheName)
+    const cachedResponse = await cache.match(url)
     if (cachedResponse) {
+        const data = await cachedResponse.arrayBuffer()
+        return new Uint8Array(data)
     }
+    const res = await fetch(url, { cache: "force-cache" })
     while (!res.body) { }
+    const reader = res.body.getReader()
+    const contentLength = +(res.headers.get('Content-Length') ?? 0)
+    let receivedLength = 0
+    let chunks = []
     while (true) {
+        const { done, value } = await reader.read()
         if (done) {
+            break
         }
+        chunks.push(value)
+        receivedLength += value.length
         if(Date.now() - lastSend > 250) {
+            times.push(receivedLength)
+            times = times.slice(1)
+            let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
+            let median = (max[0] + max[1] + max[2]) / 3
+            let lengthPerSecond = median * 4
+            let leftSize = contentLength - receivedLength
+            let leftTime = Math.abs(leftSize / lengthPerSecond)
+            if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
+            // if(leftTime > lastTime) leftTime = lastTime
+            lastTime = leftTime
+            let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
+Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
+Total size: ${humanSize(fixTwo(contentLength))}
+Download URL: ${url}`
             self.postMessage({ status: "loading", message: downloadMessage })
+            // console.log(downloadMessage)
             lastSend = Date.now()
         }
     }
+    let chunksAll = new Uint8Array(receivedLength)
+    let position = 0
     for (let chunk of chunks) {
+        chunksAll.set(chunk, position)
+        position += chunk.length
     }
+    cache.put(url, new Response(chunksAll))
+    return chunksAll
 }
 async function concatenateArrayBuffers(urls) {
+    const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
+    let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
+    let concatenatedBuffer = new Uint8Array(totalLength)
+    let offset = 0
     arrayBuffers.forEach(buffer => {
+        concatenatedBuffer.set(new Uint8Array(buffer), offset)
+        offset += buffer.byteLength
+    })
+    return concatenatedBuffer
 }
 class Phi {
+    static instance = {}
     static async getInstance(
         weightsURL,
     ) {
         // load individual modelID only once
         if (!this.instance[modelID]) {
+            await init()
+            self.postMessage({ status: "loading", message: "Loading Model" })
             const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
                 await Promise.all([
                     weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
                     fetchArrayBuffer(tokenizerURL),
                     fetchArrayBuffer(configURL),
+                ])
             this.instance[modelID] = new Model(
                 weightsArrayU8,
                 tokenizerArrayU8,
                 configArrayU8,
                 quantized
+            )
         }
+        return this.instance[modelID]
     }
 }
+let controller = null
 self.addEventListener("message", (event) => {
     if (event.data.command === "start") {
+        controller = new AbortController()
+        generate(event.data)
     } else if (event.data.command === "abort") {
+        controller.abort()
     }
+})
 async function generate(data) {
     const {
         repeatPenalty,
         seed,
         maxSeqLen,
+        stuff
+    } = data
     try {
+        self.postMessage({ status: "loading", message: "Starting Phi" })
         const model = await Phi.getInstance(
             weightsURL,
             modelID,
             tokenizerURL,
             configURL,
             quantized
+        )
+        self.postMessage({ status: "loading", message: "Initializing model" })
         const firstToken = model.init_with_prompt(
             prompt,
             temp,
             repeatPenalty,
             64,
             BigInt(seed)
+        )
+        const seq_len = 2048
+        let sentence = firstToken
+        let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
+        let startTime = performance.now()
+        let tokensCount = 0
         while (tokensCount < maxTokens) {
             await new Promise(async (resolve) => {
                 if (controller && controller.signal.aborted) {
                         status: "aborted",
                         message: "Aborted",
                         output: prompt + sentence,
+                    })
+                    return
                 }
+                const token = await model.next_token()
+                const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
+                if (terminates.includes(token)) {
                     self.postMessage({
                         status: "complete",
                         message: "complete",
                         output: prompt + sentence,
+                    })
+                    return
                 }
                 const tokensSec =
+                    ((tokensCount + 1) / (performance.now() - startTime)) * 1000
+                sentence += token
                 self.postMessage({
                     status: "generating",
                     message: "Generating token",
                     totalTime: performance.now() - startTime,
                     tokensSec,
                     prompt: prompt,
+                })
+                setTimeout(resolve, 0)
+            })
+            tokensCount++
         }
         self.postMessage({
             status: "complete",
             message: "complete",
             output: prompt + sentence,
+        })
     } catch (e) {
+        self.postMessage({ error: e })
     }
 }