Spaces:
Running
Running
Update phiWorker.js
Browse files- phiWorker.js +115 -66
phiWorker.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import init, { Model } from "./build/m.js"
|
| 2 |
|
| 3 |
function fixTwo(x) { return Math.floor(x * 100) / 100 }
|
| 4 |
|
|
@@ -10,62 +10,109 @@ function humanSize(size) {
|
|
| 10 |
return `${fixTwo(size/1e12)}tb`
|
| 11 |
}
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
let lastSend = 0
|
|
|
|
|
|
|
| 14 |
|
| 15 |
async function fetchArrayBuffer(url) {
|
| 16 |
-
const cacheName = "phi-mixformer-candle-cache"
|
| 17 |
-
const cache = await caches.open(cacheName)
|
| 18 |
-
const cachedResponse = await cache.match(url)
|
| 19 |
if (cachedResponse) {
|
| 20 |
-
const data = await cachedResponse.arrayBuffer()
|
| 21 |
-
return new Uint8Array(data)
|
| 22 |
}
|
| 23 |
-
const res = await fetch(url, { cache: "force-cache" })
|
| 24 |
while (!res.body) { }
|
| 25 |
-
const reader = res.body.getReader()
|
| 26 |
-
const contentLength = +(res.headers.get('Content-Length') ?? 0)
|
| 27 |
-
let receivedLength = 0
|
| 28 |
-
let chunks = []
|
| 29 |
while (true) {
|
| 30 |
-
const { done, value } = await reader.read()
|
| 31 |
if (done) {
|
| 32 |
-
break
|
| 33 |
}
|
| 34 |
-
chunks.push(value)
|
| 35 |
-
receivedLength += value.length
|
| 36 |
-
let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})\nLink: ${url}\nTotal size: ${humanSize(fixTwo(contentLength))}`
|
| 37 |
if(Date.now() - lastSend > 250) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
self.postMessage({ status: "loading", message: downloadMessage })
|
| 39 |
-
console.log(downloadMessage)
|
| 40 |
lastSend = Date.now()
|
| 41 |
}
|
| 42 |
}
|
| 43 |
-
let chunksAll = new Uint8Array(receivedLength)
|
| 44 |
-
let position = 0
|
| 45 |
for (let chunk of chunks) {
|
| 46 |
-
chunksAll.set(chunk, position)
|
| 47 |
-
position += chunk.length
|
| 48 |
}
|
| 49 |
-
cache.put(url, new Response(chunksAll))
|
| 50 |
-
return chunksAll
|
| 51 |
}
|
| 52 |
|
| 53 |
async function concatenateArrayBuffers(urls) {
|
| 54 |
-
const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
|
| 55 |
|
| 56 |
-
let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
|
| 57 |
-
let concatenatedBuffer = new Uint8Array(totalLength)
|
| 58 |
|
| 59 |
-
let offset = 0
|
| 60 |
arrayBuffers.forEach(buffer => {
|
| 61 |
-
concatenatedBuffer.set(new Uint8Array(buffer), offset)
|
| 62 |
-
offset += buffer.byteLength
|
| 63 |
-
})
|
| 64 |
-
return concatenatedBuffer
|
| 65 |
}
|
| 66 |
|
| 67 |
class Phi {
|
| 68 |
-
static instance = {}
|
| 69 |
|
| 70 |
static async getInstance(
|
| 71 |
weightsURL,
|
|
@@ -76,36 +123,36 @@ class Phi {
|
|
| 76 |
) {
|
| 77 |
// load individual modelID only once
|
| 78 |
if (!this.instance[modelID]) {
|
| 79 |
-
await init()
|
| 80 |
|
| 81 |
-
self.postMessage({ status: "loading", message: "Loading Model" })
|
| 82 |
const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
|
| 83 |
await Promise.all([
|
| 84 |
weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
|
| 85 |
fetchArrayBuffer(tokenizerURL),
|
| 86 |
fetchArrayBuffer(configURL),
|
| 87 |
-
])
|
| 88 |
|
| 89 |
this.instance[modelID] = new Model(
|
| 90 |
weightsArrayU8,
|
| 91 |
tokenizerArrayU8,
|
| 92 |
configArrayU8,
|
| 93 |
quantized
|
| 94 |
-
)
|
| 95 |
}
|
| 96 |
-
return this.instance[modelID]
|
| 97 |
}
|
| 98 |
}
|
| 99 |
|
| 100 |
-
let controller = null
|
| 101 |
self.addEventListener("message", (event) => {
|
| 102 |
if (event.data.command === "start") {
|
| 103 |
-
controller = new AbortController()
|
| 104 |
-
generate(event.data)
|
| 105 |
} else if (event.data.command === "abort") {
|
| 106 |
-
controller.abort()
|
| 107 |
}
|
| 108 |
-
})
|
| 109 |
|
| 110 |
async function generate(data) {
|
| 111 |
const {
|
|
@@ -120,18 +167,19 @@ async function generate(data) {
|
|
| 120 |
repeatPenalty,
|
| 121 |
seed,
|
| 122 |
maxSeqLen,
|
| 123 |
-
|
|
|
|
| 124 |
try {
|
| 125 |
-
self.postMessage({ status: "loading", message: "Starting Phi" })
|
| 126 |
const model = await Phi.getInstance(
|
| 127 |
weightsURL,
|
| 128 |
modelID,
|
| 129 |
tokenizerURL,
|
| 130 |
configURL,
|
| 131 |
quantized
|
| 132 |
-
)
|
| 133 |
|
| 134 |
-
self.postMessage({ status: "loading", message: "Initializing model" })
|
| 135 |
const firstToken = model.init_with_prompt(
|
| 136 |
prompt,
|
| 137 |
temp,
|
|
@@ -139,13 +187,13 @@ async function generate(data) {
|
|
| 139 |
repeatPenalty,
|
| 140 |
64,
|
| 141 |
BigInt(seed)
|
| 142 |
-
)
|
| 143 |
-
const seq_len = 2048
|
| 144 |
|
| 145 |
-
let sentence = firstToken
|
| 146 |
-
let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
|
| 147 |
-
let startTime = performance.now()
|
| 148 |
-
let tokensCount = 0
|
| 149 |
while (tokensCount < maxTokens) {
|
| 150 |
await new Promise(async (resolve) => {
|
| 151 |
if (controller && controller.signal.aborted) {
|
|
@@ -153,22 +201,23 @@ async function generate(data) {
|
|
| 153 |
status: "aborted",
|
| 154 |
message: "Aborted",
|
| 155 |
output: prompt + sentence,
|
| 156 |
-
})
|
| 157 |
-
return
|
| 158 |
}
|
| 159 |
-
const token = await model.next_token()
|
| 160 |
-
|
|
|
|
| 161 |
self.postMessage({
|
| 162 |
status: "complete",
|
| 163 |
message: "complete",
|
| 164 |
output: prompt + sentence,
|
| 165 |
-
})
|
| 166 |
-
return
|
| 167 |
}
|
| 168 |
const tokensSec =
|
| 169 |
-
((tokensCount + 1) / (performance.now() - startTime)) * 1000
|
| 170 |
|
| 171 |
-
sentence += token
|
| 172 |
self.postMessage({
|
| 173 |
status: "generating",
|
| 174 |
message: "Generating token",
|
|
@@ -177,17 +226,17 @@ async function generate(data) {
|
|
| 177 |
totalTime: performance.now() - startTime,
|
| 178 |
tokensSec,
|
| 179 |
prompt: prompt,
|
| 180 |
-
})
|
| 181 |
-
setTimeout(resolve, 0)
|
| 182 |
-
})
|
| 183 |
-
tokensCount
|
| 184 |
}
|
| 185 |
self.postMessage({
|
| 186 |
status: "complete",
|
| 187 |
message: "complete",
|
| 188 |
output: prompt + sentence,
|
| 189 |
-
})
|
| 190 |
} catch (e) {
|
| 191 |
-
self.postMessage({ error: e })
|
| 192 |
}
|
| 193 |
}
|
|
|
|
| 1 |
+
import init, { Model } from "./build/m.js"
|
| 2 |
|
| 3 |
function fixTwo(x) { return Math.floor(x * 100) / 100 }
|
| 4 |
|
|
|
|
| 10 |
return `${fixTwo(size/1e12)}tb`
|
| 11 |
}
|
| 12 |
|
| 13 |
+
function humanTime(seconds) {
|
| 14 |
+
const _year = 31536e3
|
| 15 |
+
const _mon = 2592e3
|
| 16 |
+
const _day = 864e2
|
| 17 |
+
const _hour = 36e2
|
| 18 |
+
const _min = 60
|
| 19 |
+
const _sec = 1
|
| 20 |
+
|
| 21 |
+
const year_rem = seconds % _year
|
| 22 |
+
const years = (seconds - year_rem) / _year
|
| 23 |
+
|
| 24 |
+
const month_rem = year_rem % _mon
|
| 25 |
+
const months = (year_rem - month_rem) / _mon
|
| 26 |
+
|
| 27 |
+
const day_rem = month_rem % _day
|
| 28 |
+
const days = (month_rem - day_rem) / _day
|
| 29 |
+
|
| 30 |
+
const hour_rem = day_rem % _hour
|
| 31 |
+
const hours = (day_rem - hour_rem) / _hour
|
| 32 |
+
|
| 33 |
+
const minute_rem = hour_rem % _min
|
| 34 |
+
const minutes = (hour_rem - minute_rem) / _min
|
| 35 |
+
|
| 36 |
+
const second_rem = minute_rem % _sec
|
| 37 |
+
const second = (minute_rem - second_rem) / _sec
|
| 38 |
+
|
| 39 |
+
return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
|
| 40 |
+
(days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
|
| 41 |
+
(minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
let lastSend = 0
|
| 45 |
+
let lastTime = Infinity
|
| 46 |
+
let times = [0, 0, 0, 0]
|
| 47 |
|
| 48 |
async function fetchArrayBuffer(url) {
|
| 49 |
+
const cacheName = "phi-mixformer-candle-cache"
|
| 50 |
+
const cache = await caches.open(cacheName)
|
| 51 |
+
const cachedResponse = await cache.match(url)
|
| 52 |
if (cachedResponse) {
|
| 53 |
+
const data = await cachedResponse.arrayBuffer()
|
| 54 |
+
return new Uint8Array(data)
|
| 55 |
}
|
| 56 |
+
const res = await fetch(url, { cache: "force-cache" })
|
| 57 |
while (!res.body) { }
|
| 58 |
+
const reader = res.body.getReader()
|
| 59 |
+
const contentLength = +(res.headers.get('Content-Length') ?? 0)
|
| 60 |
+
let receivedLength = 0
|
| 61 |
+
let chunks = []
|
| 62 |
while (true) {
|
| 63 |
+
const { done, value } = await reader.read()
|
| 64 |
if (done) {
|
| 65 |
+
break
|
| 66 |
}
|
| 67 |
+
chunks.push(value)
|
| 68 |
+
receivedLength += value.length
|
|
|
|
| 69 |
if(Date.now() - lastSend > 250) {
|
| 70 |
+
times.push(receivedLength)
|
| 71 |
+
times = times.slice(1)
|
| 72 |
+
let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
|
| 73 |
+
let median = (max[0] + max[1] + max[2]) / 3
|
| 74 |
+
let lengthPerSecond = median * 4
|
| 75 |
+
let leftSize = contentLength - receivedLength
|
| 76 |
+
let leftTime = Math.abs(leftSize / lengthPerSecond)
|
| 77 |
+
|
| 78 |
+
if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
|
| 79 |
+
// if(leftTime > lastTime) leftTime = lastTime
|
| 80 |
+
lastTime = leftTime
|
| 81 |
+
let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
|
| 82 |
+
Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
|
| 83 |
+
Total size: ${humanSize(fixTwo(contentLength))}
|
| 84 |
+
Download URL: ${url}`
|
| 85 |
self.postMessage({ status: "loading", message: downloadMessage })
|
| 86 |
+
// console.log(downloadMessage)
|
| 87 |
lastSend = Date.now()
|
| 88 |
}
|
| 89 |
}
|
| 90 |
+
let chunksAll = new Uint8Array(receivedLength)
|
| 91 |
+
let position = 0
|
| 92 |
for (let chunk of chunks) {
|
| 93 |
+
chunksAll.set(chunk, position)
|
| 94 |
+
position += chunk.length
|
| 95 |
}
|
| 96 |
+
cache.put(url, new Response(chunksAll))
|
| 97 |
+
return chunksAll
|
| 98 |
}
|
| 99 |
|
| 100 |
async function concatenateArrayBuffers(urls) {
|
| 101 |
+
const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
|
| 102 |
|
| 103 |
+
let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
|
| 104 |
+
let concatenatedBuffer = new Uint8Array(totalLength)
|
| 105 |
|
| 106 |
+
let offset = 0
|
| 107 |
arrayBuffers.forEach(buffer => {
|
| 108 |
+
concatenatedBuffer.set(new Uint8Array(buffer), offset)
|
| 109 |
+
offset += buffer.byteLength
|
| 110 |
+
})
|
| 111 |
+
return concatenatedBuffer
|
| 112 |
}
|
| 113 |
|
| 114 |
class Phi {
|
| 115 |
+
static instance = {}
|
| 116 |
|
| 117 |
static async getInstance(
|
| 118 |
weightsURL,
|
|
|
|
| 123 |
) {
|
| 124 |
// load individual modelID only once
|
| 125 |
if (!this.instance[modelID]) {
|
| 126 |
+
await init()
|
| 127 |
|
| 128 |
+
self.postMessage({ status: "loading", message: "Loading Model" })
|
| 129 |
const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
|
| 130 |
await Promise.all([
|
| 131 |
weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
|
| 132 |
fetchArrayBuffer(tokenizerURL),
|
| 133 |
fetchArrayBuffer(configURL),
|
| 134 |
+
])
|
| 135 |
|
| 136 |
this.instance[modelID] = new Model(
|
| 137 |
weightsArrayU8,
|
| 138 |
tokenizerArrayU8,
|
| 139 |
configArrayU8,
|
| 140 |
quantized
|
| 141 |
+
)
|
| 142 |
}
|
| 143 |
+
return this.instance[modelID]
|
| 144 |
}
|
| 145 |
}
|
| 146 |
|
| 147 |
+
let controller = null
|
| 148 |
self.addEventListener("message", (event) => {
|
| 149 |
if (event.data.command === "start") {
|
| 150 |
+
controller = new AbortController()
|
| 151 |
+
generate(event.data)
|
| 152 |
} else if (event.data.command === "abort") {
|
| 153 |
+
controller.abort()
|
| 154 |
}
|
| 155 |
+
})
|
| 156 |
|
| 157 |
async function generate(data) {
|
| 158 |
const {
|
|
|
|
| 167 |
repeatPenalty,
|
| 168 |
seed,
|
| 169 |
maxSeqLen,
|
| 170 |
+
stuff
|
| 171 |
+
} = data
|
| 172 |
try {
|
| 173 |
+
self.postMessage({ status: "loading", message: "Starting Phi" })
|
| 174 |
const model = await Phi.getInstance(
|
| 175 |
weightsURL,
|
| 176 |
modelID,
|
| 177 |
tokenizerURL,
|
| 178 |
configURL,
|
| 179 |
quantized
|
| 180 |
+
)
|
| 181 |
|
| 182 |
+
self.postMessage({ status: "loading", message: "Initializing model" })
|
| 183 |
const firstToken = model.init_with_prompt(
|
| 184 |
prompt,
|
| 185 |
temp,
|
|
|
|
| 187 |
repeatPenalty,
|
| 188 |
64,
|
| 189 |
BigInt(seed)
|
| 190 |
+
)
|
| 191 |
+
const seq_len = 2048
|
| 192 |
|
| 193 |
+
let sentence = firstToken
|
| 194 |
+
let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
|
| 195 |
+
let startTime = performance.now()
|
| 196 |
+
let tokensCount = 0
|
| 197 |
while (tokensCount < maxTokens) {
|
| 198 |
await new Promise(async (resolve) => {
|
| 199 |
if (controller && controller.signal.aborted) {
|
|
|
|
| 201 |
status: "aborted",
|
| 202 |
message: "Aborted",
|
| 203 |
output: prompt + sentence,
|
| 204 |
+
})
|
| 205 |
+
return
|
| 206 |
}
|
| 207 |
+
const token = await model.next_token()
|
| 208 |
+
const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
|
| 209 |
+
if (terminates.includes(token)) {
|
| 210 |
self.postMessage({
|
| 211 |
status: "complete",
|
| 212 |
message: "complete",
|
| 213 |
output: prompt + sentence,
|
| 214 |
+
})
|
| 215 |
+
return
|
| 216 |
}
|
| 217 |
const tokensSec =
|
| 218 |
+
((tokensCount + 1) / (performance.now() - startTime)) * 1000
|
| 219 |
|
| 220 |
+
sentence += token
|
| 221 |
self.postMessage({
|
| 222 |
status: "generating",
|
| 223 |
message: "Generating token",
|
|
|
|
| 226 |
totalTime: performance.now() - startTime,
|
| 227 |
tokensSec,
|
| 228 |
prompt: prompt,
|
| 229 |
+
})
|
| 230 |
+
setTimeout(resolve, 0)
|
| 231 |
+
})
|
| 232 |
+
tokensCount++
|
| 233 |
}
|
| 234 |
self.postMessage({
|
| 235 |
status: "complete",
|
| 236 |
message: "complete",
|
| 237 |
output: prompt + sentence,
|
| 238 |
+
})
|
| 239 |
} catch (e) {
|
| 240 |
+
self.postMessage({ error: e })
|
| 241 |
}
|
| 242 |
}
|