| import express from "express" |
| import { python } from 'pythonia' |
|
|
| import { daisy } from "./daisy.mts" |
| import { alpine } from "./alpine.mts" |
|
|
| |
| const { AutoModelForCausalLM } = await python('ctransformers') |
|
|
| |
| const css = [ |
| "/css/daisyui@2.6.0.css", |
| ].map(item => `<link href="${item}" rel="stylesheet" type="text/css"/>`) |
| .join("") |
|
|
| const script = [ |
| |
| "/js/tailwindcss@3.3.2.js" |
| ].map(item => `<script src="${item}"></script>`) |
| .join("") |
|
|
| |
| const llm = await AutoModelForCausalLM.from_pretrained$( |
| "TheBloke/WizardCoder-15B-1.0-GGML", { |
| model_file: "WizardCoder-15B-1.0.ggmlv3.q4_0.bin", |
| model_type: "starcoder" |
| }) |
|
|
| const app = express() |
| const port = 7860 |
|
|
| const timeoutInSec = 8 * 60 |
| console.log("timeout set to 8 minutes") |
|
|
| app.use(express.static("public")) |
| |
| const maxParallelRequests = 1 |
|
|
| const pending: { |
| total: number; |
| queue: string[]; |
| } = { |
| total: 0, |
| queue: [], |
| } |
| |
| const endRequest = (id: string, reason: string) => { |
| if (!id || !pending.queue.includes(id)) { |
| return |
| } |
| |
| pending.queue = pending.queue.filter(i => i !== id) |
| console.log(`request ${id} ended (${reason})`) |
| } |
|
|
| app.get("/debug", (req, res) => { |
| res.write(JSON.stringify({ |
| nbTotal: pending.total, |
| nbPending: pending.queue.length, |
| queue: pending.queue, |
| })) |
| res.end() |
| }) |
|
|
| app.get("/", async (req, res) => { |
| |
| if (pending.queue.length >= maxParallelRequests) { |
| res.write("sorry, max nb of parallel requests reached") |
| res.end() |
| return |
| } |
| |
| |
| |
| |
|
|
| const id = `${pending.total++}` |
| console.log(`new request ${id}`) |
|
|
| pending.queue.push(id) |
|
|
| const prefix = `<html><head>${css}${script}` |
| res.write(prefix) |
|
|
| req.on("close", function() { |
| endRequest(id, "browser ended the connection") |
| }) |
|
|
| |
| setTimeout(() => { |
| endRequest(id, `timed out after ${timeoutInSec}s`) |
| }, timeoutInSec * 1000) |
|
|
|
|
| const finalPrompt = `# Context |
| Generate a webpage written in English about: ${req.query.prompt}. |
| # Documentation |
| ${daisy} |
| ${alpine} |
| # Guidelines |
| - You use Tailwind CSS and DaisyUI! |
| - You MUST use English, not Latin! (I repeat: do NOT write lorem ipsum!) |
| - Use a central layout by wrapping everything in a \`<div class="flex flex-col justify-center">\` |
| # Result output |
| ${prefix}` |
|
|
| |
| try { |
| const inputTokens = await llm.tokenize(finalPrompt) |
| console.log("initializing the generator (may take 30s or more)") |
| const generator = await llm.generate(inputTokens) |
|
|
| for await (const token of generator) { |
| if (!pending.queue.includes(id)) { |
| break |
| } |
| const tmp = await llm.detokenize(token) |
| process.stdout.write(tmp) |
| res.write(tmp) |
| } |
|
|
| endRequest(id, `normal end of the llama stream for request ${id}`) |
| } catch (e) { |
| endRequest(id, `premature end of the llama stream for request ${id} (${e})`) |
| } |
|
|
| try { |
| res.end() |
| } catch (err) { |
| console.log(`couldn't end the HTTP stream for request ${id} (${err})`) |
| } |
| |
| }) |
|
|
| app.listen(port, () => { console.log(`Open http://localhost:${port}/?prompt=a%20landing%20page%20for%20a%20company%20called%20Hugging%20Face`) }) |
|
|
|
|