| | const paramDefaults = {
|
| | stream: true,
|
| | n_predict: 500,
|
| | temperature: 0.2,
|
| | stop: ["</s>"]
|
| | };
|
| |
|
| | let generation_settings = null;
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | export async function* llama(prompt, params = {}, config = {}) {
|
| | let controller = config.controller;
|
| | const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
| |
|
| | if (!controller) {
|
| | controller = new AbortController();
|
| | }
|
| |
|
| | const completionParams = { ...paramDefaults, ...params, prompt };
|
| |
|
| | const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
|
| | method: 'POST',
|
| | body: JSON.stringify(completionParams),
|
| | headers: {
|
| | 'Connection': 'keep-alive',
|
| | 'Content-Type': 'application/json',
|
| | 'Accept': 'text/event-stream',
|
| | ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
|
| | },
|
| | signal: controller.signal,
|
| | });
|
| |
|
| | const reader = response.body.getReader();
|
| | const decoder = new TextDecoder();
|
| |
|
| | let content = "";
|
| | let leftover = "";
|
| |
|
| | try {
|
| | let cont = true;
|
| |
|
| | while (cont) {
|
| | const result = await reader.read();
|
| | if (result.done) {
|
| | break;
|
| | }
|
| |
|
| |
|
| | const text = leftover + decoder.decode(result.value);
|
| |
|
| |
|
| | const endsWithLineBreak = text.endsWith('\n');
|
| |
|
| |
|
| | let lines = text.split('\n');
|
| |
|
| |
|
| |
|
| | if (!endsWithLineBreak) {
|
| | leftover = lines.pop();
|
| | } else {
|
| | leftover = "";
|
| | }
|
| |
|
| |
|
| | const regex = /^(\S+):\s(.*)$/gm;
|
| | for (const line of lines) {
|
| | const match = regex.exec(line);
|
| | if (match) {
|
| | result[match[1]] = match[2];
|
| | if (result.data === '[DONE]') {
|
| | cont = false;
|
| | break;
|
| | }
|
| |
|
| |
|
| | if (result.data) {
|
| | result.data = JSON.parse(result.data);
|
| | content += result.data.content;
|
| |
|
| |
|
| | yield result;
|
| |
|
| |
|
| | if (result.data.stop) {
|
| | if (result.data.generation_settings) {
|
| | generation_settings = result.data.generation_settings;
|
| | }
|
| | cont = false;
|
| | break;
|
| | }
|
| | }
|
| | if (result.error) {
|
| | try {
|
| | result.error = JSON.parse(result.error);
|
| | if (result.error.message.includes('slot unavailable')) {
|
| |
|
| | throw new Error('slot unavailable');
|
| | } else {
|
| | console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
|
| | }
|
| | } catch(e) {
|
| | console.error(`llama.cpp error ${result.error}`)
|
| | }
|
| | }
|
| | }
|
| | }
|
| | }
|
| | } catch (e) {
|
| | if (e.name !== 'AbortError') {
|
| | console.error("llama error: ", e);
|
| | }
|
| | throw e;
|
| | }
|
| | finally {
|
| | controller.abort();
|
| | }
|
| |
|
| | return content;
|
| | }
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | export const llamaEventTarget = (prompt, params = {}, config = {}) => {
|
| | const eventTarget = new EventTarget();
|
| | (async () => {
|
| | let content = "";
|
| | for await (const chunk of llama(prompt, params, config)) {
|
| | if (chunk.data) {
|
| | content += chunk.data.content;
|
| | eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
|
| | }
|
| | if (chunk.data.generation_settings) {
|
| | eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
|
| | }
|
| | if (chunk.data.timings) {
|
| | eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
|
| | }
|
| | }
|
| | eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
|
| | })();
|
| | return eventTarget;
|
| | }
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | export const llamaPromise = (prompt, params = {}, config = {}) => {
|
| | return new Promise(async (resolve, reject) => {
|
| | let content = "";
|
| | try {
|
| | for await (const chunk of llama(prompt, params, config)) {
|
| | content += chunk.data.content;
|
| | }
|
| | resolve(content);
|
| | } catch (error) {
|
| | reject(error);
|
| | }
|
| | });
|
| | };
|
| |
|
| | |
| | |
| |
|
| | export const llamaComplete = async (params, controller, callback) => {
|
| | for await (const chunk of llama(params.prompt, params, { controller })) {
|
| | callback(chunk);
|
| | }
|
| | }
|
| |
|
| |
|
| | export const llamaModelInfo = async (config = {}) => {
|
| | if (!generation_settings) {
|
| | const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
| | const props = await fetch(`${api_url}/props`).then(r => r.json());
|
| | generation_settings = props.default_generation_settings;
|
| | }
|
| | return generation_settings;
|
| | }
|
| |
|