Upload local.js
Browse files- reasoning/local.js +145 -0
reasoning/local.js
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// local.js
|
| 2 |
+
import { strictFormat } from '../utils/text.js';
|
| 3 |
+
import { log } from '../../logger.js';
|
| 4 |
+
|
| 5 |
+
export class Local {
|
| 6 |
+
constructor(model_name, url) {
|
| 7 |
+
this.model_name = model_name;
|
| 8 |
+
this.url = url || 'http://127.0.0.1:11434';
|
| 9 |
+
this.chat_endpoint = '/api/chat';
|
| 10 |
+
this.embedding_endpoint = '/api/embeddings';
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/**
|
| 14 |
+
* Main method to handle chat requests.
|
| 15 |
+
*/
|
| 16 |
+
async sendRequest(turns, systemMessage) {
|
| 17 |
+
// Choose the model name or default to 'llama3'
|
| 18 |
+
const model = this.model_name || 'llama3';
|
| 19 |
+
|
| 20 |
+
// Format messages and inject the system message at the front
|
| 21 |
+
let messages = strictFormat(turns);
|
| 22 |
+
messages.unshift({ role: 'system', content: systemMessage });
|
| 23 |
+
console.log('Messages:', messages);
|
| 24 |
+
|
| 25 |
+
// We'll do up to 5 attempts for "deepseek-r1" if the <think> tags are mismatched
|
| 26 |
+
const maxAttempts = 5;
|
| 27 |
+
let attempt = 0;
|
| 28 |
+
let finalRes = null;
|
| 29 |
+
|
| 30 |
+
while (attempt < maxAttempts) {
|
| 31 |
+
attempt++;
|
| 32 |
+
console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);
|
| 33 |
+
|
| 34 |
+
// Perform the actual request (wrapped in a try/catch)
|
| 35 |
+
let res;
|
| 36 |
+
try {
|
| 37 |
+
const responseData = await this.send(this.chat_endpoint, {
|
| 38 |
+
model: model,
|
| 39 |
+
messages: messages,
|
| 40 |
+
stream: false
|
| 41 |
+
});
|
| 42 |
+
// The local endpoint apparently returns { message: { content: "..." } }
|
| 43 |
+
res = responseData?.message?.content || 'No response data.';
|
| 44 |
+
} catch (err) {
|
| 45 |
+
// If context length exceeded and we have turns to remove, try again with one fewer turn
|
| 46 |
+
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
| 47 |
+
console.log('Context length exceeded, trying again with shorter context.');
|
| 48 |
+
return await this.sendRequest(turns.slice(1), systemMessage);
|
| 49 |
+
} else {
|
| 50 |
+
console.log(err);
|
| 51 |
+
res = 'My brain disconnected, try again.';
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// If the model name includes "deepseek-r1", then we handle the <think> block
|
| 56 |
+
if (this.model_name && this.model_name.includes("deepseek-r1") || this.model_name.includes("Andy_3.5")) {
|
| 57 |
+
const hasOpenTag = res.includes("<think>");
|
| 58 |
+
const hasCloseTag = res.includes("</think>");
|
| 59 |
+
|
| 60 |
+
// If there's a partial mismatch, we regenerate the response
|
| 61 |
+
if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
|
| 62 |
+
console.warn("Partial <think> block detected. Re-generating...");
|
| 63 |
+
// Attempt another loop iteration to get a complete or no-think response
|
| 64 |
+
continue;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// LOGGING:
|
| 68 |
+
// We only log if the response does not contain "Error:" or "exception:"
|
| 69 |
+
if (res && !res.includes("Error:") && !res.includes("exception:")) {
|
| 70 |
+
log(JSON.stringify(messages), res);
|
| 71 |
+
} else {
|
| 72 |
+
// If there’s a potential error in the content, skip logging
|
| 73 |
+
console.warn(`Not logging due to potential error in model response: ${res}`);
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
// If both tags appear, remove them (and everything inside)
|
| 77 |
+
if (hasOpenTag && hasCloseTag) {
|
| 78 |
+
res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// LOGGING:
|
| 83 |
+
// We only log if the response does not contain "Error:" or "exception:"
|
| 84 |
+
if (res && !res.includes("Error:") && !res.includes("exception:")) {
|
| 85 |
+
log(JSON.stringify(messages), res);
|
| 86 |
+
} else {
|
| 87 |
+
// If there’s a potential error in the content, skip logging
|
| 88 |
+
console.warn(`Not logging due to potential error in model response: ${res}`);
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
// We made it here with either a fully valid or not-needed to handle <think> scenario
|
| 92 |
+
finalRes = res;
|
| 93 |
+
break; // Break out of the while loop
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// If after max attempts we STILL have partial tags, finalRes might be partial
|
| 97 |
+
// Or we never set finalRes because all attempts threw partial tags
|
| 98 |
+
if (finalRes == null) {
|
| 99 |
+
// This means we kept continuing in the loop but never got a break
|
| 100 |
+
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
| 101 |
+
finalRes = 'Response incomplete, please try again.';
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
return finalRes;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/**
|
| 108 |
+
* Embedding method (unchanged).
|
| 109 |
+
*/
|
| 110 |
+
async embed(text) {
|
| 111 |
+
let model = this.model_name || 'nomic-embed-text';
|
| 112 |
+
let body = { model: model, prompt: text };
|
| 113 |
+
let res = await this.send(this.embedding_endpoint, body);
|
| 114 |
+
return res['embedding'];
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
/**
|
| 118 |
+
* Generic send method for local endpoint.
|
| 119 |
+
*/
|
| 120 |
+
async send(endpoint, body) {
|
| 121 |
+
const url = new URL(endpoint, this.url);
|
| 122 |
+
const method = 'POST';
|
| 123 |
+
const headers = new Headers();
|
| 124 |
+
const request = new Request(url, {
|
| 125 |
+
method,
|
| 126 |
+
headers,
|
| 127 |
+
body: JSON.stringify(body)
|
| 128 |
+
});
|
| 129 |
+
|
| 130 |
+
let data = null;
|
| 131 |
+
try {
|
| 132 |
+
const res = await fetch(request);
|
| 133 |
+
if (res.ok) {
|
| 134 |
+
data = await res.json();
|
| 135 |
+
} else {
|
| 136 |
+
throw new Error(`Ollama Status: ${res.status}`);
|
| 137 |
+
}
|
| 138 |
+
} catch (err) {
|
| 139 |
+
console.error('Failed to send Ollama request.');
|
| 140 |
+
console.error(err);
|
| 141 |
+
throw err; // rethrow so we can catch it in the calling method
|
| 142 |
+
}
|
| 143 |
+
return data;
|
| 144 |
+
}
|
| 145 |
+
}
|