| import { | |
| getLlama, | |
| HarmonyChatWrapper, | |
| LlamaChatSession, | |
| } from "node-llama-cpp"; | |
| import {fileURLToPath} from "url"; | |
| import path from "path"; | |
| const __dirname = path.dirname(fileURLToPath(import.meta.url)); | |
| const llama = await getLlama(); | |
| const model = await llama.loadModel({ | |
| modelPath: path.join( | |
| __dirname, | |
| '..', | |
| '..', | |
| 'models', | |
| 'hf_giladgd_gpt-oss-20b.MXFP4.gguf' | |
| ) | |
| }); | |
| const context = await model.createContext(); | |
| const session = new LlamaChatSession({ | |
| chatWrapper: new HarmonyChatWrapper(), | |
| contextSequence: context.getSequence(), | |
| }); | |
| const q1 = `What is hoisting in JavaScript? Explain with examples.`; | |
| console.log('context.contextSize', context.contextSize) | |
| const a1 = await session.prompt(q1, { | |
| // Tip: let the lib choose or cap reasonably; using the whole context size can be wasteful | |
| maxTokens: 2000, | |
| // Fires as soon as the first characters arrive | |
| onTextChunk: (text) => { | |
| process.stdout.write(text); // optional: live print | |
| }, | |
| }); | |
| console.log("\n\nFinal answer:\n", a1); | |
| session.dispose() | |
| context.dispose() | |
| model.dispose() | |
| llama.dispose() |