|
|
const { getEncodingNameForModel, getEncoding } = require("js-tiktoken"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TokenManager { |
|
|
static instance = null; |
|
|
static currentModel = null; |
|
|
|
|
|
constructor(model = "gpt-3.5-turbo") { |
|
|
if (TokenManager.instance && TokenManager.currentModel === model) { |
|
|
this.log("Returning existing instance for model:", model); |
|
|
return TokenManager.instance; |
|
|
} |
|
|
|
|
|
this.model = model; |
|
|
this.encoderName = this.#getEncodingFromModel(model); |
|
|
this.encoder = getEncoding(this.encoderName); |
|
|
|
|
|
TokenManager.instance = this; |
|
|
TokenManager.currentModel = model; |
|
|
this.log("Initialized new TokenManager instance for model:", model); |
|
|
return this; |
|
|
} |
|
|
|
|
|
log(text, ...args) { |
|
|
console.log(`\x1b[35m[TokenManager]\x1b[0m ${text}`, ...args); |
|
|
} |
|
|
|
|
|
#getEncodingFromModel(model) { |
|
|
try { |
|
|
return getEncodingNameForModel(model); |
|
|
} catch { |
|
|
return "cl100k_base"; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokensFromString(input = "") { |
|
|
try { |
|
|
const tokens = this.encoder.encode(String(input), undefined, []); |
|
|
return tokens; |
|
|
} catch (e) { |
|
|
console.error(e); |
|
|
return []; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bytesFromTokens(tokens = []) { |
|
|
const bytes = this.encoder.decode(tokens); |
|
|
return bytes; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
countFromString(input = "") { |
|
|
const tokens = this.tokensFromString(input); |
|
|
return tokens.length; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
statsFrom(input) { |
|
|
if (typeof input === "string") return this.countFromString(input); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (Array.isArray(input)) { |
|
|
const perMessageFactorTokens = input.length * 3; |
|
|
const tokensFromContent = input.reduce( |
|
|
(a, b) => a + this.countFromString(b.content), |
|
|
0 |
|
|
); |
|
|
const diffCoefficient = 5; |
|
|
return perMessageFactorTokens + tokensFromContent + diffCoefficient; |
|
|
} |
|
|
|
|
|
throw new Error("Not a supported tokenized format."); |
|
|
} |
|
|
} |
|
|
|
|
|
module.exports = { |
|
|
TokenManager, |
|
|
}; |
|
|
|