Spaces:

Gaston895
/

opengsstec

Paused

opengsstec / src /memory /manager.embedding-token-limit.test.ts

OpenClaw Deploy

Deploy OpenClaw to Hugging Face

c1243f9 about 2 months ago

3.99 kB

	import fs from "node:fs/promises";
	import os from "node:os";
	import path from "node:path";
	import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
	import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";

	const embedBatch = vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0]));
	const embedQuery = vi.fn(async () => [0, 1, 0]);

	vi.mock("./embeddings.js", () => ({
	createEmbeddingProvider: async () => ({
	requestedProvider: "openai",
	provider: {
	id: "mock",
	model: "mock-embed",
	maxInputTokens: 8192,
	embedQuery,
	embedBatch,
	},
	}),
	}));

	describe("memory embedding token limits", () => {
	let workspaceDir: string;
	let indexPath: string;
	let manager: MemoryIndexManager \| null = null;

	beforeEach(async () => {
	embedBatch.mockReset();
	embedQuery.mockReset();
	embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0]));
	embedQuery.mockImplementation(async () => [0, 1, 0]);
	workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-token-"));
	indexPath = path.join(workspaceDir, "index.sqlite");
	await fs.mkdir(path.join(workspaceDir, "memory"));
	});

	afterEach(async () => {
	if (manager) {
	await manager.close();
	manager = null;
	}
	await fs.rm(workspaceDir, { recursive: true, force: true });
	});

	it("splits oversized chunks so each embedding input stays <= 8192 UTF-8 bytes", async () => {
	const content = "x".repeat(9500);
	await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-09.md"), content);

	const cfg = {
	agents: {
	defaults: {
	workspace: workspaceDir,
	memorySearch: {
	provider: "openai",
	model: "mock-embed",
	store: { path: indexPath },
	chunking: { tokens: 10_000, overlap: 0 },
	sync: { watch: false, onSessionStart: false, onSearch: false },
	query: { minScore: 0 },
	},
	},
	list: [{ id: "main", default: true }],
	},
	};

	const result = await getMemorySearchManager({ cfg, agentId: "main" });
	expect(result.manager).not.toBeNull();
	if (!result.manager) {
	throw new Error("manager missing");
	}
	manager = result.manager;
	await manager.sync({ force: true });

	const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
	expect(inputs.length).toBeGreaterThan(1);
	expect(
	Math.max(...inputs.map((input) => Buffer.byteLength(input, "utf8"))),
	).toBeLessThanOrEqual(8192);
	});

	it("uses UTF-8 byte estimates when batching multibyte chunks", async () => {
	const line = "😀".repeat(1800);
	const content = `${line}\n${line}\n${line}`;
	await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-10.md"), content);

	const cfg = {
	agents: {
	defaults: {
	workspace: workspaceDir,
	memorySearch: {
	provider: "openai",
	model: "mock-embed",
	store: { path: indexPath },
	chunking: { tokens: 1000, overlap: 0 },
	sync: { watch: false, onSessionStart: false, onSearch: false },
	query: { minScore: 0 },
	},
	},
	list: [{ id: "main", default: true }],
	},
	};

	const result = await getMemorySearchManager({ cfg, agentId: "main" });
	expect(result.manager).not.toBeNull();
	if (!result.manager) {
	throw new Error("manager missing");
	}
	manager = result.manager;
	await manager.sync({ force: true });

	const batchSizes = embedBatch.mock.calls.map(
	(call) => (call[0] as string[] \| undefined)?.length ?? 0,
	);
	expect(batchSizes.length).toBe(3);
	expect(batchSizes.every((size) => size === 1)).toBe(true);
	const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
	expect(inputs.every((input) => Buffer.byteLength(input, "utf8") <= 8192)).toBe(true);
	});
	});