Spaces:
Paused
Paused
| import fs from "node:fs/promises"; | |
| import os from "node:os"; | |
| import path from "node:path"; | |
| import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; | |
| import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; | |
| const embedBatch = vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0])); | |
| const embedQuery = vi.fn(async () => [0, 1, 0]); | |
| vi.mock("./embeddings.js", () => ({ | |
| createEmbeddingProvider: async () => ({ | |
| requestedProvider: "openai", | |
| provider: { | |
| id: "mock", | |
| model: "mock-embed", | |
| maxInputTokens: 8192, | |
| embedQuery, | |
| embedBatch, | |
| }, | |
| }), | |
| })); | |
| describe("memory embedding token limits", () => { | |
| let workspaceDir: string; | |
| let indexPath: string; | |
| let manager: MemoryIndexManager | null = null; | |
| beforeEach(async () => { | |
| embedBatch.mockReset(); | |
| embedQuery.mockReset(); | |
| embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0])); | |
| embedQuery.mockImplementation(async () => [0, 1, 0]); | |
| workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-token-")); | |
| indexPath = path.join(workspaceDir, "index.sqlite"); | |
| await fs.mkdir(path.join(workspaceDir, "memory")); | |
| }); | |
| afterEach(async () => { | |
| if (manager) { | |
| await manager.close(); | |
| manager = null; | |
| } | |
| await fs.rm(workspaceDir, { recursive: true, force: true }); | |
| }); | |
| it("splits oversized chunks so each embedding input stays <= 8192 UTF-8 bytes", async () => { | |
| const content = "x".repeat(9500); | |
| await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-09.md"), content); | |
| const cfg = { | |
| agents: { | |
| defaults: { | |
| workspace: workspaceDir, | |
| memorySearch: { | |
| provider: "openai", | |
| model: "mock-embed", | |
| store: { path: indexPath }, | |
| chunking: { tokens: 10_000, overlap: 0 }, | |
| sync: { watch: false, onSessionStart: false, onSearch: false }, | |
| query: { minScore: 0 }, | |
| }, | |
| }, | |
| list: [{ id: "main", default: true }], | |
| }, | |
| }; | |
| const result = await getMemorySearchManager({ cfg, agentId: "main" }); | |
| expect(result.manager).not.toBeNull(); | |
| if (!result.manager) { | |
| throw new Error("manager missing"); | |
| } | |
| manager = result.manager; | |
| await manager.sync({ force: true }); | |
| const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); | |
| expect(inputs.length).toBeGreaterThan(1); | |
| expect( | |
| Math.max(...inputs.map((input) => Buffer.byteLength(input, "utf8"))), | |
| ).toBeLessThanOrEqual(8192); | |
| }); | |
| it("uses UTF-8 byte estimates when batching multibyte chunks", async () => { | |
| const line = "😀".repeat(1800); | |
| const content = `${line}\n${line}\n${line}`; | |
| await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-10.md"), content); | |
| const cfg = { | |
| agents: { | |
| defaults: { | |
| workspace: workspaceDir, | |
| memorySearch: { | |
| provider: "openai", | |
| model: "mock-embed", | |
| store: { path: indexPath }, | |
| chunking: { tokens: 1000, overlap: 0 }, | |
| sync: { watch: false, onSessionStart: false, onSearch: false }, | |
| query: { minScore: 0 }, | |
| }, | |
| }, | |
| list: [{ id: "main", default: true }], | |
| }, | |
| }; | |
| const result = await getMemorySearchManager({ cfg, agentId: "main" }); | |
| expect(result.manager).not.toBeNull(); | |
| if (!result.manager) { | |
| throw new Error("manager missing"); | |
| } | |
| manager = result.manager; | |
| await manager.sync({ force: true }); | |
| const batchSizes = embedBatch.mock.calls.map( | |
| (call) => (call[0] as string[] | undefined)?.length ?? 0, | |
| ); | |
| expect(batchSizes.length).toBe(3); | |
| expect(batchSizes.every((size) => size === 1)).toBe(true); | |
| const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); | |
| expect(inputs.every((input) => Buffer.byteLength(input, "utf8") <= 8192)).toBe(true); | |
| }); | |
| }); | |