| | import { describe, it, expect } from "vitest"; |
| | import { cosineSimilarity, vectorSearch } from "./vectorSearch"; |
| | import type { EmbeddedChunk } from "../types"; |
| |
|
| | |
| | |
| | |
| | function makeEmbeddedChunk( |
| | embedding: number[], |
| | docId = "doc1", |
| | chunkIndex = 0, |
| | ): EmbeddedChunk { |
| | return { |
| | docId, |
| | chunkIndex, |
| | text: `chunk ${chunkIndex} of ${docId}`, |
| | startChar: 0, |
| | title: "Test", |
| | embedding: new Float32Array(embedding), |
| | }; |
| | } |
| |
|
| | |
| | |
| | |
| | describe("cosineSimilarity", () => { |
| | it("returns 1 for identical vectors", () => { |
| | const v = new Float32Array([1, 2, 3]); |
| | expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5); |
| | }); |
| |
|
| | it("returns -1 for opposite vectors", () => { |
| | const a = new Float32Array([1, 0, 0]); |
| | const b = new Float32Array([-1, 0, 0]); |
| | expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5); |
| | }); |
| |
|
| | it("returns 0 for orthogonal vectors", () => { |
| | const a = new Float32Array([1, 0, 0]); |
| | const b = new Float32Array([0, 1, 0]); |
| | expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5); |
| | }); |
| |
|
| | it("is symmetric", () => { |
| | const a = new Float32Array([1, 2, 3]); |
| | const b = new Float32Array([4, 5, 6]); |
| | expect(cosineSimilarity(a, b)).toBeCloseTo(cosineSimilarity(b, a), 10); |
| | }); |
| |
|
| | it("is scale-invariant", () => { |
| | const a = new Float32Array([1, 2, 3]); |
| | const b = new Float32Array([2, 4, 6]); |
| | expect(cosineSimilarity(a, b)).toBeCloseTo(1.0, 5); |
| | }); |
| |
|
| | it("computes correct value for known vectors", () => { |
| | |
| | |
| | const a = new Float32Array([3, 4]); |
| | const b = new Float32Array([4, 3]); |
| | expect(cosineSimilarity(a, b)).toBeCloseTo(0.96, 2); |
| | }); |
| | }); |
| |
|
| | |
| | |
| | |
| | describe("vectorSearch", () => { |
| | |
| | const query = new Float32Array([1, 0, 0]); |
| |
|
| | const chunks: EmbeddedChunk[] = [ |
| | makeEmbeddedChunk([0, 1, 0], "orthogonal", 0), |
| | makeEmbeddedChunk([1, 0, 0], "identical", 0), |
| | makeEmbeddedChunk([0.7, 0.7, 0], "similar", 0), |
| | makeEmbeddedChunk([-1, 0, 0], "opposite", 0), |
| | makeEmbeddedChunk([0.9, 0.1, 0], "very-similar", 0), |
| | ]; |
| |
|
| | it("returns results sorted by descending score", () => { |
| | const results = vectorSearch(query, chunks); |
| | for (let i = 1; i < results.length; i++) { |
| | expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score); |
| | } |
| | }); |
| |
|
| | it("ranks identical vector highest", () => { |
| | const results = vectorSearch(query, chunks); |
| | expect(results[0].chunk.docId).toBe("identical"); |
| | }); |
| |
|
| | it("ranks opposite vector lowest", () => { |
| | const results = vectorSearch(query, chunks); |
| | expect(results[results.length - 1].chunk.docId).toBe("opposite"); |
| | }); |
| |
|
| | it("all results have source 'vector'", () => { |
| | const results = vectorSearch(query, chunks); |
| | for (const r of results) { |
| | expect(r.source).toBe("vector"); |
| | } |
| | }); |
| |
|
| | it("respects topK parameter", () => { |
| | const results = vectorSearch(query, chunks, 2); |
| | expect(results.length).toBe(2); |
| | expect(results[0].chunk.docId).toBe("identical"); |
| | expect(results[1].chunk.docId).toBe("very-similar"); |
| | }); |
| |
|
| | it("returns all when topK exceeds chunk count", () => { |
| | const results = vectorSearch(query, chunks, 100); |
| | expect(results.length).toBe(chunks.length); |
| | }); |
| |
|
| | it("handles empty chunks array", () => { |
| | const results = vectorSearch(query, []); |
| | expect(results).toEqual([]); |
| | }); |
| |
|
| | it("handles single chunk", () => { |
| | const single = [makeEmbeddedChunk([0.5, 0.5, 0], "only", 0)]; |
| | const results = vectorSearch(query, single); |
| | expect(results.length).toBe(1); |
| | expect(results[0].source).toBe("vector"); |
| | expect(results[0].score).toBeGreaterThan(0); |
| | }); |
| | }); |
| |
|