qmd-web / src /pipeline /vectorSearch.test.ts
shreyask's picture
add embeddings and vector search modules
eb46abf verified
import { describe, it, expect } from "vitest";
import { cosineSimilarity, vectorSearch } from "./vectorSearch";
import type { EmbeddedChunk } from "../types";
// ---------------------------------------------------------------------------
// Helper to create an EmbeddedChunk with a given embedding
// ---------------------------------------------------------------------------
function makeEmbeddedChunk(
embedding: number[],
docId = "doc1",
chunkIndex = 0,
): EmbeddedChunk {
return {
docId,
chunkIndex,
text: `chunk ${chunkIndex} of ${docId}`,
startChar: 0,
title: "Test",
embedding: new Float32Array(embedding),
};
}
// ---------------------------------------------------------------------------
// cosineSimilarity
// ---------------------------------------------------------------------------
describe("cosineSimilarity", () => {
it("returns 1 for identical vectors", () => {
const v = new Float32Array([1, 2, 3]);
expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5);
});
it("returns -1 for opposite vectors", () => {
const a = new Float32Array([1, 0, 0]);
const b = new Float32Array([-1, 0, 0]);
expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5);
});
it("returns 0 for orthogonal vectors", () => {
const a = new Float32Array([1, 0, 0]);
const b = new Float32Array([0, 1, 0]);
expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5);
});
it("is symmetric", () => {
const a = new Float32Array([1, 2, 3]);
const b = new Float32Array([4, 5, 6]);
expect(cosineSimilarity(a, b)).toBeCloseTo(cosineSimilarity(b, a), 10);
});
it("is scale-invariant", () => {
const a = new Float32Array([1, 2, 3]);
const b = new Float32Array([2, 4, 6]); // 2x scale of a
expect(cosineSimilarity(a, b)).toBeCloseTo(1.0, 5);
});
it("computes correct value for known vectors", () => {
// a = [3, 4], b = [4, 3]
// dot = 12 + 12 = 24, |a| = 5, |b| = 5 β†’ cos = 24/25 = 0.96
const a = new Float32Array([3, 4]);
const b = new Float32Array([4, 3]);
expect(cosineSimilarity(a, b)).toBeCloseTo(0.96, 2);
});
});
// ---------------------------------------------------------------------------
// vectorSearch
// ---------------------------------------------------------------------------
describe("vectorSearch", () => {
// Create chunks with embeddings at known angles from the query
const query = new Float32Array([1, 0, 0]);
const chunks: EmbeddedChunk[] = [
makeEmbeddedChunk([0, 1, 0], "orthogonal", 0), // cos = 0
makeEmbeddedChunk([1, 0, 0], "identical", 0), // cos = 1
makeEmbeddedChunk([0.7, 0.7, 0], "similar", 0), // cos β‰ˆ 0.707
makeEmbeddedChunk([-1, 0, 0], "opposite", 0), // cos = -1
makeEmbeddedChunk([0.9, 0.1, 0], "very-similar", 0), // cos β‰ˆ 0.994
];
it("returns results sorted by descending score", () => {
const results = vectorSearch(query, chunks);
for (let i = 1; i < results.length; i++) {
expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
}
});
it("ranks identical vector highest", () => {
const results = vectorSearch(query, chunks);
expect(results[0].chunk.docId).toBe("identical");
});
it("ranks opposite vector lowest", () => {
const results = vectorSearch(query, chunks);
expect(results[results.length - 1].chunk.docId).toBe("opposite");
});
it("all results have source 'vector'", () => {
const results = vectorSearch(query, chunks);
for (const r of results) {
expect(r.source).toBe("vector");
}
});
it("respects topK parameter", () => {
const results = vectorSearch(query, chunks, 2);
expect(results.length).toBe(2);
expect(results[0].chunk.docId).toBe("identical");
expect(results[1].chunk.docId).toBe("very-similar");
});
it("returns all when topK exceeds chunk count", () => {
const results = vectorSearch(query, chunks, 100);
expect(results.length).toBe(chunks.length);
});
it("handles empty chunks array", () => {
const results = vectorSearch(query, []);
expect(results).toEqual([]);
});
it("handles single chunk", () => {
const single = [makeEmbeddedChunk([0.5, 0.5, 0], "only", 0)];
const results = vectorSearch(query, single);
expect(results.length).toBe(1);
expect(results[0].source).toBe("vector");
expect(results[0].score).toBeGreaterThan(0);
});
});