File size: 4,397 Bytes
eb46abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import { describe, it, expect } from "vitest";
import { cosineSimilarity, vectorSearch } from "./vectorSearch";
import type { EmbeddedChunk } from "../types";

// ---------------------------------------------------------------------------
// Helper to create an EmbeddedChunk with a given embedding
// ---------------------------------------------------------------------------
function makeEmbeddedChunk(
  embedding: number[],
  docId = "doc1",
  chunkIndex = 0,
): EmbeddedChunk {
  return {
    docId,
    chunkIndex,
    text: `chunk ${chunkIndex} of ${docId}`,
    startChar: 0,
    title: "Test",
    embedding: new Float32Array(embedding),
  };
}

// ---------------------------------------------------------------------------
// cosineSimilarity
// ---------------------------------------------------------------------------
describe("cosineSimilarity", () => {
  it("returns 1 for identical vectors", () => {
    const v = new Float32Array([1, 2, 3]);
    expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5);
  });

  it("returns -1 for opposite vectors", () => {
    const a = new Float32Array([1, 0, 0]);
    const b = new Float32Array([-1, 0, 0]);
    expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5);
  });

  it("returns 0 for orthogonal vectors", () => {
    const a = new Float32Array([1, 0, 0]);
    const b = new Float32Array([0, 1, 0]);
    expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5);
  });

  it("is symmetric", () => {
    const a = new Float32Array([1, 2, 3]);
    const b = new Float32Array([4, 5, 6]);
    expect(cosineSimilarity(a, b)).toBeCloseTo(cosineSimilarity(b, a), 10);
  });

  it("is scale-invariant", () => {
    const a = new Float32Array([1, 2, 3]);
    const b = new Float32Array([2, 4, 6]); // 2x scale of a
    expect(cosineSimilarity(a, b)).toBeCloseTo(1.0, 5);
  });

  it("computes correct value for known vectors", () => {
    // a = [3, 4], b = [4, 3]
    // dot = 12 + 12 = 24, |a| = 5, |b| = 5 → cos = 24/25 = 0.96
    const a = new Float32Array([3, 4]);
    const b = new Float32Array([4, 3]);
    expect(cosineSimilarity(a, b)).toBeCloseTo(0.96, 2);
  });
});

// ---------------------------------------------------------------------------
// vectorSearch
// ---------------------------------------------------------------------------
describe("vectorSearch", () => {
  // Create chunks with embeddings at known angles from the query
  const query = new Float32Array([1, 0, 0]);

  const chunks: EmbeddedChunk[] = [
    makeEmbeddedChunk([0, 1, 0], "orthogonal", 0), // cos = 0
    makeEmbeddedChunk([1, 0, 0], "identical", 0), // cos = 1
    makeEmbeddedChunk([0.7, 0.7, 0], "similar", 0), // cos ≈ 0.707
    makeEmbeddedChunk([-1, 0, 0], "opposite", 0), // cos = -1
    makeEmbeddedChunk([0.9, 0.1, 0], "very-similar", 0), // cos ≈ 0.994
  ];

  it("returns results sorted by descending score", () => {
    const results = vectorSearch(query, chunks);
    for (let i = 1; i < results.length; i++) {
      expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
    }
  });

  it("ranks identical vector highest", () => {
    const results = vectorSearch(query, chunks);
    expect(results[0].chunk.docId).toBe("identical");
  });

  it("ranks opposite vector lowest", () => {
    const results = vectorSearch(query, chunks);
    expect(results[results.length - 1].chunk.docId).toBe("opposite");
  });

  it("all results have source 'vector'", () => {
    const results = vectorSearch(query, chunks);
    for (const r of results) {
      expect(r.source).toBe("vector");
    }
  });

  it("respects topK parameter", () => {
    const results = vectorSearch(query, chunks, 2);
    expect(results.length).toBe(2);
    expect(results[0].chunk.docId).toBe("identical");
    expect(results[1].chunk.docId).toBe("very-similar");
  });

  it("returns all when topK exceeds chunk count", () => {
    const results = vectorSearch(query, chunks, 100);
    expect(results.length).toBe(chunks.length);
  });

  it("handles empty chunks array", () => {
    const results = vectorSearch(query, []);
    expect(results).toEqual([]);
  });

  it("handles single chunk", () => {
    const single = [makeEmbeddedChunk([0.5, 0.5, 0], "only", 0)];
    const results = vectorSearch(query, single);
    expect(results.length).toBe(1);
    expect(results[0].source).toBe("vector");
    expect(results[0].score).toBeGreaterThan(0);
  });
});