shreyask Claude Opus 4.6 commited on
Commit
997db9a
·
verified ·
1 Parent(s): 5f4a715

feat: add RRF fusion and position-aware score blending

Browse files

Implement reciprocal rank fusion (rrf.ts) to merge BM25 and vector
search ranked lists with weighted contributions and rank bonuses.
Add position-aware score blending (blend.ts) that combines RRF scores
with reranker scores using tiered weights (top3/mid/tail).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

src/pipeline/blend.test.ts ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, it, expect } from "vitest";
2
+ import { blendScores } from "./blend";
3
+ import type { RRFResult } from "../types";
4
+ import { BLEND_TOP3_RRF, BLEND_MID_RRF, BLEND_TAIL_RRF } from "../constants";
5
+
6
+ // ---------------------------------------------------------------------------
7
+ // Helpers
8
+ // ---------------------------------------------------------------------------
9
+ function makeRRFResult(
10
+ docId: string,
11
+ score: number,
12
+ title = `Title ${docId}`,
13
+ ): RRFResult {
14
+ return {
15
+ docId,
16
+ filepath: docId,
17
+ title,
18
+ bestChunk: `chunk from ${docId}`,
19
+ score,
20
+ contributions: [],
21
+ };
22
+ }
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // blendScores
26
+ // ---------------------------------------------------------------------------
27
+ describe("blendScores", () => {
28
+ it("returns empty array for empty input", () => {
29
+ expect(blendScores([], new Map())).toEqual([]);
30
+ });
31
+
32
+ it("uses top3 weight for rank 1-3", () => {
33
+ const rrfResults = [
34
+ makeRRFResult("doc1", 0.5),
35
+ makeRRFResult("doc2", 0.4),
36
+ makeRRFResult("doc3", 0.3),
37
+ ];
38
+ const rerankScores = new Map([
39
+ ["doc1", 0.9],
40
+ ["doc2", 0.8],
41
+ ["doc3", 0.7],
42
+ ]);
43
+ const results = blendScores(rrfResults, rerankScores);
44
+ // All three should use BLEND_TOP3_RRF weight
45
+ // doc1: 0.75*0.5 + 0.25*0.9 = 0.375 + 0.225 = 0.600
46
+ const doc1 = results.find((r) => r.docId === "doc1")!;
47
+ const expected1 = BLEND_TOP3_RRF * 0.5 + (1 - BLEND_TOP3_RRF) * 0.9;
48
+ expect(doc1.score).toBeCloseTo(expected1, 10);
49
+ });
50
+
51
+ it("uses mid weight for rank 4-10", () => {
52
+ // Create 5 results; rank 4 and 5 should use mid weight
53
+ const rrfResults = Array.from({ length: 5 }, (_, i) =>
54
+ makeRRFResult(`doc${i}`, 0.5 - i * 0.05),
55
+ );
56
+ const rerankScores = new Map<string, number>([["doc3", 0.9]]);
57
+
58
+ const results = blendScores(rrfResults, rerankScores);
59
+ // doc3 is at rank 4 in RRF ordering -> uses BLEND_MID_RRF
60
+ const doc3 = results.find((r) => r.docId === "doc3")!;
61
+ const expected = BLEND_MID_RRF * 0.35 + (1 - BLEND_MID_RRF) * 0.9;
62
+ expect(doc3.score).toBeCloseTo(expected, 10);
63
+ });
64
+
65
+ it("uses tail weight for rank 11+", () => {
66
+ const rrfResults = Array.from({ length: 12 }, (_, i) =>
67
+ makeRRFResult(`doc${i}`, 0.5 - i * 0.03),
68
+ );
69
+ const rerankScores = new Map<string, number>([["doc11", 0.95]]);
70
+
71
+ const results = blendScores(rrfResults, rerankScores);
72
+ // doc11 is at rank 12 -> uses BLEND_TAIL_RRF
73
+ const doc11 = results.find((r) => r.docId === "doc11")!;
74
+ const rrfScore = 0.5 - 11 * 0.03; // 0.17
75
+ const expected = BLEND_TAIL_RRF * rrfScore + (1 - BLEND_TAIL_RRF) * 0.95;
76
+ expect(doc11.score).toBeCloseTo(expected, 10);
77
+ });
78
+
79
+ it("defaults missing rerank scores to 0", () => {
80
+ const rrfResults = [makeRRFResult("doc1", 0.5)];
81
+ const rerankScores = new Map<string, number>(); // no rerank score for doc1
82
+ const results = blendScores(rrfResults, rerankScores);
83
+ // score = 0.75 * 0.5 + 0.25 * 0 = 0.375
84
+ expect(results[0].score).toBeCloseTo(BLEND_TOP3_RRF * 0.5, 10);
85
+ });
86
+
87
+ it("sorts by blended score descending", () => {
88
+ const rrfResults = [
89
+ makeRRFResult("doc1", 0.5),
90
+ makeRRFResult("doc2", 0.4),
91
+ makeRRFResult("doc3", 0.3),
92
+ ];
93
+ // High rerank score on doc3 should push it up
94
+ const rerankScores = new Map([
95
+ ["doc1", 0.1],
96
+ ["doc2", 0.2],
97
+ ["doc3", 0.99],
98
+ ]);
99
+ const results = blendScores(rrfResults, rerankScores);
100
+ for (let i = 1; i < results.length; i++) {
101
+ expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score);
102
+ }
103
+ });
104
+
105
+ it("can reorder results when reranker disagrees with RRF", () => {
106
+ const rrfResults = [
107
+ makeRRFResult("doc1", 0.5),
108
+ makeRRFResult("doc2", 0.49),
109
+ ];
110
+ // doc2 gets much higher rerank score
111
+ const rerankScores = new Map([
112
+ ["doc1", 0.0],
113
+ ["doc2", 1.0],
114
+ ]);
115
+ const results = blendScores(rrfResults, rerankScores);
116
+ // doc1: 0.75*0.5 + 0.25*0.0 = 0.375
117
+ // doc2: 0.75*0.49 + 0.25*1.0 = 0.3675 + 0.25 = 0.6175
118
+ expect(results[0].docId).toBe("doc2");
119
+ });
120
+
121
+ it("preserves filepath, title, bestChunk in final results", () => {
122
+ const rrfResults = [makeRRFResult("doc1", 0.5, "My Title")];
123
+ const results = blendScores(rrfResults, new Map());
124
+ expect(results[0].filepath).toBe("doc1");
125
+ expect(results[0].title).toBe("My Title");
126
+ expect(results[0].bestChunk).toBe("chunk from doc1");
127
+ expect(results[0].docId).toBe("doc1");
128
+ });
129
+
130
+ it("deduplicates by docId, keeping highest blended score", () => {
131
+ // This shouldn't normally happen since RRF already deduplicates,
132
+ // but the function handles it defensively
133
+ const rrfResults = [
134
+ makeRRFResult("doc1", 0.5),
135
+ makeRRFResult("doc1", 0.3), // duplicate
136
+ ];
137
+ const rerankScores = new Map([["doc1", 0.8]]);
138
+ const results = blendScores(rrfResults, rerankScores);
139
+ expect(results).toHaveLength(1);
140
+ expect(results[0].docId).toBe("doc1");
141
+ });
142
+
143
+ it("handles single result correctly", () => {
144
+ const rrfResults = [makeRRFResult("doc1", 0.42)];
145
+ const rerankScores = new Map([["doc1", 0.88]]);
146
+ const results = blendScores(rrfResults, rerankScores);
147
+ expect(results).toHaveLength(1);
148
+ const expected = BLEND_TOP3_RRF * 0.42 + (1 - BLEND_TOP3_RRF) * 0.88;
149
+ expect(results[0].score).toBeCloseTo(expected, 10);
150
+ });
151
+
152
+ it("weight transitions are correct at boundaries", () => {
153
+ // Create exactly 11 results, check ranks 3, 4, 10, 11
154
+ const rrfResults = Array.from({ length: 11 }, (_, i) =>
155
+ makeRRFResult(`doc${i}`, 1.0), // same RRF score
156
+ );
157
+ const rerankScores = new Map<string, number>();
158
+ // Give all the same rerank score so we can check weight differences
159
+ for (let i = 0; i < 11; i++) {
160
+ rerankScores.set(`doc${i}`, 1.0);
161
+ }
162
+
163
+ const results = blendScores(rrfResults, rerankScores);
164
+ // All should have blended score = weight * 1.0 + (1-weight) * 1.0 = 1.0
165
+ // regardless of weight, since both inputs are 1.0
166
+ for (const r of results) {
167
+ expect(r.score).toBeCloseTo(1.0, 10);
168
+ }
169
+ });
170
+
171
+ it("correctly applies different weights when scores differ", () => {
172
+ // 11 results with identical RRF=0.5, rerank=1.0
173
+ const rrfResults = Array.from({ length: 11 }, (_, i) =>
174
+ makeRRFResult(`doc${i}`, 0.5),
175
+ );
176
+ const rerankScores = new Map<string, number>();
177
+ for (let i = 0; i < 11; i++) {
178
+ rerankScores.set(`doc${i}`, 1.0);
179
+ }
180
+
181
+ const results = blendScores(rrfResults, rerankScores);
182
+ // Rank 1-3: 0.75*0.5 + 0.25*1.0 = 0.625
183
+ // Rank 4-10: 0.60*0.5 + 0.40*1.0 = 0.700
184
+ // Rank 11: 0.40*0.5 + 0.60*1.0 = 0.800
185
+ // So rank 11 should have highest score (tail weight favors reranker more)
186
+ const topScore = results[0].score;
187
+ expect(topScore).toBeCloseTo(0.8, 10); // rank 11 doc
188
+ });
189
+ });
src/pipeline/blend.ts ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { RRFResult, RerankedResult, FinalResult } from "../types";
2
+ import { BLEND_TOP3_RRF, BLEND_MID_RRF, BLEND_TAIL_RRF } from "../constants";
3
+
4
+ // Blend RRF score with reranker score using position-aware weights
5
+ export function blendScores(
6
+ rrfResults: RRFResult[],
7
+ rerankScores: Map<string, number>, // docId -> rerank score
8
+ ): FinalResult[] {
9
+ const blended: RerankedResult[] = rrfResults.map((result, index) => {
10
+ const rank = index + 1; // 1-indexed position in RRF ordering
11
+ const rerankScore = rerankScores.get(result.docId) ?? 0;
12
+
13
+ // Position-aware RRF weight (from QMD):
14
+ // Rank 1-3: 75% RRF + 25% reranker
15
+ // Rank 4-10: 60% RRF + 40% reranker
16
+ // Rank 11+: 40% RRF + 60% reranker
17
+ let rrfWeight: number;
18
+ if (rank <= 3) {
19
+ rrfWeight = BLEND_TOP3_RRF; // 0.75
20
+ } else if (rank <= 10) {
21
+ rrfWeight = BLEND_MID_RRF; // 0.60
22
+ } else {
23
+ rrfWeight = BLEND_TAIL_RRF; // 0.40
24
+ }
25
+
26
+ // Normalize RRF score to [0,1] range for blending
27
+ // (RRF scores vary based on number of lists, so normalize by max)
28
+ const blendedScore =
29
+ rrfWeight * result.score + (1 - rrfWeight) * rerankScore;
30
+
31
+ return {
32
+ ...result,
33
+ rerankScore,
34
+ blendedScore,
35
+ };
36
+ });
37
+
38
+ // Sort by blended score descending
39
+ blended.sort((a, b) => b.blendedScore - a.blendedScore);
40
+
41
+ // Dedup by docId (keep highest blended score)
42
+ const seen = new Set<string>();
43
+ const final: FinalResult[] = [];
44
+ for (const result of blended) {
45
+ if (seen.has(result.docId)) continue;
46
+ seen.add(result.docId);
47
+ final.push({
48
+ filepath: result.filepath,
49
+ title: result.title,
50
+ bestChunk: result.bestChunk,
51
+ score: result.blendedScore,
52
+ docId: result.docId,
53
+ });
54
+ }
55
+
56
+ return final;
57
+ }
src/pipeline/rrf.test.ts ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, it, expect } from "vitest";
2
+ import { reciprocalRankFusion } from "./rrf";
3
+ import type { ScoredChunk } from "../types";
4
+ import {
5
+ RRF_K,
6
+ RRF_PRIMARY_WEIGHT,
7
+ RRF_SECONDARY_WEIGHT,
8
+ RRF_RANK1_BONUS,
9
+ RRF_RANK2_BONUS,
10
+ } from "../constants";
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Helpers
14
+ // ---------------------------------------------------------------------------
15
+ function makeScoredChunk(
16
+ docId: string,
17
+ score: number,
18
+ source: "bm25" | "vector" = "bm25",
19
+ text = `chunk from ${docId}`,
20
+ ): ScoredChunk {
21
+ return {
22
+ chunk: { docId, chunkIndex: 0, text, startChar: 0, title: `Title ${docId}` },
23
+ score,
24
+ source,
25
+ };
26
+ }
27
+
28
+ function makeList(
29
+ results: ScoredChunk[],
30
+ queryType: "original" | "lex" | "vec" | "hyde" = "original",
31
+ query = "test query",
32
+ ) {
33
+ return { results, queryType, query };
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // reciprocalRankFusion
38
+ // ---------------------------------------------------------------------------
39
+ describe("reciprocalRankFusion", () => {
40
+ it("returns empty array for empty input", () => {
41
+ expect(reciprocalRankFusion([])).toEqual([]);
42
+ });
43
+
44
+ it("returns empty array for lists with no results", () => {
45
+ const result = reciprocalRankFusion([
46
+ makeList([]),
47
+ makeList([]),
48
+ ]);
49
+ expect(result).toEqual([]);
50
+ });
51
+
52
+ it("computes correct RRF score for a single list with one result", () => {
53
+ const results = reciprocalRankFusion([
54
+ makeList([makeScoredChunk("doc1", 0.9)]),
55
+ ]);
56
+ expect(results).toHaveLength(1);
57
+ // rank=1, weight=PRIMARY(2.0), score = 2/(60+1) + rank1 bonus + rank2 bonus
58
+ const expected = RRF_PRIMARY_WEIGHT / (RRF_K + 1) + RRF_RANK1_BONUS + RRF_RANK2_BONUS;
59
+ expect(results[0].score).toBeCloseTo(expected, 10);
60
+ expect(results[0].docId).toBe("doc1");
61
+ });
62
+
63
+ it("uses primary weight for first two lists and secondary for rest", () => {
64
+ // doc1 appears at rank 1 in all three lists
65
+ const results = reciprocalRankFusion([
66
+ makeList([makeScoredChunk("doc1", 0.9)], "original"),
67
+ makeList([makeScoredChunk("doc1", 0.8)], "lex"),
68
+ makeList([makeScoredChunk("doc1", 0.7)], "vec"),
69
+ ]);
70
+ expect(results).toHaveLength(1);
71
+ // Lists 0,1 use PRIMARY weight, list 2 uses SECONDARY weight
72
+ const rrfBase =
73
+ RRF_PRIMARY_WEIGHT / (RRF_K + 1) +
74
+ RRF_PRIMARY_WEIGHT / (RRF_K + 1) +
75
+ RRF_SECONDARY_WEIGHT / (RRF_K + 1);
76
+ const expected = rrfBase + RRF_RANK1_BONUS + RRF_RANK2_BONUS;
77
+ expect(results[0].score).toBeCloseTo(expected, 10);
78
+ });
79
+
80
+ it("merges results from different lists for the same docId", () => {
81
+ const results = reciprocalRankFusion([
82
+ makeList([makeScoredChunk("doc1", 0.9)], "original"),
83
+ makeList([makeScoredChunk("doc1", 0.8, "vector")], "vec"),
84
+ ]);
85
+ expect(results).toHaveLength(1);
86
+ expect(results[0].contributions).toHaveLength(2);
87
+ expect(results[0].contributions[0].source).toBe("bm25");
88
+ expect(results[0].contributions[1].source).toBe("vector");
89
+ });
90
+
91
+ it("keeps the best chunk (highest individual score) per doc", () => {
92
+ const results = reciprocalRankFusion([
93
+ makeList([makeScoredChunk("doc1", 0.5, "bm25", "low score chunk")], "original"),
94
+ makeList([makeScoredChunk("doc1", 0.9, "vector", "high score chunk")], "vec"),
95
+ ]);
96
+ expect(results[0].bestChunk).toBe("high score chunk");
97
+ });
98
+
99
+ it("ranks documents by total RRF score descending", () => {
100
+ // doc1 appears in both lists, doc2 only in one
101
+ const results = reciprocalRankFusion([
102
+ makeList(
103
+ [makeScoredChunk("doc1", 0.9), makeScoredChunk("doc2", 0.8)],
104
+ "original",
105
+ ),
106
+ makeList([makeScoredChunk("doc1", 0.7, "vector")], "vec"),
107
+ ]);
108
+ expect(results[0].docId).toBe("doc1");
109
+ expect(results[1].docId).toBe("doc2");
110
+ expect(results[0].score).toBeGreaterThan(results[1].score);
111
+ });
112
+
113
+ it("applies rank 1 bonus only to docs that are rank 1 in some list", () => {
114
+ const results = reciprocalRankFusion([
115
+ makeList(
116
+ [makeScoredChunk("doc1", 0.9), makeScoredChunk("doc2", 0.8)],
117
+ "original",
118
+ ),
119
+ ]);
120
+ // doc1 is rank 1 -> gets both bonuses
121
+ // doc2 is rank 2 -> gets only rank2 bonus
122
+ const doc1Score = RRF_PRIMARY_WEIGHT / (RRF_K + 1) + RRF_RANK1_BONUS + RRF_RANK2_BONUS;
123
+ const doc2Score = RRF_PRIMARY_WEIGHT / (RRF_K + 2) + RRF_RANK2_BONUS;
124
+ expect(results[0].score).toBeCloseTo(doc1Score, 10);
125
+ expect(results[1].score).toBeCloseTo(doc2Score, 10);
126
+ });
127
+
128
+ it("does not apply rank bonuses to docs ranked 3 or lower", () => {
129
+ const results = reciprocalRankFusion([
130
+ makeList(
131
+ [
132
+ makeScoredChunk("doc1", 0.9),
133
+ makeScoredChunk("doc2", 0.8),
134
+ makeScoredChunk("doc3", 0.7),
135
+ ],
136
+ "original",
137
+ ),
138
+ ]);
139
+ const doc3Score = RRF_PRIMARY_WEIGHT / (RRF_K + 3); // no bonuses
140
+ expect(results[2].score).toBeCloseTo(doc3Score, 10);
141
+ });
142
+
143
+ it("respects candidateLimit", () => {
144
+ const chunks = Array.from({ length: 10 }, (_, i) =>
145
+ makeScoredChunk(`doc${i}`, 1 - i * 0.1),
146
+ );
147
+ const results = reciprocalRankFusion([makeList(chunks)], 3);
148
+ expect(results).toHaveLength(3);
149
+ });
150
+
151
+ it("tracks contributions correctly with queryType and query", () => {
152
+ const results = reciprocalRankFusion([
153
+ makeList([makeScoredChunk("doc1", 0.9)], "original", "hello world"),
154
+ makeList([makeScoredChunk("doc1", 0.8, "vector")], "hyde", "hypothetical doc"),
155
+ ]);
156
+ const contribs = results[0].contributions;
157
+ expect(contribs[0].queryType).toBe("original");
158
+ expect(contribs[0].query).toBe("hello world");
159
+ expect(contribs[1].queryType).toBe("hyde");
160
+ expect(contribs[1].query).toBe("hypothetical doc");
161
+ });
162
+
163
+ it("uses correct rank within each list independently", () => {
164
+ // doc1 is rank 2 in list 0, rank 1 in list 1
165
+ const results = reciprocalRankFusion([
166
+ makeList(
167
+ [makeScoredChunk("doc0", 0.9), makeScoredChunk("doc1", 0.8)],
168
+ "original",
169
+ ),
170
+ makeList([makeScoredChunk("doc1", 0.95, "vector")], "vec"),
171
+ ]);
172
+ const doc1 = results.find((r) => r.docId === "doc1")!;
173
+ // From list 0: rank=2, from list 1: rank=1 -> topRank=1
174
+ expect(doc1.contributions).toHaveLength(2);
175
+ expect(doc1.contributions[0].rank).toBe(2);
176
+ expect(doc1.contributions[1].rank).toBe(1);
177
+ // Should get rank1 bonus since topRank=1
178
+ const expectedScore =
179
+ RRF_PRIMARY_WEIGHT / (RRF_K + 2) +
180
+ RRF_PRIMARY_WEIGHT / (RRF_K + 1) +
181
+ RRF_RANK1_BONUS +
182
+ RRF_RANK2_BONUS;
183
+ expect(doc1.score).toBeCloseTo(expectedScore, 10);
184
+ });
185
+
186
+ it("sets filepath equal to docId", () => {
187
+ const results = reciprocalRankFusion([
188
+ makeList([makeScoredChunk("my-file.md", 0.9)]),
189
+ ]);
190
+ expect(results[0].filepath).toBe("my-file.md");
191
+ });
192
+
193
+ it("preserves title from the chunk", () => {
194
+ const results = reciprocalRankFusion([
195
+ makeList([makeScoredChunk("doc1", 0.9)]),
196
+ ]);
197
+ expect(results[0].title).toBe("Title doc1");
198
+ });
199
+
200
+ it("handles many lists (>2) with correct weight assignment", () => {
201
+ const results = reciprocalRankFusion([
202
+ makeList([makeScoredChunk("doc1", 0.9)], "original"),
203
+ makeList([makeScoredChunk("doc1", 0.8)], "lex"),
204
+ makeList([makeScoredChunk("doc1", 0.7)], "vec"),
205
+ makeList([makeScoredChunk("doc1", 0.6)], "hyde"),
206
+ ]);
207
+ expect(results[0].contributions).toHaveLength(4);
208
+ // First two use primary weight
209
+ expect(results[0].contributions[0].weight).toBe(RRF_PRIMARY_WEIGHT);
210
+ expect(results[0].contributions[1].weight).toBe(RRF_PRIMARY_WEIGHT);
211
+ // Rest use secondary weight
212
+ expect(results[0].contributions[2].weight).toBe(RRF_SECONDARY_WEIGHT);
213
+ expect(results[0].contributions[3].weight).toBe(RRF_SECONDARY_WEIGHT);
214
+ });
215
+
216
+ it("default candidateLimit is RERANK_CANDIDATE_LIMIT (40)", () => {
217
+ const chunks = Array.from({ length: 50 }, (_, i) =>
218
+ makeScoredChunk(`doc${i}`, 1 - i * 0.01),
219
+ );
220
+ const results = reciprocalRankFusion([makeList(chunks)]);
221
+ expect(results).toHaveLength(40);
222
+ });
223
+ });
src/pipeline/rrf.ts ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ScoredChunk, RRFResult, RRFContribution } from "../types";
2
+ import {
3
+ RRF_K,
4
+ RRF_PRIMARY_WEIGHT,
5
+ RRF_SECONDARY_WEIGHT,
6
+ RRF_RANK1_BONUS,
7
+ RRF_RANK2_BONUS,
8
+ RERANK_CANDIDATE_LIMIT,
9
+ } from "../constants";
10
+
11
+ interface RankedList {
12
+ results: ScoredChunk[];
13
+ queryType: "original" | "lex" | "vec" | "hyde";
14
+ query: string;
15
+ }
16
+
17
+ // Merge multiple ranked lists using Reciprocal Rank Fusion
18
+ export function reciprocalRankFusion(
19
+ lists: RankedList[],
20
+ candidateLimit: number = RERANK_CANDIDATE_LIMIT,
21
+ ): RRFResult[] {
22
+ // For each document across all lists, compute RRF score:
23
+ // score = Σ (weight / (k + rank))
24
+ //
25
+ // Weight rules (from QMD):
26
+ // - First 2 lists: weight = RRF_PRIMARY_WEIGHT (2.0)
27
+ // - Remaining lists: weight = RRF_SECONDARY_WEIGHT (1.0)
28
+ //
29
+ // Rank bonuses:
30
+ // - Rank 1 in any list: +RRF_RANK1_BONUS (0.05)
31
+ // - Rank 1 or 2 in any list: +RRF_RANK2_BONUS (0.02)
32
+ //
33
+ // Group by docId (not chunk), keep the best chunk per doc.
34
+ // Sort by total score descending, return top candidateLimit.
35
+
36
+ const docScores = new Map<
37
+ string,
38
+ {
39
+ docId: string;
40
+ filepath: string;
41
+ title: string;
42
+ bestChunk: string;
43
+ bestChunkScore: number;
44
+ totalScore: number;
45
+ topRank: number;
46
+ contributions: RRFContribution[];
47
+ }
48
+ >();
49
+
50
+ lists.forEach((list, listIndex) => {
51
+ const weight = listIndex < 2 ? RRF_PRIMARY_WEIGHT : RRF_SECONDARY_WEIGHT;
52
+
53
+ list.results.forEach((result, rankIndex) => {
54
+ const rank = rankIndex + 1; // 1-indexed
55
+ const rrfContribution = weight / (RRF_K + rank);
56
+
57
+ const existing = docScores.get(result.chunk.docId);
58
+ if (existing) {
59
+ existing.totalScore += rrfContribution;
60
+ existing.topRank = Math.min(existing.topRank, rank);
61
+ existing.contributions.push({
62
+ source: result.source,
63
+ queryType: list.queryType,
64
+ query: list.query,
65
+ rank,
66
+ weight,
67
+ rrfContribution,
68
+ });
69
+ // Keep the chunk with the highest individual score
70
+ if (result.score > existing.bestChunkScore) {
71
+ existing.bestChunk = result.chunk.text;
72
+ existing.bestChunkScore = result.score;
73
+ }
74
+ } else {
75
+ docScores.set(result.chunk.docId, {
76
+ docId: result.chunk.docId,
77
+ filepath: result.chunk.docId, // In browser demo, docId = filepath
78
+ title: result.chunk.title,
79
+ bestChunk: result.chunk.text,
80
+ bestChunkScore: result.score,
81
+ totalScore: rrfContribution,
82
+ topRank: rank,
83
+ contributions: [
84
+ {
85
+ source: result.source,
86
+ queryType: list.queryType,
87
+ query: list.query,
88
+ rank,
89
+ weight,
90
+ rrfContribution,
91
+ },
92
+ ],
93
+ });
94
+ }
95
+ });
96
+ });
97
+
98
+ // Apply rank bonuses
99
+ for (const doc of docScores.values()) {
100
+ if (doc.topRank === 1) doc.totalScore += RRF_RANK1_BONUS;
101
+ if (doc.topRank <= 2) doc.totalScore += RRF_RANK2_BONUS;
102
+ }
103
+
104
+ // Sort and slice
105
+ const results = Array.from(docScores.values())
106
+ .sort((a, b) => b.totalScore - a.totalScore)
107
+ .slice(0, candidateLimit)
108
+ .map((doc) => ({
109
+ docId: doc.docId,
110
+ filepath: doc.filepath,
111
+ title: doc.title,
112
+ bestChunk: doc.bestChunk,
113
+ score: doc.totalScore,
114
+ contributions: doc.contributions,
115
+ }));
116
+
117
+ return results;
118
+ }