| import type { ScoredChunk, RRFResult, RRFContribution } from "../types"; |
| import { |
| RRF_K, |
| RRF_PRIMARY_WEIGHT, |
| RRF_SECONDARY_WEIGHT, |
| RRF_RANK1_BONUS, |
| RRF_RANK2_BONUS, |
| RERANK_CANDIDATE_LIMIT, |
| } from "../constants"; |
|
|
| interface RankedList { |
| results: ScoredChunk[]; |
| queryType: "original" | "lex" | "vec" | "hyde"; |
| query: string; |
| } |
|
|
| |
| export function reciprocalRankFusion( |
| lists: RankedList[], |
| candidateLimit: number = RERANK_CANDIDATE_LIMIT, |
| ): RRFResult[] { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| const docScores = new Map< |
| string, |
| { |
| docId: string; |
| filepath: string; |
| title: string; |
| bestChunk: string; |
| bestChunkScore: number; |
| totalScore: number; |
| topRank: number; |
| contributions: RRFContribution[]; |
| } |
| >(); |
|
|
| lists.forEach((list, listIndex) => { |
| const weight = listIndex < 2 ? RRF_PRIMARY_WEIGHT : RRF_SECONDARY_WEIGHT; |
|
|
| list.results.forEach((result, rankIndex) => { |
| const rank = rankIndex + 1; |
| const rrfContribution = weight / (RRF_K + rank); |
|
|
| const existing = docScores.get(result.chunk.docId); |
| if (existing) { |
| existing.totalScore += rrfContribution; |
| existing.topRank = Math.min(existing.topRank, rank); |
| existing.contributions.push({ |
| source: result.source, |
| queryType: list.queryType, |
| query: list.query, |
| rank, |
| weight, |
| rrfContribution, |
| }); |
| |
| if (result.score > existing.bestChunkScore) { |
| existing.bestChunk = result.chunk.text; |
| existing.bestChunkScore = result.score; |
| } |
| } else { |
| docScores.set(result.chunk.docId, { |
| docId: result.chunk.docId, |
| filepath: result.chunk.docId, |
| title: result.chunk.title, |
| bestChunk: result.chunk.text, |
| bestChunkScore: result.score, |
| totalScore: rrfContribution, |
| topRank: rank, |
| contributions: [ |
| { |
| source: result.source, |
| queryType: list.queryType, |
| query: list.query, |
| rank, |
| weight, |
| rrfContribution, |
| }, |
| ], |
| }); |
| } |
| }); |
| }); |
|
|
| |
| for (const doc of docScores.values()) { |
| if (doc.topRank === 1) doc.totalScore += RRF_RANK1_BONUS; |
| if (doc.topRank <= 2) doc.totalScore += RRF_RANK2_BONUS; |
| } |
|
|
| |
| const results = Array.from(docScores.values()) |
| .sort((a, b) => b.totalScore - a.totalScore) |
| .slice(0, candidateLimit) |
| .map((doc) => ({ |
| docId: doc.docId, |
| filepath: doc.filepath, |
| title: doc.title, |
| bestChunk: doc.bestChunk, |
| score: doc.totalScore, |
| contributions: doc.contributions, |
| })); |
|
|
| return results; |
| } |
|
|