Spaces:
Running
Running
File size: 2,563 Bytes
fb4d8fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | export type HybridSource = string;
export type HybridVectorResult = {
id: string;
path: string;
startLine: number;
endLine: number;
source: HybridSource;
snippet: string;
vectorScore: number;
};
export type HybridKeywordResult = {
id: string;
path: string;
startLine: number;
endLine: number;
source: HybridSource;
snippet: string;
textScore: number;
};
export function buildFtsQuery(raw: string): string | null {
const tokens =
raw
.match(/[A-Za-z0-9_]+/g)
?.map((t) => t.trim())
.filter(Boolean) ?? [];
if (tokens.length === 0) {
return null;
}
const quoted = tokens.map((t) => `"${t.replaceAll('"', "")}"`);
return quoted.join(" AND ");
}
export function bm25RankToScore(rank: number): number {
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
return 1 / (1 + normalized);
}
export function mergeHybridResults(params: {
vector: HybridVectorResult[];
keyword: HybridKeywordResult[];
vectorWeight: number;
textWeight: number;
}): Array<{
path: string;
startLine: number;
endLine: number;
score: number;
snippet: string;
source: HybridSource;
}> {
const byId = new Map<
string,
{
id: string;
path: string;
startLine: number;
endLine: number;
source: HybridSource;
snippet: string;
vectorScore: number;
textScore: number;
}
>();
for (const r of params.vector) {
byId.set(r.id, {
id: r.id,
path: r.path,
startLine: r.startLine,
endLine: r.endLine,
source: r.source,
snippet: r.snippet,
vectorScore: r.vectorScore,
textScore: 0,
});
}
for (const r of params.keyword) {
const existing = byId.get(r.id);
if (existing) {
existing.textScore = r.textScore;
if (r.snippet && r.snippet.length > 0) {
existing.snippet = r.snippet;
}
} else {
byId.set(r.id, {
id: r.id,
path: r.path,
startLine: r.startLine,
endLine: r.endLine,
source: r.source,
snippet: r.snippet,
vectorScore: 0,
textScore: r.textScore,
});
}
}
const merged = Array.from(byId.values()).map((entry) => {
const score = params.vectorWeight * entry.vectorScore + params.textWeight * entry.textScore;
return {
path: entry.path,
startLine: entry.startLine,
endLine: entry.endLine,
score,
snippet: entry.snippet,
source: entry.source,
};
});
return merged.toSorted((a, b) => b.score - a.score);
}
|