File size: 1,088 Bytes
cdc50ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/**
 * Compute cosine similarity between two vectors
 * Assumes vectors are already normalized
 */
export function cosineSimilarity( a, b ) {
	let dot = 0;
	for ( let i = 0; i < a.length; i++ ) {
		dot += a[ i ] * b[ i ];
	}
	return dot;
}

/**
 * Search chunks by embedding similarity
 */
export function search( queryEmbedding, chunks, topK = 3, threshold = 0.4 ) {
	const scored = chunks.map( ( chunk ) => ( {
		...chunk,
		score: cosineSimilarity( queryEmbedding, chunk.embedding )
	} ) );

	scored.sort( ( a, b ) => b.score - a.score );

	const results = scored.slice( 0, topK ).map( ( r ) => ( {
		chunkId: r.id,
		text: r.text,
		score: r.score,
		confidence: scoreToConfidence( r.score ),
		sectionId: r.sectionId,
		sectionTitle: r.sectionTitle,
		selector: r.selector,
		paragraphIndex: r.paragraphIndex
	} ) );

	const belowThreshold = results.length === 0 || results[ 0 ].score < threshold;

	return { results, belowThreshold };
}

function scoreToConfidence( score ) {
	if ( score >= 0.6 ) {
		return 'high';
	}
	if ( score >= 0.45 ) {
		return 'medium';
	}
	return 'low';
}