| |
|
|
| export function cosineSimilarity(a: number[], b: number[]): number { |
| if (a.length !== b.length) return 0; |
|
|
| let dotProduct = 0; |
| let normA = 0; |
| let normB = 0; |
|
|
| for (let i = 0; i < a.length; i++) { |
| dotProduct += a[i] * b[i]; |
| normA += a[i] * a[i]; |
| normB += b[i] * b[i]; |
| } |
|
|
| if (normA === 0 || normB === 0) return 0; |
| return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); |
| } |
|
|
| export function computeMarginalGainDiversity( |
| newIdx: number, |
| currentCoverage: number[], |
| similarityMatrix: number[][] |
| ): number { |
| const n = similarityMatrix.length; |
| let marginalGain = 0; |
| const row = similarityMatrix[newIdx]; |
| for (let i = 0; i < n; i++) { |
| const newCoverage = row[i] > currentCoverage[i] ? row[i] : currentCoverage[i]; |
| marginalGain += newCoverage - currentCoverage[i]; |
| } |
| return marginalGain; |
| } |
|
|
| export function lazyGreedySelection(embeddings: number[][], k: number): number[] { |
| const n = embeddings.length; |
| if (k >= n) return Array.from({ length: n }, (_, i) => i); |
|
|
| const selected: number[] = []; |
| const remaining = new Set(Array.from({ length: n }, (_, i) => i)); |
|
|
| |
| const similarityMatrix: number[][] = []; |
| for (let i = 0; i < n; i++) { |
| similarityMatrix[i] = []; |
| for (let j = 0; j < n; j++) { |
| |
| const sim = cosineSimilarity(embeddings[i], embeddings[j]); |
| similarityMatrix[i][j] = sim > 0 ? sim : 0; |
| } |
| } |
|
|
| |
| const currentCoverage = new Array(n).fill(0); |
|
|
| |
| const pq: Array<[number, number, number]> = []; |
|
|
| |
| for (let i = 0; i < n; i++) { |
| const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); |
| pq.push([-gain, 0, i]); |
| } |
|
|
| |
| pq.sort((a, b) => a[0] - b[0]); |
|
|
| for (let iteration = 0; iteration < k; iteration++) { |
| while (pq.length > 0) { |
| const [negGain, lastUpdated, bestIdx] = pq.shift()!; |
|
|
| if (!remaining.has(bestIdx)) continue; |
|
|
| if (lastUpdated === iteration) { |
| selected.push(bestIdx); |
| remaining.delete(bestIdx); |
| |
| const row = similarityMatrix[bestIdx]; |
| for (let i = 0; i < n; i++) { |
| if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; |
| } |
| break; |
| } |
|
|
| const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); |
| pq.push([-currentGain, iteration, bestIdx]); |
| pq.sort((a, b) => a[0] - b[0]); |
| } |
| } |
|
|
| return selected; |
| } |
|
|
| export function lazyGreedySelectionWithSaturation( |
| embeddings: number[][], |
| threshold: number = 1e-2 |
| ): { selected: number[], optimalK: number, values: number[] } { |
| const n = embeddings.length; |
|
|
| const selected: number[] = []; |
| const remaining = new Set(Array.from({ length: n }, (_, i) => i)); |
| const values: number[] = []; |
|
|
| |
| const similarityMatrix: number[][] = []; |
| for (let i = 0; i < n; i++) { |
| similarityMatrix[i] = []; |
| for (let j = 0; j < n; j++) { |
| const sim = cosineSimilarity(embeddings[i], embeddings[j]); |
| similarityMatrix[i][j] = sim > 0 ? sim : 0; |
| } |
| } |
|
|
| const currentCoverage = new Array(n).fill(0); |
|
|
| |
| const pq: Array<[number, number, number]> = []; |
|
|
| |
| for (let i = 0; i < n; i++) { |
| const gain = computeMarginalGainDiversity(i, currentCoverage, similarityMatrix); |
| pq.push([-gain, 0, i]); |
| } |
|
|
| |
| pq.sort((a, b) => a[0] - b[0]); |
|
|
| let earlyStopK: number | null = null; |
| for (let iteration = 0; iteration < n; iteration++) { |
| while (pq.length > 0) { |
| const [negGain, lastUpdated, bestIdx] = pq.shift()!; |
|
|
| if (!remaining.has(bestIdx)) continue; |
|
|
| if (lastUpdated === iteration) { |
| selected.push(bestIdx); |
| remaining.delete(bestIdx); |
|
|
| |
| const row = similarityMatrix[bestIdx]; |
| for (let i = 0; i < n; i++) { |
| if (row[i] > currentCoverage[i]) currentCoverage[i] = row[i]; |
| } |
| const functionValue = currentCoverage.reduce((sum, val) => sum + val, 0) / n; |
| values.push(functionValue); |
|
|
| |
| if (values.length >= 2) { |
| const delta = values[values.length - 1] - values[values.length - 2]; |
| if (delta < threshold) { |
| earlyStopK = values.length; |
| } |
| } |
|
|
| break; |
| } |
|
|
| const currentGain = computeMarginalGainDiversity(bestIdx, currentCoverage, similarityMatrix); |
| pq.push([-currentGain, iteration, bestIdx]); |
| pq.sort((a, b) => a[0] - b[0]); |
| } |
| if (earlyStopK !== null) break; |
| } |
|
|
| |
| const optimalK = earlyStopK ?? values.length; |
| const finalSelected = selected.slice(0, optimalK); |
|
|
| return { selected: finalSelected, optimalK, values }; |
| } |
|
|