|
|
import { VectorEmbedding } from '../entities/VectorEmbedding.js'; |
|
|
import BaseRepository from './BaseRepository.js'; |
|
|
import { getAppDataSource } from '../connection.js'; |
|
|
|
|
|
export class VectorEmbeddingRepository extends BaseRepository<VectorEmbedding> { |
|
|
constructor() { |
|
|
super(VectorEmbedding); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async findByContentIdentity( |
|
|
contentType: string, |
|
|
contentId: string, |
|
|
): Promise<VectorEmbedding | null> { |
|
|
return this.repository.findOneBy({ |
|
|
content_type: contentType, |
|
|
content_id: contentId, |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async saveEmbedding( |
|
|
contentType: string, |
|
|
contentId: string, |
|
|
textContent: string, |
|
|
embedding: number[], |
|
|
metadata: Record<string, any> = {}, |
|
|
model = 'default', |
|
|
): Promise<VectorEmbedding> { |
|
|
|
|
|
let vectorEmbedding = await this.findByContentIdentity(contentType, contentId); |
|
|
|
|
|
if (!vectorEmbedding) { |
|
|
vectorEmbedding = new VectorEmbedding(); |
|
|
vectorEmbedding.content_type = contentType; |
|
|
vectorEmbedding.content_id = contentId; |
|
|
} |
|
|
|
|
|
|
|
|
vectorEmbedding.text_content = textContent; |
|
|
vectorEmbedding.embedding = embedding; |
|
|
vectorEmbedding.dimensions = embedding.length; |
|
|
vectorEmbedding.metadata = metadata; |
|
|
vectorEmbedding.model = model; |
|
|
|
|
|
|
|
|
|
|
|
const rawEmbedding = this.formatEmbeddingForPgVector(embedding); |
|
|
if (rawEmbedding) { |
|
|
(vectorEmbedding as any).embedding = rawEmbedding; |
|
|
} |
|
|
|
|
|
return this.save(vectorEmbedding); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async searchSimilar( |
|
|
embedding: number[], |
|
|
limit = 10, |
|
|
threshold = 0.7, |
|
|
contentTypes?: string[], |
|
|
): Promise<Array<{ embedding: VectorEmbedding; similarity: number }>> { |
|
|
try { |
|
|
|
|
|
try { |
|
|
|
|
|
let query = getAppDataSource() |
|
|
.createQueryBuilder() |
|
|
.select('vector_embedding.*') |
|
|
.addSelect(`1 - (vector_embedding.embedding <=> :embedding) AS similarity`) |
|
|
.from(VectorEmbedding, 'vector_embedding') |
|
|
.where(`1 - (vector_embedding.embedding <=> :embedding) > :threshold`) |
|
|
.orderBy('similarity', 'DESC') |
|
|
.limit(limit) |
|
|
.setParameter( |
|
|
'embedding', |
|
|
Array.isArray(embedding) ? `[${embedding.join(',')}]` : embedding, |
|
|
) |
|
|
.setParameter('threshold', threshold); |
|
|
|
|
|
|
|
|
if (contentTypes && contentTypes.length > 0) { |
|
|
query = query |
|
|
.andWhere('vector_embedding.content_type IN (:...contentTypes)') |
|
|
.setParameter('contentTypes', contentTypes); |
|
|
} |
|
|
|
|
|
|
|
|
const results = await query.getRawMany(); |
|
|
|
|
|
|
|
|
return results.map((row) => ({ |
|
|
embedding: this.mapRawToEntity(row), |
|
|
similarity: parseFloat(row.similarity), |
|
|
})); |
|
|
} catch (vectorError) { |
|
|
console.warn( |
|
|
'Vector similarity search failed, falling back to basic filtering:', |
|
|
vectorError, |
|
|
); |
|
|
|
|
|
|
|
|
let query = this.repository.createQueryBuilder('vector_embedding'); |
|
|
|
|
|
|
|
|
if (contentTypes && contentTypes.length > 0) { |
|
|
query = query |
|
|
.where('vector_embedding.content_type IN (:...contentTypes)') |
|
|
.setParameter('contentTypes', contentTypes); |
|
|
} |
|
|
|
|
|
|
|
|
query = query.take(limit); |
|
|
|
|
|
|
|
|
const results = await query.getMany(); |
|
|
|
|
|
|
|
|
return results.map((entity) => ({ |
|
|
embedding: entity, |
|
|
similarity: 0.5, |
|
|
})); |
|
|
} |
|
|
} catch (error) { |
|
|
console.error('Error during vector search:', error); |
|
|
return []; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async searchByText( |
|
|
text: string, |
|
|
getEmbeddingFunc: (text: string) => Promise<number[]>, |
|
|
limit = 10, |
|
|
threshold = 0.7, |
|
|
contentTypes?: string[], |
|
|
): Promise<Array<{ embedding: VectorEmbedding; similarity: number }>> { |
|
|
try { |
|
|
|
|
|
const embedding = await getEmbeddingFunc(text); |
|
|
|
|
|
|
|
|
return this.searchSimilar(embedding, limit, threshold, contentTypes); |
|
|
} catch (error) { |
|
|
console.error('Error searching by text:', error); |
|
|
return []; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private mapRawToEntity(raw: any): VectorEmbedding { |
|
|
const entity = new VectorEmbedding(); |
|
|
entity.id = raw.id; |
|
|
entity.content_type = raw.content_type; |
|
|
entity.content_id = raw.content_id; |
|
|
entity.text_content = raw.text_content; |
|
|
entity.metadata = raw.metadata; |
|
|
entity.embedding = raw.embedding; |
|
|
entity.dimensions = raw.dimensions; |
|
|
entity.model = raw.model; |
|
|
entity.createdAt = raw.created_at; |
|
|
entity.updatedAt = raw.updated_at; |
|
|
return entity; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private formatEmbeddingForPgVector(embedding: number[] | string): string | null { |
|
|
if (!embedding) return null; |
|
|
|
|
|
|
|
|
if (typeof embedding === 'string') { |
|
|
if (embedding.startsWith('[') && embedding.endsWith(']')) { |
|
|
return embedding; |
|
|
} |
|
|
return `[${embedding}]`; |
|
|
} |
|
|
|
|
|
|
|
|
if (Array.isArray(embedding)) { |
|
|
return `[${embedding.join(',')}]`; |
|
|
} |
|
|
|
|
|
return null; |
|
|
} |
|
|
} |
|
|
|
|
|
export default VectorEmbeddingRepository; |
|
|
|