import { prisma } from './prisma'; import { Prisma } from '@repo/database'; import { logger } from '../logger'; import { randomUUID } from 'crypto'; export class IndexingService { /** * Embeds an array of text chunks and inserts them into KnowledgeBaseEntry. * Returns the number of chunks indexed. */ static async indexTextChunks( organizationId: string, chunks: string[], metadata?: Record ): Promise { if (chunks.length === 0) return 0; const apiKey = process.env.OPENAI_API_KEY; if (!apiKey) throw new Error('OPENAI_API_KEY not configured'); // Generate embeddings in batches of 100 (OpenAI limit) const BATCH = 100; let indexed = 0; for (let i = 0; i < chunks.length; i += BATCH) { const batch = chunks.slice(i, i + BATCH); const response = await fetch('https://api.openai.com/v1/embeddings', { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ input: batch, model: 'text-embedding-3-small' }), }); if (!response.ok) { const err = await response.text(); throw new Error(`OpenAI embeddings failed: ${err}`); } const data = await response.json() as { data: Array<{ embedding: number[] }> }; const embeddings = data.data.map((d) => d.embedding); for (let j = 0; j < batch.length; j++) { const content = batch[j]; const embedding = embeddings[j]; const vecRaw = Prisma.raw(`'[${embedding.join(',')}]'::vector`); await prisma.$executeRaw` INSERT INTO "KnowledgeBaseEntry" ("id", "organizationId", "content", "embedding", "metadata", "createdAt") VALUES ( ${randomUUID()}, ${organizationId}, ${content}, ${vecRaw}, ${JSON.stringify(metadata ?? {})}::jsonb, NOW() ) `; indexed++; } } logger.info({ organizationId, indexed }, '[INDEXING-SERVICE] Text chunks indexed'); return indexed; } }