File size: 2,417 Bytes
b438786 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | import { prisma } from './prisma';
import { Prisma } from '@repo/database';
import { logger } from '../logger';
import { randomUUID } from 'crypto';
export class IndexingService {
/**
* Embeds an array of text chunks and inserts them into KnowledgeBaseEntry.
* Returns the number of chunks indexed.
*/
static async indexTextChunks(
organizationId: string,
chunks: string[],
metadata?: Record<string, unknown>
): Promise<number> {
if (chunks.length === 0) return 0;
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error('OPENAI_API_KEY not configured');
// Generate embeddings in batches of 100 (OpenAI limit)
const BATCH = 100;
let indexed = 0;
for (let i = 0; i < chunks.length; i += BATCH) {
const batch = chunks.slice(i, i + BATCH);
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ input: batch, model: 'text-embedding-3-small' }),
});
if (!response.ok) {
const err = await response.text();
throw new Error(`OpenAI embeddings failed: ${err}`);
}
const data = await response.json() as { data: Array<{ embedding: number[] }> };
const embeddings = data.data.map((d) => d.embedding);
for (let j = 0; j < batch.length; j++) {
const content = batch[j];
const embedding = embeddings[j];
const vecRaw = Prisma.raw(`'[${embedding.join(',')}]'::vector`);
await prisma.$executeRaw`
INSERT INTO "KnowledgeBaseEntry" ("id", "organizationId", "content", "embedding", "metadata", "createdAt")
VALUES (
${randomUUID()},
${organizationId},
${content},
${vecRaw},
${JSON.stringify(metadata ?? {})}::jsonb,
NOW()
)
`;
indexed++;
}
}
logger.info({ organizationId, indexed }, '[INDEXING-SERVICE] Text chunks indexed');
return indexed;
}
}
|