edtech / apps /api /src /services /indexing-service.ts
CognxSafeTrack
feat: complete agentic audit roadmap — retry, KB generate, real costs, traceId, rate-limit
b438786
import { prisma } from './prisma';
import { Prisma } from '@repo/database';
import { logger } from '../logger';
import { randomUUID } from 'crypto';
export class IndexingService {
/**
* Embeds an array of text chunks and inserts them into KnowledgeBaseEntry.
* Returns the number of chunks indexed.
*/
static async indexTextChunks(
organizationId: string,
chunks: string[],
metadata?: Record<string, unknown>
): Promise<number> {
if (chunks.length === 0) return 0;
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error('OPENAI_API_KEY not configured');
// Generate embeddings in batches of 100 (OpenAI limit)
const BATCH = 100;
let indexed = 0;
for (let i = 0; i < chunks.length; i += BATCH) {
const batch = chunks.slice(i, i + BATCH);
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({ input: batch, model: 'text-embedding-3-small' }),
});
if (!response.ok) {
const err = await response.text();
throw new Error(`OpenAI embeddings failed: ${err}`);
}
const data = await response.json() as { data: Array<{ embedding: number[] }> };
const embeddings = data.data.map((d) => d.embedding);
for (let j = 0; j < batch.length; j++) {
const content = batch[j];
const embedding = embeddings[j];
const vecRaw = Prisma.raw(`'[${embedding.join(',')}]'::vector`);
await prisma.$executeRaw`
INSERT INTO "KnowledgeBaseEntry" ("id", "organizationId", "content", "embedding", "metadata", "createdAt")
VALUES (
${randomUUID()},
${organizationId},
${content},
${vecRaw},
${JSON.stringify(metadata ?? {})}::jsonb,
NOW()
)
`;
indexed++;
}
}
logger.info({ organizationId, indexed }, '[INDEXING-SERVICE] Text chunks indexed');
return indexed;
}
}