Spaces:

safetrack
/

edtech

Running

File size: 2,417 Bytes

b438786

import { prisma } from './prisma';
import { Prisma } from '@repo/database';
import { logger } from '../logger';
import { randomUUID } from 'crypto';

export class IndexingService {
    /**
     * Embeds an array of text chunks and inserts them into KnowledgeBaseEntry.
     * Returns the number of chunks indexed.
     */
    static async indexTextChunks(
        organizationId: string,
        chunks: string[],
        metadata?: Record<string, unknown>
    ): Promise<number> {
        if (chunks.length === 0) return 0;

        const apiKey = process.env.OPENAI_API_KEY;
        if (!apiKey) throw new Error('OPENAI_API_KEY not configured');

        // Generate embeddings in batches of 100 (OpenAI limit)
        const BATCH = 100;
        let indexed = 0;

        for (let i = 0; i < chunks.length; i += BATCH) {
            const batch = chunks.slice(i, i + BATCH);

            const response = await fetch('https://api.openai.com/v1/embeddings', {
                method: 'POST',
                headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
                body: JSON.stringify({ input: batch, model: 'text-embedding-3-small' }),
            });

            if (!response.ok) {
                const err = await response.text();
                throw new Error(`OpenAI embeddings failed: ${err}`);
            }

            const data = await response.json() as { data: Array<{ embedding: number[] }> };
            const embeddings = data.data.map((d) => d.embedding);

            for (let j = 0; j < batch.length; j++) {
                const content = batch[j];
                const embedding = embeddings[j];
                const vecRaw = Prisma.raw(`'[${embedding.join(',')}]'::vector`);

                await prisma.$executeRaw`
                    INSERT INTO "KnowledgeBaseEntry" ("id", "organizationId", "content", "embedding", "metadata", "createdAt")
                    VALUES (
                        ${randomUUID()},
                        ${organizationId},
                        ${content},
                        ${vecRaw},
                        ${JSON.stringify(metadata ?? {})}::jsonb,
                        NOW()
                    )
                `;
                indexed++;
            }
        }

        logger.info({ organizationId, indexed }, '[INDEXING-SERVICE] Text chunks indexed');
        return indexed;
    }
}