Spaces:

safetrack
/

edtech

Running

edtech / apps /api /src /services /indexing-service.ts

CognxSafeTrack

feat: complete agentic audit roadmap — retry, KB generate, real costs, traceId, rate-limit

b438786 13 days ago

2.42 kB

	import { prisma } from './prisma';
	import { Prisma } from '@repo/database';
	import { logger } from '../logger';
	import { randomUUID } from 'crypto';

	export class IndexingService {
	/**
	* Embeds an array of text chunks and inserts them into KnowledgeBaseEntry.
	* Returns the number of chunks indexed.
	*/
	static async indexTextChunks(
	organizationId: string,
	chunks: string[],
	metadata?: Record<string, unknown>
	): Promise<number> {
	if (chunks.length === 0) return 0;

	const apiKey = process.env.OPENAI_API_KEY;
	if (!apiKey) throw new Error('OPENAI_API_KEY not configured');

	// Generate embeddings in batches of 100 (OpenAI limit)
	const BATCH = 100;
	let indexed = 0;

	for (let i = 0; i < chunks.length; i += BATCH) {
	const batch = chunks.slice(i, i + BATCH);

	const response = await fetch('https://api.openai.com/v1/embeddings', {
	method: 'POST',
	headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
	body: JSON.stringify({ input: batch, model: 'text-embedding-3-small' }),
	});

	if (!response.ok) {
	const err = await response.text();
	throw new Error(`OpenAI embeddings failed: ${err}`);
	}

	const data = await response.json() as { data: Array<{ embedding: number[] }> };
	const embeddings = data.data.map((d) => d.embedding);

	for (let j = 0; j < batch.length; j++) {
	const content = batch[j];
	const embedding = embeddings[j];
	const vecRaw = Prisma.raw(`'[${embedding.join(',')}]'::vector`);

	await prisma.$executeRaw`
	INSERT INTO "KnowledgeBaseEntry" ("id", "organizationId", "content", "embedding", "metadata", "createdAt")
	VALUES (
	${randomUUID()},
	${organizationId},
	${content},
	${vecRaw},
	${JSON.stringify(metadata ?? {})}::jsonb,
	NOW()
	)
	`;
	indexed++;
	}
	}

	logger.info({ organizationId, indexed }, '[INDEXING-SERVICE] Text chunks indexed');
	return indexed;
	}
	}