Spaces:
Runtime error
Runtime error
| import { api } from "encore.dev/api"; | |
| import { secret } from "encore.dev/config"; | |
| import log from "encore.dev/log"; | |
| import { LLMProviderService } from "../lib/llm-provider"; | |
| import { ragCache } from "../lib/cache"; | |
| import type { LLMProvider } from "../lib/types"; | |
| const llmProvider = secret("LLMProvider"); | |
| const ollamaBaseURL = secret("OllamaBaseURL"); | |
| const huggingFaceAPIKey = secret("HuggingFaceAPIKey"); | |
| const defaultModel = secret("DefaultModel"); | |
| function getProvider(): LLMProviderService { | |
| return new LLMProviderService({ | |
| provider: llmProvider() as LLMProvider, | |
| ollamaBaseURL: ollamaBaseURL(), | |
| huggingFaceAPIKey: huggingFaceAPIKey(), | |
| defaultModel: defaultModel(), | |
| }); | |
| } | |
| export interface RAGRequest { | |
| query: string; | |
| context: string[]; | |
| model?: string; | |
| temperature?: number; | |
| maxTokens?: number; | |
| } | |
| export interface RAGResponse { | |
| response: string; | |
| model: string; | |
| tokensUsed?: number; | |
| sources: number[]; | |
| } | |
| // Performs retrieval-augmented generation with provided context. | |
| export const rag = api<RAGRequest, RAGResponse>( | |
| { expose: true, method: "POST", path: "/rag" }, | |
| async (req) => { | |
| log.info("RAG request received", { | |
| queryLength: req.query.length, | |
| contextCount: req.context.length, | |
| model: req.model | |
| }); | |
| // Generate cache key | |
| const cacheKey = ragCache.generateKey({ | |
| query: req.query, | |
| context: req.context, | |
| model: req.model, | |
| temperature: req.temperature, | |
| }); | |
| // Check cache | |
| const cached = ragCache.get(cacheKey); | |
| if (cached) { | |
| return JSON.parse(cached); | |
| } | |
| // Build RAG prompt with context | |
| const contextText = req.context | |
| .map((doc, idx) => `[${idx}] ${doc}`) | |
| .join("\n\n"); | |
| const prompt = `Based on the following context, answer the query. Include source numbers [0], [1], etc. when referencing specific documents. | |
| Context: | |
| ${contextText} | |
| Query: ${req.query} | |
| Answer:`; | |
| const provider = getProvider(); | |
| const result = await provider.generate({ | |
| prompt, | |
| model: req.model, | |
| temperature: req.temperature ?? 0.5, // Lower temp for more accurate retrieval | |
| maxTokens: req.maxTokens, | |
| systemPrompt: "You are a helpful assistant that answers questions based on provided context. Always cite sources using [0], [1], etc.", | |
| }); | |
| // Extract source references from response | |
| const sourceMatches = result.text.match(/\[(\d+)\]/g) || []; | |
| const sources = Array.from( | |
| new Set(sourceMatches.map(s => parseInt(s.replace(/\[|\]/g, "")))) | |
| ).filter(s => s < req.context.length); | |
| const response: RAGResponse = { | |
| response: result.text, | |
| model: result.model, | |
| tokensUsed: result.tokensUsed, | |
| sources, | |
| }; | |
| // Cache the response | |
| ragCache.set(cacheKey, JSON.stringify(response)); | |
| log.info("RAG response generated", { | |
| model: result.model, | |
| sourcesUsed: sources.length, | |
| tokensUsed: result.tokensUsed | |
| }); | |
| return response; | |
| } | |
| ); | |