cygon
intial commit
86042ad
import { api } from "encore.dev/api";
import { secret } from "encore.dev/config";
import log from "encore.dev/log";
import { LLMProviderService } from "../lib/llm-provider";
import { ragCache } from "../lib/cache";
import type { LLMProvider } from "../lib/types";
const llmProvider = secret("LLMProvider");
const ollamaBaseURL = secret("OllamaBaseURL");
const huggingFaceAPIKey = secret("HuggingFaceAPIKey");
const defaultModel = secret("DefaultModel");
function getProvider(): LLMProviderService {
return new LLMProviderService({
provider: llmProvider() as LLMProvider,
ollamaBaseURL: ollamaBaseURL(),
huggingFaceAPIKey: huggingFaceAPIKey(),
defaultModel: defaultModel(),
});
}
export interface RAGRequest {
query: string;
context: string[];
model?: string;
temperature?: number;
maxTokens?: number;
}
export interface RAGResponse {
response: string;
model: string;
tokensUsed?: number;
sources: number[];
}
// Performs retrieval-augmented generation with provided context.
export const rag = api<RAGRequest, RAGResponse>(
{ expose: true, method: "POST", path: "/rag" },
async (req) => {
log.info("RAG request received", {
queryLength: req.query.length,
contextCount: req.context.length,
model: req.model
});
// Generate cache key
const cacheKey = ragCache.generateKey({
query: req.query,
context: req.context,
model: req.model,
temperature: req.temperature,
});
// Check cache
const cached = ragCache.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// Build RAG prompt with context
const contextText = req.context
.map((doc, idx) => `[${idx}] ${doc}`)
.join("\n\n");
const prompt = `Based on the following context, answer the query. Include source numbers [0], [1], etc. when referencing specific documents.
Context:
${contextText}
Query: ${req.query}
Answer:`;
const provider = getProvider();
const result = await provider.generate({
prompt,
model: req.model,
temperature: req.temperature ?? 0.5, // Lower temp for more accurate retrieval
maxTokens: req.maxTokens,
systemPrompt: "You are a helpful assistant that answers questions based on provided context. Always cite sources using [0], [1], etc.",
});
// Extract source references from response
const sourceMatches = result.text.match(/\[(\d+)\]/g) || [];
const sources = Array.from(
new Set(sourceMatches.map(s => parseInt(s.replace(/\[|\]/g, ""))))
).filter(s => s < req.context.length);
const response: RAGResponse = {
response: result.text,
model: result.model,
tokensUsed: result.tokensUsed,
sources,
};
// Cache the response
ragCache.set(cacheKey, JSON.stringify(response));
log.info("RAG response generated", {
model: result.model,
sourcesUsed: sources.length,
tokensUsed: result.tokensUsed
});
return response;
}
);