cygon
Initial deployment with Ollama support
d61feef
import { api, APIError } from "encore.dev/api";
import { auth, validateApiKey } from "../utils/auth";
import { checkRateLimit } from "../utils/rate_limit";
import { metrics } from "../utils/metrics";
import { ragService } from "../services/rag_service";
import { aiService } from "../services/ai_service";
import type { RAGQueryRequest, RAGQueryResponse } from "../types/models";
export const ragQuery = api<RAGQueryRequest, RAGQueryResponse>(
{ expose: true, method: "POST", path: "/rag/query", auth: false },
async (req) => {
const startTime = Date.now();
try {
const authHeader = auth();
const authData = validateApiKey(authHeader);
checkRateLimit(authData.apiKey, authData.tier);
metrics.incrementRequests("/rag/query");
if (!req.query) {
throw APIError.invalidArgument("query is required");
}
const response = await ragService.query(req);
metrics.recordResponseTime(Date.now() - startTime);
return response;
} catch (error) {
metrics.incrementErrors();
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
const err = error as any;
throw APIError.resourceExhausted(err.message).withDetails({
limit: err.limit,
remaining: err.remaining,
reset_at: err.resetAt,
});
}
throw error instanceof Error ? error : APIError.internal(String(error));
}
}
);
interface ModelsResponse {
models: string[];
default_model: string;
}
export const getModels = api<void, ModelsResponse>(
{ expose: true, method: "GET", path: "/rag/models", auth: false },
async () => {
try {
const authHeader = auth();
validateApiKey(authHeader);
metrics.incrementRequests("/rag/models");
const models = aiService.getAvailableModels();
return {
models,
default_model: models[0] || 'gpt-3.5-turbo',
};
} catch (error) {
metrics.incrementErrors();
throw error instanceof Error ? error : APIError.internal(String(error));
}
}
);