/** * Backend-based inference using FastAPI (Python) for local GPU inference * This allows using the full Turkish fine-tuned models with PEFT/LoRA adapters * * Backend: backend/python_backend/main.py * - FastAPI server * - PyTorch + Transformers for model loading * - PEFT for LoRA adapters (Gemma models) * - Direct loading for Qwen/Llama merged models */ // Backend API configuration const BACKEND_URL = import.meta.env.VITE_BACKEND_URL || 'http://localhost:3000'; interface GenerateResponseParams { modelPath: string; systemPrompt: string; userInput: string; image?: string; temperature?: number; maxTokens?: number; topP?: number; topK?: number; onToken?: (content: string) => void; onProgress?: (progress: any) => void; } export async function generateResponse(params: GenerateResponseParams): Promise { const { modelPath, systemPrompt, userInput, // image, // TODO: Implement image support for multimodal models temperature = 0.7, maxTokens = 512, topP = 0.95, topK = 50, onToken, onProgress } = params; try { // Notify progress: loading model onProgress?.({ status: 'loading', progress: 0 }); console.log(`Starting inference with model: ${modelPath}`); // Call backend API const response = await fetch(`${BACKEND_URL}/api/inference/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ modelPath, systemPrompt, userInput, temperature, maxTokens, topP, topK, }), }); if (!response.ok) { throw new Error(`Backend error: ${response.statusText}`); } const result = await response.json(); if (!result.success) { throw new Error(result.error || 'Inference failed'); } // Update progress: generating onProgress?.({ status: 'generating' }); // Send the response via onToken callback onToken?.(result.response); onProgress?.({ status: 'done' }); } catch (error) { console.error('Error in generateResponse:', error); onProgress?.({ status: 'error', error }); throw error; } } /** * Check backend health and Python environment */ export async function checkBackendHealth(): Promise<{ healthy: boolean; message: string }> { try { const response = await fetch(`${BACKEND_URL}/api/inference/health`); const result = await response.json(); return result; } catch (error) { return { healthy: false, message: `Cannot connect to backend at ${BACKEND_URL}. Make sure the backend is running.`, }; } }