UI / frontend /utils /huggingface.ts
Chan-Y's picture
Initial commit for HF Space
a94ab76
/**
* Backend-based inference using FastAPI (Python) for local GPU inference
* This allows using the full Turkish fine-tuned models with PEFT/LoRA adapters
*
* Backend: backend/python_backend/main.py
* - FastAPI server
* - PyTorch + Transformers for model loading
* - PEFT for LoRA adapters (Gemma models)
* - Direct loading for Qwen/Llama merged models
*/
// Backend API configuration
const BACKEND_URL = import.meta.env.VITE_BACKEND_URL || 'http://localhost:3000';
interface GenerateResponseParams {
modelPath: string;
systemPrompt: string;
userInput: string;
image?: string;
temperature?: number;
maxTokens?: number;
topP?: number;
topK?: number;
onToken?: (content: string) => void;
onProgress?: (progress: any) => void;
}
export async function generateResponse(params: GenerateResponseParams): Promise<void> {
const {
modelPath,
systemPrompt,
userInput,
// image, // TODO: Implement image support for multimodal models
temperature = 0.7,
maxTokens = 512,
topP = 0.95,
topK = 50,
onToken,
onProgress
} = params;
try {
// Notify progress: loading model
onProgress?.({ status: 'loading', progress: 0 });
console.log(`Starting inference with model: ${modelPath}`);
// Call backend API
const response = await fetch(`${BACKEND_URL}/api/inference/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
modelPath,
systemPrompt,
userInput,
temperature,
maxTokens,
topP,
topK,
}),
});
if (!response.ok) {
throw new Error(`Backend error: ${response.statusText}`);
}
const result = await response.json();
if (!result.success) {
throw new Error(result.error || 'Inference failed');
}
// Update progress: generating
onProgress?.({ status: 'generating' });
// Send the response via onToken callback
onToken?.(result.response);
onProgress?.({ status: 'done' });
} catch (error) {
console.error('Error in generateResponse:', error);
onProgress?.({ status: 'error', error });
throw error;
}
}
/**
* Check backend health and Python environment
*/
export async function checkBackendHealth(): Promise<{ healthy: boolean; message: string }> {
try {
const response = await fetch(`${BACKEND_URL}/api/inference/health`);
const result = await response.json();
return result;
} catch (error) {
return {
healthy: false,
message: `Cannot connect to backend at ${BACKEND_URL}. Make sure the backend is running.`,
};
}
}