Spaces:

snsmcy
/

UI

Sleeping

App Files Files Community

UI / frontend /utils /huggingface.ts

Chan-Y

Initial commit for HF Space

a94ab76 2 months ago

raw

history blame contribute delete

2.95 kB

	/**
	* Backend-based inference using FastAPI (Python) for local GPU inference
	* This allows using the full Turkish fine-tuned models with PEFT/LoRA adapters
	*
	* Backend: backend/python_backend/main.py
	* - FastAPI server
	* - PyTorch + Transformers for model loading
	* - PEFT for LoRA adapters (Gemma models)
	* - Direct loading for Qwen/Llama merged models
	*/

	// Backend API configuration
	const BACKEND_URL = import.meta.env.VITE_BACKEND_URL \|\| 'http://localhost:3000';

	interface GenerateResponseParams {
	modelPath: string;
	systemPrompt: string;
	userInput: string;
	image?: string;
	temperature?: number;
	maxTokens?: number;
	topP?: number;
	topK?: number;
	onToken?: (content: string) => void;
	onProgress?: (progress: any) => void;
	}

	export async function generateResponse(params: GenerateResponseParams): Promise<void> {
	const {
	modelPath,
	systemPrompt,
	userInput,
	// image, // TODO: Implement image support for multimodal models
	temperature = 0.7,
	maxTokens = 512,
	topP = 0.95,
	topK = 50,
	onToken,
	onProgress
	} = params;

	try {
	// Notify progress: loading model
	onProgress?.({ status: 'loading', progress: 0 });
	console.log(`Starting inference with model: ${modelPath}`);

	// Call backend API
	const response = await fetch(`${BACKEND_URL}/api/inference/generate`, {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	},
	body: JSON.stringify({
	modelPath,
	systemPrompt,
	userInput,
	temperature,
	maxTokens,
	topP,
	topK,
	}),
	});

	if (!response.ok) {
	throw new Error(`Backend error: ${response.statusText}`);
	}

	const result = await response.json();

	if (!result.success) {
	throw new Error(result.error \|\| 'Inference failed');
	}

	// Update progress: generating
	onProgress?.({ status: 'generating' });

	// Send the response via onToken callback
	onToken?.(result.response);

	onProgress?.({ status: 'done' });
	} catch (error) {
	console.error('Error in generateResponse:', error);
	onProgress?.({ status: 'error', error });
	throw error;
	}
	}

	/**
	* Check backend health and Python environment
	*/
	export async function checkBackendHealth(): Promise<{ healthy: boolean; message: string }> {
	try {
	const response = await fetch(`${BACKEND_URL}/api/inference/health`);
	const result = await response.json();
	return result;
	} catch (error) {
	return {
	healthy: false,
	message: `Cannot connect to backend at ${BACKEND_URL}. Make sure the backend is running.`,
	};
	}
	}