Spaces:

cygon24
/

llm-api-backend

Runtime error

llm-api-backend / backend /lib /huggingface-client.ts

cygon

intial commit

86042ad 4 months ago

3.91 kB

	import log from "encore.dev/log";
	import { APIError } from "encore.dev/api";
	import { LLMRequest, LLMResponse, ModelInfo } from "./types";

	export class HuggingFaceClient {
	private apiKey: string;
	private defaultModel: string;

	constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") {
	this.apiKey = apiKey;
	this.defaultModel = defaultModel;
	}

	async generate(request: LLMRequest): Promise<LLMResponse> {
	const model = request.model \|\| this.defaultModel;

	// Build the full prompt with system message if provided
	let fullPrompt = request.prompt;
	if (request.systemPrompt) {
	fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`;
	}

	try {
	const response = await fetch(
	`https://api-inference.huggingface.co/models/${model}`,
	{
	method: "POST",
	headers: {
	"Authorization": `Bearer ${this.apiKey}`,
	"Content-Type": "application/json",
	},
	body: JSON.stringify({
	inputs: fullPrompt,
	parameters: {
	temperature: request.temperature ?? 0.7,
	max_new_tokens: request.maxTokens ?? 500,
	return_full_text: false,
	},
	}),
	}
	);

	if (!response.ok) {
	const errorText = await response.text();
	throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`);
	}

	const data = await response.json() as any;

	let text: string;
	if (Array.isArray(data) && data[0]?.generated_text) {
	text = data[0].generated_text;
	} else if (data.generated_text) {
	text = data.generated_text;
	} else {
	throw new Error("Unexpected response format from Hugging Face");
	}

	log.info("Hugging Face generation complete", {
	model,
	promptLength: fullPrompt.length,
	responseLength: text.length
	});

	return {
	text,
	model,
	tokensUsed: undefined, // HF doesn't return token count in basic API
	};
	} catch (error) {
	log.error("Hugging Face generation failed", { error, model });
	throw APIError.internal("Failed to generate response from Hugging Face", error as Error);
	}
	}

	async listModels(): Promise<ModelInfo[]> {
	// Return a curated list of popular models
	// In production, you could fetch this from the HF API
	return [
	{
	name: "mistralai/Mistral-7B-Instruct-v0.2",
	size: "7B",
	description: "Mistral 7B Instruct - Fast and efficient",
	provider: "huggingface",
	},
	{
	name: "meta-llama/Meta-Llama-3-8B-Instruct",
	size: "8B",
	description: "Meta Llama 3 - High quality responses",
	provider: "huggingface",
	},
	{
	name: "microsoft/phi-3-mini-4k-instruct",
	size: "3.8B",
	description: "Phi-3 Mini - Compact and fast",
	provider: "huggingface",
	},
	{
	name: "google/gemma-7b-it",
	size: "7B",
	description: "Google Gemma - Versatile model",
	provider: "huggingface",
	},
	];
	}

	async checkHealth(): Promise<boolean> {
	try {
	// Test with a minimal request
	const response = await fetch(
	`https://api-inference.huggingface.co/models/${this.defaultModel}`,
	{
	method: "POST",
	headers: {
	"Authorization": `Bearer ${this.apiKey}`,
	"Content-Type": "application/json",
	},
	body: JSON.stringify({
	inputs: "test",
	parameters: { max_new_tokens: 1 },
	}),
	}
	);
	return response.ok \|\| response.status === 503; // 503 means model is loading
	} catch (error) {
	log.error("Hugging Face health check failed", { error });
	return false;
	}
	}
	}