import log from "encore.dev/log"; import { APIError } from "encore.dev/api"; import { LLMRequest, LLMResponse, ModelInfo } from "./types"; export class HuggingFaceClient { private apiKey: string; private defaultModel: string; constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") { this.apiKey = apiKey; this.defaultModel = defaultModel; } async generate(request: LLMRequest): Promise { const model = request.model || this.defaultModel; // Build the full prompt with system message if provided let fullPrompt = request.prompt; if (request.systemPrompt) { fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`; } try { const response = await fetch( `https://api-inference.huggingface.co/models/${model}`, { method: "POST", headers: { "Authorization": `Bearer ${this.apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify({ inputs: fullPrompt, parameters: { temperature: request.temperature ?? 0.7, max_new_tokens: request.maxTokens ?? 500, return_full_text: false, }, }), } ); if (!response.ok) { const errorText = await response.text(); throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`); } const data = await response.json() as any; let text: string; if (Array.isArray(data) && data[0]?.generated_text) { text = data[0].generated_text; } else if (data.generated_text) { text = data.generated_text; } else { throw new Error("Unexpected response format from Hugging Face"); } log.info("Hugging Face generation complete", { model, promptLength: fullPrompt.length, responseLength: text.length }); return { text, model, tokensUsed: undefined, // HF doesn't return token count in basic API }; } catch (error) { log.error("Hugging Face generation failed", { error, model }); throw APIError.internal("Failed to generate response from Hugging Face", error as Error); } } async listModels(): Promise { // Return a curated list of popular models // In production, you could fetch this from the HF API return [ { name: "mistralai/Mistral-7B-Instruct-v0.2", size: "7B", description: "Mistral 7B Instruct - Fast and efficient", provider: "huggingface", }, { name: "meta-llama/Meta-Llama-3-8B-Instruct", size: "8B", description: "Meta Llama 3 - High quality responses", provider: "huggingface", }, { name: "microsoft/phi-3-mini-4k-instruct", size: "3.8B", description: "Phi-3 Mini - Compact and fast", provider: "huggingface", }, { name: "google/gemma-7b-it", size: "7B", description: "Google Gemma - Versatile model", provider: "huggingface", }, ]; } async checkHealth(): Promise { try { // Test with a minimal request const response = await fetch( `https://api-inference.huggingface.co/models/${this.defaultModel}`, { method: "POST", headers: { "Authorization": `Bearer ${this.apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify({ inputs: "test", parameters: { max_new_tokens: 1 }, }), } ); return response.ok || response.status === 503; // 503 means model is loading } catch (error) { log.error("Hugging Face health check failed", { error }); return false; } } }