Spaces:
Runtime error
Runtime error
| import log from "encore.dev/log"; | |
| import { APIError } from "encore.dev/api"; | |
| import { LLMRequest, LLMResponse, ModelInfo } from "./types"; | |
| export class HuggingFaceClient { | |
| private apiKey: string; | |
| private defaultModel: string; | |
| constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") { | |
| this.apiKey = apiKey; | |
| this.defaultModel = defaultModel; | |
| } | |
| async generate(request: LLMRequest): Promise<LLMResponse> { | |
| const model = request.model || this.defaultModel; | |
| // Build the full prompt with system message if provided | |
| let fullPrompt = request.prompt; | |
| if (request.systemPrompt) { | |
| fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`; | |
| } | |
| try { | |
| const response = await fetch( | |
| `https://api-inference.huggingface.co/models/${model}`, | |
| { | |
| method: "POST", | |
| headers: { | |
| "Authorization": `Bearer ${this.apiKey}`, | |
| "Content-Type": "application/json", | |
| }, | |
| body: JSON.stringify({ | |
| inputs: fullPrompt, | |
| parameters: { | |
| temperature: request.temperature ?? 0.7, | |
| max_new_tokens: request.maxTokens ?? 500, | |
| return_full_text: false, | |
| }, | |
| }), | |
| } | |
| ); | |
| if (!response.ok) { | |
| const errorText = await response.text(); | |
| throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`); | |
| } | |
| const data = await response.json() as any; | |
| let text: string; | |
| if (Array.isArray(data) && data[0]?.generated_text) { | |
| text = data[0].generated_text; | |
| } else if (data.generated_text) { | |
| text = data.generated_text; | |
| } else { | |
| throw new Error("Unexpected response format from Hugging Face"); | |
| } | |
| log.info("Hugging Face generation complete", { | |
| model, | |
| promptLength: fullPrompt.length, | |
| responseLength: text.length | |
| }); | |
| return { | |
| text, | |
| model, | |
| tokensUsed: undefined, // HF doesn't return token count in basic API | |
| }; | |
| } catch (error) { | |
| log.error("Hugging Face generation failed", { error, model }); | |
| throw APIError.internal("Failed to generate response from Hugging Face", error as Error); | |
| } | |
| } | |
| async listModels(): Promise<ModelInfo[]> { | |
| // Return a curated list of popular models | |
| // In production, you could fetch this from the HF API | |
| return [ | |
| { | |
| name: "mistralai/Mistral-7B-Instruct-v0.2", | |
| size: "7B", | |
| description: "Mistral 7B Instruct - Fast and efficient", | |
| provider: "huggingface", | |
| }, | |
| { | |
| name: "meta-llama/Meta-Llama-3-8B-Instruct", | |
| size: "8B", | |
| description: "Meta Llama 3 - High quality responses", | |
| provider: "huggingface", | |
| }, | |
| { | |
| name: "microsoft/phi-3-mini-4k-instruct", | |
| size: "3.8B", | |
| description: "Phi-3 Mini - Compact and fast", | |
| provider: "huggingface", | |
| }, | |
| { | |
| name: "google/gemma-7b-it", | |
| size: "7B", | |
| description: "Google Gemma - Versatile model", | |
| provider: "huggingface", | |
| }, | |
| ]; | |
| } | |
| async checkHealth(): Promise<boolean> { | |
| try { | |
| // Test with a minimal request | |
| const response = await fetch( | |
| `https://api-inference.huggingface.co/models/${this.defaultModel}`, | |
| { | |
| method: "POST", | |
| headers: { | |
| "Authorization": `Bearer ${this.apiKey}`, | |
| "Content-Type": "application/json", | |
| }, | |
| body: JSON.stringify({ | |
| inputs: "test", | |
| parameters: { max_new_tokens: 1 }, | |
| }), | |
| } | |
| ); | |
| return response.ok || response.status === 503; // 503 means model is loading | |
| } catch (error) { | |
| log.error("Hugging Face health check failed", { error }); | |
| return false; | |
| } | |
| } | |
| } | |