llm-api-backend / backend /lib /huggingface-client.ts
cygon
intial commit
86042ad
import log from "encore.dev/log";
import { APIError } from "encore.dev/api";
import { LLMRequest, LLMResponse, ModelInfo } from "./types";
export class HuggingFaceClient {
private apiKey: string;
private defaultModel: string;
constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") {
this.apiKey = apiKey;
this.defaultModel = defaultModel;
}
async generate(request: LLMRequest): Promise<LLMResponse> {
const model = request.model || this.defaultModel;
// Build the full prompt with system message if provided
let fullPrompt = request.prompt;
if (request.systemPrompt) {
fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`;
}
try {
const response = await fetch(
`https://api-inference.huggingface.co/models/${model}`,
{
method: "POST",
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: fullPrompt,
parameters: {
temperature: request.temperature ?? 0.7,
max_new_tokens: request.maxTokens ?? 500,
return_full_text: false,
},
}),
}
);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`);
}
const data = await response.json() as any;
let text: string;
if (Array.isArray(data) && data[0]?.generated_text) {
text = data[0].generated_text;
} else if (data.generated_text) {
text = data.generated_text;
} else {
throw new Error("Unexpected response format from Hugging Face");
}
log.info("Hugging Face generation complete", {
model,
promptLength: fullPrompt.length,
responseLength: text.length
});
return {
text,
model,
tokensUsed: undefined, // HF doesn't return token count in basic API
};
} catch (error) {
log.error("Hugging Face generation failed", { error, model });
throw APIError.internal("Failed to generate response from Hugging Face", error as Error);
}
}
async listModels(): Promise<ModelInfo[]> {
// Return a curated list of popular models
// In production, you could fetch this from the HF API
return [
{
name: "mistralai/Mistral-7B-Instruct-v0.2",
size: "7B",
description: "Mistral 7B Instruct - Fast and efficient",
provider: "huggingface",
},
{
name: "meta-llama/Meta-Llama-3-8B-Instruct",
size: "8B",
description: "Meta Llama 3 - High quality responses",
provider: "huggingface",
},
{
name: "microsoft/phi-3-mini-4k-instruct",
size: "3.8B",
description: "Phi-3 Mini - Compact and fast",
provider: "huggingface",
},
{
name: "google/gemma-7b-it",
size: "7B",
description: "Google Gemma - Versatile model",
provider: "huggingface",
},
];
}
async checkHealth(): Promise<boolean> {
try {
// Test with a minimal request
const response = await fetch(
`https://api-inference.huggingface.co/models/${this.defaultModel}`,
{
method: "POST",
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: "test",
parameters: { max_new_tokens: 1 },
}),
}
);
return response.ok || response.status === 503; // 503 means model is loading
} catch (error) {
log.error("Hugging Face health check failed", { error });
return false;
}
}
}