Spaces:
Runtime error
Runtime error
File size: 3,913 Bytes
86042ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import log from "encore.dev/log";
import { APIError } from "encore.dev/api";
import { LLMRequest, LLMResponse, ModelInfo } from "./types";
export class HuggingFaceClient {
private apiKey: string;
private defaultModel: string;
constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") {
this.apiKey = apiKey;
this.defaultModel = defaultModel;
}
async generate(request: LLMRequest): Promise<LLMResponse> {
const model = request.model || this.defaultModel;
// Build the full prompt with system message if provided
let fullPrompt = request.prompt;
if (request.systemPrompt) {
fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`;
}
try {
const response = await fetch(
`https://api-inference.huggingface.co/models/${model}`,
{
method: "POST",
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: fullPrompt,
parameters: {
temperature: request.temperature ?? 0.7,
max_new_tokens: request.maxTokens ?? 500,
return_full_text: false,
},
}),
}
);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`);
}
const data = await response.json() as any;
let text: string;
if (Array.isArray(data) && data[0]?.generated_text) {
text = data[0].generated_text;
} else if (data.generated_text) {
text = data.generated_text;
} else {
throw new Error("Unexpected response format from Hugging Face");
}
log.info("Hugging Face generation complete", {
model,
promptLength: fullPrompt.length,
responseLength: text.length
});
return {
text,
model,
tokensUsed: undefined, // HF doesn't return token count in basic API
};
} catch (error) {
log.error("Hugging Face generation failed", { error, model });
throw APIError.internal("Failed to generate response from Hugging Face", error as Error);
}
}
async listModels(): Promise<ModelInfo[]> {
// Return a curated list of popular models
// In production, you could fetch this from the HF API
return [
{
name: "mistralai/Mistral-7B-Instruct-v0.2",
size: "7B",
description: "Mistral 7B Instruct - Fast and efficient",
provider: "huggingface",
},
{
name: "meta-llama/Meta-Llama-3-8B-Instruct",
size: "8B",
description: "Meta Llama 3 - High quality responses",
provider: "huggingface",
},
{
name: "microsoft/phi-3-mini-4k-instruct",
size: "3.8B",
description: "Phi-3 Mini - Compact and fast",
provider: "huggingface",
},
{
name: "google/gemma-7b-it",
size: "7B",
description: "Google Gemma - Versatile model",
provider: "huggingface",
},
];
}
async checkHealth(): Promise<boolean> {
try {
// Test with a minimal request
const response = await fetch(
`https://api-inference.huggingface.co/models/${this.defaultModel}`,
{
method: "POST",
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: "test",
parameters: { max_new_tokens: 1 },
}),
}
);
return response.ok || response.status === 503; // 503 means model is loading
} catch (error) {
log.error("Hugging Face health check failed", { error });
return false;
}
}
}
|