Spaces:

cygon24
/

llm-api-backend

Runtime error

File size: 3,913 Bytes

86042ad

import log from "encore.dev/log";
import { APIError } from "encore.dev/api";
import { LLMRequest, LLMResponse, ModelInfo } from "./types";

export class HuggingFaceClient {
  private apiKey: string;
  private defaultModel: string;

  constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") {
    this.apiKey = apiKey;
    this.defaultModel = defaultModel;
  }

  async generate(request: LLMRequest): Promise<LLMResponse> {
    const model = request.model || this.defaultModel;
    
    // Build the full prompt with system message if provided
    let fullPrompt = request.prompt;
    if (request.systemPrompt) {
      fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`;
    }

    try {
      const response = await fetch(
        `https://api-inference.huggingface.co/models/${model}`,
        {
          method: "POST",
          headers: {
            "Authorization": `Bearer ${this.apiKey}`,
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            inputs: fullPrompt,
            parameters: {
              temperature: request.temperature ?? 0.7,
              max_new_tokens: request.maxTokens ?? 500,
              return_full_text: false,
            },
          }),
        }
      );

      if (!response.ok) {
        const errorText = await response.text();
        throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`);
      }

      const data = await response.json() as any;
      
      let text: string;
      if (Array.isArray(data) && data[0]?.generated_text) {
        text = data[0].generated_text;
      } else if (data.generated_text) {
        text = data.generated_text;
      } else {
        throw new Error("Unexpected response format from Hugging Face");
      }

      log.info("Hugging Face generation complete", { 
        model,
        promptLength: fullPrompt.length,
        responseLength: text.length 
      });

      return {
        text,
        model,
        tokensUsed: undefined, // HF doesn't return token count in basic API
      };
    } catch (error) {
      log.error("Hugging Face generation failed", { error, model });
      throw APIError.internal("Failed to generate response from Hugging Face", error as Error);
    }
  }

  async listModels(): Promise<ModelInfo[]> {
    // Return a curated list of popular models
    // In production, you could fetch this from the HF API
    return [
      {
        name: "mistralai/Mistral-7B-Instruct-v0.2",
        size: "7B",
        description: "Mistral 7B Instruct - Fast and efficient",
        provider: "huggingface",
      },
      {
        name: "meta-llama/Meta-Llama-3-8B-Instruct",
        size: "8B",
        description: "Meta Llama 3 - High quality responses",
        provider: "huggingface",
      },
      {
        name: "microsoft/phi-3-mini-4k-instruct",
        size: "3.8B",
        description: "Phi-3 Mini - Compact and fast",
        provider: "huggingface",
      },
      {
        name: "google/gemma-7b-it",
        size: "7B",
        description: "Google Gemma - Versatile model",
        provider: "huggingface",
      },
    ];
  }

  async checkHealth(): Promise<boolean> {
    try {
      // Test with a minimal request
      const response = await fetch(
        `https://api-inference.huggingface.co/models/${this.defaultModel}`,
        {
          method: "POST",
          headers: {
            "Authorization": `Bearer ${this.apiKey}`,
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            inputs: "test",
            parameters: { max_new_tokens: 1 },
          }),
        }
      );
      return response.ok || response.status === 503; // 503 means model is loading
    } catch (error) {
      log.error("Hugging Face health check failed", { error });
      return false;
    }
  }
}