File size: 3,913 Bytes
86042ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import log from "encore.dev/log";
import { APIError } from "encore.dev/api";
import { LLMRequest, LLMResponse, ModelInfo } from "./types";

export class HuggingFaceClient {
  private apiKey: string;
  private defaultModel: string;

  constructor(apiKey: string, defaultModel: string = "mistralai/Mistral-7B-Instruct-v0.2") {
    this.apiKey = apiKey;
    this.defaultModel = defaultModel;
  }

  async generate(request: LLMRequest): Promise<LLMResponse> {
    const model = request.model || this.defaultModel;
    
    // Build the full prompt with system message if provided
    let fullPrompt = request.prompt;
    if (request.systemPrompt) {
      fullPrompt = `System: ${request.systemPrompt}\n\nUser: ${request.prompt}`;
    }

    try {
      const response = await fetch(
        `https://api-inference.huggingface.co/models/${model}`,
        {
          method: "POST",
          headers: {
            "Authorization": `Bearer ${this.apiKey}`,
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            inputs: fullPrompt,
            parameters: {
              temperature: request.temperature ?? 0.7,
              max_new_tokens: request.maxTokens ?? 500,
              return_full_text: false,
            },
          }),
        }
      );

      if (!response.ok) {
        const errorText = await response.text();
        throw new Error(`Hugging Face API error: ${response.status} - ${errorText}`);
      }

      const data = await response.json() as any;
      
      let text: string;
      if (Array.isArray(data) && data[0]?.generated_text) {
        text = data[0].generated_text;
      } else if (data.generated_text) {
        text = data.generated_text;
      } else {
        throw new Error("Unexpected response format from Hugging Face");
      }

      log.info("Hugging Face generation complete", { 
        model,
        promptLength: fullPrompt.length,
        responseLength: text.length 
      });

      return {
        text,
        model,
        tokensUsed: undefined, // HF doesn't return token count in basic API
      };
    } catch (error) {
      log.error("Hugging Face generation failed", { error, model });
      throw APIError.internal("Failed to generate response from Hugging Face", error as Error);
    }
  }

  async listModels(): Promise<ModelInfo[]> {
    // Return a curated list of popular models
    // In production, you could fetch this from the HF API
    return [
      {
        name: "mistralai/Mistral-7B-Instruct-v0.2",
        size: "7B",
        description: "Mistral 7B Instruct - Fast and efficient",
        provider: "huggingface",
      },
      {
        name: "meta-llama/Meta-Llama-3-8B-Instruct",
        size: "8B",
        description: "Meta Llama 3 - High quality responses",
        provider: "huggingface",
      },
      {
        name: "microsoft/phi-3-mini-4k-instruct",
        size: "3.8B",
        description: "Phi-3 Mini - Compact and fast",
        provider: "huggingface",
      },
      {
        name: "google/gemma-7b-it",
        size: "7B",
        description: "Google Gemma - Versatile model",
        provider: "huggingface",
      },
    ];
  }

  async checkHealth(): Promise<boolean> {
    try {
      // Test with a minimal request
      const response = await fetch(
        `https://api-inference.huggingface.co/models/${this.defaultModel}`,
        {
          method: "POST",
          headers: {
            "Authorization": `Bearer ${this.apiKey}`,
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            inputs: "test",
            parameters: { max_new_tokens: 1 },
          }),
        }
      );
      return response.ok || response.status === 503; // 503 means model is loading
    } catch (error) {
      log.error("Hugging Face health check failed", { error });
      return false;
    }
  }
}