File size: 6,605 Bytes
56f66cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// Qwen Embedding Service using Docker-based Hugging Face Space
// This version uses direct HTTP calls instead of Gradio client for better stability

const QWEN_API_URL = process.env.QWEN_API_URL || 'https://your-username-qwen-embedding-api.hf.space';

// Helper function to call Qwen Embeddings API via HTTP
export async function generateQwenEmbeddings(texts: string[]): Promise<number[][]> {
  try {
    console.log(`Calling Qwen API for ${texts.length} texts...`);
    
    const response = await fetch(`${QWEN_API_URL}/api/predict`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        data: [texts] // Wrap in array for batch processing
      }),
    });

    if (!response.ok) {
      throw new Error(`HTTP error! status: ${response.status}`);
    }

    const data = await response.json();
    
    if (data.error) {
      throw new Error(`API Error: ${data.error}`);
    }

    // The response should be in the format: { data: [embeddings] }
    const embeddings = data.data[0];
    
    if (!Array.isArray(embeddings)) {
      throw new Error('Invalid embeddings format received from Qwen API');
    }

    // Validate embeddings
    for (let i = 0; i < embeddings.length; i++) {
      if (!Array.isArray(embeddings[i])) {
        throw new Error(`Embedding ${i} is not an array`);
      }
      if (embeddings[i].length === 0) {
        throw new Error(`Embedding ${i} is empty`);
      }
    }

    console.log(`Successfully generated ${embeddings.length} embeddings`);
    return embeddings;
    
  } catch (error) {
    console.error('Error calling Qwen embeddings API:', error);
    throw error;
  }
}

// Helper function to generate single embedding
export async function generateSingleQwenEmbedding(text: string): Promise<number[]> {
  try {
    console.log('Calling Qwen API for single text...');
    
    const response = await fetch(`${QWEN_API_URL}/api/predict`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        data: [text] // Single text
      }),
    });

    if (!response.ok) {
      throw new Error(`HTTP error! status: ${response.status}`);
    }

    const data = await response.json();
    
    if (data.error) {
      throw new Error(`API Error: ${data.error}`);
    }

    // The response should be in the format: { data: [embedding] }
    const embedding = data.data[0];
    
    if (!Array.isArray(embedding)) {
      throw new Error('Invalid embedding format received from Qwen API');
    }

    if (embedding.length === 0) {
      throw new Error('Empty embedding received from Qwen API');
    }

    console.log('Successfully generated single embedding');
    return embedding;
    
  } catch (error) {
    console.error('Error calling Qwen single embedding API:', error);
    // Fallback to batch processing
    const embeddings = await generateQwenEmbeddings([text]);
    return embeddings[0];
  }
}

// Health check function
export async function checkQwenAPIHealth(): Promise<boolean> {
  try {
    const response = await fetch(`${QWEN_API_URL}/health`, {
      method: 'GET',
    });

    if (!response.ok) {
      return false;
    }

    const data = await response.json();
    return data.status === 'healthy' && data.model_loaded === true;
    
  } catch (error) {
    console.error('Health check failed:', error);
    return false;
  }
}

// Retry mechanism for Qwen API
async function generateQwenEmbeddingsWithRetry(texts: string[], maxRetries: number = 3): Promise<number[][]> {
  let lastError: Error | null = null;
  
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      console.log(`Attempt ${attempt}/${maxRetries} to generate embeddings...`);
      return await generateQwenEmbeddings(texts);
    } catch (error) {
      lastError = error as Error;
      console.warn(`Attempt ${attempt} failed:`, error);
      
      if (attempt < maxRetries) {
        const delay = Math.pow(2, attempt) * 1000; // Exponential backoff
        console.log(`Waiting ${delay}ms before retry...`);
        await new Promise(resolve => setTimeout(resolve, delay));
      }
    }
  }
  
  throw lastError || new Error('Qwen API failed after all retries');
}

// Fallback to Jina if Qwen fails
export async function generateEmbeddingsWithFallback(texts: string[]): Promise<number[][]> {
  try {
    // Check API health first
    const isHealthy = await checkQwenAPIHealth();
    if (!isHealthy) {
      throw new Error('Qwen API is not healthy');
    }

    // Try Qwen first with retry
    return await generateQwenEmbeddingsWithRetry(texts);
  } catch (qwenError) {
    console.warn('Qwen API failed after retries, falling back to Jina:', qwenError);
    
    // Fallback to Jina
    const JINA_API_KEY = process.env.JINA_API_KEY;
    const JINA_EMBEDDINGS_MODEL = process.env.JINA_EMBEDDINGS_MODEL || 'jina-embeddings-v3';

    if (!JINA_API_KEY) {
      throw new Error('Both Qwen and Jina APIs failed. JINA_API_KEY not available for fallback.');
    }

    const response = await fetch('https://api.jina.ai/v1/embeddings', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${JINA_API_KEY}`,
      },
      body: JSON.stringify({
        model: JINA_EMBEDDINGS_MODEL,
        input: texts,
      }),
    });

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`Jina API error: ${response.status} ${response.statusText} - ${errorText}`);
    }

    const data = await response.json();
    return data.data.map((item: any) => item.embedding);
  }
}

// Main function that uses Qwen with Jina fallback
export async function generateEmbeddings(texts: string[]): Promise<number[][]> {
  // For single text, use the optimized single embedding endpoint
  if (texts.length === 1) {
    try {
      const embedding = await generateSingleQwenEmbedding(texts[0]);
      return [embedding];
    } catch (error) {
      console.warn('Single embedding failed, falling back to batch processing:', error);
      // Fall through to batch processing
    }
  }

  // Use batch processing with fallback
  return await generateEmbeddingsWithFallback(texts);
}

// Export the single embedding function for compatibility
export const generateSingleEmbedding = generateSingleQwenEmbedding;