Spaces:
Running
Running
| // Qwen Embedding Service using Docker-based Hugging Face Space | |
| // This version uses direct HTTP calls instead of Gradio client for better stability | |
| const QWEN_API_URL = process.env.QWEN_API_URL || 'https://your-username-qwen-embedding-api.hf.space'; | |
| // Helper function to call Qwen Embeddings API via HTTP | |
| export async function generateQwenEmbeddings(texts: string[]): Promise<number[][]> { | |
| try { | |
| console.log(`Calling Qwen API for ${texts.length} texts...`); | |
| const response = await fetch(`${QWEN_API_URL}/api/predict`, { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| }, | |
| body: JSON.stringify({ | |
| data: [texts] // Wrap in array for batch processing | |
| }), | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP error! status: ${response.status}`); | |
| } | |
| const data = await response.json(); | |
| if (data.error) { | |
| throw new Error(`API Error: ${data.error}`); | |
| } | |
| // The response should be in the format: { data: [embeddings] } | |
| const embeddings = data.data[0]; | |
| if (!Array.isArray(embeddings)) { | |
| throw new Error('Invalid embeddings format received from Qwen API'); | |
| } | |
| // Validate embeddings | |
| for (let i = 0; i < embeddings.length; i++) { | |
| if (!Array.isArray(embeddings[i])) { | |
| throw new Error(`Embedding ${i} is not an array`); | |
| } | |
| if (embeddings[i].length === 0) { | |
| throw new Error(`Embedding ${i} is empty`); | |
| } | |
| } | |
| console.log(`Successfully generated ${embeddings.length} embeddings`); | |
| return embeddings; | |
| } catch (error) { | |
| console.error('Error calling Qwen embeddings API:', error); | |
| throw error; | |
| } | |
| } | |
| // Helper function to generate single embedding | |
| export async function generateSingleQwenEmbedding(text: string): Promise<number[]> { | |
| try { | |
| console.log('Calling Qwen API for single text...'); | |
| const response = await fetch(`${QWEN_API_URL}/api/predict`, { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| }, | |
| body: JSON.stringify({ | |
| data: [text] // Single text | |
| }), | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP error! status: ${response.status}`); | |
| } | |
| const data = await response.json(); | |
| if (data.error) { | |
| throw new Error(`API Error: ${data.error}`); | |
| } | |
| // The response should be in the format: { data: [embedding] } | |
| const embedding = data.data[0]; | |
| if (!Array.isArray(embedding)) { | |
| throw new Error('Invalid embedding format received from Qwen API'); | |
| } | |
| if (embedding.length === 0) { | |
| throw new Error('Empty embedding received from Qwen API'); | |
| } | |
| console.log('Successfully generated single embedding'); | |
| return embedding; | |
| } catch (error) { | |
| console.error('Error calling Qwen single embedding API:', error); | |
| // Fallback to batch processing | |
| const embeddings = await generateQwenEmbeddings([text]); | |
| return embeddings[0]; | |
| } | |
| } | |
| // Health check function | |
| export async function checkQwenAPIHealth(): Promise<boolean> { | |
| try { | |
| const response = await fetch(`${QWEN_API_URL}/health`, { | |
| method: 'GET', | |
| }); | |
| if (!response.ok) { | |
| return false; | |
| } | |
| const data = await response.json(); | |
| return data.status === 'healthy' && data.model_loaded === true; | |
| } catch (error) { | |
| console.error('Health check failed:', error); | |
| return false; | |
| } | |
| } | |
| // Retry mechanism for Qwen API | |
| async function generateQwenEmbeddingsWithRetry(texts: string[], maxRetries: number = 3): Promise<number[][]> { | |
| let lastError: Error | null = null; | |
| for (let attempt = 1; attempt <= maxRetries; attempt++) { | |
| try { | |
| console.log(`Attempt ${attempt}/${maxRetries} to generate embeddings...`); | |
| return await generateQwenEmbeddings(texts); | |
| } catch (error) { | |
| lastError = error as Error; | |
| console.warn(`Attempt ${attempt} failed:`, error); | |
| if (attempt < maxRetries) { | |
| const delay = Math.pow(2, attempt) * 1000; // Exponential backoff | |
| console.log(`Waiting ${delay}ms before retry...`); | |
| await new Promise(resolve => setTimeout(resolve, delay)); | |
| } | |
| } | |
| } | |
| throw lastError || new Error('Qwen API failed after all retries'); | |
| } | |
| // Fallback to Jina if Qwen fails | |
| export async function generateEmbeddingsWithFallback(texts: string[]): Promise<number[][]> { | |
| try { | |
| // Check API health first | |
| const isHealthy = await checkQwenAPIHealth(); | |
| if (!isHealthy) { | |
| throw new Error('Qwen API is not healthy'); | |
| } | |
| // Try Qwen first with retry | |
| return await generateQwenEmbeddingsWithRetry(texts); | |
| } catch (qwenError) { | |
| console.warn('Qwen API failed after retries, falling back to Jina:', qwenError); | |
| // Fallback to Jina | |
| const JINA_API_KEY = process.env.JINA_API_KEY; | |
| const JINA_EMBEDDINGS_MODEL = process.env.JINA_EMBEDDINGS_MODEL || 'jina-embeddings-v3'; | |
| if (!JINA_API_KEY) { | |
| throw new Error('Both Qwen and Jina APIs failed. JINA_API_KEY not available for fallback.'); | |
| } | |
| const response = await fetch('https://api.jina.ai/v1/embeddings', { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'Authorization': `Bearer ${JINA_API_KEY}`, | |
| }, | |
| body: JSON.stringify({ | |
| model: JINA_EMBEDDINGS_MODEL, | |
| input: texts, | |
| }), | |
| }); | |
| if (!response.ok) { | |
| const errorText = await response.text(); | |
| throw new Error(`Jina API error: ${response.status} ${response.statusText} - ${errorText}`); | |
| } | |
| const data = await response.json(); | |
| return data.data.map((item: any) => item.embedding); | |
| } | |
| } | |
| // Main function that uses Qwen with Jina fallback | |
| export async function generateEmbeddings(texts: string[]): Promise<number[][]> { | |
| // For single text, use the optimized single embedding endpoint | |
| if (texts.length === 1) { | |
| try { | |
| const embedding = await generateSingleQwenEmbedding(texts[0]); | |
| return [embedding]; | |
| } catch (error) { | |
| console.warn('Single embedding failed, falling back to batch processing:', error); | |
| // Fall through to batch processing | |
| } | |
| } | |
| // Use batch processing with fallback | |
| return await generateEmbeddingsWithFallback(texts); | |
| } | |
| // Export the single embedding function for compatibility | |
| export const generateSingleEmbedding = generateSingleQwenEmbedding; | |