Spaces:
Paused
Paused
| /** | |
| * vision.js — Análisis de Imágenes v1.1 | |
| * FIX: notifica al usuario cuando todos los modelos fallan en vez de devolver null silencioso | |
| */ | |
| import { readConfig } from './utils.js'; | |
| import { analyzeUserImage as visionAgentAnalyze } from './vision-agent.js'; | |
| import { sanitizeOutput } from './security.js'; | |
| const config = readConfig(); | |
| const VISION_MODELS = [ | |
| 'google/gemini-2.0-flash-001', | |
| 'google/gemini-2.5-pro-preview', | |
| 'meta-llama/llama-4-maverick:free', | |
| ]; | |
| export async function analyzeImage(imageUrls, prompt = 'Describe esta imagen en español de forma breve.') { | |
| const urls = Array.isArray(imageUrls) ? imageUrls : [imageUrls]; | |
| // PRIMERA OPCIÓN: vision-agent (Gemini Vision directo — más rápido y sin gastar API de texto) | |
| if (urls.length > 0) { | |
| try { | |
| const firstUrl = typeof urls[0] === 'string' ? urls[0] : urls[0]?.url ?? urls[0]?.proxyURL ?? ''; | |
| if (firstUrl.startsWith('http')) { | |
| const imgRes = await fetch(firstUrl, { signal: AbortSignal.timeout(12000) }); | |
| if (imgRes.ok) { | |
| const b64 = Buffer.from(await imgRes.arrayBuffer()).toString('base64'); | |
| const result = await visionAgentAnalyze(b64, prompt); | |
| if (result && result.length > 15 && !result.includes('No se pudo')) { | |
| return result; | |
| } | |
| } | |
| } | |
| } catch { /* fallback al método original via OpenRouter */ } | |
| } | |
| const imageContent = urls.slice(0, 4).map(url => ({ | |
| type: 'image_url', image_url: { url, detail: 'auto' } | |
| })); | |
| for (const model of VISION_MODELS) { | |
| try { | |
| const res = await fetch('https://openrouter.ai/api/v1/chat/completions', { | |
| method : 'POST', | |
| headers: { | |
| 'Authorization': `Bearer ${config.ai.openrouter.apiKey}`, | |
| 'Content-Type' : 'application/json', | |
| 'HTTP-Referer' : 'https://tomatesmp.pw', | |
| 'X-Title' : 'Zelin', | |
| }, | |
| body : JSON.stringify({ | |
| model, | |
| messages: [{ role: 'user', content: [ | |
| { type: 'text', text: prompt }, | |
| ...imageContent, | |
| ]}], | |
| max_tokens: 500, | |
| }), | |
| signal: AbortSignal.timeout(30000), | |
| }); | |
| if (!res.ok) { console.warn(`[Vision] ${model}: ${res.status}`); continue; } | |
| const data = await res.json(); | |
| const content = data.choices?.[0]?.message?.content; | |
| if (!content) continue; | |
| console.log(`[Vision] ✅ ${model} (${urls.length} imagen${urls.length > 1 ? 'es' : ''})`); | |
| return sanitizeOutput(content); | |
| } catch (err) { | |
| console.warn(`[Vision] ${model} error:`, err.message); | |
| } | |
| } | |
| // FIX: antes devolvía null silencioso — ahora devuelve mensaje de error | |
| console.error('[Vision] Todos los modelos de visión fallaron'); | |
| return 'no pude analizar la imagen ahora mismo, inténtalo de nuevo'; | |
| } | |
| export function getImageAttachments(message) { | |
| const images = []; | |
| for (const att of message.attachments.values()) { | |
| const isImage = att.contentType?.startsWith('image/') || /\.(png|jpg|jpeg|gif|webp)$/i.test(att.name ?? ''); | |
| if (isImage) images.push(att.proxyURL ?? att.url); | |
| } | |
| for (const embed of message.embeds ?? []) { | |
| if (embed.image?.url) images.push(embed.image.url); | |
| } | |
| return images; | |
| } | |
| export function getStickerUrls(message) { | |
| return [...(message.stickers?.values() ?? [])].map(s => s.url).filter(Boolean); | |
| } | |
| export function isImageRequest(content) { | |
| if (!content) return false; | |
| const l = content.toLowerCase(); | |
| return l.includes('qué hay') || l.includes('que hay') || l.includes('analiza') || | |
| l.includes('describe') || l.includes('qué dice') || l.includes('que dice') || | |
| l.includes('lee') || l.includes('texto') || l.includes('imagen') || | |
| l.includes('foto') || l.includes('screenshot') || l.includes('captura') || | |
| l.includes('qué ves') || l.includes('que ves') || l.includes('ocr'); | |
| } | |
| export function buildVisionPrompt(userMessage) { | |
| const l = userMessage.toLowerCase(); | |
| if (l.includes('texto') || l.includes('dice') || l.includes('lee') || l.includes('ocr')) { | |
| return 'Extrae y transcribe todo el texto visible en esta imagen. Si no hay texto, descríbela brevemente.'; | |
| } | |
| if (l.includes('describe') || l.includes('qué hay') || l.includes('qué ves')) { | |
| return 'Describe detalladamente qué hay en esta imagen en español.'; | |
| } | |
| return userMessage; | |
| } | |