Spaces:

Tyb7654
/

cursor2api

Sleeping

App Files Files Community

cursor2api / src /vision.ts

Tyb7654

Upload 9 files

02d34ae verified 25 days ago

raw

history blame contribute delete

5.11 kB

	import { getConfig } from './config.js';
	import type { AnthropicMessage, AnthropicContentBlock } from './types.js';
	import { createWorker } from 'tesseract.js';

	export async function applyVisionInterceptor(messages: AnthropicMessage[]): Promise<void> {
	const config = getConfig();
	if (!config.vision?.enabled) return;

	for (const msg of messages) {
	if (!Array.isArray(msg.content)) continue;

	let hasImages = false;
	const newContent: AnthropicContentBlock[] = [];
	const imagesToAnalyze: AnthropicContentBlock[] = [];

	for (const block of msg.content) {
	if (block.type === 'image') {
	hasImages = true;
	imagesToAnalyze.push(block);
	} else {
	newContent.push(block);
	}
	}

	if (hasImages && imagesToAnalyze.length > 0) {
	try {
	let descriptions = '';
	if (config.vision.mode === 'ocr') {
	console.log(`[Vision] 启用纯本地 OCR 模式，正在提取 ${imagesToAnalyze.length} 张图片上的文字... (无需 API Key)`);
	descriptions = await processWithLocalOCR(imagesToAnalyze);
	} else {
	console.log(`[Vision] 启用外部 API 模式，正在分析 ${imagesToAnalyze.length} 张图片...`);
	descriptions = await callVisionAPI(imagesToAnalyze);
	}

	// Add descriptions as a simulated system text block
	newContent.push({
	type: 'text',
	text: `\n\n[System: The user attached ${imagesToAnalyze.length} image(s). Visual analysis/OCR extracted the following context:\n${descriptions}]\n\n`
	});

	msg.content = newContent;
	} catch (e) {
	console.error("[Vision API Error]", e);
	newContent.push({
	type: 'text',
	text: `\n\n[System: The user attached image(s), but the Vision interceptor failed to process them. Error: ${(e as Error).message}]\n\n`
	});
	msg.content = newContent;
	}
	}
	}
	}

	async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promise<string> {
	const worker = await createWorker('eng+chi_sim');
	let combinedText = '';

	for (let i = 0; i < imageBlocks.length; i++) {
	const img = imageBlocks[i];
	let imageSource: string \| Buffer = '';

	if (img.type === 'image' && img.source?.data) {
	if (img.source.type === 'base64') {
	const mime = img.source.media_type \|\| 'image/jpeg';
	imageSource = `data:${mime};base64,${img.source.data}`;
	} else if (img.source.type === 'url') {
	imageSource = img.source.data;
	}
	}

	if (imageSource) {
	try {
	const { data: { text } } = await worker.recognize(imageSource);
	combinedText += `--- Image ${i + 1} OCR Text ---\n${text.trim() \|\| '(No text detected in this image)'}\n\n`;
	} catch (err) {
	console.error(`[Vision OCR] Failed to parse image ${i + 1}:`, err);
	combinedText += `--- Image ${i + 1} ---\n(Failed to parse image with local OCR)\n\n`;
	}
	}
	}

	await worker.terminate();
	return combinedText;
	}

	async function callVisionAPI(imageBlocks: AnthropicContentBlock[]): Promise<string> {
	const config = getConfig().vision!;

	// Construct an array of OpenAI format message parts
	const parts: any[] = [
	{ type: 'text', text: 'Please describe the attached images in detail. If they contain code, UI elements, or error messages, explicitly write them out.' }
	];

	for (const img of imageBlocks) {
	if (img.type === 'image' && img.source?.data) {
	let url = '';
	// If it's a raw base64 string
	if (img.source.type === 'base64') {
	const mime = img.source.media_type \|\| 'image/jpeg';
	url = `data:${mime};base64,${img.source.data}`;
	} else if (img.source.type === 'url') {
	// Handle remote URLs natively mapped from OpenAI payloads
	url = img.source.data;
	}
	if (url) {
	parts.push({ type: 'image_url', image_url: { url } });
	}
	}
	}

	const payload = {
	model: config.model,
	messages: [{ role: 'user', content: parts }],
	max_tokens: 1500
	};

	const res = await fetch(config.baseUrl, {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	'Authorization': `Bearer ${config.apiKey}`
	},
	body: JSON.stringify(payload)
	});

	if (!res.ok) {
	throw new Error(`Vision API returned status ${res.status}: ${await res.text()}`);
	}

	const data = await res.json() as any;
	return data.choices?.[0]?.message?.content \|\| 'No description returned.';
	}