Spaces:
Running
Running
| /** | |
| * Service for interacting with OpenRouter API. | |
| */ | |
| const fileToBase64 = (file: File): Promise<string> => { | |
| return new Promise((resolve, reject) => { | |
| const reader = new FileReader(); | |
| reader.readAsDataURL(file); | |
| reader.onload = () => { | |
| if (typeof reader.result === 'string') { | |
| resolve(reader.result); | |
| } else { | |
| reject(new Error('Failed to convert file to base64')); | |
| } | |
| }; | |
| reader.onerror = error => reject(error); | |
| }); | |
| }; | |
| const extractFramesFromVideo = async (videoFile: File, numberOfFrames: number, signal?: AbortSignal): Promise<string[]> => { | |
| return new Promise((resolve, reject) => { | |
| const video = document.createElement('video'); | |
| video.preload = 'metadata'; | |
| video.muted = true; | |
| video.playsInline = true; | |
| const url = URL.createObjectURL(videoFile); | |
| const frames: string[] = []; | |
| const onAbort = () => { | |
| URL.revokeObjectURL(url); | |
| video.src = ""; | |
| reject(new Error("AbortError")); | |
| }; | |
| if (signal) signal.addEventListener('abort', onAbort); | |
| const timeout = setTimeout(() => { | |
| if (signal) signal.removeEventListener('abort', onAbort); | |
| URL.revokeObjectURL(url); | |
| video.src = ""; | |
| reject(new Error("Video processing timed out")); | |
| }, 60000); | |
| video.onloadeddata = async () => { | |
| const duration = video.duration; | |
| const canvas = document.createElement('canvas'); | |
| const ctx = canvas.getContext('2d'); | |
| if (!ctx) { | |
| if (signal) signal.removeEventListener('abort', onAbort); | |
| clearTimeout(timeout); | |
| URL.revokeObjectURL(url); | |
| reject(new Error("Could not create canvas context")); | |
| return; | |
| } | |
| canvas.width = video.videoWidth; | |
| canvas.height = video.videoHeight; | |
| const step = duration / numberOfFrames; | |
| try { | |
| for (let i = 0; i < numberOfFrames; i++) { | |
| if (signal?.aborted) throw new Error("AbortError"); | |
| const time = (step * i) + (step / 2); | |
| await new Promise<void>((frameResolve) => { | |
| const onSeeked = () => { | |
| video.removeEventListener('seeked', onSeeked); | |
| frameResolve(); | |
| }; | |
| video.addEventListener('seeked', onSeeked); | |
| video.currentTime = Math.min(time, duration - 0.1); | |
| }); | |
| ctx.drawImage(video, 0, 0); | |
| frames.push(canvas.toDataURL('image/jpeg', 0.8)); | |
| } | |
| if (signal) signal.removeEventListener('abort', onAbort); | |
| clearTimeout(timeout); | |
| URL.revokeObjectURL(url); | |
| video.src = ""; | |
| resolve(frames); | |
| } catch (e) { | |
| if (signal) signal.removeEventListener('abort', onAbort); | |
| clearTimeout(timeout); | |
| URL.revokeObjectURL(url); | |
| reject(e); | |
| } | |
| }; | |
| video.onerror = () => { | |
| if (signal) signal.removeEventListener('abort', onAbort); | |
| clearTimeout(timeout); | |
| URL.revokeObjectURL(url); | |
| reject(new Error("Failed to load video file")); | |
| }; | |
| video.src = url; | |
| }); | |
| }; | |
| const constructPrompt = ( | |
| triggerWord: string, | |
| customInstructions?: string, | |
| isCharacterTaggingEnabled?: boolean, | |
| characterShowName?: string | |
| ): string => { | |
| let basePrompt = `You are an expert captioner for AI model training data. Your task is to describe the provided image/video in detail for a style LoRA. Follow these rules strictly: | |
| 1. Start the caption with the trigger word: "${triggerWord}". | |
| 2. Describe EVERYTHING visible: characters, clothing, actions, background, objects, lighting, and camera angle. | |
| 3. Be objective and factual. | |
| 4. DO NOT mention art styles or generic animation terms like "anime" or "cartoon". | |
| 5. Write as a single, continuous paragraph.`; | |
| if (isCharacterTaggingEnabled && characterShowName && characterShowName.trim() !== '') { | |
| basePrompt += `\n6. Identify characters from the show/series "${characterShowName}" and append tags at the end of the caption, separated by commas. The format for each tag must be "char_[charactername]" (e.g., ", char_simon, char_kamina"). If no characters are recognized, do not add tags.`; | |
| } | |
| if (customInstructions) { | |
| return `${basePrompt}\n\nAdditional instructions: ${customInstructions}`; | |
| } | |
| return basePrompt; | |
| }; | |
| export const generateCaptionOpenRouter = async ( | |
| apiKey: string, | |
| model: string, | |
| file: File, | |
| triggerWord: string, | |
| customInstructions?: string, | |
| isCharacterTaggingEnabled?: boolean, | |
| characterShowName?: string, | |
| videoFrameCount: number = 8, | |
| maxTokens: number = 4096, | |
| temperature: number = 0.7, | |
| useFullVideo: boolean = false, | |
| signal?: AbortSignal | |
| ): Promise<string> => { | |
| if (!apiKey) throw new Error("OpenRouter API Key is required."); | |
| const endpoint = 'https://openrouter.ai/api/v1/chat/completions'; | |
| const prompt = constructPrompt(triggerWord, customInstructions, isCharacterTaggingEnabled, characterShowName); | |
| // Extract model ID from URL if provided | |
| let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() || '' : model; | |
| // Handle /models/ prefix if it exists in the URL | |
| if (modelId.startsWith('models/')) { | |
| modelId = modelId.replace('models/', ''); | |
| } | |
| // Remove any trailing slashes or query params | |
| modelId = modelId.split('?')[0].replace(/\/+$/, ''); | |
| let contentParts: any[] = [{ type: "text", text: prompt }]; | |
| if (file.type.startsWith('video/')) { | |
| if (useFullVideo) { | |
| const base64Video = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Video } }); | |
| } else { | |
| const frames = await extractFramesFromVideo(file, videoFrameCount, signal); | |
| frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } })); | |
| } | |
| } else { | |
| const base64Image = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Image } }); | |
| } | |
| const payload = { | |
| model: modelId || 'openai/gpt-4o-mini', | |
| messages: [{ role: "user", content: contentParts }], | |
| max_tokens: maxTokens, | |
| temperature: temperature | |
| }; | |
| const response = await fetch(endpoint, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Authorization": `Bearer ${apiKey}`, | |
| "HTTP-Referer": window.location.origin, | |
| "X-Title": "LoRA Caption Assistant" | |
| }, | |
| body: JSON.stringify(payload), | |
| signal | |
| }); | |
| if (!response.ok) { | |
| let errorMessage = response.statusText; | |
| try { | |
| const errData = await response.json(); | |
| errorMessage = errData.error?.message || errData.message || JSON.stringify(errData) || errorMessage; | |
| } catch (e) { | |
| const errText = await response.text().catch(() => ""); | |
| if (errText) errorMessage = errText; | |
| } | |
| throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`); | |
| } | |
| const data = await response.json(); | |
| console.log('OpenRouter Generate Response:', data); | |
| const message = data.choices?.[0]?.message; | |
| let content = ""; | |
| if (message) { | |
| if (typeof message.content === 'string') { | |
| content = message.content.trim(); | |
| } else if (Array.isArray(message.content)) { | |
| // Handle cases where content might be returned as an array of parts | |
| content = message.content | |
| .filter((part: any) => part.type === 'text') | |
| .map((part: any) => part.text) | |
| .join('\n') | |
| .trim(); | |
| } | |
| } | |
| const refusal = message?.refusal; | |
| const reasoning = message?.reasoning; | |
| const finishReason = data.choices?.[0]?.finish_reason; | |
| if (!content && refusal) { | |
| throw new Error(`OpenRouter Refusal: ${refusal}`); | |
| } | |
| if (!content && finishReason === 'length') { | |
| if (reasoning) { | |
| // If we only have reasoning and it hit the length limit, the model likely | |
| // spent all tokens "thinking" and never got to the output. | |
| throw new Error("OpenRouter model hit token limit during reasoning. Try increasing max tokens or using a non-reasoning model."); | |
| } | |
| throw new Error("OpenRouter response was cut off (hit token limit)."); | |
| } | |
| if (!content && finishReason === 'content_filter') { | |
| throw new Error("OpenRouter response was blocked by content filter."); | |
| } | |
| // Some models might put the result in reasoning if content is null, | |
| // though rare for standard chat completions. | |
| return content || (reasoning ? `[Reasoning Only]: ${reasoning}` : ""); | |
| }; | |
| export const refineCaptionOpenRouter = async ( | |
| apiKey: string, | |
| model: string, | |
| file: File, | |
| currentCaption: string, | |
| refinementInstructions: string, | |
| videoFrameCount: number = 4, | |
| maxTokens: number = 4096, | |
| temperature: number = 0.7, | |
| useFullVideo: boolean = false, | |
| signal?: AbortSignal | |
| ): Promise<string> => { | |
| if (!apiKey) throw new Error("OpenRouter API Key is required."); | |
| const endpoint = 'https://openrouter.ai/api/v1/chat/completions'; | |
| const prompt = `Refine the following caption based on the visual information and the instructions. Output ONLY the refined text. | |
| CURRENT CAPTION: "${currentCaption}" | |
| INSTRUCTIONS: "${refinementInstructions}"`; | |
| let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() || '' : model; | |
| if (modelId.startsWith('models/')) modelId = modelId.replace('models/', ''); | |
| modelId = modelId.split('?')[0].replace(/\/+$/, ''); | |
| let contentParts: any[] = [{ type: "text", text: prompt }]; | |
| if (file.type.startsWith('video/')) { | |
| if (useFullVideo) { | |
| const base64Video = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Video } }); | |
| } else { | |
| const frames = await extractFramesFromVideo(file, videoFrameCount, signal); | |
| frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } })); | |
| } | |
| } else { | |
| const base64Image = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Image } }); | |
| } | |
| const payload = { | |
| model: modelId || 'openai/gpt-4o-mini', | |
| messages: [{ role: "user", content: contentParts }], | |
| max_tokens: maxTokens, | |
| temperature: temperature | |
| }; | |
| const response = await fetch(endpoint, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Authorization": `Bearer ${apiKey}`, | |
| "HTTP-Referer": window.location.origin, | |
| "X-Title": "LoRA Caption Assistant" | |
| }, | |
| body: JSON.stringify(payload), | |
| signal | |
| }); | |
| if (!response.ok) { | |
| let errorMessage = response.statusText; | |
| try { | |
| const errData = await response.json(); | |
| errorMessage = errData.error?.message || errData.message || JSON.stringify(errData) || errorMessage; | |
| } catch (e) { | |
| const errText = await response.text().catch(() => ""); | |
| if (errText) errorMessage = errText; | |
| } | |
| throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`); | |
| } | |
| const data = await response.json(); | |
| console.log('OpenRouter Refine Response:', data); | |
| const content = data.choices?.[0]?.message?.content?.trim(); | |
| const refusal = data.choices?.[0]?.message?.refusal; | |
| if (!content && refusal) throw new Error(`OpenRouter Refusal: ${refusal}`); | |
| return content || ""; | |
| }; | |
| export const checkQualityOpenRouter = async ( | |
| apiKey: string, | |
| model: string, | |
| file: File, | |
| caption: string, | |
| videoFrameCount: number = 4, | |
| temperature: number = 0.7, | |
| useFullVideo: boolean = false, | |
| signal?: AbortSignal | |
| ): Promise<number> => { | |
| if (!apiKey) throw new Error("OpenRouter API Key is required."); | |
| const endpoint = 'https://openrouter.ai/api/v1/chat/completions'; | |
| const prompt = `Evaluate the caption quality. Respond with ONLY an integer from 1 to 5.\nCaption: "${caption}"`; | |
| let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() || '' : model; | |
| if (modelId.startsWith('models/')) modelId = modelId.replace('models/', ''); | |
| modelId = modelId.split('?')[0].replace(/\/+$/, ''); | |
| let contentParts: any[] = [{ type: "text", text: prompt }]; | |
| if (file.type.startsWith('video/')) { | |
| if (useFullVideo) { | |
| const base64Video = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Video } }); | |
| } else { | |
| const frames = await extractFramesFromVideo(file, videoFrameCount, signal); | |
| frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } })); | |
| } | |
| } else { | |
| const base64Image = await fileToBase64(file); | |
| contentParts.push({ type: "image_url", image_url: { url: base64Image } }); | |
| } | |
| const payload = { | |
| model: modelId || 'openai/gpt-4o-mini', | |
| messages: [{ role: "user", content: contentParts }], | |
| max_tokens: 10, | |
| temperature: temperature | |
| }; | |
| const response = await fetch(endpoint, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Authorization": `Bearer ${apiKey}`, | |
| "HTTP-Referer": window.location.origin, | |
| "X-Title": "LoRA Caption Assistant" | |
| }, | |
| body: JSON.stringify(payload), | |
| signal | |
| }); | |
| if (!response.ok) { | |
| let errorMessage = response.statusText; | |
| try { | |
| const errData = await response.json(); | |
| errorMessage = errData.error?.message || errData.message || JSON.stringify(errData) || errorMessage; | |
| } catch (e) { | |
| const errText = await response.text().catch(() => ""); | |
| if (errText) errorMessage = errText; | |
| } | |
| throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`); | |
| } | |
| const data = await response.json(); | |
| console.log('OpenRouter Quality Response:', data); | |
| const text = data.choices?.[0]?.message?.content?.trim(); | |
| const refusal = data.choices?.[0]?.message?.refusal; | |
| if (!text && refusal) throw new Error(`OpenRouter Refusal: ${refusal}`); | |
| return parseInt(text?.match(/\d+/)?.[0] || '0', 10); | |
| }; | |