| import { ensureImageFormatSupported, getBase64Async, getFileExtension, isTrueBoolean, saveBase64AsFile } from '../../utils.js'; |
| import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js'; |
| import { appendMediaToMessage, chat_metadata, eventSource, event_types, getRequestHeaders, saveChatConditional, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js'; |
| import { getMessageTimeStamp } from '../../RossAscends-mods.js'; |
| import { SECRET_KEYS, secret_state } from '../../secrets.js'; |
| import { getMultimodalCaption } from '../shared.js'; |
| import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js'; |
| import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js'; |
| import { SlashCommand } from '../../slash-commands/SlashCommand.js'; |
| import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js'; |
| import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnumsProvider.js'; |
| import { callGenericPopup, Popup, POPUP_TYPE } from '../../popup.js'; |
| import { debounce_timeout, MEDIA_DISPLAY, MEDIA_SOURCE, MEDIA_TYPE, SCROLL_BEHAVIOR } from '../../constants.js'; |
| export { MODULE_NAME }; |
|
|
| const MODULE_NAME = 'caption'; |
|
|
| const PROMPT_DEFAULT = 'What\'s in this image?'; |
| const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]'; |
|
|
| |
| |
| |
| |
| function migrateSettings() { |
| if (extension_settings.caption.local !== undefined) { |
| extension_settings.caption.source = extension_settings.caption.local ? 'local' : 'extras'; |
| } |
|
|
| delete extension_settings.caption.local; |
|
|
| if (!extension_settings.caption.source) { |
| extension_settings.caption.source = 'extras'; |
| } |
|
|
| if (extension_settings.caption.source === 'openai') { |
| extension_settings.caption.source = 'multimodal'; |
| extension_settings.caption.multimodal_api = 'openai'; |
| extension_settings.caption.multimodal_model = 'gpt-4-turbo'; |
| } |
|
|
| if (!extension_settings.caption.multimodal_api) { |
| extension_settings.caption.multimodal_api = 'openai'; |
| } |
|
|
| if (!extension_settings.caption.multimodal_model) { |
| extension_settings.caption.multimodal_model = 'gpt-4-turbo'; |
| } |
|
|
| if (!extension_settings.caption.prompt) { |
| extension_settings.caption.prompt = PROMPT_DEFAULT; |
| } |
|
|
| if (!extension_settings.caption.template) { |
| extension_settings.caption.template = TEMPLATE_DEFAULT; |
| } |
|
|
| if (!extension_settings.caption.show_in_chat) { |
| extension_settings.caption.show_in_chat = false; |
| } |
| } |
|
|
| |
| |
| |
| async function setImageIcon() { |
| try { |
| const sendButton = $('#send_picture .extensionsMenuExtensionButton'); |
| sendButton.addClass('fa-image'); |
| sendButton.removeClass('fa-hourglass-half'); |
| } |
| catch (error) { |
| console.log(error); |
| } |
| } |
|
|
| |
| |
| |
| async function setSpinnerIcon() { |
| try { |
| const sendButton = $('#send_picture .extensionsMenuExtensionButton'); |
| sendButton.removeClass('fa-image'); |
| sendButton.addClass('fa-hourglass-half'); |
| } |
| catch (error) { |
| console.log(error); |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| async function wrapCaptionTemplate(caption) { |
| let template = extension_settings.caption.template || TEMPLATE_DEFAULT; |
|
|
| if (!/{{caption}}/i.test(template)) { |
| console.warn('Poka-yoke: Caption template does not contain {{caption}}. Appending it.'); |
| template += ' {{caption}}'; |
| } |
|
|
| let messageText = substituteParamsExtended(template, { caption: caption }); |
|
|
| if (extension_settings.caption.refine_mode) { |
| messageText = await Popup.show.input( |
| 'Review and edit the generated caption:', |
| 'Press "Cancel" to abort the caption sending.', |
| messageText, |
| { rows: 8, okButton: 'Send' }); |
|
|
| if (!messageText) { |
| throw new Error('User aborted the caption sending.'); |
| } |
| } |
|
|
| return messageText; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async function captionExistingMessage(message, mediaIndex) { |
| if (!Array.isArray(message?.extra?.media) || message.extra.media.length === 0) { |
| return; |
| } |
|
|
| if (mediaIndex === undefined || isNaN(mediaIndex) || mediaIndex < 0 || mediaIndex >= message.extra.media.length) { |
| mediaIndex = 0; |
| } |
|
|
| const mediaAttachment = message.extra.media[mediaIndex]; |
|
|
| if (!mediaAttachment || !mediaAttachment.url || mediaAttachment.type === MEDIA_TYPE.AUDIO) { |
| return; |
| } |
|
|
| if (mediaAttachment.type === MEDIA_TYPE.VIDEO && !isVideoCaptioningAvailable()) { |
| throw new Error('Captioning videos is not supported for the current source.'); |
| } |
|
|
| const imageData = await fetch(mediaAttachment.url); |
| const blob = await imageData.blob(); |
| const fileName = mediaAttachment.url.split('/').pop().split('?')[0] || 'image.jpg'; |
| const file = new File([blob], fileName, { type: blob.type }); |
| const caption = await getCaptionForFile(file, null, true); |
|
|
| if (!caption) { |
| console.warn('Failed to generate a caption for the image.'); |
| return; |
| } |
|
|
| const wrappedCaption = await wrapCaptionTemplate(caption); |
|
|
| const messageText = String(message.mes).trim(); |
|
|
| if (!messageText) { |
| message.extra.inline_image = false; |
| message.mes = wrappedCaption; |
| mediaAttachment.title = wrappedCaption; |
| mediaAttachment.captioned = true; |
| } else { |
| message.extra.inline_image = true; |
| mediaAttachment.append_title = true; |
| mediaAttachment.title = wrappedCaption; |
| mediaAttachment.captioned = true; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| async function sendCaptionedMessage(caption, image, mimeType) { |
| const messageText = await wrapCaptionTemplate(caption); |
|
|
| const context = getContext(); |
|
|
| |
| const mediaAttachment = { |
| url: image, |
| type: MEDIA_TYPE.getFromMime(mimeType) || MEDIA_TYPE.IMAGE, |
| title: messageText, |
| captioned: true, |
| source: MEDIA_SOURCE.CAPTIONED, |
| }; |
| |
| const message = { |
| name: context.name1, |
| is_user: true, |
| send_date: getMessageTimeStamp(), |
| mes: messageText, |
| extra: { |
| media: [mediaAttachment], |
| media_display: MEDIA_DISPLAY.GALLERY, |
| media_index: 0, |
| inline_image: !!extension_settings.caption.show_in_chat, |
| }, |
| }; |
| chat_metadata['tainted'] = true; |
| context.chat.push(message); |
| const messageId = context.chat.length - 1; |
| await eventSource.emit(event_types.MESSAGE_SENT, messageId); |
| context.addOneMessage(message); |
| await eventSource.emit(event_types.USER_MESSAGE_RENDERED, messageId); |
| await context.saveChat(); |
| setTimeout(() => context.scrollOnMediaLoad(), debounce_timeout.short); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| async function doCaptionRequest(base64Img, fileData, externalPrompt) { |
| switch (extension_settings.caption.source) { |
| case 'local': |
| return await captionLocal(base64Img); |
| case 'extras': |
| return await captionExtras(base64Img); |
| case 'horde': |
| return await captionHorde(base64Img); |
| case 'multimodal': |
| return await captionMultimodal(fileData, externalPrompt); |
| default: |
| throw new Error('Unknown caption source.'); |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| async function captionExtras(base64Img) { |
| if (!modules.includes('caption')) { |
| throw new Error('No captioning module is available.'); |
| } |
|
|
| const url = new URL(getApiUrl()); |
| url.pathname = '/api/caption'; |
|
|
| const apiResult = await doExtrasFetch(url, { |
| method: 'POST', |
| headers: { |
| 'Content-Type': 'application/json', |
| 'Bypass-Tunnel-Reminder': 'bypass', |
| }, |
| body: JSON.stringify({ image: base64Img }), |
| }); |
|
|
| if (!apiResult.ok) { |
| throw new Error('Failed to caption image via Extras.'); |
| } |
|
|
| const data = await apiResult.json(); |
| return data; |
| } |
|
|
| |
| |
| |
| |
| |
| async function captionLocal(base64Img) { |
| const apiResult = await fetch('/api/extra/caption', { |
| method: 'POST', |
| headers: getRequestHeaders(), |
| body: JSON.stringify({ image: base64Img }), |
| }); |
|
|
| if (!apiResult.ok) { |
| throw new Error('Failed to caption image via local pipeline.'); |
| } |
|
|
| const data = await apiResult.json(); |
| return data; |
| } |
|
|
| |
| |
| |
| |
| |
| async function captionHorde(base64Img) { |
| const apiResult = await fetch('/api/horde/caption-image', { |
| method: 'POST', |
| headers: getRequestHeaders(), |
| body: JSON.stringify({ image: base64Img }), |
| }); |
|
|
| if (!apiResult.ok) { |
| throw new Error('Failed to caption image via Horde.'); |
| } |
|
|
| const data = await apiResult.json(); |
| return data; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| async function captionMultimodal(base64Img, externalPrompt) { |
| let prompt = externalPrompt || extension_settings.caption.prompt || PROMPT_DEFAULT; |
|
|
| if (!externalPrompt && extension_settings.caption.prompt_ask) { |
| const customPrompt = await callGenericPopup('Enter a comment or question:', POPUP_TYPE.INPUT, prompt, { rows: 4 }); |
| if (!customPrompt) { |
| throw new Error('User aborted the caption sending.'); |
| } |
| prompt = String(customPrompt).trim(); |
| } |
|
|
| const caption = await getMultimodalCaption(base64Img, prompt); |
| return { caption }; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| async function onSelectImage(e, prompt, quiet) { |
| if (!(e.target instanceof HTMLInputElement)) { |
| return ''; |
| } |
|
|
| const file = e.target.files[0]; |
| const form = e.target.form; |
|
|
| if (!file || !(file instanceof File)) { |
| form && form.reset(); |
| return ''; |
| } |
|
|
| const caption = await getCaptionForFile(file, prompt, quiet); |
| form && form.reset(); |
| return caption; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| async function getCaptionForFile(file, prompt, quiet) { |
| try { |
| if (file.type.startsWith('video/') && !isVideoCaptioningAvailable()) { |
| throw new Error('Video captioning is not available for the current source.'); |
| } |
|
|
| setSpinnerIcon(); |
| const context = getContext(); |
| const fileData = await getBase64Async(await ensureImageFormatSupported(file)); |
| const extension = getFileExtension(file); |
| const base64Data = fileData.split(',')[1]; |
| const { caption } = await doCaptionRequest(base64Data, fileData, prompt); |
| if (!quiet) { |
| const imagePath = await saveBase64AsFile(base64Data, context.name2, '', extension); |
| await sendCaptionedMessage(caption, imagePath, file.type); |
| } |
| return caption; |
| } |
| catch (error) { |
| const errorMessage = error.message || 'Unknown error'; |
| toastr.error(errorMessage, 'Failed to caption'); |
| console.error(error); |
| return ''; |
| } |
| finally { |
| setImageIcon(); |
| } |
| } |
|
|
| function onRefineModeInput() { |
| extension_settings.caption.refine_mode = $('#caption_refine_mode').prop('checked'); |
| saveSettingsDebounced(); |
| } |
|
|
| |
| |
| |
| |
| |
| async function captionCommandCallback(args, prompt) { |
| const quiet = isTrueBoolean(args?.quiet); |
| const messageId = args?.mesId ?? args?.id; |
| const index = Number(args?.index ?? 0); |
|
|
| if (!isNaN(Number(messageId))) { |
| |
| const message = getContext().chat[messageId]; |
| if (Array.isArray(message?.extra?.media) && message.extra.media.length > 0) { |
| try { |
| const mediaAttachment = message.extra.media[index] || message.extra.media[0]; |
| if (!mediaAttachment || !mediaAttachment.url) { |
| toastr.error('The specified message does not contain an image.'); |
| return ''; |
| } |
| if (mediaAttachment.type === MEDIA_TYPE.AUDIO) { |
| toastr.error('The specified media is an audio file. Captioning audio files is not supported.'); |
| return ''; |
| } |
| if (mediaAttachment.type === MEDIA_TYPE.VIDEO && !isVideoCaptioningAvailable()) { |
| toastr.error('The specified media is a video. Captioning videos is not supported for the current source.'); |
| return ''; |
| } |
| const fetchResult = await fetch(mediaAttachment.url); |
| const blob = await fetchResult.blob(); |
| const fileName = mediaAttachment.url.split('/').pop().split('?')[0] || 'image.jpg'; |
| const file = new File([blob], fileName, { type: blob.type }); |
| return await getCaptionForFile(file, prompt, quiet); |
| } catch (error) { |
| toastr.error('Failed to get image from the message. Make sure the image is accessible.'); |
| return ''; |
| } |
| } |
| } |
|
|
| return new Promise(resolve => { |
| const input = document.createElement('input'); |
| input.type = 'file'; |
| input.accept = 'image/*,video/*'; |
| input.onchange = async (e) => { |
| const caption = await onSelectImage(e, prompt, quiet); |
| resolve(caption); |
| }; |
| input.oncancel = () => resolve(''); |
| input.click(); |
| }); |
| } |
|
|
| |
| |
| |
| |
| function isVideoCaptioningAvailable() { |
| if (extension_settings.caption.source !== 'multimodal') { |
| return false; |
| } |
|
|
| return ['google', 'vertexai', 'zai'].includes(extension_settings.caption.multimodal_api); |
| } |
|
|
| jQuery(async function () { |
| function addSendPictureButton() { |
| const sendButton = $(` |
| <div id="send_picture" class="list-group-item flex-container flexGap5"> |
| <div class="fa-solid fa-image extensionsMenuExtensionButton"></div> |
| <span data-i18n="Generate Caption">Generate Caption</span> |
| </div>`); |
|
|
| $('#caption_wand_container').append(sendButton); |
| $(sendButton).on('click', () => { |
| const hasCaptionModule = (() => { |
| const settings = extension_settings.caption; |
|
|
| |
| if (settings.source === 'extras' && modules.includes('caption')) return true; |
| if (settings.source === 'local' || settings.source === 'horde') return true; |
|
|
| |
| if (settings.source === 'multimodal') { |
| const api = settings.multimodal_api; |
| const altEndpointEnabled = settings.alt_endpoint_enabled; |
| const altEndpointUrl = settings.alt_endpoint_url; |
|
|
| |
| const reverseProxyApis = { |
| 'openai': SECRET_KEYS.OPENAI, |
| 'mistral': SECRET_KEYS.MISTRALAI, |
| 'google': SECRET_KEYS.MAKERSUITE, |
| 'vertexai': SECRET_KEYS.VERTEXAI, |
| 'anthropic': SECRET_KEYS.CLAUDE, |
| 'xai': SECRET_KEYS.XAI, |
| }; |
|
|
| if (reverseProxyApis[api]) { |
| if (secret_state[reverseProxyApis[api]] || settings.allow_reverse_proxy) { |
| return true; |
| } |
| } |
|
|
| const chatCompletionApis = { |
| 'openrouter': SECRET_KEYS.OPENROUTER, |
| 'groq': SECRET_KEYS.GROQ, |
| 'cohere': SECRET_KEYS.COHERE, |
| 'aimlapi': SECRET_KEYS.AIMLAPI, |
| 'moonshot': SECRET_KEYS.MOONSHOT, |
| 'nanogpt': SECRET_KEYS.NANOGPT, |
| 'chutes': SECRET_KEYS.CHUTES, |
| 'electronhub': SECRET_KEYS.ELECTRONHUB, |
| 'zai': SECRET_KEYS.ZAI, |
| }; |
|
|
| if (chatCompletionApis[api] && secret_state[chatCompletionApis[api]]) { |
| return true; |
| } |
|
|
| const textCompletionApis = { |
| 'ollama': textgen_types.OLLAMA, |
| 'llamacpp': textgen_types.LLAMACPP, |
| 'ooba': textgen_types.OOBA, |
| 'koboldcpp': textgen_types.KOBOLDCPP, |
| 'vllm': textgen_types.VLLM, |
| }; |
|
|
| if (textCompletionApis[api] && altEndpointEnabled && altEndpointUrl) { |
| return true; |
| } |
|
|
| if (textCompletionApis[api] && !altEndpointEnabled && textgenerationwebui_settings.server_urls[textCompletionApis[api]]) { |
| return true; |
| } |
|
|
| |
| if (api === 'custom' || api === 'pollinations') { |
| return true; |
| } |
| } |
|
|
| return false; |
| })(); |
|
|
| if (!hasCaptionModule) { |
| toastr.error('Choose other captioning source in the extension settings.', 'Captioning is not available'); |
| return; |
| } |
|
|
| $('#img_file').trigger('click'); |
| }); |
| } |
| function addPictureSendForm() { |
| const imgInput = document.createElement('input'); |
| imgInput.type = 'file'; |
| imgInput.id = 'img_file'; |
| imgInput.accept = 'image/*,video/*'; |
| imgInput.hidden = true; |
| imgInput.addEventListener('change', (e) => onSelectImage(e, '', false)); |
| const imgForm = document.createElement('form'); |
| imgForm.id = 'img_form'; |
| imgForm.appendChild(imgInput); |
| imgForm.hidden = true; |
| $('#form_sheld').append(imgForm); |
| } |
| async function switchMultimodalBlocks() { |
| await addRemoteEndpointModels(); |
| const isMultimodal = extension_settings.caption.source === 'multimodal'; |
| if (!extension_settings.caption.multimodal_model) { |
| const dropdown = $('#caption_multimodal_model'); |
| const options = dropdown.find(`option[data-type="${extension_settings.caption.multimodal_api}"]`); |
| extension_settings.caption.multimodal_model = String(options.first().val()); |
| } |
| $('#caption_multimodal_block').toggle(isMultimodal); |
| $('#caption_prompt_block').toggle(isMultimodal); |
| $('#caption_multimodal_api').val(extension_settings.caption.multimodal_api); |
| $('#caption_multimodal_model').val(extension_settings.caption.multimodal_model); |
| $('#caption_multimodal_block [data-type]').each(function () { |
| const type = $(this).data('type'); |
| const types = type.split(','); |
| $(this).toggle(types.includes(extension_settings.caption.multimodal_api)); |
| }); |
| } |
| async function addSettings() { |
| const html = await renderExtensionTemplateAsync('caption', 'settings', { TEMPLATE_DEFAULT, PROMPT_DEFAULT }); |
| $('#caption_container').append(html); |
| } |
|
|
| async function addRemoteEndpointModels() { |
| async function processEndpoint(api, url) { |
| const dropdown = document.getElementById('caption_multimodal_model'); |
| if (!(dropdown instanceof HTMLSelectElement)) { |
| return; |
| } |
| if (extension_settings.caption.source !== 'multimodal' || extension_settings.caption.multimodal_api !== api) { |
| return; |
| } |
| const options = Array.from(dropdown.options); |
| const response = await fetch(url, { |
| method: 'POST', |
| headers: getRequestHeaders({ omitContentType: true }), |
| }); |
| if (!response.ok) { |
| return; |
| } |
| const modelIds = await response.json(); |
| if (Array.isArray(modelIds) && modelIds.length > 0) { |
| modelIds.sort().forEach((modelId) => { |
| if (!modelId || typeof modelId !== 'string' || options.some(o => o.value === modelId)) { |
| return; |
| } |
| const option = document.createElement('option'); |
| option.value = modelId; |
| option.textContent = modelId; |
| option.dataset.type = api; |
| dropdown.add(option); |
| }); |
| } |
| } |
|
|
| await processEndpoint('openrouter', '/api/openrouter/models/multimodal'); |
| await processEndpoint('aimlapi', '/api/backends/chat-completions/multimodal-models/aimlapi'); |
| await processEndpoint('pollinations', '/api/backends/chat-completions/multimodal-models/pollinations'); |
| await processEndpoint('nanogpt', '/api/backends/chat-completions/multimodal-models/nanogpt'); |
| await processEndpoint('chutes', '/api/backends/chat-completions/multimodal-models/chutes'); |
| await processEndpoint('electronhub', '/api/backends/chat-completions/multimodal-models/electronhub'); |
| await processEndpoint('mistral', '/api/backends/chat-completions/multimodal-models/mistral'); |
| await processEndpoint('xai', '/api/backends/chat-completions/multimodal-models/xai'); |
| } |
|
|
| await addSettings(); |
| addPictureSendForm(); |
| addSendPictureButton(); |
| setImageIcon(); |
| migrateSettings(); |
| await switchMultimodalBlocks(); |
|
|
| $('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode)); |
| $('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy)); |
| $('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask)); |
| $('#caption_auto_mode').prop('checked', !!(extension_settings.caption.auto_mode)); |
| $('#caption_source').val(extension_settings.caption.source); |
| $('#caption_prompt').val(extension_settings.caption.prompt); |
| $('#caption_template').val(extension_settings.caption.template); |
| $('#caption_refine_mode').on('input', onRefineModeInput); |
| $('#caption_source').on('change', async () => { |
| extension_settings.caption.source = String($('#caption_source').val()); |
| await switchMultimodalBlocks(); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_prompt').on('input', () => { |
| extension_settings.caption.prompt = String($('#caption_prompt').val()); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_template').on('input', () => { |
| extension_settings.caption.template = String($('#caption_template').val()); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_allow_reverse_proxy').on('input', () => { |
| extension_settings.caption.allow_reverse_proxy = $('#caption_allow_reverse_proxy').prop('checked'); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_prompt_ask').on('input', () => { |
| extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked'); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_auto_mode').on('input', () => { |
| extension_settings.caption.auto_mode = !!$('#caption_auto_mode').prop('checked'); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_ollama_pull').on('click', (e) => { |
| const selectedModel = extension_settings.caption.multimodal_model; |
| const staticModels = { 'ollama_current': textgenerationwebui_settings.ollama_model, 'ollama_custom': extension_settings.caption.ollama_custom_model }; |
| const presetModel = staticModels[selectedModel] || selectedModel; |
| e.preventDefault(); |
| $('#ollama_download_model').trigger('click'); |
| $('.popup .popup-input').val(presetModel); |
| }); |
| $('#caption_multimodal_api').on('change', async () => { |
| const api = String($('#caption_multimodal_api').val()); |
| extension_settings.caption.multimodal_api = api; |
| extension_settings.caption.multimodal_model = ''; |
| await switchMultimodalBlocks(); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_multimodal_model').on('change', () => { |
| extension_settings.caption.multimodal_model = String($('#caption_multimodal_model').val()); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_altEndpoint_url').val(extension_settings.caption.alt_endpoint_url).on('input', () => { |
| extension_settings.caption.alt_endpoint_url = String($('#caption_altEndpoint_url').val()); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_altEndpoint_enabled').prop('checked', !!(extension_settings.caption.alt_endpoint_enabled)).on('input', () => { |
| extension_settings.caption.alt_endpoint_enabled = !!$('#caption_altEndpoint_enabled').prop('checked'); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_show_in_chat').prop('checked', !!(extension_settings.caption.show_in_chat)).on('input', () => { |
| extension_settings.caption.show_in_chat = !!$('#caption_show_in_chat').prop('checked'); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_ollama_custom_model').val(extension_settings.caption.ollama_custom_model || '').on('input', () => { |
| extension_settings.caption.ollama_custom_model = String($('#caption_ollama_custom_model').val()).trim(); |
| saveSettingsDebounced(); |
| }); |
| $('#caption_refresh_models').on('click', async () => { |
| extension_settings.caption.multimodal_model = ''; |
| await switchMultimodalBlocks(); |
| saveSettingsDebounced(); |
| }); |
|
|
| const onMessageEvent = async ( messageId) => { |
| if (!extension_settings.caption.auto_mode) { |
| return; |
| } |
|
|
| const message = getContext().chat[messageId]; |
| if (Array.isArray(message?.extra?.media) && message.extra.media.length > 0) { |
| for (let mediaIndex = 0; mediaIndex < message.extra.media.length; mediaIndex++) { |
| const mediaAttachment = message.extra.media[mediaIndex]; |
| if (mediaAttachment.type === MEDIA_TYPE.VIDEO && !isVideoCaptioningAvailable()) { |
| continue; |
| } |
| if (mediaAttachment.type === MEDIA_TYPE.AUDIO) { |
| continue; |
| } |
| |
| if (mediaAttachment.source !== MEDIA_SOURCE.UPLOAD || mediaAttachment.captioned) { |
| continue; |
| } |
| try { |
| await captionExistingMessage(message, mediaIndex); |
| } catch (e) { |
| console.error(`Auto-captioning failed for message ID ${messageId}, media index ${mediaIndex}`, e); |
| continue; |
| } |
| } |
| } |
| }; |
|
|
| eventSource.on(event_types.MESSAGE_SENT, onMessageEvent); |
| eventSource.on(event_types.MESSAGE_FILE_EMBEDDED, onMessageEvent); |
|
|
| $(document).on('click', '.mes_img_caption', async function () { |
| const animationClass = 'fa-fade'; |
| const messageBlock = $(this).closest('.mes'); |
| const mediaContainer = $(this).closest('.mes_media_container'); |
| const messageMedia = mediaContainer.find('.mes_img, .mes_video'); |
| if (messageMedia.hasClass(animationClass)) return; |
| messageMedia.addClass(animationClass); |
| try { |
| const messageId = Number(messageBlock.attr('mesid')); |
| const mediaIndex = Number(mediaContainer.attr('data-index')); |
| const data = getContext().chat[messageId]; |
| await captionExistingMessage(data, mediaIndex); |
| appendMediaToMessage(data, messageBlock, SCROLL_BEHAVIOR.KEEP); |
| await saveChatConditional(); |
| } catch (e) { |
| console.error('Message image recaption failed', e); |
| toastr.error(e.message || 'Unknown error', 'Failed to caption'); |
| } finally { |
| messageMedia.removeClass(animationClass); |
| } |
| }); |
|
|
| SlashCommandParser.addCommandObject(SlashCommand.fromProps({ |
| name: 'caption', |
| callback: captionCommandCallback, |
| returns: 'caption', |
| namedArgumentList: [ |
| new SlashCommandNamedArgument( |
| 'quiet', 'suppress sending a captioned message', [ARGUMENT_TYPE.BOOLEAN], false, false, 'false', |
| ), |
| SlashCommandNamedArgument.fromProps({ |
| name: 'mesId', |
| description: 'get image from a message with this ID', |
| typeList: [ARGUMENT_TYPE.NUMBER], |
| enumProvider: commonEnumProviders.messages(), |
| }), |
| SlashCommandNamedArgument.fromProps({ |
| name: 'index', |
| description: 'index of the image in the message to caption (starting from 0)', |
| typeList: [ARGUMENT_TYPE.NUMBER], |
| enumProvider: commonEnumProviders.messageMedia(), |
| }), |
| ], |
| unnamedArgumentList: [ |
| new SlashCommandArgument( |
| 'prompt', [ARGUMENT_TYPE.STRING], false, |
| ), |
| ], |
| helpString: ` |
| <div> |
| Caption an image with an optional prompt and passes the caption down the pipe. |
| </div> |
| <div> |
| Only multimodal sources support custom prompts. |
| </div> |
| <div> |
| Provide a message ID to get an image from a message instead of uploading one. |
| </div> |
| <div> |
| Set the "quiet" argument to true to suppress sending a captioned message, default: false. |
| </div> |
| `, |
| })); |
|
|
| document.body.classList.add('caption'); |
| }); |
|
|