3v324v23's picture
lfs
1e3b872
import { app } from '../../../scripts/app.js'
// import { api } from '../../../scripts/api.js'
import { ComfyWidgets } from '../../../scripts/widgets.js'
import { $el } from '../../../scripts/ui.js'
function get_position_style (ctx, widget_width, y, node_height) {
const MARGIN = 4 // the margin around the html element
/* Create a transform that deals with all the scrolling and zooming */
const elRect = ctx.canvas.getBoundingClientRect()
const transform = new DOMMatrix()
.scaleSelf(
elRect.width / ctx.canvas.width,
elRect.height / ctx.canvas.height
)
.multiplySelf(ctx.getTransform())
.translateSelf(MARGIN, MARGIN + y)
return {
transformOrigin: '0 0',
transform: transform,
left: `0`,
top: `0`,
cursor: 'pointer',
position: 'absolute',
maxWidth: `${widget_width - MARGIN * 2}px`,
// maxHeight: `${node_height - MARGIN * 2}px`, // we're assuming we have the whole height of the node
width: `${widget_width - MARGIN * 2}px`,
// height: `${node_height * 0.3 - MARGIN * 2}px`,
// background: '#EEEEEE',
display: 'flex',
flexDirection: 'column',
// alignItems: 'center',
justifyContent: 'space-around'
}
}
const getLocalData = key => {
let data = {}
try {
data = JSON.parse(localStorage.getItem(key)) || {}
} catch (error) {
return {}
}
return data
}
function speakText (text) {
const speechMsg = new SpeechSynthesisUtterance()
speechMsg.text = text
// 语音合成结束时触发的事件
speechMsg.onend = function (event) {
console.log('语音播放结束')
window._mixlab_speech_synthesis_onend = true
}
// 语音合成错误时触发的事件
speechMsg.onerror = function (event) {
console.error('语音播放错误:', event.error)
}
// 使用浏览器默认语音合成器进行语音播放
speechSynthesis.speak(speechMsg)
}
// 调用方法,将文字转换为语音播放
// speakText('Hello, how are you?');
// #MixCopilot
const start = (element, id, startBtn, node) => {
startBtn.className = 'loading_mixlab'
window.recognition = new webkitSpeechRecognition()
window.recognition.continuous = true
window.recognition.interimResults = true
window.recognition.lang = navigator.language
let timeoutId, intervalId
window.recognition.onstart = () => {
console.log('开始语音输入', window._mixlab_speech_synthesis_onend)
window._mixlab_speech_synthesis_onend = false
}
window.recognition.onresult = function (event) {
const result = event.results[event.results.length - 1][0].transcript
console.log('识别结果:', result)
element.value = result
let data = getLocalData('_mixlab_speech_recognition')
data[id] = result.trim()
localStorage.setItem('_mixlab_speech_recognition', JSON.stringify(data))
if (timeoutId) clearTimeout(timeoutId)
if (!window.recognition) return
timeoutId = setTimeout(function () {
console.log('结果传递::', result)
// 把数据发送到chatgpt的输入prompt里
try {
const sendToId = node.widgets.filter(
w => w.name === 'Send to ChatGPT #'
)[0].value
app.graph
.getNodeById(sendToId)
.widgets.filter(w => w.name === 'prompt')[0].value = result
} catch (error) {}
setTimeout(() => app.queuePrompt(0, 1), 100)
window.recognition?.stop()
window.recognition = null
startBtn.className = ''
startBtn.innerText = 'START'
timeoutId = null
intervalId = setInterval(() => {
if (
app.ui.lastQueueSize === 0 &&
!window.recognition &&
window._mixlab_speech_synthesis_onend
) {
start(element, id, startBtn, node)
startBtn.innerText = 'STOP'
if (intervalId) {
clearInterval(intervalId)
}
}
}, 2200)
}, 2000)
}
window.recognition.onend = function () {
console.log('语音输入结束')
}
window.recognition.onspeechend = function () {
console.log('onspeechend')
}
window.recognition.onerror = function (event) {
console.log('Error occurred in recognition: ' + event.error)
}
window.recognition.start()
}
app.registerExtension({
name: 'Mixlab.audio.SpeechRecognition',
async getCustomWidgets (app) {
return {
AUDIOINPUTMIX (node, inputName, inputData, app) {
// console.log('##node', node)
const widget = {
type: inputData[0], // the type, CHEESE
name: inputName, // the name, slice
size: [128, 32], // a default size
draw (ctx, node, width, y) {},
computeSize (...args) {
return [128, 32] // a method to compute the current size of the widget
},
async serializeValue (nodeId, widgetIndex) {
let data = getLocalData('_mixlab_speech_recognition')
return data[node.id] || 'Hello Mixlab'
}
}
// widget.something = something; // maybe adds stuff to it
node.addCustomWidget(widget) // adds it to the node
return widget // and returns it.
}
}
},
async beforeRegisterNodeDef (nodeType, nodeData, app) {
if (nodeType.comfyClass == 'SpeechRecognition') {
const orig_nodeCreated = nodeType.prototype.onNodeCreated
nodeType.prototype.onNodeCreated = function () {
orig_nodeCreated?.apply(this, arguments)
const sendTo = ComfyWidgets.INT(
this,
'Send to ChatGPT #',
['INT', { default: 0 }],
app
)
// console.log('sendTo',sendTo)
const widget = {
type: 'div',
name: 'chatgptdiv',
draw (ctx, node, widget_width, y, widget_height) {
Object.assign(
this.div.style,
get_position_style(ctx, widget_width, 78, node.size[1])
)
}
}
widget.div = $el('div', {})
document.body.appendChild(widget.div)
const inputDiv = (key, placeholder) => {
let div = document.createElement('div')
const startBtn = document.createElement('button')
const textArea = document.createElement('textarea')
textArea.placeholder = 'speak text'
// sendTo.type='range';
// sendTo.min=0;
// sendTo.max=2000;
// sendTo.step=1;
// sendTo.className='comfy-multiline-input'
textArea.className = `${'comfy-multiline-input'} ${placeholder}`
textArea.style = `margin-top: 14px;
height: 44px;`
div.style = `flex-direction: column;
display: flex;
margin: 0px 8px 6px;`
startBtn.style = `
margin-top:48px;
background-color: var(--comfy-input-bg);
border-radius: 8px;
border-color: var(--border-color);
border-style: solid;
color: var(--descrip-text);
`
startBtn.innerText = 'START'
div.appendChild(startBtn)
// div.appendChild(sendTo);
div.appendChild(textArea)
startBtn.addEventListener('click', () => {
if (window.recognition) {
window.recognition.stop()
window.recognition = null
startBtn.innerText = 'START'
startBtn.className = ''
} else {
start(textArea, this.id, startBtn, this)
startBtn.innerText = 'STOP'
}
})
// sendTo.addEventListener('change',()=>{
// console.log(sendTo.value)
// })
return div
}
let inputAudio = inputDiv('_mixlab_speech_recognition', 'audio')
widget.div.appendChild(inputAudio)
this.addCustomWidget(widget)
const onRemoved = this.onRemoved
this.onRemoved = () => {
inputAudio.remove()
widget.div.remove()
return onRemoved?.()
}
this.serialize_widgets = true //需要保存参数
}
// const onGraphConfigured=nodeType.prototype.onGraphConfigured;
// nodeType.prototype.onGraphConfigured = function (message) {
// onGraphConfigured?.apply(this, arguments)
// console.log('###SpeechRecognition onGraphConfigured',this,message)
// }
const onExecuted = nodeType.prototype.onExecuted
nodeType.prototype.onExecuted = function (message) {
onExecuted?.apply(this, arguments)
// console.log('this.widgets', this.widgets)
try {
// 是否根据start by 开启
let open = message.start_by[0] > 0
if (open) {
const div = this.widgets.filter(w => w.name == 'chatgptdiv')[0].div
const startBtn = div.querySelector('button')
let textArea = div.querySelector('textarea')
if (open && !window.recognition) {
start(textArea, this.id, startBtn, this)
startBtn.innerText = 'STOP'
} else if (!open && window.recognition) {
window.recognition.stop()
window.recognition = null
startBtn.innerText = 'START'
startBtn.className = ''
}
}
} catch (error) {
console.log('###SpeechRecognition', error)
}
}
}
},
async loadedGraphNode (node, app) {
if (node.type === 'SpeechRecognition') {
let data = getLocalData('_mixlab_speech_recognition')
// console.log('_mixlab_speech_recognition', node )
let div = node.widgets.filter(f => f.type === 'div')[0]
if (div && data[node.id]) {
div.div.querySelector('textarea').value = data[node.id]
}
try {
let open = node.widgets_values[1] > 0
if (open) {
const div = node.widgets.filter(w => w.name == 'chatgptdiv')[0].div
const startBtn = div.querySelector('button')
let textArea = div.querySelector('textarea')
if (open && !window.recognition) {
start(textArea, node.id, startBtn, node)
startBtn.innerText = 'STOP'
} else if (!open && window.recognition) {
window.recognition.stop()
window.recognition = null
startBtn.innerText = 'START'
startBtn.className = ''
}
}
} catch (error) {
console.log('###SpeechRecognition', error)
}
}
}
})
app.registerExtension({
name: 'Mixlab.audio.SpeechSynthesis',
async beforeRegisterNodeDef (nodeType, nodeData, app) {
if (nodeData.name === 'SpeechSynthesis') {
function populate (text) {
// console.log('SpeechSynthesis',this.widgets)
if (this.widgets) {
const pos = this.widgets.findIndex(w => w.name === 'text')
if (pos !== -1) {
for (let i = pos; i < this.widgets.length; i++) {
this.widgets[i].onRemove?.()
}
this.widgets.length = pos
}
}
for (let list of text) {
const w = ComfyWidgets['STRING'](
this,
'text',
['STRING', { multiline: true }],
app
).widget
w.inputEl.readOnly = true
w.inputEl.style.opacity = 0.6
w.value = list
}
speakText(text.join('\n'))
// console.log('ShowTextForGPT',this.widgets.length)
requestAnimationFrame(() => {
const sz = this.computeSize()
if (sz[0] < this.size[0]) {
sz[0] = this.size[0]
}
if (sz[1] < this.size[1]) {
sz[1] = this.size[1]
}
this.onResize?.(sz)
app.graph.setDirtyCanvas(true, false)
})
}
// When the node is executed we will be sent the input text, display this in the widget
const onExecuted = nodeType.prototype.onExecuted
nodeType.prototype.onExecuted = function (message) {
onExecuted?.apply(this, arguments)
populate.call(this, message.text)
}
this.serialize_widgets = true //需要保存参数
}
}
})
// 上传音频转为base64
async function uploadAndConvertAudio (file) {
if (!file) {
alert('Please select a WAV file.')
return
}
if (file.type !== 'audio/wav') {
alert('Only WAV files are supported.')
return
}
try {
const base64Audio = await readFileAsDataURL(file)
return base64Audio
} catch (error) {
console.error('Error reading file:', error)
alert('Error reading file.')
}
}
function readFileAsDataURL (file) {
return new Promise((resolve, reject) => {
const reader = new FileReader()
reader.onload = function (event) {
resolve(event.target.result)
}
reader.onerror = function (error) {
reject(error)
}
reader.readAsDataURL(file)
})
}
const createInputAudioForBatch = (base64, widget) => {
// Create an audio element
let audio = document.createElement('audio')
audio.src = base64
audio.controls = true
audio.style = 'width: 120px; display: block'
// Create a delete button
let deleteButton = document.createElement('button')
deleteButton.textContent = 'Delete'
deleteButton.style = `cursor: pointer;
font-weight: 300;
margin: 2px;
margin-left: 10px;
color: var(--descrip-text);
background-color: var(--comfy-input-bg);
border-radius: 8px;
border-color: var(--border-color);
border-style: solid;height: 30px;min-width: 122px;
`
// Create a container for the audio and delete button
let container = document.createElement('div')
container.appendChild(audio)
container.appendChild(deleteButton)
container.style = `display: flex;margin-top: 12px;`
// Add event listener for the delete button
deleteButton.addEventListener('click', e => {
let newValue = []
let items = widget.value?.base64 || []
for (const v of items) {
if (v != base64) newValue.push(v)
}
widget.value.base64 = newValue
container.remove()
})
return container
}
app.registerExtension({
name: 'Mixlab.Comfy.LoadAndCombinedAudio_',
async getCustomWidgets (app) {
return {
AUDIOBASE64 (node, inputName, inputData, app) {
// console.log('##node', node)
const widget = {
value: {
base64: []
}, // 不能[x,x,x]
type: inputData[0], // the type
name: inputName, // the name, slice
size: [128, 32], // a default size
draw (ctx, node, width, y) {},
computeSize (...args) {
return [128, 122] // a method to compute the current size of the widget
}
// serializeValue (nodeId, widgetIndex) {
// return widget.value
// },
}
// widget.something = something; // maybe adds stuff to it
node.addCustomWidget(widget) // adds it to the node
return widget // and returns it.
}
}
},
async beforeRegisterNodeDef (nodeType, nodeData, app) {
if (nodeType.comfyClass == 'LoadAndCombinedAudio_') {
const orig_nodeCreated = nodeType.prototype.onNodeCreated
nodeType.prototype.onNodeCreated = function () {
orig_nodeCreated?.apply(this, arguments)
let audiosWidget = this.widgets.filter(w => w.name == 'audios')[0]
const widget = {
type: 'div',
name: 'audio_base64',
draw (ctx, node, widget_width, y, widget_height) {
Object.assign(
this.div.style,
get_position_style(ctx, widget_width, 44, node.size[1])
)
},
serialize: false
}
widget.div = $el('div', {})
document.body.appendChild(widget.div)
let audioPreview = document.createElement('div')
let audiosDiv = document.createElement('div') //显示图片
audiosDiv.className = 'audios_preview'
audiosDiv.style = `width: calc(100% - 14px);
display: flex;
flex-wrap: wrap;
padding: 7px; justify-content: space-between;
align-items: center;`
const btn = document.createElement('button')
btn.innerText = 'Upload Audio'
btn.style = `cursor: pointer;
font-weight: 300;
margin: 2px;
color: var(--descrip-text);
background-color: var(--comfy-input-bg);
border-radius: 8px;
border-color: var(--border-color);
border-style: solid;height: 30px;min-width: 122px;
`
btn.addEventListener('click', e => {
e.preventDefault()
let inputAudio = document.createElement('input')
inputAudio.type = 'file'
inputAudio.style.display = 'none'
inputAudio.addEventListener('change', async e => {
e.preventDefault()
const file = e.target.files[0]
let base64 = await uploadAndConvertAudio(file)
if (!audiosWidget.value) audiosWidget.value = { base64: [] }
audiosWidget.value.base64.push(base64)
let a = createInputAudioForBatch(base64, audiosWidget)
audiosDiv.appendChild(a)
})
inputAudio.click()
inputAudio.remove()
})
widget.div.appendChild(audioPreview)
audioPreview.appendChild(audiosDiv)
audioPreview.appendChild(btn)
// audioPreview.appendChild(inputAudio)
this.addCustomWidget(widget)
// document.addEventListener('wheel', handleMouseWheel)
const onRemoved = this.onRemoved
this.onRemoved = () => {
widget.div.remove()
try {
// document.removeEventListener('wheel', handleMouseWheel)
} catch (error) {
console.log(error)
}
return onRemoved?.()
}
this.serialize_widgets = true //需要保存参数
}
}
},
async loadedGraphNode (node, app) {
if (node.type === 'LoadAndCombinedAudio_') {
// await sleep(0)
let audiosWidget = node.widgets.filter(w => w.name === 'audios')[0]
let audioPreview = node.widgets.filter(w => w.name == 'audio_base64')[0]
let pre = audioPreview.div.querySelector('.audios_preview')
for (const d of audiosWidget.value?.base64 || []) {
let im = createInputAudioForBatch(d, audiosWidget)
pre.appendChild(im)
}
}
}
})