| import { getPreviewString, saveTtsProviderSettings } from './index.js'; |
| import { getBase64Async } from '../../utils.js'; |
| import { getRequestHeaders } from '../../../script.js'; |
|
|
| export { SpeechT5TtsProvider }; |
|
|
| class SpeechT5TtsProvider { |
| |
| |
| |
|
|
| settings; |
| ready = false; |
| voices = []; |
| separator = ' .. '; |
| audioElement = document.createElement('audio'); |
|
|
| defaultSettings = { |
| speakers: [], |
| speaker: '', |
| voiceMap: {}, |
| }; |
|
|
| get settingsHtml() { |
| let html = ` |
| <label for="speecht5_tts_speaker">Speaker:</label> |
| <div class="flex-container"> |
| <select id="speecht5_tts_speaker" class="text_pole flex1"> |
| </select> |
| <div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker"> |
| <i class="fa-solid fa-upload"></i> |
| </div> |
| <div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker"> |
| <i class="fa-solid fa-trash"></i> |
| </div> |
| </div> |
| <input type="file" id="speecht5_tts_speaker_upload" class="displayNone"> |
| <div><i>Loading model for the first time may take a while!</i></div> |
| `; |
| return html; |
| } |
|
|
| onSettingsChange() { |
| |
| this.settings.speaker = $('#speecht5_tts_speaker').val(); |
| saveTtsProviderSettings(); |
| } |
|
|
| async previewTtsVoice(voiceId) { |
| this.audioElement.pause(); |
| this.audioElement.currentTime = 0; |
|
|
| const text = getPreviewString('en-US'); |
| const response = await this.fetchTtsGeneration(text, voiceId); |
| if (!response.ok) { |
| throw new Error(`HTTP ${response.status}`); |
| } |
|
|
| const audio = await response.blob(); |
| const url = URL.createObjectURL(audio); |
| this.audioElement.src = url; |
| this.audioElement.play(); |
| this.audioElement.onended = () => URL.revokeObjectURL(url); |
| } |
|
|
| async loadSettings(settings) { |
| |
| if (Object.keys(settings).length == 0) { |
| console.info('Using default TTS Provider settings'); |
| } |
|
|
| |
| this.settings = this.defaultSettings; |
|
|
| for (const key in settings) { |
| if (key in this.settings) { |
| this.settings[key] = settings[key]; |
| } else { |
| throw `Invalid setting passed to TTS Provider: ${key}`; |
| } |
| } |
|
|
| for (const speaker of this.settings.speakers) { |
| $('#speecht5_tts_speaker').append($('<option>', { |
| value: speaker.voice_id, |
| text: speaker.name, |
| })); |
| } |
|
|
| $('#speecht5_tts_speaker').val(this.settings.speaker); |
| $('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this)); |
| $('#speecht5_tts_speaker_upload_button').on('click', () => { |
| $('#speecht5_tts_speaker_upload').trigger('click'); |
| }); |
| $('#speecht5_tts_speaker_upload').on('change', async (event) => { |
| const file = event.target.files[0]; |
| if (file.size != 2048) { |
| toastr.error('Invalid speaker file size, expected 2048 bytes'); |
| return; |
| } |
|
|
| const data = await getBase64Async(file); |
| const speaker = { |
| voice_id: file.name, |
| name: file.name, |
| data: data, |
| lang: 'en-US', |
| preview_url: false, |
| }; |
| this.settings.speakers.push(speaker); |
| $('#speecht5_tts_speaker').append($('<option>', { |
| value: speaker.voice_id, |
| text: speaker.name, |
| })); |
| $('#speecht5_tts_speaker').val(speaker.name); |
| this.onSettingsChange(); |
| }); |
| $('#speecht5_tts_delete_speaker_button').on('click', () => { |
| const confirmDelete = confirm('Are you sure you want to delete this speaker?'); |
|
|
| if (!confirmDelete) { |
| return; |
| } |
|
|
| const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker); |
| if (!speaker) { |
| toastr.error('Speaker not found'); |
| return; |
| } |
|
|
| const index = this.settings.speakers.indexOf(speaker); |
| this.settings.speakers.splice(index, 1); |
| $(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove(); |
|
|
| if (this.settings.speakers.length == 0) { |
| console.log('No speakers left'); |
| return; |
| } |
|
|
| $('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id); |
| this.onSettingsChange(); |
| }); |
|
|
| await this.checkReady(); |
|
|
| console.debug('SpeechT5: Settings loaded'); |
| } |
|
|
| async checkReady() { |
| return Promise.resolve(); |
| } |
|
|
| async getVoice(voiceName) { |
| return this.settings.speakers.find(s => s.voice_id === voiceName); |
| } |
|
|
| async generateTts(text, voiceId) { |
| const response = await this.fetchTtsGeneration(text, voiceId); |
| return response; |
| } |
|
|
| async fetchTtsVoiceObjects() { |
| return this.settings.speakers; |
| } |
|
|
| async fetchTtsGeneration(inputText, voiceId) { |
| console.info(`Generating new TTS for voice_id ${voiceId}`); |
| const speaker = await this.getVoice(voiceId); |
|
|
| if (!speaker) { |
| toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed'); |
| throw new Error(`Speaker not found: ${voiceId}`); |
| } |
|
|
| const response = await fetch( |
| '/api/speech/synthesize', |
| { |
| method: 'POST', |
| headers: getRequestHeaders(), |
| body: JSON.stringify({ |
| 'text': inputText, |
| 'speaker': speaker.data, |
| 'model': 'Xenova/speecht5_tts', |
| }), |
| }, |
| ); |
|
|
| if (!response.ok) { |
| toastr.error(response.statusText, 'TTS Generation Failed'); |
| throw new Error(`HTTP ${response.status}: ${await response.text()}`); |
| } |
|
|
| return response; |
| } |
|
|
| async fetchTtsFromHistory(history_item_id) { |
| return Promise.resolve(history_item_id); |
| } |
| } |
|
|