Spaces:
Paused
Paused
| <script lang="ts"> | |
| import { toast } from 'svelte-sonner'; | |
| import { createEventDispatcher, onMount, getContext } from 'svelte'; | |
| import { KokoroTTS } from 'kokoro-js'; | |
| import { user, settings, config } from '$lib/stores'; | |
| import { getVoices as _getVoices } from '$lib/apis/audio'; | |
| import Switch from '$lib/components/common/Switch.svelte'; | |
| import { round } from '@huggingface/transformers'; | |
| import Spinner from '$lib/components/common/Spinner.svelte'; | |
| const dispatch = createEventDispatcher(); | |
| const i18n = getContext('i18n'); | |
| export let saveSettings: Function; | |
| // Audio | |
| let conversationMode = false; | |
| let speechAutoSend = false; | |
| let responseAutoPlayback = false; | |
| let nonLocalVoices = false; | |
| let STTEngine = ''; | |
| let TTSEngine = ''; | |
| let TTSEngineConfig = {}; | |
| let TTSModel = null; | |
| let TTSModelProgress = null; | |
| let TTSModelLoading = false; | |
| let voices = []; | |
| let voice = ''; | |
| // Audio speed control | |
| let playbackRate = 1; | |
| const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5]; | |
| const getVoices = async () => { | |
| if (TTSEngine === 'browser-kokoro') { | |
| if (!TTSModel) { | |
| await loadKokoro(); | |
| } | |
| voices = Object.entries(TTSModel.voices).map(([key, value]) => { | |
| return { | |
| id: key, | |
| name: value.name, | |
| localService: false | |
| }; | |
| }); | |
| } else { | |
| if ($config.audio.tts.engine === '') { | |
| const getVoicesLoop = setInterval(async () => { | |
| voices = await speechSynthesis.getVoices(); | |
| // do your loop | |
| if (voices.length > 0) { | |
| clearInterval(getVoicesLoop); | |
| } | |
| }, 100); | |
| } else { | |
| const res = await _getVoices(localStorage.token).catch((e) => { | |
| toast.error(`${e}`); | |
| }); | |
| if (res) { | |
| console.log(res); | |
| voices = res.voices; | |
| } | |
| } | |
| } | |
| }; | |
| const toggleResponseAutoPlayback = async () => { | |
| responseAutoPlayback = !responseAutoPlayback; | |
| saveSettings({ responseAutoPlayback: responseAutoPlayback }); | |
| }; | |
| const toggleSpeechAutoSend = async () => { | |
| speechAutoSend = !speechAutoSend; | |
| saveSettings({ speechAutoSend: speechAutoSend }); | |
| }; | |
| onMount(async () => { | |
| playbackRate = $settings.audio?.tts?.playbackRate ?? 1; | |
| conversationMode = $settings.conversationMode ?? false; | |
| speechAutoSend = $settings.speechAutoSend ?? false; | |
| responseAutoPlayback = $settings.responseAutoPlayback ?? false; | |
| STTEngine = $settings?.audio?.stt?.engine ?? ''; | |
| TTSEngine = $settings?.audio?.tts?.engine ?? ''; | |
| TTSEngineConfig = $settings?.audio?.tts?.engineConfig ?? {}; | |
| if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) { | |
| voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? ''; | |
| } else { | |
| voice = $config.audio.tts.voice ?? ''; | |
| } | |
| nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false; | |
| await getVoices(); | |
| }); | |
| $: if (TTSEngine && TTSEngineConfig) { | |
| onTTSEngineChange(); | |
| } | |
| const onTTSEngineChange = async () => { | |
| if (TTSEngine === 'browser-kokoro') { | |
| await loadKokoro(); | |
| } | |
| }; | |
| const loadKokoro = async () => { | |
| if (TTSEngine === 'browser-kokoro') { | |
| voices = []; | |
| if (TTSEngineConfig?.dtype) { | |
| TTSModel = null; | |
| TTSModelProgress = null; | |
| TTSModelLoading = true; | |
| const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX'; | |
| TTSModel = await KokoroTTS.from_pretrained(model_id, { | |
| dtype: TTSEngineConfig.dtype, // Options: "fp32", "fp16", "q8", "q4", "q4f16" | |
| device: !!navigator?.gpu ? 'webgpu' : 'wasm', // Detect WebGPU | |
| progress_callback: (e) => { | |
| TTSModelProgress = e; | |
| console.log(e); | |
| } | |
| }); | |
| await getVoices(); | |
| // const rawAudio = await tts.generate(inputText, { | |
| // // Use `tts.list_voices()` to list all available voices | |
| // voice: voice | |
| // }); | |
| // const blobUrl = URL.createObjectURL(await rawAudio.toBlob()); | |
| // const audio = new Audio(blobUrl); | |
| // audio.play(); | |
| } | |
| } | |
| }; | |
| </script> | |
| <form | |
| class="flex flex-col h-full justify-between space-y-3 text-sm" | |
| on:submit|preventDefault={async () => { | |
| saveSettings({ | |
| audio: { | |
| stt: { | |
| engine: STTEngine !== '' ? STTEngine : undefined | |
| }, | |
| tts: { | |
| engine: TTSEngine !== '' ? TTSEngine : undefined, | |
| engineConfig: TTSEngineConfig, | |
| playbackRate: playbackRate, | |
| voice: voice !== '' ? voice : undefined, | |
| defaultVoice: $config?.audio?.tts?.voice ?? '', | |
| nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined | |
| } | |
| } | |
| }); | |
| dispatch('save'); | |
| }} | |
| > | |
| <div class=" space-y-3 overflow-y-scroll max-h-[28rem] lg:max-h-full"> | |
| <div> | |
| <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div> | |
| {#if $config.audio.stt.engine !== 'web'} | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div> | |
| <div class="flex items-center relative"> | |
| <select | |
| class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right" | |
| bind:value={STTEngine} | |
| placeholder="Select an engine" | |
| > | |
| <option value="">{$i18n.t('Default')}</option> | |
| <option value="web">{$i18n.t('Web API')}</option> | |
| </select> | |
| </div> | |
| </div> | |
| {/if} | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium"> | |
| {$i18n.t('Instant Auto-Send After Voice Transcription')} | |
| </div> | |
| <button | |
| class="p-1 px-3 text-xs flex rounded-sm transition" | |
| on:click={() => { | |
| toggleSpeechAutoSend(); | |
| }} | |
| type="button" | |
| > | |
| {#if speechAutoSend === true} | |
| <span class="ml-2 self-center">{$i18n.t('On')}</span> | |
| {:else} | |
| <span class="ml-2 self-center">{$i18n.t('Off')}</span> | |
| {/if} | |
| </button> | |
| </div> | |
| </div> | |
| <div> | |
| <div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div> | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div> | |
| <div class="flex items-center relative"> | |
| <select | |
| class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right" | |
| bind:value={TTSEngine} | |
| placeholder="Select an engine" | |
| > | |
| <option value="">{$i18n.t('Default')}</option> | |
| <option value="browser-kokoro">{$i18n.t('Kokoro.js (Browser)')}</option> | |
| </select> | |
| </div> | |
| </div> | |
| {#if TTSEngine === 'browser-kokoro'} | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium">{$i18n.t('Kokoro.js Dtype')}</div> | |
| <div class="flex items-center relative"> | |
| <select | |
| class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right" | |
| bind:value={TTSEngineConfig.dtype} | |
| placeholder="Select dtype" | |
| > | |
| <option value="" disabled selected>Select dtype</option> | |
| <option value="fp32">fp32</option> | |
| <option value="fp16">fp16</option> | |
| <option value="q8">q8</option> | |
| <option value="q4">q4</option> | |
| </select> | |
| </div> | |
| </div> | |
| {/if} | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div> | |
| <button | |
| class="p-1 px-3 text-xs flex rounded-sm transition" | |
| on:click={() => { | |
| toggleResponseAutoPlayback(); | |
| }} | |
| type="button" | |
| > | |
| {#if responseAutoPlayback === true} | |
| <span class="ml-2 self-center">{$i18n.t('On')}</span> | |
| {:else} | |
| <span class="ml-2 self-center">{$i18n.t('Off')}</span> | |
| {/if} | |
| </button> | |
| </div> | |
| <div class=" py-0.5 flex w-full justify-between"> | |
| <div class=" self-center text-xs font-medium">{$i18n.t('Speech Playback Speed')}</div> | |
| <div class="flex items-center relative"> | |
| <select | |
| class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right" | |
| bind:value={playbackRate} | |
| > | |
| {#each speedOptions as option} | |
| <option value={option} selected={playbackRate === option}>{option}x</option> | |
| {/each} | |
| </select> | |
| </div> | |
| </div> | |
| </div> | |
| <hr class=" border-gray-100 dark:border-gray-850" /> | |
| {#if TTSEngine === 'browser-kokoro'} | |
| {#if TTSModel} | |
| <div> | |
| <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> | |
| <div class="flex w-full"> | |
| <div class="flex-1"> | |
| <input | |
| list="voice-list" | |
| class="w-full text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden" | |
| bind:value={voice} | |
| placeholder="Select a voice" | |
| /> | |
| <datalist id="voice-list"> | |
| {#each voices as voice} | |
| <option value={voice.id}>{voice.name}</option> | |
| {/each} | |
| </datalist> | |
| </div> | |
| </div> | |
| </div> | |
| {:else} | |
| <div> | |
| <div class=" mb-2.5 text-sm font-medium flex gap-2 items-center"> | |
| <Spinner className="size-4" /> | |
| <div class=" text-sm font-medium shimmer"> | |
| {$i18n.t('Loading Kokoro.js...')} | |
| {TTSModelProgress && TTSModelProgress.status === 'progress' | |
| ? `(${Math.round(TTSModelProgress.progress * 10) / 10}%)` | |
| : ''} | |
| </div> | |
| </div> | |
| <div class="text-xs text-gray-500"> | |
| {$i18n.t('Please do not close the settings page while loading the model.')} | |
| </div> | |
| </div> | |
| {/if} | |
| {:else if $config.audio.tts.engine === ''} | |
| <div> | |
| <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> | |
| <div class="flex w-full"> | |
| <div class="flex-1"> | |
| <select | |
| class="w-full text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden" | |
| bind:value={voice} | |
| > | |
| <option value="" selected={voice !== ''}>{$i18n.t('Default')}</option> | |
| {#each voices.filter((v) => nonLocalVoices || v.localService === true) as _voice} | |
| <option | |
| value={_voice.name} | |
| class="bg-gray-100 dark:bg-gray-700" | |
| selected={voice === _voice.name}>{_voice.name}</option | |
| > | |
| {/each} | |
| </select> | |
| </div> | |
| </div> | |
| <div class="flex items-center justify-between my-1.5"> | |
| <div class="text-xs"> | |
| {$i18n.t('Allow non-local voices')} | |
| </div> | |
| <div class="mt-1"> | |
| <Switch bind:state={nonLocalVoices} /> | |
| </div> | |
| </div> | |
| </div> | |
| {:else if $config.audio.tts.engine !== ''} | |
| <div> | |
| <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> | |
| <div class="flex w-full"> | |
| <div class="flex-1"> | |
| <input | |
| list="voice-list" | |
| class="w-full text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden" | |
| bind:value={voice} | |
| placeholder="Select a voice" | |
| /> | |
| <datalist id="voice-list"> | |
| {#each voices as voice} | |
| <option value={voice.id}>{voice.name}</option> | |
| {/each} | |
| </datalist> | |
| </div> | |
| </div> | |
| </div> | |
| {/if} | |
| </div> | |
| <div class="flex justify-end text-sm font-medium"> | |
| <button | |
| class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full" | |
| type="submit" | |
| > | |
| {$i18n.t('Save')} | |
| </button> | |
| </div> | |
| </form> | |