arena-learning / studyArena /components /settings /audio-settings.tsx
Nitish kumar
Upload folder using huggingface_hub
c20f20c verified
'use client';
import { useState, useRef, useEffect, useMemo, useCallback } from 'react';
import { Label } from '@/components/ui/label';
import { Input } from '@/components/ui/input';
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select';
import { Slider } from '@/components/ui/slider';
import { Switch } from '@/components/ui/switch';
import { Button } from '@/components/ui/button';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import {
TTS_PROVIDERS,
getTTSVoices,
ASR_PROVIDERS,
getASRSupportedLanguages,
} from '@/lib/audio/constants';
import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types';
import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
import { cn } from '@/lib/utils';
import azureVoicesData from '@/lib/audio/azure.json';
import { createLogger } from '@/lib/logger';
import {
ensureVoicesLoaded,
isBrowserTTSAbortError,
playBrowserTTSPreview,
} from '@/lib/audio/browser-tts-preview';
const log = createLogger('AudioSettings');
/**
* Get provider display name with i18n
*/
function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string {
const names: Record<TTSProviderId, string> = {
'openai-tts': t('settings.providerOpenAITTS'),
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId];
}
function getASRProviderName(providerId: ASRProviderId, t: (key: string) => string): string {
const names: Record<ASRProviderId, string> = {
'openai-whisper': t('settings.providerOpenAIWhisper'),
'browser-native': t('settings.providerBrowserNative'),
'qwen-asr': t('settings.providerQwenASR'),
};
return names[providerId];
}
function getLanguageName(code: string, t: (key: string) => string): string {
const key = `settings.lang_${code}`;
const translated = t(key);
// If translation key not found, return the code itself
return translated === key ? code : translated;
}
interface AudioSettingsProps {
onSave?: () => void;
}
export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
const { t } = useI18n();
// TTS state
const ttsProviderId = useSettingsStore((state) => state.ttsProviderId);
const ttsVoice = useSettingsStore((state) => state.ttsVoice);
const ttsSpeed = useSettingsStore((state) => state.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig);
const setTTSProvider = useSettingsStore((state) => state.setTTSProvider);
const setTTSVoice = useSettingsStore((state) => state.setTTSVoice);
const setTTSSpeed = useSettingsStore((state) => state.setTTSSpeed);
const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig);
// ASR state
const asrProviderId = useSettingsStore((state) => state.asrProviderId);
const asrLanguage = useSettingsStore((state) => state.asrLanguage);
const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig);
const setASRProvider = useSettingsStore((state) => state.setASRProvider);
const setASRLanguage = useSettingsStore((state) => state.setASRLanguage);
const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig);
const ttsEnabled = useSettingsStore((state) => state.ttsEnabled);
const asrEnabled = useSettingsStore((state) => state.asrEnabled);
const setTTSEnabled = useSettingsStore((state) => state.setTTSEnabled);
const setASREnabled = useSettingsStore((state) => state.setASREnabled);
const ttsProvider = TTS_PROVIDERS[ttsProviderId] ?? TTS_PROVIDERS['openai-tts'];
// Azure voices - load from static JSON
const azureVoices = useMemo(() => azureVoicesData.voices, []);
// Wrapped setters that trigger onSave callback
const handleTTSProviderChange = (providerId: TTSProviderId) => {
setTTSProvider(providerId);
onSave?.();
};
const handleTTSVoiceChange = (voice: string) => {
setTTSVoice(voice);
onSave?.();
};
const handleTTSSpeedChange = (speed: number) => {
setTTSSpeed(speed);
onSave?.();
};
const handleTTSProviderConfigChange = (
providerId: TTSProviderId,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
) => {
setTTSProviderConfig(providerId, config);
onSave?.();
};
const handleASRProviderChange = (providerId: ASRProviderId) => {
setASRProvider(providerId);
onSave?.();
};
const handleASRLanguageChange = (language: string) => {
setASRLanguage(language);
onSave?.();
};
const handleASRProviderConfigChange = (
providerId: ASRProviderId,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
) => {
setASRProviderConfig(providerId, config);
onSave?.();
};
// Password visibility state
const [showTTSApiKey, setShowTTSApiKey] = useState(false);
const [showASRApiKey, setShowASRApiKey] = useState(false);
// Language filter state
const [selectedLocale, setSelectedLocale] = useState<string>('all');
// Test state
const [testingTTS, setTestingTTS] = useState(false);
const [testText, setTestText] = useState(t('settings.ttsTestTextDefault'));
const [ttsTestStatus, setTTSTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>(
'idle',
);
const [ttsTestMessage, setTTSTestMessage] = useState('');
const [isRecording, setIsRecording] = useState(false);
const [asrResult, setASRResult] = useState('');
const [asrTestStatus, setASRTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>(
'idle',
);
const [asrTestMessage, setASRTestMessage] = useState('');
const audioRef = useRef<HTMLAudioElement>(null);
const audioUrlRef = useRef<string | null>(null);
const browserPreviewCancelRef = useRef<(() => void) | null>(null);
const ttsTestRequestIdRef = useRef(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const asrProvider = ASR_PROVIDERS[asrProviderId] ?? ASR_PROVIDERS['openai-whisper'];
// Update test text when language changes (derived state pattern)
const [prevT, setPrevT] = useState(() => t);
if (t !== prevT) {
setPrevT(t);
setTestText(t('settings.ttsTestTextDefault'));
}
// Reset locale filter when provider changes (derived state pattern)
const [prevTTSProviderId, setPrevTTSProviderId] = useState(ttsProviderId);
if (ttsProviderId !== prevTTSProviderId) {
setPrevTTSProviderId(ttsProviderId);
if (ttsProviderId !== 'azure-tts') {
setSelectedLocale('all');
}
}
const stopTTSPreview = useCallback((resetState = true) => {
ttsTestRequestIdRef.current += 1;
browserPreviewCancelRef.current?.();
browserPreviewCancelRef.current = null;
audioRef.current?.pause();
if (audioRef.current) {
audioRef.current.src = '';
}
if (audioUrlRef.current) {
URL.revokeObjectURL(audioUrlRef.current);
audioUrlRef.current = null;
}
if (resetState) {
setTestingTTS(false);
}
}, []);
// Update voice selection when locale filter changes
useEffect(() => {
if (ttsProviderId === 'azure-tts' && selectedLocale !== 'all') {
// Filter Azure voices by selected locale
const filteredVoices = azureVoices.filter((voice) => voice.Locale === selectedLocale);
// Check if current voice is in the filtered list
const currentVoiceInFilter = filteredVoices.some((voice) => voice.ShortName === ttsVoice);
// If current voice is not in filtered list, select the first voice in the filtered list
if (!currentVoiceInFilter && filteredVoices.length > 0) {
setTTSVoice(filteredVoices[0].ShortName);
}
}
// Intentionally exclude ttsVoice from dependencies to avoid infinite loop
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedLocale, ttsProviderId, azureVoices, setTTSVoice]);
useEffect(() => {
stopTTSPreview(false);
setTTSTestStatus('idle');
setTTSTestMessage('');
}, [ttsProviderId, stopTTSPreview]);
// Initialize and reset TTS voice when provider changes
useEffect(() => {
let availableVoices: Array<{ id: string; name: string }> = [];
if (ttsProviderId === 'azure-tts') {
// Use Azure voices from JSON
availableVoices = azureVoices.map((voice) => ({
id: voice.ShortName,
name: voice.LocalName,
}));
} else {
// Use static voices from constants
availableVoices = getTTSVoices(ttsProviderId);
}
if (availableVoices.length > 0) {
// Initialize default voice if not set
if (!ttsVoice) {
setTTSVoice(availableVoices[0].id);
} else {
// Check if current voice is available in new provider
const currentVoiceExists = availableVoices.some((v) => v.id === ttsVoice);
if (!currentVoiceExists) {
setTTSVoice(availableVoices[0].id);
}
}
}
}, [ttsProviderId, ttsVoice, azureVoices, setTTSVoice]);
// Initialize and reset ASR language when provider changes
useEffect(() => {
const availableLanguages = getASRSupportedLanguages(asrProviderId);
if (availableLanguages.length > 0) {
// Initialize default language if not set
if (!asrLanguage) {
setASRLanguage(availableLanguages[0]);
} else {
// Check if current language is available in new provider
const currentLanguageExists = availableLanguages.includes(asrLanguage);
if (!currentLanguageExists) {
setASRLanguage(availableLanguages[0]);
}
}
}
}, [asrProviderId, asrLanguage, setASRLanguage]);
useEffect(() => {
return () => {
stopTTSPreview(false);
};
}, [stopTTSPreview]);
// Clear ASR test status when provider changes (derived state pattern)
const [prevASRProviderId, setPrevASRProviderId] = useState(asrProviderId);
if (asrProviderId !== prevASRProviderId) {
setPrevASRProviderId(asrProviderId);
setASRTestStatus('idle');
setASRTestMessage('');
setASRResult('');
}
// Test TTS
const handleTestTTS = async () => {
if (!testText.trim()) {
return;
}
const requestId = ttsTestRequestIdRef.current + 1;
ttsTestRequestIdRef.current = requestId;
setTestingTTS(true);
setTTSTestStatus('testing');
setTTSTestMessage('');
try {
if (ttsProviderId === 'browser-native-tts') {
if (!('speechSynthesis' in window)) {
setTTSTestStatus('error');
setTTSTestMessage(t('settings.browserTTSNotSupported'));
return;
}
const voices = await ensureVoicesLoaded();
if (ttsTestRequestIdRef.current !== requestId) {
return;
}
if (voices.length === 0) {
setTTSTestStatus('error');
setTTSTestMessage(t('settings.browserTTSNoVoices'));
return;
}
const controller = playBrowserTTSPreview({
text: testText,
voice: ttsVoice,
rate: ttsSpeed,
voices,
});
browserPreviewCancelRef.current = controller.cancel;
await controller.promise;
if (ttsTestRequestIdRef.current !== requestId) {
return;
}
setTTSTestStatus('success');
setTTSTestMessage(t('settings.ttsTestSuccess'));
return;
}
const requestBody: Record<string, unknown> = {
text: testText,
audioId: 'tts-test',
ttsProviderId,
ttsVoice: ttsVoice,
ttsSpeed: ttsSpeed,
};
const apiKeyValue = ttsProvidersConfig[ttsProviderId]?.apiKey;
if (apiKeyValue && apiKeyValue.trim()) {
requestBody.ttsApiKey = apiKeyValue;
}
const baseUrlValue = ttsProvidersConfig[ttsProviderId]?.baseUrl;
if (baseUrlValue && baseUrlValue.trim()) {
requestBody.ttsBaseUrl = baseUrlValue;
}
const response = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
});
const data = await response
.json()
.catch(() => ({ success: false, error: response.statusText }));
if (ttsTestRequestIdRef.current !== requestId) {
return;
}
if (response.ok && data.success) {
const binaryStr = atob(data.base64);
const bytes = new Uint8Array(binaryStr.length);
for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);
const audioBlob = new Blob([bytes], { type: `audio/${data.format}` });
if (audioUrlRef.current) {
URL.revokeObjectURL(audioUrlRef.current);
}
const audioUrl = URL.createObjectURL(audioBlob);
audioUrlRef.current = audioUrl;
if (audioRef.current) {
audioRef.current.src = audioUrl;
await audioRef.current.play();
}
setTTSTestStatus('success');
setTTSTestMessage(t('settings.ttsTestSuccess'));
} else {
setTTSTestStatus('error');
setTTSTestMessage(data.error || t('settings.ttsTestFailed'));
}
} catch (error) {
if (ttsTestRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) {
return;
}
log.error('TTS test failed:', error);
setTTSTestStatus('error');
setTTSTestMessage(
error instanceof Error && error.message
? `${t('settings.ttsTestFailed')}: ${error.message}`
: t('settings.ttsTestFailed'),
);
} finally {
if (ttsTestRequestIdRef.current === requestId) {
browserPreviewCancelRef.current = null;
setTestingTTS(false);
}
}
};
// Test ASR
const handleToggleASRRecording = async () => {
if (isRecording) {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
}
setIsRecording(false);
} else {
setASRResult('');
setASRTestStatus('testing');
setASRTestMessage('');
if (asrProviderId === 'browser-native') {
const SpeechRecognitionCtor =
(window as unknown as Record<string, unknown>).SpeechRecognition ||
(window as unknown as Record<string, unknown>).webkitSpeechRecognition;
if (!SpeechRecognitionCtor) {
setASRTestStatus('error');
setASRTestMessage(t('settings.asrNotSupported'));
return;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Vendor-prefixed API without standard typings
const recognition = new (SpeechRecognitionCtor as new () => any)();
recognition.lang = asrLanguage || 'zh-CN';
recognition.onresult = (event: {
results: {
[index: number]: { [index: number]: { transcript: string } };
};
}) => {
const transcript = event.results[0][0].transcript;
setASRResult(transcript);
setASRTestStatus('success');
setASRTestMessage(t('settings.asrTestSuccess'));
};
recognition.onerror = (event: { error: string }) => {
log.error('Speech recognition error:', event.error);
setASRTestStatus('error');
setASRTestMessage(t('settings.asrTestFailed') + ': ' + event.error);
};
recognition.onend = () => {
setIsRecording(false);
};
recognition.start();
setIsRecording(true);
} else {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
const mediaRecorder = new MediaRecorder(stream);
mediaRecorderRef.current = mediaRecorder;
const audioChunks: Blob[] = [];
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = async () => {
stream.getTracks().forEach((track) => track.stop());
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.webm');
formData.append('providerId', asrProviderId);
formData.append('language', asrLanguage);
// Only append non-empty values
const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey;
if (apiKeyValue && apiKeyValue.trim()) {
formData.append('apiKey', apiKeyValue);
}
const baseUrlValue = asrProvidersConfig[asrProviderId]?.baseUrl;
if (baseUrlValue && baseUrlValue.trim()) {
formData.append('baseUrl', baseUrlValue);
}
try {
const response = await fetch('/api/transcription', {
method: 'POST',
body: formData,
});
if (response.ok) {
const data = await response.json();
setASRResult(data.text);
setASRTestStatus('success');
setASRTestMessage(t('settings.asrTestSuccess'));
} else {
setASRTestStatus('error');
const errorData = await response
.json()
.catch(() => ({ error: response.statusText }));
// Show details if available, otherwise show error message
setASRTestMessage(
errorData.details || errorData.error || t('settings.asrTestFailed'),
);
}
} catch (error) {
log.error('ASR test failed:', error);
setASRTestStatus('error');
setASRTestMessage(t('settings.asrTestFailed'));
}
};
mediaRecorder.start();
setIsRecording(true);
} catch (error) {
log.error('Failed to access microphone:', error);
setASRTestStatus('error');
setASRTestMessage(t('settings.microphoneAccessFailed'));
}
}
}
};
return (
<div className="space-y-6 max-w-4xl">
{/* TTS Section */}
<div className="space-y-4">
<div
className={cn(
'relative flex items-center gap-3 rounded-lg border px-4 py-3 transition-all duration-300',
ttsEnabled ? 'bg-background border-border' : 'bg-muted/30 border-transparent',
)}
>
<div
className={cn(
'absolute left-0 top-2.5 bottom-2.5 w-[3px] rounded-full transition-colors duration-300',
ttsEnabled ? 'bg-primary' : 'bg-muted-foreground/20',
)}
/>
<div
className={cn(
'flex h-8 w-8 shrink-0 items-center justify-center rounded-md transition-colors duration-300',
ttsEnabled ? 'bg-primary/10 text-primary' : 'bg-muted text-muted-foreground',
)}
>
<Volume2 className="h-4 w-4" />
</div>
<div className="flex-1 min-w-0">
<h3
className={cn(
'text-sm font-medium transition-colors duration-300',
!ttsEnabled && 'text-muted-foreground',
)}
>
{t('settings.ttsSection')}
</h3>
<p className="text-xs text-muted-foreground">{t('settings.ttsEnabledDescription')}</p>
</div>
<Switch
checked={ttsEnabled}
onCheckedChange={(checked) => {
setTTSEnabled(checked);
onSave?.();
}}
/>
</div>
<div
className={cn(
'space-y-4 transition-all duration-300 overflow-hidden',
ttsEnabled ? 'opacity-100' : 'opacity-40 max-h-0 pointer-events-none',
)}
>
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsProvider')}</Label>
<Select
value={ttsProviderId}
onValueChange={(value) => handleTTSProviderChange(value as TTSProviderId)}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
{Object.values(TTS_PROVIDERS).map((provider) => (
<SelectItem key={provider.id} value={provider.id}>
<div className="flex items-center gap-2">
{provider.icon && (
<img src={provider.icon} alt={provider.name} className="w-4 h-4" />
)}
{getTTSProviderName(provider.id, t)}
{ttsProvidersConfig[provider.id]?.isServerConfigured && (
<span className="text-[10px] px-1 py-0.5 rounded border text-muted-foreground">
{t('settings.serverConfigured')}
</span>
)}
</div>
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{(ttsProvider.requiresApiKey ||
ttsProvidersConfig[ttsProviderId]?.isServerConfigured) && (
<>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsApiKey')}</Label>
<div className="relative">
<Input
type={showTTSApiKey ? 'text' : 'password'}
placeholder={
ttsProvidersConfig[ttsProviderId]?.isServerConfigured
? t('settings.optionalOverride')
: t('settings.enterApiKey')
}
value={ttsProvidersConfig[ttsProviderId]?.apiKey || ''}
onChange={(e) =>
handleTTSProviderConfigChange(ttsProviderId, {
apiKey: e.target.value,
})
}
className="font-mono text-sm pr-10"
/>
<button
type="button"
onClick={() => setShowTTSApiKey(!showTTSApiKey)}
className="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
>
{showTTSApiKey ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
</button>
</div>
</div>
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsBaseUrl')}</Label>
<Input
placeholder={ttsProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')}
value={ttsProvidersConfig[ttsProviderId]?.baseUrl || ''}
onChange={(e) =>
handleTTSProviderConfigChange(ttsProviderId, {
baseUrl: e.target.value,
})
}
className="text-sm"
/>
</div>
</div>
{(() => {
const effectiveBaseUrl =
ttsProvidersConfig[ttsProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || '';
if (!effectiveBaseUrl) return null;
// Get endpoint path based on provider
let endpointPath = '';
switch (ttsProviderId) {
case 'openai-tts':
case 'glm-tts':
endpointPath = '/audio/speech';
break;
case 'azure-tts':
endpointPath = '/cognitiveservices/v1';
break;
case 'qwen-tts':
endpointPath = '/services/aigc/multimodal-generation/generation';
break;
default:
endpointPath = '';
}
if (!endpointPath) return null;
const fullUrl = effectiveBaseUrl + endpointPath;
return (
<p className="text-xs text-muted-foreground break-all">
{t('settings.requestUrl')}: {fullUrl}
</p>
);
})()}
</>
)}
{/* Voice Selection Row */}
<div
className="grid gap-4"
style={{
gridTemplateColumns:
ttsProviderId === 'azure-tts' ? '280px 280px 200px' : '280px 200px',
}}
>
{/* Language Filter for Azure TTS */}
{ttsProviderId === 'azure-tts' && (
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsLanguageFilter')}</Label>
<Select value={selectedLocale} onValueChange={setSelectedLocale}>
<SelectTrigger className="w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="all">{t('settings.allLanguages')}</SelectItem>
{(() => {
// Extract unique locales from Azure voices
const uniqueLocales = Array.from(
new Set(azureVoices.map((voice) => voice.Locale)),
);
// Sort: Chinese dialects first, then other major languages, then alphabetically
const sortedLocales = uniqueLocales.sort((a, b) => {
// Get LocaleName for both locales
const voiceA = azureVoices.find((v) => v.Locale === a);
const voiceB = azureVoices.find((v) => v.Locale === b);
const localeNameA = voiceA?.LocaleName || a;
const localeNameB = voiceB?.LocaleName || b;
// Check if LocaleName contains "Chinese" (case-insensitive)
const aIsChinese = /chinese/i.test(localeNameA);
const bIsChinese = /chinese/i.test(localeNameB);
// Both are Chinese - sort by priority
if (aIsChinese && bIsChinese) {
const chinesePriority = [
'zh-CN', // Chinese (Simplified, China)
'zh-CN-liaoning', // Chinese (Northeastern Mandarin, Liaoning)
'zh-CN-shaanxi', // Chinese (Shaanxi dialect)
'wuu-CN', // Chinese (Wu, China)
'zh-HK', // Chinese (Cantonese, Hong Kong)
'yue-CN', // Chinese (Cantonese, China)
'zh-CN-shandong', // Chinese (Jinan dialect, Shandong)
'zh-CN-sichuan', // Chinese (Sichuan dialect)
'zh-TW', // Chinese (Taiwanese Mandarin)
];
const aIndex = chinesePriority.indexOf(a);
const bIndex = chinesePriority.indexOf(b);
if (aIndex !== -1 && bIndex !== -1) return aIndex - bIndex;
if (aIndex !== -1) return -1;
if (bIndex !== -1) return 1;
return localeNameA.localeCompare(localeNameB);
}
// Only a is Chinese
if (aIsChinese) return -1;
// Only b is Chinese
if (bIsChinese) return 1;
// Neither is Chinese - sort by priority for other major languages
const otherPriority = [
'en-US',
'en-GB',
'ja-JP',
'ko-KR',
'es-ES',
'fr-FR',
'de-DE',
'ru-RU',
'ar-SA',
'pt-BR',
'it-IT',
];
const aIndex = otherPriority.indexOf(a);
const bIndex = otherPriority.indexOf(b);
if (aIndex !== -1 && bIndex !== -1) return aIndex - bIndex;
if (aIndex !== -1) return -1;
if (bIndex !== -1) return 1;
// Sort alphabetically
return a.localeCompare(b);
});
return sortedLocales.map((locale) => {
// Find a voice with this locale to get the LocaleName
const voiceWithLocale = azureVoices.find((v) => v.Locale === locale);
const localeName = voiceWithLocale?.LocaleName || locale;
return (
<SelectItem key={locale} value={locale}>
{localeName}
</SelectItem>
);
});
})()}
</SelectContent>
</Select>
</div>
)}
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsVoice')}</Label>
<Select value={ttsVoice} onValueChange={handleTTSVoiceChange}>
<SelectTrigger className="w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
{(() => {
// For Azure TTS, use JSON data
if (ttsProviderId === 'azure-tts') {
// Filter voices by selected locale
const filteredVoices =
selectedLocale === 'all'
? azureVoices
: azureVoices.filter((voice) => voice.Locale === selectedLocale);
return filteredVoices.map((voice) => (
<SelectItem key={voice.ShortName} value={voice.ShortName}>
{voice.LocalName} ({voice.DisplayName})
</SelectItem>
));
}
// For other providers, use static voices
const allVoices = getTTSVoices(ttsProviderId);
return allVoices.map((voice) => (
<SelectItem key={voice.id} value={voice.id}>
{voice.name}
{voice.description && ` - ${t(`settings.${voice.description}`)}`}
</SelectItem>
));
})()}
</SelectContent>
</Select>
</div>
{ttsProvider.speedRange && (
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsSpeed')}</Label>
<div className="flex items-center gap-3">
<Slider
value={[ttsSpeed]}
onValueChange={(value) => handleTTSSpeedChange(value[0])}
min={ttsProvider.speedRange.min}
max={ttsProvider.speedRange.max}
step={0.1}
className="flex-1"
/>
<span className="text-xs text-muted-foreground min-w-[3rem] text-right">
{ttsSpeed.toFixed(1)}x
</span>
</div>
</div>
)}
</div>
{/* Test TTS Section */}
<div className="space-y-2">
<Label className="text-sm">{t('settings.testTTS')}</Label>
<div className="flex gap-2">
<Input
placeholder={t('settings.ttsTestTextPlaceholder')}
value={testText}
onChange={(e) => setTestText(e.target.value)}
className="flex-1"
/>
<Button
onClick={handleTestTTS}
disabled={
testingTTS ||
!testText.trim() ||
(ttsProvider.requiresApiKey &&
!ttsProvidersConfig[ttsProviderId]?.apiKey?.trim() &&
!ttsProvidersConfig[ttsProviderId]?.isServerConfigured)
}
size="default"
className="gap-2 w-32"
>
{testingTTS ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Volume2 className="h-4 w-4" />
)}
{t('settings.testTTS')}
</Button>
</div>
</div>
{ttsTestMessage && (
<div
className={cn(
'rounded-lg p-3 text-sm overflow-hidden',
ttsTestStatus === 'success' &&
'bg-green-50 text-green-700 border border-green-200 dark:bg-green-950/50 dark:text-green-400 dark:border-green-800',
ttsTestStatus === 'error' &&
'bg-red-50 text-red-700 border border-red-200 dark:bg-red-950/50 dark:text-red-400 dark:border-red-800',
)}
>
<div className="flex items-start gap-2 min-w-0">
{ttsTestStatus === 'success' && (
<CheckCircle2 className="h-4 w-4 mt-0.5 shrink-0" />
)}
{ttsTestStatus === 'error' && <XCircle className="h-4 w-4 mt-0.5 shrink-0" />}
<p className="flex-1 min-w-0 break-all">{ttsTestMessage}</p>
</div>
</div>
)}
<audio ref={audioRef} className="hidden" />
</div>
</div>
{/* ASR Section */}
<div className="space-y-4 pt-4 border-t">
<div
className={cn(
'relative flex items-center gap-3 rounded-lg border px-4 py-3 transition-all duration-300',
asrEnabled ? 'bg-background border-border' : 'bg-muted/30 border-transparent',
)}
>
<div
className={cn(
'absolute left-0 top-2.5 bottom-2.5 w-[3px] rounded-full transition-colors duration-300',
asrEnabled ? 'bg-primary' : 'bg-muted-foreground/20',
)}
/>
<div
className={cn(
'flex h-8 w-8 shrink-0 items-center justify-center rounded-md transition-colors duration-300',
asrEnabled ? 'bg-primary/10 text-primary' : 'bg-muted text-muted-foreground',
)}
>
<Mic className="h-4 w-4" />
</div>
<div className="flex-1 min-w-0">
<h3
className={cn(
'text-sm font-medium transition-colors duration-300',
!asrEnabled && 'text-muted-foreground',
)}
>
{t('settings.asrSection')}
</h3>
<p className="text-xs text-muted-foreground">{t('settings.asrEnabledDescription')}</p>
</div>
<Switch
checked={asrEnabled}
onCheckedChange={(checked) => {
setASREnabled(checked);
onSave?.();
}}
/>
</div>
<div
className={cn(
'space-y-4 transition-all duration-300 overflow-hidden',
asrEnabled ? 'opacity-100' : 'opacity-40 max-h-0 pointer-events-none',
)}
>
<div className="space-y-2">
<Label className="text-sm">{t('settings.asrProvider')}</Label>
<Select
value={asrProviderId}
onValueChange={(value) => handleASRProviderChange(value as ASRProviderId)}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
{Object.values(ASR_PROVIDERS).map((provider) => (
<SelectItem key={provider.id} value={provider.id}>
<div className="flex items-center gap-2">
{provider.icon && (
<img src={provider.icon} alt={provider.name} className="w-4 h-4" />
)}
{getASRProviderName(provider.id, t)}
{asrProvidersConfig[provider.id]?.isServerConfigured && (
<span className="text-[10px] px-1 py-0.5 rounded border text-muted-foreground">
{t('settings.serverConfigured')}
</span>
)}
</div>
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{(asrProvider.requiresApiKey ||
asrProvidersConfig[asrProviderId]?.isServerConfigured) && (
<>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-2">
<Label className="text-sm">{t('settings.asrApiKey')}</Label>
<div className="relative">
<Input
type={showASRApiKey ? 'text' : 'password'}
placeholder={
asrProvidersConfig[asrProviderId]?.isServerConfigured
? t('settings.optionalOverride')
: t('settings.enterApiKey')
}
value={asrProvidersConfig[asrProviderId]?.apiKey || ''}
onChange={(e) =>
handleASRProviderConfigChange(asrProviderId, {
apiKey: e.target.value,
})
}
className="font-mono text-sm pr-10"
/>
<button
type="button"
onClick={() => setShowASRApiKey(!showASRApiKey)}
className="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
>
{showASRApiKey ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
</button>
</div>
</div>
<div className="space-y-2">
<Label className="text-sm">{t('settings.asrBaseUrl')}</Label>
<Input
placeholder={asrProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')}
value={asrProvidersConfig[asrProviderId]?.baseUrl || ''}
onChange={(e) =>
handleASRProviderConfigChange(asrProviderId, {
baseUrl: e.target.value,
})
}
className="text-sm"
/>
</div>
</div>
{(() => {
const effectiveBaseUrl =
asrProvidersConfig[asrProviderId]?.baseUrl || asrProvider.defaultBaseUrl || '';
if (!effectiveBaseUrl) return null;
// Get endpoint path based on provider
let endpointPath = '';
switch (asrProviderId) {
case 'openai-whisper':
endpointPath = '/audio/transcriptions';
break;
case 'qwen-asr':
endpointPath = '/services/aigc/multimodal-generation/generation';
break;
default:
endpointPath = '';
}
if (!endpointPath) return null;
const fullUrl = effectiveBaseUrl + endpointPath;
return (
<p className="text-xs text-muted-foreground break-all">
{t('settings.requestUrl')}: {fullUrl}
</p>
);
})()}
</>
)}
{(() => {
const supportedLanguages = getASRSupportedLanguages(asrProviderId);
const hasLanguageSelection = supportedLanguages.length > 0;
return (
<div
className="grid gap-4"
style={{
gridTemplateColumns: hasLanguageSelection ? '160px 1fr' : '1fr',
}}
>
{hasLanguageSelection && (
<div className="space-y-2">
<Label className="text-sm">{t('settings.asrLanguage')}</Label>
<Select value={asrLanguage} onValueChange={handleASRLanguageChange}>
<SelectTrigger className="w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
{supportedLanguages.map((lang) => (
<SelectItem key={lang} value={lang}>
{getLanguageName(lang, t)}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
)}
<div className="space-y-2">
<Label className="text-sm">{t('settings.testASR')}</Label>
<div className="flex gap-2">
<Input
value={asrResult}
readOnly
placeholder={t('settings.asrResultPlaceholder')}
className="flex-1 bg-muted/50"
/>
<Button
onClick={handleToggleASRRecording}
disabled={
asrProvider.requiresApiKey &&
!asrProvidersConfig[asrProviderId]?.apiKey?.trim() &&
!asrProvidersConfig[asrProviderId]?.isServerConfigured
}
className="gap-2 w-[140px]"
>
{isRecording ? (
<>
<MicOff className="h-4 w-4" />
{t('settings.stopRecording')}
</>
) : (
<>
<Mic className="h-4 w-4" />
{t('settings.startRecording')}
</>
)}
</Button>
</div>
</div>
</div>
);
})()}
{asrTestMessage && (
<div
className={cn(
'rounded-lg p-3 text-sm overflow-hidden',
asrTestStatus === 'success' &&
'bg-green-50 text-green-700 border border-green-200 dark:bg-green-950/50 dark:text-green-400 dark:border-green-800',
asrTestStatus === 'error' &&
'bg-red-50 text-red-700 border border-red-200 dark:bg-red-950/50 dark:text-red-400 dark:border-red-800',
)}
>
<div className="flex items-start gap-2 min-w-0">
{asrTestStatus === 'success' && (
<CheckCircle2 className="h-4 w-4 mt-0.5 shrink-0" />
)}
{asrTestStatus === 'error' && <XCircle className="h-4 w-4 mt-0.5 shrink-0" />}
<p className="flex-1 min-w-0 break-all">{asrTestMessage}</p>
</div>
</div>
)}
</div>
</div>
</div>
);
}