Spaces:
Paused
Paused
| // import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; | |
| // import Button from '@mui/material/Button'; | |
| // import Typography from '@mui/material/Typography'; | |
| // import InputLabel from '@mui/material/InputLabel'; | |
| // import FormControl from '@mui/material/FormControl'; | |
| // import Select, {SelectChangeEvent} from '@mui/material/Select'; | |
| // import MenuItem from '@mui/material/MenuItem'; | |
| // import Stack from '@mui/material/Stack'; | |
| // import seamlessLogoUrl from './assets/seamless.svg'; | |
| // import { | |
| // AgentCapabilities, | |
| // BaseResponse, | |
| // BrowserAudioStreamConfig, | |
| // DynamicConfig, | |
| // PartialDynamicConfig, | |
| // SUPPORTED_INPUT_SOURCES, | |
| // SUPPORTED_OUTPUT_MODES, | |
| // ServerExceptionData, | |
| // ServerSpeechData, | |
| // ServerState, | |
| // ServerTextData, | |
| // StartStreamEventConfig, | |
| // StreamingStatus, | |
| // SupportedInputSource, | |
| // SupportedOutputMode, | |
| // TranslationSentences, | |
| // } from './types/StreamingTypes'; | |
| // import FormLabel from '@mui/material/FormLabel'; | |
| // import RadioGroup from '@mui/material/RadioGroup'; | |
| // import FormControlLabel from '@mui/material/FormControlLabel'; | |
| // import Radio from '@mui/material/Radio'; | |
| // import './StreamingInterface.css'; | |
| // import RoomConfig from './RoomConfig'; | |
| // import Divider from '@mui/material/Divider'; | |
| // import {useSocket} from './useSocket'; | |
| // import {RoomState} from './types/RoomState'; | |
| // import useStable from './useStable'; | |
| // import float32To16BitPCM from './float32To16BitPCM'; | |
| // import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; | |
| // import Checkbox from '@mui/material/Checkbox'; | |
| // import Alert from '@mui/material/Alert'; | |
| // import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; | |
| // import Box from '@mui/material/Box'; | |
| // import Slider from '@mui/material/Slider'; | |
| // import VolumeDown from '@mui/icons-material/VolumeDown'; | |
| // import VolumeUp from '@mui/icons-material/VolumeUp'; | |
| // import Mic from '@mui/icons-material/Mic'; | |
| // import MicOff from '@mui/icons-material/MicOff'; | |
| // import XRDialog from './react-xr/XRDialog'; | |
| // import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; | |
| // import { | |
| // sliceTranslationSentencesUpToIndex, | |
| // getTotalSentencesLength, | |
| // } from './sliceTranslationSentencesUtils'; | |
| // import Blink from './Blink'; | |
| // import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; | |
| // import {getURLParams} from './URLParams'; | |
| // import debug from './debug'; | |
| // import DebugSection from './DebugSection'; | |
| // import Switch from '@mui/material/Switch'; | |
| // import Grid from '@mui/material/Grid'; | |
| // import {getLanguageFromThreeLetterCode} from './languageLookup'; | |
| // import HeadphonesIcon from '@mui/icons-material/Headphones'; | |
| // const AUDIO_STREAM_DEFAULTS = { | |
| // userMedia: { | |
| // echoCancellation: false, | |
| // noiseSuppression: true, | |
| // }, | |
| // displayMedia: { | |
| // echoCancellation: false, | |
| // noiseSuppression: false, | |
| // }, | |
| // } as const; | |
| // async function requestUserMediaAudioStream( | |
| // config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], | |
| // ) { | |
| // const stream = await navigator.mediaDevices.getUserMedia({ | |
| // audio: {...config, channelCount: 1}, | |
| // }); | |
| // console.debug( | |
| // '[requestUserMediaAudioStream] stream created with settings:', | |
| // stream.getAudioTracks()?.[0]?.getSettings(), | |
| // ); | |
| // return stream; | |
| // } | |
| // async function requestDisplayMediaAudioStream( | |
| // config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], | |
| // ) { | |
| // const stream = await navigator.mediaDevices.getDisplayMedia({ | |
| // audio: {...config, channelCount: 1}, | |
| // }); | |
| // console.debug( | |
| // '[requestDisplayMediaAudioStream] stream created with settings:', | |
| // stream.getAudioTracks()?.[0]?.getSettings(), | |
| // ); | |
| // return stream; | |
| // } | |
| // const buttonLabelMap: {[key in StreamingStatus]: string} = { | |
| // stopped: 'Start Streaming', | |
| // running: 'Stop Streaming', | |
| // starting: 'Starting...', | |
| // }; | |
| // const BUFFER_LIMIT = 1; | |
| // const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; | |
| // const GAIN_MULTIPLIER_OVER_1 = 3; | |
| // const getGainScaledValue = (value) => | |
| // value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; | |
| // const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; | |
| // const MAX_SERVER_EXCEPTIONS_TRACKED = 500; | |
| // export const TYPING_ANIMATION_DELAY_MS = 6; | |
| // export default function StreamingInterface() { | |
| // const urlParams = getURLParams(); | |
| // const debugParam = urlParams.debug; | |
| // const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>( | |
| // urlParams.animateTextDisplay, | |
| // ); | |
| // const socketObject = useSocket(); | |
| // const {socket, clientID} = socketObject; | |
| // const [serverState, setServerState] = useState<ServerState | null>(null); | |
| // const [agent, setAgent] = useState<AgentCapabilities | null>(null); | |
| // const model = agent?.name ?? null; | |
| // const agentsCapabilities: Array<AgentCapabilities> = | |
| // serverState?.agentsCapabilities ?? []; | |
| // const currentAgent: AgentCapabilities | null = | |
| // agentsCapabilities.find((agent) => agent.name === model) ?? null; | |
| // const [serverExceptions, setServerExceptions] = useState< | |
| // Array<ServerExceptionData> | |
| // >([]); | |
| // const [roomState, setRoomState] = useState<RoomState | null>(null); | |
| // const roomID = roomState?.room_id ?? null; | |
| // const isSpeaker = | |
| // (clientID != null && roomState?.speakers.includes(clientID)) ?? false; | |
| // const isListener = | |
| // (clientID != null && roomState?.listeners.includes(clientID)) ?? false; | |
| // const [streamingStatus, setStreamingStatus] = | |
| // useState<StreamingStatus>('stopped'); | |
| // const isStreamConfiguredRef = useRef<boolean>(false); | |
| // const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false); | |
| // const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t'); | |
| // const [inputSource, setInputSource] = | |
| // useState<SupportedInputSource>('userMedia'); | |
| // const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< | |
| // boolean | null | |
| // >(null); | |
| // const [enableEchoCancellation, setEnableEchoCancellation] = useState< | |
| // boolean | null | |
| // >(null); | |
| // // Dynamic Params: | |
| // const [targetLang, setTargetLang] = useState<string | null>(null); | |
| // const [enableExpressive, setEnableExpressive] = useState<boolean | null>( | |
| // null, | |
| // ); | |
| // const [serverDebugFlag, setServerDebugFlag] = useState<boolean>( | |
| // debugParam ?? false, | |
| // ); | |
| // const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]); | |
| // const [ | |
| // translationSentencesAnimatedIndex, | |
| // setTranslationSentencesAnimatedIndex, | |
| // ] = useState<number>(0); | |
| // const lastTranslationResultRef = useRef<HTMLDivElement | null>(null); | |
| // const [inputStream, setInputStream] = useState<MediaStream | null>(null); | |
| // const [inputStreamSource, setInputStreamSource] = | |
| // useState<MediaStreamAudioSourceNode | null>(null); | |
| // const audioContext = useStable<AudioContext>(() => new AudioContext()); | |
| // const [scriptNodeProcessor, setScriptNodeProcessor] = | |
| // useState<ScriptProcessorNode | null>(null); | |
| // const [muted, setMuted] = useState<boolean>(false); | |
| // // The onaudioprocess script needs an up-to-date reference to the muted state, so | |
| // // we use a ref here and keep it in sync via useEffect | |
| // const mutedRef = useRef<boolean>(muted); | |
| // useEffect(() => { | |
| // mutedRef.current = muted; | |
| // }, [muted]); | |
| // const [gain, setGain] = useState<number>(1); | |
| // const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom()); | |
| // // Some config options must be set when starting streaming and cannot be chaned dynamically. | |
| // // This controls whether they are disabled or not | |
| // const streamFixedConfigOptionsDisabled = | |
| // streamingStatus !== 'stopped' || roomID == null; | |
| // const bufferedSpeechPlayer = useStable(() => { | |
| // const player = createBufferedSpeechPlayer({ | |
| // onStarted: () => { | |
| // console.debug('📢 PLAYBACK STARTED 📢'); | |
| // }, | |
| // onEnded: () => { | |
| // console.debug('🛑 PLAYBACK ENDED 🛑'); | |
| // }, | |
| // }); | |
| // // Start the player now so it eagerly plays audio when it arrives | |
| // player.start(); | |
| // return player; | |
| // }); | |
| // const translationSentencesBase: TranslationSentences = | |
| // getTranslationSentencesFromReceivedData(receivedData); | |
| // const translationSentencesBaseTotalLength = getTotalSentencesLength( | |
| // translationSentencesBase, | |
| // ); | |
| // const translationSentences: TranslationSentences = animateTextDisplay | |
| // ? sliceTranslationSentencesUpToIndex( | |
| // translationSentencesBase, | |
| // translationSentencesAnimatedIndex, | |
| // ) | |
| // : translationSentencesBase; | |
| // // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up | |
| // const translationSentencesWithEmptyStartingString = | |
| // streamingStatus === 'running' && translationSentences.length === 0 | |
| // ? [''] | |
| // : translationSentences; | |
| // /****************************************** | |
| // * Event Handlers | |
| // ******************************************/ | |
| // const setAgentAndUpdateParams = useCallback( | |
| // (newAgent: AgentCapabilities | null) => { | |
| // setAgent((prevAgent) => { | |
| // if (prevAgent?.name !== newAgent?.name) { | |
| // setTargetLang(newAgent?.targetLangs[0] ?? null); | |
| // setEnableExpressive(null); | |
| // } | |
| // return newAgent; | |
| // }); | |
| // }, | |
| // [], | |
| // ); | |
| // const onSetDynamicConfig = useCallback( | |
| // async (partialConfig: PartialDynamicConfig) => { | |
| // return new Promise<void>((resolve, reject) => { | |
| // if (socket == null) { | |
| // reject(new Error('[onSetDynamicConfig] socket is null ')); | |
| // return; | |
| // } | |
| // socket.emit( | |
| // 'set_dynamic_config', | |
| // partialConfig, | |
| // (result: BaseResponse) => { | |
| // console.log('[emit result: set_dynamic_config]', result); | |
| // if (result.status === 'ok') { | |
| // resolve(); | |
| // } else { | |
| // reject(); | |
| // } | |
| // }, | |
| // ); | |
| // }); | |
| // }, | |
| // [socket], | |
| // ); | |
| // const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { | |
| // return new Promise<void>((resolve, reject) => { | |
| // if (socket == null) { | |
| // reject(new Error('[configureStreamAsync] socket is null ')); | |
| // return; | |
| // } | |
| // const modelName = agent?.name ?? null; | |
| // if (modelName == null) { | |
| // reject(new Error('[configureStreamAsync] modelName is null ')); | |
| // return; | |
| // } | |
| // const config: StartStreamEventConfig = { | |
| // event: 'config', | |
| // rate: sampleRate, | |
| // model_name: modelName, | |
| // debug: serverDebugFlag, | |
| // // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true | |
| // async_processing: true, | |
| // buffer_limit: BUFFER_LIMIT, | |
| // model_type: outputMode, | |
| // }; | |
| // console.log('[configureStreamAsync] sending config', config); | |
| // socket.emit('configure_stream', config, (statusObject) => { | |
| // setHasMaxSpeakers(statusObject.message === 'max_speakers') | |
| // if (statusObject.status === 'ok') { | |
| // isStreamConfiguredRef.current = true; | |
| // console.debug( | |
| // '[configureStreamAsync] stream configured!', | |
| // statusObject, | |
| // ); | |
| // resolve(); | |
| // } else { | |
| // isStreamConfiguredRef.current = false; | |
| // reject( | |
| // new Error( | |
| // `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, | |
| // ), | |
| // ); | |
| // return; | |
| // } | |
| // }); | |
| // }); | |
| // }; | |
| // const startStreaming = async () => { | |
| // if (streamingStatus !== 'stopped') { | |
| // console.warn( | |
| // `Attempting to start stream when status is ${streamingStatus}`, | |
| // ); | |
| // return; | |
| // } | |
| // setStreamingStatus('starting'); | |
| // if (audioContext.state === 'suspended') { | |
| // console.warn('audioContext was suspended! resuming...'); | |
| // await audioContext.resume(); | |
| // } | |
| // let stream: MediaStream | null = null; | |
| // try { | |
| // if (inputSource === 'userMedia') { | |
| // stream = await requestUserMediaAudioStream({ | |
| // noiseSuppression: | |
| // enableNoiseSuppression ?? | |
| // AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, | |
| // echoCancellation: | |
| // enableEchoCancellation ?? | |
| // AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, | |
| // }); | |
| // } else if (inputSource === 'displayMedia') { | |
| // stream = await requestDisplayMediaAudioStream({ | |
| // noiseSuppression: | |
| // enableNoiseSuppression ?? | |
| // AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, | |
| // echoCancellation: | |
| // enableEchoCancellation ?? | |
| // AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, | |
| // }); | |
| // } else { | |
| // throw new Error(`Unsupported input source requested: ${inputSource}`); | |
| // } | |
| // setInputStream(stream); | |
| // } catch (e) { | |
| // console.error('[startStreaming] media stream request failed:', e); | |
| // setStreamingStatus('stopped'); | |
| // return; | |
| // } | |
| // const mediaStreamSource = audioContext.createMediaStreamSource(stream); | |
| // setInputStreamSource(mediaStreamSource); | |
| // /** | |
| // * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but | |
| // * which is easy and convenient for our purposes. | |
| // * | |
| // * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor | |
| // * | |
| // * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287 | |
| // */ | |
| // const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); | |
| // setScriptNodeProcessor(scriptProcessor); | |
| // scriptProcessor.onaudioprocess = (event) => { | |
| // if (isStreamConfiguredRef.current === false) { | |
| // console.debug('[onaudioprocess] stream is not configured yet!'); | |
| // return; | |
| // } | |
| // if (socket == null) { | |
| // console.warn('[onaudioprocess] socket is null in onaudioprocess'); | |
| // return; | |
| // } | |
| // if (mutedRef.current) { | |
| // // We still want to send audio to the server when we're muted to ensure we | |
| // // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0 | |
| // const mostlyEmptyInt16Array = new Int16Array(1); | |
| // socket.emit('incoming_audio', mostlyEmptyInt16Array); | |
| // } else { | |
| // const float32Audio = event.inputBuffer.getChannelData(0); | |
| // const pcm16Audio = float32To16BitPCM(float32Audio); | |
| // socket.emit('incoming_audio', pcm16Audio); | |
| // } | |
| // debug()?.sentAudio(event); | |
| // }; | |
| // mediaStreamSource.connect(scriptProcessor); | |
| // scriptProcessor.connect(audioContext.destination); | |
| // bufferedSpeechPlayer.start(); | |
| // try { | |
| // if (targetLang == null) { | |
| // throw new Error('[startStreaming] targetLang cannot be nullish'); | |
| // } | |
| // // When we are starting the stream we want to pass all the dynamic config values | |
| // // available before actually configuring and starting the stream | |
| // const fullDynamicConfig: DynamicConfig = { | |
| // targetLanguage: targetLang, | |
| // expressive: enableExpressive, | |
| // }; | |
| // await onSetDynamicConfig(fullDynamicConfig); | |
| // // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why. | |
| // await configureStreamAsync({ | |
| // sampleRate: audioContext.sampleRate, | |
| // }); | |
| // } catch (e) { | |
| // console.error('configureStreamAsync failed', e); | |
| // setStreamingStatus('stopped'); | |
| // return; | |
| // } | |
| // setStreamingStatus('running'); | |
| // }; | |
| // const stopStreaming = useCallback(async () => { | |
| // if (streamingStatus === 'stopped') { | |
| // console.warn( | |
| // `Attempting to stop stream when status is ${streamingStatus}`, | |
| // ); | |
| // return; | |
| // } | |
| // // Stop the speech playback right away | |
| // bufferedSpeechPlayer.stop(); | |
| // if (inputStreamSource == null || scriptNodeProcessor == null) { | |
| // console.error( | |
| // 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', | |
| // ); | |
| // } else { | |
| // inputStreamSource.disconnect(scriptNodeProcessor); | |
| // scriptNodeProcessor.disconnect(audioContext.destination); | |
| // // Release the mic input so we stop showing the red recording icon in the browser | |
| // inputStream?.getTracks().forEach((track) => track.stop()); | |
| // } | |
| // if (socket == null) { | |
| // console.warn('Unable to emit stop_stream because socket is null'); | |
| // } else { | |
| // socket.emit('stop_stream', (result) => { | |
| // console.debug('[emit result: stop_stream]', result); | |
| // }); | |
| // } | |
| // setStreamingStatus('stopped'); | |
| // }, [ | |
| // audioContext.destination, | |
| // bufferedSpeechPlayer, | |
| // inputStream, | |
| // inputStreamSource, | |
| // scriptNodeProcessor, | |
| // socket, | |
| // streamingStatus, | |
| // ]); | |
| // const onClearTranscriptForAll = useCallback(() => { | |
| // if (socket != null) { | |
| // socket.emit('clear_transcript_for_all'); | |
| // } | |
| // }, [socket]); | |
| // /****************************************** | |
| // * Effects | |
| // ******************************************/ | |
| // useEffect(() => { | |
| // if (socket == null) { | |
| // return; | |
| // } | |
| // const onRoomStateUpdate = (roomState: RoomState) => { | |
| // setRoomState(roomState); | |
| // }; | |
| // socket.on('room_state_update', onRoomStateUpdate); | |
| // return () => { | |
| // socket.off('room_state_update', onRoomStateUpdate); | |
| // }; | |
| // }, [socket]); | |
| // useEffect(() => { | |
| // if (socket != null) { | |
| // const onTranslationText = (data: ServerTextData) => { | |
| // setReceivedData((prev) => [...prev, data]); | |
| // debug()?.receivedText(data.payload); | |
| // }; | |
| // const onTranslationSpeech = (data: ServerSpeechData) => { | |
| // bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); | |
| // }; | |
| // socket.on('translation_text', onTranslationText); | |
| // socket.on('translation_speech', onTranslationSpeech); | |
| // return () => { | |
| // socket.off('translation_text', onTranslationText); | |
| // socket.off('translation_speech', onTranslationSpeech); | |
| // }; | |
| // } | |
| // }, [bufferedSpeechPlayer, socket]); | |
| // useEffect(() => { | |
| // if (socket != null) { | |
| // const onServerStateUpdate = (newServerState: ServerState) => { | |
| // setServerState(newServerState); | |
| // // If a client creates a server lock, we want to stop streaming if we're not them | |
| // if ( | |
| // newServerState.serverLock?.isActive === true && | |
| // newServerState.serverLock?.clientID !== clientID && | |
| // streamingStatus === 'running' | |
| // ) { | |
| // stopStreaming(); | |
| // } | |
| // const firstAgentNullable = newServerState.agentsCapabilities[0]; | |
| // if (agent == null && firstAgentNullable != null) { | |
| // setAgentAndUpdateParams(firstAgentNullable); | |
| // } | |
| // }; | |
| // socket.on('server_state_update', onServerStateUpdate); | |
| // return () => { | |
| // socket.off('server_state_update', onServerStateUpdate); | |
| // }; | |
| // } | |
| // }, [ | |
| // agent, | |
| // clientID, | |
| // setAgentAndUpdateParams, | |
| // socket, | |
| // stopStreaming, | |
| // streamingStatus, | |
| // ]); | |
| // useEffect(() => { | |
| // if (socket != null) { | |
| // const onServerException = ( | |
| // exceptionDataWithoutClientTime: ServerExceptionData, | |
| // ) => { | |
| // const exceptionData = { | |
| // ...exceptionDataWithoutClientTime, | |
| // timeStringClient: new Date( | |
| // exceptionDataWithoutClientTime['timeEpochMs'], | |
| // ).toLocaleString(), | |
| // }; | |
| // setServerExceptions((prev) => | |
| // [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), | |
| // ); | |
| // console.error( | |
| // `[server_exception] The server encountered an exception: ${exceptionData['message']}`, | |
| // exceptionData, | |
| // ); | |
| // }; | |
| // socket.on('server_exception', onServerException); | |
| // return () => { | |
| // socket.off('server_exception', onServerException); | |
| // }; | |
| // } | |
| // }, [socket]); | |
| // useEffect(() => { | |
| // if (socket != null) { | |
| // const onClearTranscript = () => { | |
| // setReceivedData([]); | |
| // setTranslationSentencesAnimatedIndex(0); | |
| // }; | |
| // socket.on('clear_transcript', onClearTranscript); | |
| // return () => { | |
| // socket.off('clear_transcript', onClearTranscript); | |
| // }; | |
| // } | |
| // }, [socket]); | |
| // useEffect(() => { | |
| // const onScroll = () => { | |
| // if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { | |
| // isScrolledToBottomRef.current = true; | |
| // return; | |
| // } | |
| // isScrolledToBottomRef.current = false; | |
| // return; | |
| // }; | |
| // document.addEventListener('scroll', onScroll); | |
| // return () => { | |
| // document.removeEventListener('scroll', onScroll); | |
| // }; | |
| // }, []); | |
| // useLayoutEffect(() => { | |
| // if ( | |
| // lastTranslationResultRef.current != null && | |
| // isScrolledToBottomRef.current | |
| // ) { | |
| // // Scroll the div to the most recent entry | |
| // lastTranslationResultRef.current.scrollIntoView(); | |
| // } | |
| // // Run the effect every time data is received, so that | |
| // // we scroll to the bottom even if we're just adding text to | |
| // // a pre-existing chunk | |
| // }, [receivedData]); | |
| // useEffect(() => { | |
| // if (!animateTextDisplay) { | |
| // return; | |
| // } | |
| // if ( | |
| // translationSentencesAnimatedIndex < translationSentencesBaseTotalLength | |
| // ) { | |
| // const timeout = setTimeout(() => { | |
| // setTranslationSentencesAnimatedIndex((prev) => prev + 1); | |
| // debug()?.startRenderText(); | |
| // }, TYPING_ANIMATION_DELAY_MS); | |
| // return () => clearTimeout(timeout); | |
| // } else { | |
| // debug()?.endRenderText(); | |
| // } | |
| // }, [ | |
| // animateTextDisplay, | |
| // translationSentencesAnimatedIndex, | |
| // translationSentencesBaseTotalLength, | |
| // ]); | |
| // /****************************************** | |
| // * Sub-components | |
| // ******************************************/ | |
| // const volumeSliderNode = ( | |
| // <Stack | |
| // spacing={2} | |
| // direction="row" | |
| // sx={{mb: 1, width: '100%'}} | |
| // alignItems="center"> | |
| // <VolumeDown color="primary" /> | |
| // <Slider | |
| // aria-label="Volume" | |
| // defaultValue={1} | |
| // scale={getGainScaledValue} | |
| // min={0} | |
| // max={3} | |
| // step={0.1} | |
| // marks={[ | |
| // {value: 0, label: '0%'}, | |
| // {value: 1, label: '100%'}, | |
| // {value: 2, label: '400%'}, | |
| // {value: 3, label: '700%'}, | |
| // ]} | |
| // valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`} | |
| // valueLabelDisplay="auto" | |
| // value={gain} | |
| // onChange={(_event: Event, newValue: number | number[]) => { | |
| // if (typeof newValue === 'number') { | |
| // const scaledGain = getGainScaledValue(newValue); | |
| // // We want the actual gain node to use the scaled value | |
| // bufferedSpeechPlayer.setGain(scaledGain); | |
| // // But we want react state to keep track of the non-scaled value | |
| // setGain(newValue); | |
| // } else { | |
| // console.error( | |
| // `[volume slider] Unexpected non-number value: ${newValue}`, | |
| // ); | |
| // } | |
| // }} | |
| // /> | |
| // <VolumeUp color="primary" /> | |
| // </Stack> | |
| // ); | |
| // const xrDialogComponent = ( | |
| // <XRDialog | |
| // animateTextDisplay={ | |
| // animateTextDisplay && | |
| // translationSentencesAnimatedIndex == translationSentencesBaseTotalLength | |
| // } | |
| // bufferedSpeechPlayer={bufferedSpeechPlayer} | |
| // translationSentences={translationSentences} | |
| // roomState={roomState} | |
| // roomID={roomID} | |
| // startStreaming={startStreaming} | |
| // stopStreaming={stopStreaming} | |
| // debugParam={debugParam} | |
| // onARHidden={() => { | |
| // setAnimateTextDisplay(urlParams.animateTextDisplay); | |
| // }} | |
| // onARVisible={() => setAnimateTextDisplay(false)} | |
| // /> | |
| // ); | |
| // return ( | |
| // <div className="app-wrapper-sra"> | |
| // <Box | |
| // // eslint-disable-next-line @typescript-eslint/ban-ts-comment | |
| // // @ts-ignore Not sure why it's complaining about complexity here | |
| // sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}> | |
| // <div className="main-container-sra"> | |
| // <div className="top-section-sra horizontal-padding-sra"> | |
| // <div className="header-container-sra"> | |
| // <img | |
| // src={seamlessLogoUrl} | |
| // className="header-icon-sra" | |
| // alt="Seamless Translation Logo" | |
| // height={24} | |
| // width={24} | |
| // /> | |
| // <div> | |
| // <Typography variant="h1" sx={{color: '#65676B'}}> | |
| // Seamless Translation | |
| // </Typography> | |
| // </div> | |
| // </div> | |
| // <div className="header-container-sra"> | |
| // <div> | |
| // <Typography variant="body2" sx={{color: '#65676B'}}> | |
| // Welcome! This space is limited to one speaker at a time. | |
| // If using the live HF space, sharing room code to listeners on another | |
| // IP address may not work because it's running on different replicas. | |
| // Use headphones if you are both speaker and listener to prevent feedback. | |
| // <br/> | |
| // If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>. | |
| // In your duplicated space, join a room as speaker or listener (or both), | |
| // and share the room code to invite listeners. | |
| // <br/> | |
| // Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information. | |
| // <br/> | |
| // SeamlessStreaming model is a research model and is not released | |
| // for production deployment. It is important to use a microphone with | |
| // noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises. | |
| // It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold | |
| // in the model config. The real-time performance will degrade | |
| // if you try streaming multiple speakers at the same time. | |
| // </Typography> | |
| // </div> | |
| // </div> | |
| // <Stack spacing="22px" direction="column"> | |
| // <Box> | |
| // <RoomConfig | |
| // roomState={roomState} | |
| // serverState={serverState} | |
| // streamingStatus={streamingStatus} | |
| // onJoinRoomOrUpdateRoles={() => { | |
| // // If the user has switched from speaker to listener we need to tell the | |
| // // player to play eagerly, since currently the listener doesn't have any stop/start controls | |
| // bufferedSpeechPlayer.start(); | |
| // }} | |
| // /> | |
| // {isListener && !isSpeaker && ( | |
| // <Box | |
| // sx={{ | |
| // paddingX: 6, | |
| // paddingBottom: 2, | |
| // marginY: 2, | |
| // display: 'flex', | |
| // flexDirection: 'column', | |
| // alignItems: 'center', | |
| // }}> | |
| // {volumeSliderNode} | |
| // </Box> | |
| // )} | |
| // </Box> | |
| // {isSpeaker && ( | |
| // <> | |
| // <Divider /> | |
| // <Stack spacing="12px" direction="column"> | |
| // <FormLabel id="output-modes-radio-group-label"> | |
| // Model | |
| // </FormLabel> | |
| // <FormControl | |
| // disabled={ | |
| // streamFixedConfigOptionsDisabled || | |
| // agentsCapabilities.length === 0 | |
| // } | |
| // fullWidth | |
| // sx={{minWidth: '14em'}}> | |
| // <InputLabel id="model-selector-input-label"> | |
| // Model | |
| // </InputLabel> | |
| // <Select | |
| // labelId="model-selector-input-label" | |
| // label="Model" | |
| // onChange={(e: SelectChangeEvent) => { | |
| // const newAgent = | |
| // agentsCapabilities.find( | |
| // (agent) => e.target.value === agent.name, | |
| // ) ?? null; | |
| // if (newAgent == null) { | |
| // console.error( | |
| // 'Unable to find agent with name', | |
| // e.target.value, | |
| // ); | |
| // } | |
| // setAgentAndUpdateParams(newAgent); | |
| // }} | |
| // value={model ?? ''}> | |
| // {agentsCapabilities.map((agent) => ( | |
| // <MenuItem value={agent.name} key={agent.name}> | |
| // {agent.name} | |
| // </MenuItem> | |
| // ))} | |
| // </Select> | |
| // </FormControl> | |
| // </Stack> | |
| // <Stack spacing={0.5}> | |
| // <FormLabel id="output-modes-radio-group-label"> | |
| // Output | |
| // </FormLabel> | |
| // <Box sx={{paddingTop: 2, paddingBottom: 1}}> | |
| // <FormControl fullWidth sx={{minWidth: '14em'}}> | |
| // <InputLabel id="target-selector-input-label"> | |
| // Target Language | |
| // </InputLabel> | |
| // <Select | |
| // labelId="target-selector-input-label" | |
| // label="Target Language" | |
| // onChange={(e: SelectChangeEvent) => { | |
| // setTargetLang(e.target.value); | |
| // onSetDynamicConfig({ | |
| // targetLanguage: e.target.value, | |
| // }); | |
| // }} | |
| // value={targetLang ?? ''}> | |
| // {currentAgent?.targetLangs.map((langCode) => ( | |
| // <MenuItem value={langCode} key={langCode}> | |
| // {getLanguageFromThreeLetterCode(langCode) != null | |
| // ? `${getLanguageFromThreeLetterCode( | |
| // langCode, | |
| // )} (${langCode})` | |
| // : langCode} | |
| // </MenuItem> | |
| // ))} | |
| // </Select> | |
| // </FormControl> | |
| // </Box> | |
| // <Grid container> | |
| // <Grid item xs={12} sm={4}> | |
| // <FormControl | |
| // disabled={streamFixedConfigOptionsDisabled}> | |
| // <RadioGroup | |
| // aria-labelledby="output-modes-radio-group-label" | |
| // value={outputMode} | |
| // onChange={(e) => | |
| // setOutputMode( | |
| // e.target.value as SupportedOutputMode, | |
| // ) | |
| // } | |
| // name="output-modes-radio-buttons-group"> | |
| // { | |
| // // TODO: Use supported modalities from agentCapabilities | |
| // SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( | |
| // <FormControlLabel | |
| // key={value} | |
| // value={value} | |
| // control={<Radio />} | |
| // label={label} | |
| // /> | |
| // )) | |
| // } | |
| // </RadioGroup> | |
| // </FormControl> | |
| // </Grid> | |
| // <Grid item xs={12} sm={8}> | |
| // <Stack | |
| // direction="column" | |
| // spacing={1} | |
| // alignItems="flex-start" | |
| // sx={{flexGrow: 1}}> | |
| // {currentAgent?.dynamicParams?.includes( | |
| // 'expressive', | |
| // ) && ( | |
| // <FormControlLabel | |
| // control={ | |
| // <Switch | |
| // checked={enableExpressive ?? false} | |
| // onChange={( | |
| // event: React.ChangeEvent<HTMLInputElement>, | |
| // ) => { | |
| // const newValue = event.target.checked; | |
| // setEnableExpressive(newValue); | |
| // onSetDynamicConfig({ | |
| // expressive: newValue, | |
| // }); | |
| // }} | |
| // /> | |
| // } | |
| // label="Expressive" | |
| // /> | |
| // )} | |
| // {isListener && ( | |
| // <Box | |
| // sx={{ | |
| // flexGrow: 1, | |
| // paddingX: 1.5, | |
| // paddingY: 1.5, | |
| // width: '100%', | |
| // }}> | |
| // {volumeSliderNode} | |
| // </Box> | |
| // )} | |
| // </Stack> | |
| // </Grid> | |
| // </Grid> | |
| // </Stack> | |
| // <Stack | |
| // direction="row" | |
| // spacing={2} | |
| // justifyContent="space-between"> | |
| // <Box sx={{flex: 1}}> | |
| // <FormControl disabled={streamFixedConfigOptionsDisabled}> | |
| // <FormLabel id="input-source-radio-group-label"> | |
| // Input Source | |
| // </FormLabel> | |
| // <RadioGroup | |
| // aria-labelledby="input-source-radio-group-label" | |
| // value={inputSource} | |
| // onChange={(e: React.ChangeEvent<HTMLInputElement>) => | |
| // setInputSource( | |
| // e.target.value as SupportedInputSource, | |
| // ) | |
| // } | |
| // name="input-source-radio-buttons-group"> | |
| // {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( | |
| // <FormControlLabel | |
| // key={value} | |
| // value={value} | |
| // control={<Radio />} | |
| // label={label} | |
| // /> | |
| // ))} | |
| // </RadioGroup> | |
| // </FormControl> | |
| // </Box> | |
| // <Box sx={{flex: 1, flexGrow: 2}}> | |
| // <FormControl disabled={streamFixedConfigOptionsDisabled}> | |
| // <FormLabel>Options</FormLabel> | |
| // <FormControlLabel | |
| // control={ | |
| // <Checkbox | |
| // checked={ | |
| // enableNoiseSuppression ?? | |
| // AUDIO_STREAM_DEFAULTS[inputSource] | |
| // .noiseSuppression | |
| // } | |
| // onChange={( | |
| // event: React.ChangeEvent<HTMLInputElement>, | |
| // ) => | |
| // setEnableNoiseSuppression(event.target.checked) | |
| // } | |
| // /> | |
| // } | |
| // label="Noise Suppression" | |
| // /> | |
| // <FormControlLabel | |
| // control={ | |
| // <Checkbox | |
| // checked={ | |
| // enableEchoCancellation ?? | |
| // AUDIO_STREAM_DEFAULTS[inputSource] | |
| // .echoCancellation | |
| // } | |
| // onChange={( | |
| // event: React.ChangeEvent<HTMLInputElement>, | |
| // ) => | |
| // setEnableEchoCancellation(event.target.checked) | |
| // } | |
| // /> | |
| // } | |
| // label="Echo Cancellation (not recommended)" | |
| // /> | |
| // <FormControlLabel | |
| // control={ | |
| // <Checkbox | |
| // checked={serverDebugFlag} | |
| // onChange={( | |
| // event: React.ChangeEvent<HTMLInputElement>, | |
| // ) => setServerDebugFlag(event.target.checked)} | |
| // /> | |
| // } | |
| // label="Enable Server Debugging" | |
| // /> | |
| // </FormControl> | |
| // </Box> | |
| // </Stack> | |
| // {isSpeaker && | |
| // isListener && | |
| // inputSource === 'userMedia' && | |
| // !enableEchoCancellation && | |
| // gain !== 0 && ( | |
| // <div> | |
| // <Alert severity="warning" icon={<HeadphonesIcon />}> | |
| // Headphones required to prevent feedback. | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // {isSpeaker && enableEchoCancellation && ( | |
| // <div> | |
| // <Alert severity="warning"> | |
| // We don't recommend using echo cancellation as it may | |
| // distort the input audio. If possible, use headphones and | |
| // disable echo cancellation instead. | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // <Stack direction="row" spacing={2}> | |
| // {streamingStatus === 'stopped' ? ( | |
| // <Button | |
| // variant="contained" | |
| // onClick={startStreaming} | |
| // disabled={ | |
| // roomID == null || | |
| // // Prevent users from starting streaming if there is a server lock with an active session | |
| // (serverState?.serverLock?.isActive === true && | |
| // serverState.serverLock.clientID !== clientID) | |
| // }> | |
| // {buttonLabelMap[streamingStatus]} | |
| // </Button> | |
| // ) : ( | |
| // <Button | |
| // variant="contained" | |
| // color={ | |
| // streamingStatus === 'running' ? 'error' : 'primary' | |
| // } | |
| // disabled={ | |
| // streamingStatus === 'starting' || roomID == null | |
| // } | |
| // onClick={stopStreaming}> | |
| // {buttonLabelMap[streamingStatus]} | |
| // </Button> | |
| // )} | |
| // <Box> | |
| // <Button | |
| // variant="contained" | |
| // aria-label={muted ? 'Unmute' : 'Mute'} | |
| // color={muted ? 'info' : 'primary'} | |
| // onClick={() => setMuted((prev) => !prev)} | |
| // sx={{ | |
| // borderRadius: 100, | |
| // paddingX: 0, | |
| // minWidth: '36px', | |
| // }}> | |
| // {muted ? <MicOff /> : <Mic />} | |
| // </Button> | |
| // </Box> | |
| // {roomID == null ? null : ( | |
| // <Box | |
| // sx={{ | |
| // flexGrow: 1, | |
| // display: 'flex', | |
| // justifyContent: 'flex-end', | |
| // }}> | |
| // {xrDialogComponent} | |
| // </Box> | |
| // )} | |
| // </Stack> | |
| // {serverExceptions.length > 0 && ( | |
| // <div> | |
| // <Alert severity="error"> | |
| // {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // {serverState != null && hasMaxSpeakers && ( | |
| // <div> | |
| // <Alert severity="error"> | |
| // {`Maximum number of speakers reached. Please try again at a later time.`} | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // {serverState != null && | |
| // serverState.totalActiveTranscoders >= | |
| // TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( | |
| // <div> | |
| // <Alert severity="warning"> | |
| // {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // {serverState?.serverLock != null && | |
| // serverState.serverLock.clientID !== clientID && ( | |
| // <div> | |
| // <Alert severity="warning"> | |
| // {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} | |
| // </Alert> | |
| // </div> | |
| // )} | |
| // </> | |
| // )} | |
| // </Stack> | |
| // {isListener && !isSpeaker && ( | |
| // <Box sx={{marginBottom: 1, marginTop: 2}}> | |
| // {xrDialogComponent} | |
| // </Box> | |
| // )} | |
| // </div> | |
| // {debugParam && roomID != null && <DebugSection />} | |
| // <div className="translation-text-container-sra horizontal-padding-sra"> | |
| // <Stack | |
| // direction="row" | |
| // spacing={2} | |
| // sx={{mb: '16px', alignItems: 'center'}}> | |
| // <Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}> | |
| // Transcript | |
| // </Typography> | |
| // {isSpeaker && ( | |
| // <Button | |
| // variant="text" | |
| // size="small" | |
| // onClick={onClearTranscriptForAll}> | |
| // Clear Transcript for All | |
| // </Button> | |
| // )} | |
| // </Stack> | |
| // <Stack direction="row"> | |
| // <div className="translation-text-sra"> | |
| // {translationSentencesWithEmptyStartingString.map( | |
| // (sentence, index, arr) => { | |
| // const isLast = index === arr.length - 1; | |
| // const maybeRef = isLast | |
| // ? {ref: lastTranslationResultRef} | |
| // : {}; | |
| // return ( | |
| // <div className="text-chunk-sra" key={index} {...maybeRef}> | |
| // <Typography variant="body1"> | |
| // {sentence} | |
| // {animateTextDisplay && isLast && ( | |
| // <Blink | |
| // intervalMs={CURSOR_BLINK_INTERVAL_MS} | |
| // shouldBlink={ | |
| // (roomState?.activeTranscoders ?? 0) > 0 | |
| // }> | |
| // <Typography | |
| // component="span" | |
| // variant="body1" | |
| // sx={{ | |
| // display: 'inline-block', | |
| // transform: 'scaleY(1.25) translateY(-1px)', | |
| // }}> | |
| // {'|'} | |
| // </Typography> | |
| // </Blink> | |
| // )} | |
| // </Typography> | |
| // </div> | |
| // ); | |
| // }, | |
| // )} | |
| // </div> | |
| // </Stack> | |
| // </div> | |
| // </div> | |
| // </Box> | |
| // </div> | |
| // ); | |
| // } | |
| import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; | |
| import Button from '@mui/material/Button'; | |
| import Typography from '@mui/material/Typography'; | |
| import InputLabel from '@mui/material/InputLabel'; | |
| import FormControl from '@mui/material/FormControl'; | |
| import Select, {SelectChangeEvent} from '@mui/material/Select'; | |
| import MenuItem from '@mui/material/MenuItem'; | |
| import Stack from '@mui/material/Stack'; | |
| import seamlessLogoUrl from './assets/DSC_4281.svg'; | |
| import { | |
| AgentCapabilities, | |
| BaseResponse, | |
| BrowserAudioStreamConfig, | |
| DynamicConfig, | |
| PartialDynamicConfig, | |
| SUPPORTED_INPUT_SOURCES, | |
| SUPPORTED_OUTPUT_MODES, | |
| ServerExceptionData, | |
| ServerSpeechData, | |
| ServerState, | |
| ServerTextData, | |
| StartStreamEventConfig, | |
| StreamingStatus, | |
| SupportedInputSource, | |
| SupportedOutputMode, | |
| TranslationSentences, | |
| } from './types/StreamingTypes'; | |
| import FormLabel from '@mui/material/FormLabel'; | |
| import RadioGroup from '@mui/material/RadioGroup'; | |
| import FormControlLabel from '@mui/material/FormControlLabel'; | |
| import Radio from '@mui/material/Radio'; | |
| import './StreamingInterface.css'; | |
| import RoomConfig from './RoomConfig'; | |
| import Divider from '@mui/material/Divider'; | |
| import {useSocket} from './useSocket'; | |
| import {RoomState} from './types/RoomState'; | |
| import useStable from './useStable'; | |
| import float32To16BitPCM from './float32To16BitPCM'; | |
| import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; | |
| import Checkbox from '@mui/material/Checkbox'; | |
| import Alert from '@mui/material/Alert'; | |
| import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; | |
| import Box from '@mui/material/Box'; | |
| import Slider from '@mui/material/Slider'; | |
| import VolumeDown from '@mui/icons-material/VolumeDown'; | |
| import VolumeUp from '@mui/icons-material/VolumeUp'; | |
| import Mic from '@mui/icons-material/Mic'; | |
| import MicOff from '@mui/icons-material/MicOff'; | |
| import XRDialog from './react-xr/XRDialog'; | |
| import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; | |
| import { | |
| sliceTranslationSentencesUpToIndex, | |
| getTotalSentencesLength, | |
| } from './sliceTranslationSentencesUtils'; | |
| import Blink from './Blink'; | |
| import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; | |
| import {getURLParams} from './URLParams'; | |
| import debug from './debug'; | |
| import DebugSection from './DebugSection'; | |
| import Switch from '@mui/material/Switch'; | |
| import Grid from '@mui/material/Grid'; | |
| import {getLanguageFromThreeLetterCode} from './languageLookup'; | |
| import HeadphonesIcon from '@mui/icons-material/Headphones'; | |
| const AUDIO_STREAM_DEFAULTS = { | |
| userMedia: { | |
| echoCancellation: false, | |
| noiseSuppression: true, | |
| }, | |
| displayMedia: { | |
| echoCancellation: false, | |
| noiseSuppression: false, | |
| }, | |
| } as const; | |
| async function requestUserMediaAudioStream( | |
| config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], | |
| ) { | |
| const stream = await navigator.mediaDevices.getUserMedia({ | |
| audio: {...config, channelCount: 1}, | |
| }); | |
| console.debug( | |
| '[requestUserMediaAudioStream] stream created with settings:', | |
| stream.getAudioTracks()?.[0]?.getSettings(), | |
| ); | |
| return stream; | |
| } | |
| async function requestDisplayMediaAudioStream( | |
| config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], | |
| ) { | |
| const stream = await navigator.mediaDevices.getDisplayMedia({ | |
| audio: {...config, channelCount: 1}, | |
| }); | |
| console.debug( | |
| '[requestDisplayMediaAudioStream] stream created with settings:', | |
| stream.getAudioTracks()?.[0]?.getSettings(), | |
| ); | |
| return stream; | |
| } | |
| const buttonLabelMap: {[key in StreamingStatus]: string} = { | |
| stopped: 'Start Streaming', | |
| running: 'Stop Streaming', | |
| starting: 'Starting...', | |
| }; | |
| const BUFFER_LIMIT = 1; | |
| const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; | |
| const GAIN_MULTIPLIER_OVER_1 = 3; | |
| const getGainScaledValue = (value) => | |
| value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; | |
| const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; | |
| const MAX_SERVER_EXCEPTIONS_TRACKED = 500; | |
| export const TYPING_ANIMATION_DELAY_MS = 6; | |
| export default function StreamingInterface() { | |
| const urlParams = getURLParams(); | |
| const debugParam = urlParams.debug; | |
| const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>( | |
| urlParams.animateTextDisplay, | |
| ); | |
| const socketObject = useSocket(); | |
| const {socket, clientID} = socketObject; | |
| const [serverState, setServerState] = useState<ServerState | null>(null); | |
| const [agent, setAgent] = useState<AgentCapabilities | null>(null); | |
| const model = agent?.name ?? null; | |
| const agentsCapabilities: Array<AgentCapabilities> = | |
| serverState?.agentsCapabilities ?? []; | |
| const currentAgent: AgentCapabilities | null = | |
| agentsCapabilities.find((agent) => agent.name === model) ?? null; | |
| const [serverExceptions, setServerExceptions] = useState< | |
| Array<ServerExceptionData> | |
| >([]); | |
| const [roomState, setRoomState] = useState<RoomState | null>(null); | |
| const roomID = roomState?.room_id ?? null; | |
| const isSpeaker = | |
| (clientID != null && roomState?.speakers.includes(clientID)) ?? false; | |
| const isListener = | |
| (clientID != null && roomState?.listeners.includes(clientID)) ?? false; | |
| const [streamingStatus, setStreamingStatus] = | |
| useState<StreamingStatus>('stopped'); | |
| const isStreamConfiguredRef = useRef<boolean>(false); | |
| const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false); | |
| const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t'); | |
| const [inputSource, setInputSource] = | |
| useState<SupportedInputSource>('userMedia'); | |
| const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< | |
| boolean | null | |
| >(null); | |
| const [enableEchoCancellation, setEnableEchoCancellation] = useState< | |
| boolean | null | |
| >(null); | |
| // Dynamic Params: | |
| const [targetLang, setTargetLang] = useState<string | null>(null); | |
| const [enableExpressive, setEnableExpressive] = useState<boolean | null>( | |
| null, | |
| ); | |
| const [serverDebugFlag, setServerDebugFlag] = useState<boolean>( | |
| debugParam ?? false, | |
| ); | |
| const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]); | |
| const [ | |
| translationSentencesAnimatedIndex, | |
| setTranslationSentencesAnimatedIndex, | |
| ] = useState<number>(0); | |
| const lastTranslationResultRef = useRef<HTMLDivElement | null>(null); | |
| const [inputStream, setInputStream] = useState<MediaStream | null>(null); | |
| const [inputStreamSource, setInputStreamSource] = | |
| useState<MediaStreamAudioSourceNode | null>(null); | |
| const audioContext = useStable<AudioContext>(() => new AudioContext()); | |
| const [scriptNodeProcessor, setScriptNodeProcessor] = | |
| useState<ScriptProcessorNode | null>(null); | |
| const [muted, setMuted] = useState<boolean>(false); | |
| // The onaudioprocess script needs an up-to-date reference to the muted state, so | |
| // we use a ref here and keep it in sync via useEffect | |
| const mutedRef = useRef<boolean>(muted); | |
| useEffect(() => { | |
| mutedRef.current = muted; | |
| }, [muted]); | |
| const [gain, setGain] = useState<number>(1); | |
| const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom()); | |
| // Some config options must be set when starting streaming and cannot be chaned dynamically. | |
| // This controls whether they are disabled or not | |
| const streamFixedConfigOptionsDisabled = | |
| streamingStatus !== 'stopped' || roomID == null; | |
| const bufferedSpeechPlayer = useStable(() => { | |
| const player = createBufferedSpeechPlayer({ | |
| onStarted: () => { | |
| console.debug('📢 PLAYBACK STARTED 📢'); | |
| }, | |
| onEnded: () => { | |
| console.debug('🛑 PLAYBACK ENDED 🛑'); | |
| }, | |
| }); | |
| // Start the player now so it eagerly plays audio when it arrives | |
| player.start(); | |
| return player; | |
| }); | |
| const translationSentencesBase: TranslationSentences = | |
| getTranslationSentencesFromReceivedData(receivedData); | |
| const translationSentencesBaseTotalLength = getTotalSentencesLength( | |
| translationSentencesBase, | |
| ); | |
| const translationSentences: TranslationSentences = animateTextDisplay | |
| ? sliceTranslationSentencesUpToIndex( | |
| translationSentencesBase, | |
| translationSentencesAnimatedIndex, | |
| ) | |
| : translationSentencesBase; | |
| // We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up | |
| const translationSentencesWithEmptyStartingString = | |
| streamingStatus === 'running' && translationSentences.length === 0 | |
| ? [''] | |
| : translationSentences; | |
| /****************************************** | |
| * Event Handlers | |
| ******************************************/ | |
| const setAgentAndUpdateParams = useCallback( | |
| (newAgent: AgentCapabilities | null) => { | |
| setAgent((prevAgent) => { | |
| if (prevAgent?.name !== newAgent?.name) { | |
| setTargetLang(newAgent?.targetLangs[0] ?? null); | |
| setEnableExpressive(null); | |
| } | |
| return newAgent; | |
| }); | |
| }, | |
| [], | |
| ); | |
| const onSetDynamicConfig = useCallback( | |
| async (partialConfig: PartialDynamicConfig) => { | |
| return new Promise<void>((resolve, reject) => { | |
| if (socket == null) { | |
| reject(new Error('[onSetDynamicConfig] socket is null ')); | |
| return; | |
| } | |
| socket.emit( | |
| 'set_dynamic_config', | |
| partialConfig, | |
| (result: BaseResponse) => { | |
| console.log('[emit result: set_dynamic_config]', result); | |
| if (result.status === 'ok') { | |
| resolve(); | |
| } else { | |
| reject(); | |
| } | |
| }, | |
| ); | |
| }); | |
| }, | |
| [socket], | |
| ); | |
| const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { | |
| return new Promise<void>((resolve, reject) => { | |
| if (socket == null) { | |
| reject(new Error('[configureStreamAsync] socket is null ')); | |
| return; | |
| } | |
| const modelName = agent?.name ?? null; | |
| if (modelName == null) { | |
| reject(new Error('[configureStreamAsync] modelName is null ')); | |
| return; | |
| } | |
| const config: StartStreamEventConfig = { | |
| event: 'config', | |
| rate: sampleRate, | |
| model_name: modelName, | |
| debug: serverDebugFlag, | |
| // synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true | |
| async_processing: true, | |
| buffer_limit: BUFFER_LIMIT, | |
| model_type: outputMode, | |
| }; | |
| console.log('[configureStreamAsync] sending config', config); | |
| socket.emit('configure_stream', config, (statusObject) => { | |
| setHasMaxSpeakers(statusObject.message === 'max_speakers') | |
| if (statusObject.status === 'ok') { | |
| isStreamConfiguredRef.current = true; | |
| console.debug( | |
| '[configureStreamAsync] stream configured!', | |
| statusObject, | |
| ); | |
| resolve(); | |
| } else { | |
| isStreamConfiguredRef.current = false; | |
| reject( | |
| new Error( | |
| `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, | |
| ), | |
| ); | |
| return; | |
| } | |
| }); | |
| }); | |
| }; | |
| const startStreaming = async () => { | |
| if (streamingStatus !== 'stopped') { | |
| console.warn( | |
| `Attempting to start stream when status is ${streamingStatus}`, | |
| ); | |
| return; | |
| } | |
| setStreamingStatus('starting'); | |
| if (audioContext.state === 'suspended') { | |
| console.warn('audioContext was suspended! resuming...'); | |
| await audioContext.resume(); | |
| } | |
| let stream: MediaStream | null = null; | |
| try { | |
| if (inputSource === 'userMedia') { | |
| stream = await requestUserMediaAudioStream({ | |
| noiseSuppression: | |
| enableNoiseSuppression ?? | |
| AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, | |
| echoCancellation: | |
| enableEchoCancellation ?? | |
| AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, | |
| }); | |
| } else if (inputSource === 'displayMedia') { | |
| stream = await requestDisplayMediaAudioStream({ | |
| noiseSuppression: | |
| enableNoiseSuppression ?? | |
| AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, | |
| echoCancellation: | |
| enableEchoCancellation ?? | |
| AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, | |
| }); | |
| } else { | |
| throw new Error(`Unsupported input source requested: ${inputSource}`); | |
| } | |
| setInputStream(stream); | |
| } catch (e) { | |
| console.error('[startStreaming] media stream request failed:', e); | |
| setStreamingStatus('stopped'); | |
| return; | |
| } | |
| const mediaStreamSource = audioContext.createMediaStreamSource(stream); | |
| setInputStreamSource(mediaStreamSource); | |
| /** | |
| * NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but | |
| * which is easy and convenient for our purposes. | |
| * | |
| * Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor | |
| * | |
| * In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287 | |
| */ | |
| const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); | |
| setScriptNodeProcessor(scriptProcessor); | |
| scriptProcessor.onaudioprocess = (event) => { | |
| if (isStreamConfiguredRef.current === false) { | |
| console.debug('[onaudioprocess] stream is not configured yet!'); | |
| return; | |
| } | |
| if (socket == null) { | |
| console.warn('[onaudioprocess] socket is null in onaudioprocess'); | |
| return; | |
| } | |
| if (mutedRef.current) { | |
| // We still want to send audio to the server when we're muted to ensure we | |
| // get any remaining audio back from the server, so let's pass an array length 1 with a value of 0 | |
| const mostlyEmptyInt16Array = new Int16Array(1); | |
| socket.emit('incoming_audio', mostlyEmptyInt16Array); | |
| } else { | |
| const float32Audio = event.inputBuffer.getChannelData(0); | |
| const pcm16Audio = float32To16BitPCM(float32Audio); | |
| socket.emit('incoming_audio', pcm16Audio); | |
| } | |
| debug()?.sentAudio(event); | |
| }; | |
| mediaStreamSource.connect(scriptProcessor); | |
| scriptProcessor.connect(audioContext.destination); | |
| bufferedSpeechPlayer.start(); | |
| try { | |
| if (targetLang == null) { | |
| throw new Error('[startStreaming] targetLang cannot be nullish'); | |
| } | |
| // When we are starting the stream we want to pass all the dynamic config values | |
| // available before actually configuring and starting the stream | |
| const fullDynamicConfig: DynamicConfig = { | |
| targetLanguage: targetLang, | |
| expressive: enableExpressive, | |
| }; | |
| await onSetDynamicConfig(fullDynamicConfig); | |
| // NOTE: this needs to be the *audioContext* sample rate, not the sample rate of the input stream. Not entirely sure why. | |
| await configureStreamAsync({ | |
| sampleRate: audioContext.sampleRate, | |
| }); | |
| } catch (e) { | |
| console.error('configureStreamAsync failed', e); | |
| setStreamingStatus('stopped'); | |
| return; | |
| } | |
| setStreamingStatus('running'); | |
| }; | |
| const stopStreaming = useCallback(async () => { | |
| if (streamingStatus === 'stopped') { | |
| console.warn( | |
| `Attempting to stop stream when status is ${streamingStatus}`, | |
| ); | |
| return; | |
| } | |
| // Stop the speech playback right away | |
| bufferedSpeechPlayer.stop(); | |
| if (inputStreamSource == null || scriptNodeProcessor == null) { | |
| console.error( | |
| 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', | |
| ); | |
| } else { | |
| inputStreamSource.disconnect(scriptNodeProcessor); | |
| scriptNodeProcessor.disconnect(audioContext.destination); | |
| // Release the mic input so we stop showing the red recording icon in the browser | |
| inputStream?.getTracks().forEach((track) => track.stop()); | |
| } | |
| if (socket == null) { | |
| console.warn('Unable to emit stop_stream because socket is null'); | |
| } else { | |
| socket.emit('stop_stream', (result) => { | |
| console.debug('[emit result: stop_stream]', result); | |
| }); | |
| } | |
| setStreamingStatus('stopped'); | |
| }, [ | |
| audioContext.destination, | |
| bufferedSpeechPlayer, | |
| inputStream, | |
| inputStreamSource, | |
| scriptNodeProcessor, | |
| socket, | |
| streamingStatus, | |
| ]); | |
| const onClearTranscriptForAll = useCallback(() => { | |
| if (socket != null) { | |
| socket.emit('clear_transcript_for_all'); | |
| } | |
| }, [socket]); | |
| /****************************************** | |
| * Effects | |
| ******************************************/ | |
| useEffect(() => { | |
| if (socket == null) { | |
| return; | |
| } | |
| const onRoomStateUpdate = (roomState: RoomState) => { | |
| setRoomState(roomState); | |
| }; | |
| socket.on('room_state_update', onRoomStateUpdate); | |
| return () => { | |
| socket.off('room_state_update', onRoomStateUpdate); | |
| }; | |
| }, [socket]); | |
| useEffect(() => { | |
| if (socket != null) { | |
| const onTranslationText = (data: ServerTextData) => { | |
| setReceivedData((prev) => [...prev, data]); | |
| debug()?.receivedText(data.payload); | |
| }; | |
| const onTranslationSpeech = (data: ServerSpeechData) => { | |
| bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); | |
| }; | |
| socket.on('translation_text', onTranslationText); | |
| socket.on('translation_speech', onTranslationSpeech); | |
| return () => { | |
| socket.off('translation_text', onTranslationText); | |
| socket.off('translation_speech', onTranslationSpeech); | |
| }; | |
| } | |
| }, [bufferedSpeechPlayer, socket]); | |
| useEffect(() => { | |
| if (socket != null) { | |
| const onServerStateUpdate = (newServerState: ServerState) => { | |
| setServerState(newServerState); | |
| // If a client creates a server lock, we want to stop streaming if we're not them | |
| if ( | |
| newServerState.serverLock?.isActive === true && | |
| newServerState.serverLock?.clientID !== clientID && | |
| streamingStatus === 'running' | |
| ) { | |
| stopStreaming(); | |
| } | |
| const firstAgentNullable = newServerState.agentsCapabilities[0]; | |
| if (agent == null && firstAgentNullable != null) { | |
| setAgentAndUpdateParams(firstAgentNullable); | |
| } | |
| }; | |
| socket.on('server_state_update', onServerStateUpdate); | |
| return () => { | |
| socket.off('server_state_update', onServerStateUpdate); | |
| }; | |
| } | |
| }, [ | |
| agent, | |
| clientID, | |
| setAgentAndUpdateParams, | |
| socket, | |
| stopStreaming, | |
| streamingStatus, | |
| ]); | |
| useEffect(() => { | |
| if (socket != null) { | |
| const onServerException = ( | |
| exceptionDataWithoutClientTime: ServerExceptionData, | |
| ) => { | |
| const exceptionData = { | |
| ...exceptionDataWithoutClientTime, | |
| timeStringClient: new Date( | |
| exceptionDataWithoutClientTime['timeEpochMs'], | |
| ).toLocaleString(), | |
| }; | |
| setServerExceptions((prev) => | |
| [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), | |
| ); | |
| console.error( | |
| `[server_exception] The server encountered an exception: ${exceptionData['message']}`, | |
| exceptionData, | |
| ); | |
| }; | |
| socket.on('server_exception', onServerException); | |
| return () => { | |
| socket.off('server_exception', onServerException); | |
| }; | |
| } | |
| }, [socket]); | |
| useEffect(() => { | |
| if (socket != null) { | |
| const onClearTranscript = () => { | |
| setReceivedData([]); | |
| setTranslationSentencesAnimatedIndex(0); | |
| }; | |
| socket.on('clear_transcript', onClearTranscript); | |
| return () => { | |
| socket.off('clear_transcript', onClearTranscript); | |
| }; | |
| } | |
| }, [socket]); | |
| useEffect(() => { | |
| const onScroll = () => { | |
| if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { | |
| isScrolledToBottomRef.current = true; | |
| return; | |
| } | |
| isScrolledToBottomRef.current = false; | |
| return; | |
| }; | |
| document.addEventListener('scroll', onScroll); | |
| return () => { | |
| document.removeEventListener('scroll', onScroll); | |
| }; | |
| }, []); | |
| useLayoutEffect(() => { | |
| if ( | |
| lastTranslationResultRef.current != null && | |
| isScrolledToBottomRef.current | |
| ) { | |
| // Scroll the div to the most recent entry | |
| lastTranslationResultRef.current.scrollIntoView(); | |
| } | |
| // Run the effect every time data is received, so that | |
| // we scroll to the bottom even if we're just adding text to | |
| // a pre-existing chunk | |
| }, [receivedData]); | |
| useEffect(() => { | |
| if (!animateTextDisplay) { | |
| return; | |
| } | |
| if ( | |
| translationSentencesAnimatedIndex < translationSentencesBaseTotalLength | |
| ) { | |
| const timeout = setTimeout(() => { | |
| setTranslationSentencesAnimatedIndex((prev) => prev + 1); | |
| debug()?.startRenderText(); | |
| }, TYPING_ANIMATION_DELAY_MS); | |
| return () => clearTimeout(timeout); | |
| } else { | |
| debug()?.endRenderText(); | |
| } | |
| }, [ | |
| animateTextDisplay, | |
| translationSentencesAnimatedIndex, | |
| translationSentencesBaseTotalLength, | |
| ]); | |
| /****************************************** | |
| * Sub-components | |
| ******************************************/ | |
| const volumeSliderNode = ( | |
| <Stack | |
| spacing={2} | |
| direction="row" | |
| sx={{mb: 1, width: '100%'}} | |
| alignItems="center"> | |
| <VolumeDown color="primary" /> | |
| <Slider | |
| aria-label="Volume" | |
| defaultValue={1} | |
| scale={getGainScaledValue} | |
| min={0} | |
| max={3} | |
| step={0.1} | |
| marks={[ | |
| {value: 0, label: '0%'}, | |
| {value: 1, label: '100%'}, | |
| {value: 2, label: '400%'}, | |
| {value: 3, label: '700%'}, | |
| ]} | |
| valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`} | |
| valueLabelDisplay="auto" | |
| value={gain} | |
| onChange={(_event: Event, newValue: number | number[]) => { | |
| if (typeof newValue === 'number') { | |
| const scaledGain = getGainScaledValue(newValue); | |
| // We want the actual gain node to use the scaled value | |
| bufferedSpeechPlayer.setGain(scaledGain); | |
| // But we want react state to keep track of the non-scaled value | |
| setGain(newValue); | |
| } else { | |
| console.error( | |
| `[volume slider] Unexpected non-number value: ${newValue}`, | |
| ); | |
| } | |
| }} | |
| /> | |
| <VolumeUp color="primary" /> | |
| </Stack> | |
| ); | |
| const xrDialogComponent = ( | |
| <XRDialog | |
| animateTextDisplay={ | |
| animateTextDisplay && | |
| translationSentencesAnimatedIndex == translationSentencesBaseTotalLength | |
| } | |
| bufferedSpeechPlayer={bufferedSpeechPlayer} | |
| translationSentences={translationSentences} | |
| roomState={roomState} | |
| roomID={roomID} | |
| startStreaming={startStreaming} | |
| stopStreaming={stopStreaming} | |
| debugParam={debugParam} | |
| onARHidden={() => { | |
| setAnimateTextDisplay(urlParams.animateTextDisplay); | |
| }} | |
| onARVisible={() => setAnimateTextDisplay(false)} | |
| /> | |
| ); | |
| return ( | |
| <div className="app-wrapper-sra"> | |
| <Box | |
| // eslint-disable-next-line @typescript-eslint/ban-ts-comment | |
| // @ts-ignore Not sure why it's complaining about complexity here | |
| sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}> | |
| <div className="main-container-sra"> | |
| <div className="top-section-sra horizontal-padding-sra"> | |
| <div className="header-container-sra"> | |
| <img | |
| src={seamlessLogoUrl} | |
| className="header-icon-sra" | |
| alt="Seamless Translation Logo" | |
| height={150} | |
| width={225} | |
| /> | |
| <div> | |
| <Typography variant="h1" sx={{color: '#800020'}}> | |
| Pietro's translator | |
| </Typography> | |
| <Typography variant="body2" sx={{color: '#800020'}}> | |
| <span style={{ fontStyle: 'italic' }}> | |
| Making communication easier | |
| </span> | |
| </Typography> | |
| </div> | |
| </div> | |
| <div className="header-container-sra"> | |
| <div> | |
| <Typography variant="body2" sx={{color: '#65676B'}}> | |
| Hey <strong style={{ fontWeight: 'bold' }}>Pietro</strong>, <strong style={{ fontWeight: 'bold' }}>it's good to see you!</strong> | |
| <br/> | |
| You can use this platform to translate from/to Italian and many some other languages. | |
| <br/> | |
| Use headphones if you are both speaker and listener to prevent feedback. | |
| <br/> | |
| <br/> | |
| <a target="_blank" rel="noopener noreferrer" href="https://ai.meta.com/research/seamless-communication/">SeamlessStreaming</a> is | |
| a research model and streaming quality works best if you pause | |
| every couple of sentences. The real-time performance will degrade | |
| if you try streaming multiple speakers at the same time. | |
| <br/> | |
| <br/> | |
| Let's try! | |
| </Typography> | |
| </div> | |
| </div> | |
| <Stack spacing="22px" direction="column"> | |
| <Box> | |
| { <RoomConfig | |
| roomState={roomState} | |
| serverState={serverState} | |
| streamingStatus={streamingStatus} | |
| onJoinRoomOrUpdateRoles={() => { | |
| // If the user has switched from speaker to listener we need to tell the | |
| // player to play eagerly, since currently the listener doesn't have any stop/start controls | |
| bufferedSpeechPlayer.start(); | |
| }} | |
| /> } | |
| {isListener && !isSpeaker && ( | |
| <Box | |
| sx={{ | |
| paddingX: 6, | |
| paddingBottom: 2, | |
| marginY: 2, | |
| display: 'flex', | |
| flexDirection: 'column', | |
| alignItems: 'center', | |
| }}> | |
| {volumeSliderNode} | |
| </Box> | |
| )} | |
| </Box> | |
| {isSpeaker && ( | |
| <> | |
| <Divider /> | |
| <Stack spacing="12px" direction="column"> | |
| {/* <FormLabel id="output-modes-radio-group-label"> | |
| Model | |
| </FormLabel> */} | |
| <FormControl | |
| disabled={ | |
| streamFixedConfigOptionsDisabled || | |
| agentsCapabilities.length === 0 | |
| } | |
| fullWidth | |
| sx={{minWidth: '14em'}}> | |
| {/* <InputLabel id="model-selector-input-label"> | |
| Model | |
| </InputLabel> */} | |
| {/* <Select | |
| labelId="model-selector-input-label" | |
| label="Model" | |
| onChange={(e: SelectChangeEvent) => { | |
| const newAgent = | |
| agentsCapabilities.find( | |
| (agent) => e.target.value === agent.name, | |
| ) ?? null; | |
| if (newAgent == null) { | |
| console.error( | |
| 'Unable to find agent with name', | |
| e.target.value, | |
| ); | |
| } | |
| setAgentAndUpdateParams(newAgent); | |
| }} | |
| value={model ?? ''}> | |
| {agentsCapabilities.map((agent) => ( | |
| <MenuItem value={agent.name} key={agent.name}> | |
| {agent.name} | |
| </MenuItem> | |
| ))} | |
| </Select> */} | |
| </FormControl> | |
| </Stack> | |
| <Stack spacing={0.5}> | |
| <FormLabel id="output-modes-radio-group-label"> | |
| Pietro, can you please select the target language? | |
| </FormLabel> | |
| <Box sx={{paddingTop: 2, paddingBottom: 1}}> | |
| <FormControl fullWidth sx={{minWidth: '14em'}}> | |
| <InputLabel id="target-selector-input-label"> | |
| Target Language | |
| </InputLabel> | |
| <Select | |
| labelId="target-selector-input-label" | |
| label="Target Language" | |
| onChange={(e: SelectChangeEvent) => { | |
| setTargetLang(e.target.value); | |
| onSetDynamicConfig({ | |
| targetLanguage: e.target.value, | |
| }); | |
| }} | |
| value={targetLang ?? ''}> | |
| {currentAgent?.targetLangs.map((langCode) => ( | |
| <MenuItem value={langCode} key={langCode}> | |
| {getLanguageFromThreeLetterCode(langCode) != null | |
| ? `${getLanguageFromThreeLetterCode( | |
| langCode, | |
| )} (${langCode})` | |
| : langCode} | |
| </MenuItem> | |
| ))} | |
| </Select> | |
| </FormControl> | |
| </Box> | |
| <Grid container> | |
| <Grid item xs={12} sm={4}> | |
| <FormControl | |
| disabled={streamFixedConfigOptionsDisabled}> | |
| <RadioGroup | |
| aria-labelledby="output-modes-radio-group-label" | |
| value={outputMode} | |
| onChange={(e) => | |
| setOutputMode( | |
| e.target.value as SupportedOutputMode, | |
| ) | |
| } | |
| name="output-modes-radio-buttons-group"> | |
| { | |
| // TODO: Use supported modalities from agentCapabilities | |
| SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( | |
| <FormControlLabel | |
| key={value} | |
| value={value} | |
| control={<Radio />} | |
| label={label} | |
| /> | |
| )) | |
| } | |
| </RadioGroup> | |
| </FormControl> | |
| </Grid> | |
| <Grid item xs={12} sm={8}> | |
| <Stack | |
| direction="column" | |
| spacing={1} | |
| alignItems="flex-start" | |
| sx={{flexGrow: 1}}> | |
| {/* {currentAgent?.dynamicParams?.includes( | |
| 'expressive', | |
| ) && ( | |
| <FormControlLabel | |
| control={ | |
| <Switch | |
| checked={enableExpressive ?? false} | |
| onChange={( | |
| event: React.ChangeEvent<HTMLInputElement>, | |
| ) => { | |
| const newValue = event.target.checked; | |
| setEnableExpressive(newValue); | |
| onSetDynamicConfig({ | |
| expressive: newValue, | |
| }); | |
| }} | |
| /> | |
| } | |
| label="Expressive" | |
| /> | |
| )} */} | |
| {isListener && ( | |
| <Box | |
| sx={{ | |
| flexGrow: 1, | |
| paddingX: 1.5, | |
| paddingY: 1.5, | |
| width: '100%', | |
| }}> | |
| {volumeSliderNode} | |
| </Box> | |
| )} | |
| </Stack> | |
| </Grid> | |
| </Grid> | |
| </Stack> | |
| <Stack | |
| direction="row" | |
| spacing={2} | |
| justifyContent="space-between"> | |
| <Box sx={{flex: 1}}> | |
| <FormControl disabled={streamFixedConfigOptionsDisabled}> | |
| {/* <FormLabel id="input-source-radio-group-label"> | |
| Input Source | |
| </FormLabel> */} | |
| {/* <RadioGroup | |
| aria-labelledby="input-source-radio-group-label" | |
| value={inputSource} | |
| onChange={(e: React.ChangeEvent<HTMLInputElement>) => | |
| setInputSource( | |
| e.target.value as SupportedInputSource, | |
| ) | |
| } | |
| name="input-source-radio-buttons-group"> | |
| {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( | |
| <FormControlLabel | |
| key={value} | |
| value={value} | |
| control={<Radio />} | |
| label={label} | |
| /> | |
| ))} */} | |
| {/* </RadioGroup> */} | |
| </FormControl> | |
| </Box> | |
| <Box sx={{flex: 1, flexGrow: 2}}> | |
| <FormControl disabled={streamFixedConfigOptionsDisabled}> | |
| <FormLabel>Options</FormLabel> | |
| <FormControlLabel | |
| control={ | |
| <Checkbox | |
| checked={ | |
| enableNoiseSuppression ?? | |
| AUDIO_STREAM_DEFAULTS[inputSource] | |
| .noiseSuppression | |
| } | |
| onChange={( | |
| event: React.ChangeEvent<HTMLInputElement>, | |
| ) => | |
| setEnableNoiseSuppression(event.target.checked) | |
| } | |
| /> | |
| } | |
| label="Noise Suppression" | |
| /> | |
| <FormControlLabel | |
| control={ | |
| <Checkbox | |
| checked={ | |
| enableEchoCancellation ?? | |
| AUDIO_STREAM_DEFAULTS[inputSource] | |
| .echoCancellation | |
| } | |
| onChange={( | |
| event: React.ChangeEvent<HTMLInputElement>, | |
| ) => | |
| setEnableEchoCancellation(event.target.checked) | |
| } | |
| /> | |
| } | |
| label="Echo Cancellation (not recommended)" | |
| /> | |
| <FormControlLabel | |
| control={ | |
| <Checkbox | |
| checked={serverDebugFlag} | |
| onChange={( | |
| event: React.ChangeEvent<HTMLInputElement>, | |
| ) => setServerDebugFlag(event.target.checked)} | |
| /> | |
| } | |
| label="Enable Server Debugging" | |
| /> | |
| </FormControl> | |
| </Box> | |
| </Stack> | |
| {isSpeaker && | |
| isListener && | |
| inputSource === 'userMedia' && | |
| !enableEchoCancellation && | |
| gain !== 0 && ( | |
| <div> | |
| <Alert severity="warning" icon={<HeadphonesIcon />}> | |
| Headphones required to prevent feedback. | |
| </Alert> | |
| </div> | |
| )} | |
| {isSpeaker && enableEchoCancellation && ( | |
| <div> | |
| <Alert severity="warning"> | |
| We don't recommend using echo cancellation as it may | |
| distort the input audio. If possible, use headphones and | |
| disable echo cancellation instead. | |
| </Alert> | |
| </div> | |
| )} | |
| <Stack direction="row" spacing={2}> | |
| {streamingStatus === 'stopped' ? ( | |
| <Button | |
| variant="contained" | |
| onClick={startStreaming} | |
| disabled={ | |
| roomID == null || | |
| // Prevent users from starting streaming if there is a server lock with an active session | |
| (serverState?.serverLock?.isActive === true && | |
| serverState.serverLock.clientID !== clientID) | |
| }> | |
| {buttonLabelMap[streamingStatus]} | |
| </Button> | |
| ) : ( | |
| <Button | |
| variant="contained" | |
| color={ | |
| streamingStatus === 'running' ? 'error' : 'primary' | |
| } | |
| disabled={ | |
| streamingStatus === 'starting' || roomID == null | |
| } | |
| onClick={stopStreaming}> | |
| {buttonLabelMap[streamingStatus]} | |
| </Button> | |
| )} | |
| <Box> | |
| <Button | |
| variant="contained" | |
| aria-label={muted ? 'Unmute' : 'Mute'} | |
| color={muted ? 'info' : 'primary'} | |
| onClick={() => setMuted((prev) => !prev)} | |
| sx={{ | |
| borderRadius: 100, | |
| paddingX: 0, | |
| minWidth: '36px', | |
| }}> | |
| {muted ? <MicOff /> : <Mic />} | |
| </Button> | |
| </Box> | |
| {roomID == null ? null : ( | |
| <Box | |
| sx={{ | |
| flexGrow: 1, | |
| display: 'flex', | |
| justifyContent: 'flex-end', | |
| }}> | |
| {xrDialogComponent} | |
| </Box> | |
| )} | |
| </Stack> | |
| {serverExceptions.length > 0 && ( | |
| <div> | |
| <Alert severity="error"> | |
| {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} | |
| </Alert> | |
| </div> | |
| )} | |
| {serverState != null && hasMaxSpeakers && ( | |
| <div> | |
| <Alert severity="error"> | |
| {`Maximum number of speakers reached. Please try again at a later time.`} | |
| </Alert> | |
| </div> | |
| )} | |
| {serverState != null && | |
| serverState.totalActiveTranscoders >= | |
| TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( | |
| <div> | |
| <Alert severity="warning"> | |
| {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} | |
| </Alert> | |
| </div> | |
| )} | |
| {serverState?.serverLock != null && | |
| serverState.serverLock.clientID !== clientID && ( | |
| <div> | |
| <Alert severity="warning"> | |
| {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} | |
| </Alert> | |
| </div> | |
| )} | |
| </> | |
| )} | |
| </Stack> | |
| {isListener && !isSpeaker && ( | |
| <Box sx={{marginBottom: 1, marginTop: 2}}> | |
| {xrDialogComponent} | |
| </Box> | |
| )} | |
| </div> | |
| {debugParam && roomID != null && <DebugSection />} | |
| <div className="translation-text-container-sra horizontal-padding-sra"> | |
| <Stack | |
| direction="row" | |
| spacing={2} | |
| sx={{mb: '16px', alignItems: 'center'}}> | |
| <Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}> | |
| Transcript | |
| </Typography> | |
| {isSpeaker && ( | |
| <Button | |
| variant="text" | |
| size="small" | |
| onClick={onClearTranscriptForAll}> | |
| Clear Transcript for All | |
| </Button> | |
| )} | |
| </Stack> | |
| <Stack direction="row"> | |
| <div className="translation-text-sra"> | |
| {translationSentencesWithEmptyStartingString.map( | |
| (sentence, index, arr) => { | |
| const isLast = index === arr.length - 1; | |
| const maybeRef = isLast | |
| ? {ref: lastTranslationResultRef} | |
| : {}; | |
| return ( | |
| <div className="text-chunk-sra" key={index} {...maybeRef}> | |
| <Typography variant="body1"> | |
| {sentence} | |
| {animateTextDisplay && isLast && ( | |
| <Blink | |
| intervalMs={CURSOR_BLINK_INTERVAL_MS} | |
| shouldBlink={ | |
| (roomState?.activeTranscoders ?? 0) > 0 | |
| }> | |
| <Typography | |
| component="span" | |
| variant="body1" | |
| sx={{ | |
| display: 'inline-block', | |
| transform: 'scaleY(1.25) translateY(-1px)', | |
| }}> | |
| {'|'} | |
| </Typography> | |
| </Blink> | |
| )} | |
| </Typography> | |
| </div> | |
| ); | |
| }, | |
| )} | |
| </div> | |
| </Stack> | |
| </div> | |
| </div> | |
| </Box> | |
| </div> | |
| ); | |
| } | |