import React, { useCallback, useEffect, useState } from "react"; import axios from "axios"; import Modal from "./modal/Modal"; import { UrlInput } from "./modal/UrlInput"; import AudioPlayer from "./AudioPlayer"; import { TranscribeButton } from "./TranscribeButton"; import Constants from "../utils/Constants"; import { Transcriber } from "../hooks/useTranscriber"; import Progress from "./Progress"; import AudioRecorder from "./AudioRecorder"; function titleCase(str: string) { str = str.toLowerCase(); return (str.match(/\w+.?/g) || []) .map((word) => { return word.charAt(0).toUpperCase() + word.slice(1); }) .join(""); } // List of supported languages: // https://help.openai.com/en/articles/7031512-whisper-api-faq // https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79 const LANGUAGES = { en: "english", zh: "chinese", de: "german", es: "spanish/castilian", ru: "russian", ko: "korean", fr: "french", ja: "japanese", pt: "portuguese", tr: "turkish", pl: "polish", ca: "catalan/valencian", nl: "dutch/flemish", ar: "arabic", sv: "swedish", it: "italian", id: "indonesian", hi: "hindi", fi: "finnish", vi: "vietnamese", he: "hebrew", uk: "ukrainian", el: "greek", ms: "malay", cs: "czech", ro: "romanian/moldavian/moldovan", da: "danish", hu: "hungarian", ta: "tamil", no: "norwegian", th: "thai", ur: "urdu", hr: "croatian", bg: "bulgarian", lt: "lithuanian", la: "latin", mi: "maori", ml: "malayalam", cy: "welsh", sk: "slovak", te: "telugu", fa: "persian", lv: "latvian", bn: "bengali", sr: "serbian", az: "azerbaijani", sl: "slovenian", kn: "kannada", et: "estonian", mk: "macedonian", br: "breton", eu: "basque", is: "icelandic", hy: "armenian", ne: "nepali", mn: "mongolian", bs: "bosnian", kk: "kazakh", sq: "albanian", sw: "swahili", gl: "galician", mr: "marathi", pa: "punjabi/panjabi", si: "sinhala/sinhalese", km: "khmer", sn: "shona", yo: "yoruba", so: "somali", af: "afrikaans", oc: "occitan", ka: "georgian", be: "belarusian", tg: "tajik", sd: "sindhi", gu: "gujarati", am: "amharic", yi: "yiddish", lo: "lao", uz: "uzbek", fo: "faroese", ht: "haitian creole/haitian", ps: "pashto/pushto", tk: "turkmen", nn: "nynorsk", mt: "maltese", sa: "sanskrit", lb: "luxembourgish/letzeburgesch", my: "myanmar/burmese", bo: "tibetan", tl: "tagalog", mg: "malagasy", as: "assamese", tt: "tatar", haw: "hawaiian", ln: "lingala", ha: "hausa", ba: "bashkir", jw: "javanese", su: "sundanese", }; export enum AudioSource { URL = "URL", FILE = "FILE", RECORDING = "RECORDING", } export function AudioManager(props: { transcriber: Transcriber }) { const [progress, setProgress] = useState(undefined); const [audioData, setAudioData] = useState< | { buffer: AudioBuffer; url: string; source: AudioSource; mimeType: string; } | undefined >(undefined); const [audioDownloadUrl, setAudioDownloadUrl] = useState< string | undefined >(undefined); const isAudioLoading = progress !== undefined; const resetAudio = () => { setAudioData(undefined); setAudioDownloadUrl(undefined); }; const setAudioFromDownload = async ( data: ArrayBuffer, mimeType: string, ) => { const audioCTX = new AudioContext({ sampleRate: Constants.SAMPLING_RATE, }); const blobUrl = URL.createObjectURL( new Blob([data], { type: "audio/*" }), ); const decoded = await audioCTX.decodeAudioData(data); setAudioData({ buffer: decoded, url: blobUrl, source: AudioSource.URL, mimeType: mimeType, }); }; const setAudioFromRecording = async (data: Blob) => { resetAudio(); setProgress(0); const blobUrl = URL.createObjectURL(data); const fileReader = new FileReader(); fileReader.onprogress = (event) => { setProgress(event.loaded / event.total || 0); }; fileReader.onloadend = async () => { const audioCTX = new AudioContext({ sampleRate: Constants.SAMPLING_RATE, }); const arrayBuffer = fileReader.result as ArrayBuffer; const decoded = await audioCTX.decodeAudioData(arrayBuffer); setProgress(undefined); setAudioData({ buffer: decoded, url: blobUrl, source: AudioSource.RECORDING, mimeType: data.type, }); }; fileReader.readAsArrayBuffer(data); }; const downloadAudioFromUrl = async ( requestAbortController: AbortController, ) => { if (audioDownloadUrl) { try { setAudioData(undefined); setProgress(0); const { data, headers } = (await axios.get(audioDownloadUrl, { signal: requestAbortController.signal, responseType: "arraybuffer", onDownloadProgress(progressEvent) { setProgress(progressEvent.progress || 0); }, })) as { data: ArrayBuffer; headers: { "content-type": string }; }; let mimeType = headers["content-type"]; if (!mimeType || mimeType === "audio/wave") { mimeType = "audio/wav"; } setAudioFromDownload(data, mimeType); } catch (error) { console.log("Request failed or aborted", error); } finally { setProgress(undefined); } } }; // When URL changes, download audio useEffect(() => { if (audioDownloadUrl) { const requestAbortController = new AbortController(); downloadAudioFromUrl(requestAbortController); return () => { requestAbortController.abort(); }; } }, [audioDownloadUrl]); return ( <>
} text={"From URL"} onUrlUpdate={(e) => { props.transcriber.onInputChange(); setAudioDownloadUrl(e); }} /> } text={"From file"} onFileUpdate={(decoded, blobUrl, mimeType) => { props.transcriber.onInputChange(); setAudioData({ buffer: decoded, url: blobUrl, source: AudioSource.FILE, mimeType: mimeType, }); }} /> {navigator.mediaDevices && ( <> } text={"Record"} setAudioData={(e) => { props.transcriber.onInputChange(); setAudioFromRecording(e); }} /> )}
{ }
{audioData && ( <>
{ props.transcriber.start(audioData.buffer); }} isModelLoading={props.transcriber.isModelLoading} // isAudioLoading || isTranscribing={props.transcriber.isBusy} /> } />
{props.transcriber.progressItems.length > 0 && (
{props.transcriber.progressItems.map((data) => (
))}
)} )} ); } function SettingsTile(props: { icon: JSX.Element; className?: string; transcriber: Transcriber; }) { const [showModal, setShowModal] = useState(false); const onClick = () => { setShowModal(true); }; const onClose = () => { setShowModal(false); }; const onSubmit = (url: string) => { onClose(); }; return (
); } function SettingsModal(props: { show: boolean; onSubmit: (url: string) => void; onClose: () => void; transcriber: Transcriber; }) { const names = Object.values(LANGUAGES).map(titleCase); const models = { // Original checkpoints 'Xenova/whisper-tiny': [41, 152], 'Xenova/whisper-base': [77, 291], 'Xenova/whisper-small': [249], 'Xenova/whisper-medium': [776], // Distil Whisper (English-only) 'distil-whisper/distil-medium.en': [402], 'distil-whisper/distil-large-v2': [767], }; return (
{ props.transcriber.setMultilingual( e.target.checked, ); }} >
{ props.transcriber.setQuantized( e.target.checked, ); }} >
{props.transcriber.multilingual && ( <> )} } onClose={props.onClose} onSubmit={() => {}} /> ); } function VerticalBar() { return
; } function AudioDataBar(props: { progress: number }) { return ; } function ProgressBar(props: { progress: string }) { return (
); } function UrlTile(props: { icon: JSX.Element; text: string; onUrlUpdate: (url: string) => void; }) { const [showModal, setShowModal] = useState(false); const onClick = () => { setShowModal(true); }; const onClose = () => { setShowModal(false); }; const onSubmit = (url: string) => { props.onUrlUpdate(url); onClose(); }; return ( <> ); } function UrlModal(props: { show: boolean; onSubmit: (url: string) => void; onClose: () => void; }) { const [url, setUrl] = useState(Constants.DEFAULT_AUDIO_URL); const onChange = (event: React.ChangeEvent) => { setUrl(event.target.value); }; const onSubmit = () => { props.onSubmit(url); }; return ( {"Enter the URL of the audio file you want to load."} } onClose={props.onClose} submitText={"Load"} onSubmit={onSubmit} /> ); } function FileTile(props: { icon: JSX.Element; text: string; onFileUpdate: ( decoded: AudioBuffer, blobUrl: string, mimeType: string, ) => void; }) { // const audioPlayer = useRef(null); // Create hidden input element let elem = document.createElement("input"); elem.type = "file"; elem.oninput = (event) => { // Make sure we have files to use let files = (event.target as HTMLInputElement).files; if (!files) return; // Create a blob that we can use as an src for our audio element const urlObj = URL.createObjectURL(files[0]); const mimeType = files[0].type; const reader = new FileReader(); reader.addEventListener("load", async (e) => { const arrayBuffer = e.target?.result as ArrayBuffer; // Get the ArrayBuffer if (!arrayBuffer) return; const audioCTX = new AudioContext({ sampleRate: Constants.SAMPLING_RATE, }); const decoded = await audioCTX.decodeAudioData(arrayBuffer); props.onFileUpdate(decoded, urlObj, mimeType); }); reader.readAsArrayBuffer(files[0]); // Reset files elem.value = ""; }; return ( <> elem.click()} /> ); } function RecordTile(props: { icon: JSX.Element; text: string; setAudioData: (data: Blob) => void; }) { const [showModal, setShowModal] = useState(false); const onClick = () => { setShowModal(true); }; const onClose = () => { setShowModal(false); }; const onSubmit = (data: Blob | undefined) => { if (data) { props.setAudioData(data); onClose(); } }; return ( <> ); } function RecordModal(props: { show: boolean; onSubmit: (data: Blob | undefined) => void; onClose: () => void; }) { const [audioBlob, setAudioBlob] = useState(); const onRecordingComplete = (blob: Blob) => { setAudioBlob(blob); }; const onSubmit = () => { props.onSubmit(audioBlob); setAudioBlob(undefined); }; const onClose = () => { props.onClose(); setAudioBlob(undefined); }; return ( {"Record audio using your microphone"} } onClose={onClose} submitText={"Load"} submitEnabled={audioBlob !== undefined} onSubmit={onSubmit} /> ); } function Tile(props: { icon: JSX.Element; text?: string; onClick?: () => void; }) { return ( ); } function AnchorIcon() { return ( ); } function FolderIcon() { return ( ); } function SettingsIcon() { return ( ); } function MicrophoneIcon() { return ( ); }