Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Whisper WAV Transcription</title> | |
| <style> | |
| body { | |
| font-family: sans-serif; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| padding: 20px; | |
| } | |
| #container { | |
| width: 80%; | |
| border: 1px solid #ccc; | |
| padding: 20px; | |
| border-radius: 5px; | |
| } | |
| input[type="text"], input[type="file"], button { | |
| padding: 10px; | |
| margin: 10px 0; | |
| width: calc(100% - 22px); | |
| box-sizing: border-box; | |
| border: 1px solid #ccc; | |
| border-radius: 3px; | |
| } | |
| button { | |
| background-color: #4CAF50; | |
| color: white; | |
| cursor: pointer; | |
| } | |
| button:disabled { | |
| background-color: #ccc; | |
| cursor: not-allowed; | |
| } | |
| #transcription { | |
| margin-top: 20px; | |
| padding: 15px; | |
| border: 1px solid #ccc; | |
| border-radius: 5px; | |
| white-space: pre-wrap; | |
| width: calc(100% - 32px); /* Adjust width for padding */ | |
| box-sizing: border-box; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div id="container"> | |
| <h1>Whisper WAV Transcription</h1> | |
| <h3>API Key</h3> | |
| <input type="text" id="apiKey"> <!-- Hidden input for API key --> | |
| <h3>API base url (with '.../v1/' at the end)</h3> | |
| <input type="text" id="apiBaseUrl"> <!-- Hidden input for API base URL --> | |
| <label for="audioFile">Select WAV File:</label> | |
| <input type="file" id="audioFile" accept=".wav"> | |
| <button id="transcribeButton" onclick="transcribeAudio()">Transcribe</button> | |
| <div id="transcription"></div> | |
| <button id="copyButton" onclick="copyToClipboard()" style="display: none;">Copy to Clipboard</button> | |
| </div> | |
| <script> | |
| // Function to extract parameters from URL hash | |
| function getHashParams() { | |
| const hash = window.location.hash.substring(1); | |
| const params = {}; | |
| hash.split('&').forEach(part => { | |
| console.log(part); | |
| const [key, value] = part.split('='); | |
| if (key && value) { | |
| params[key] = value; | |
| } | |
| }); | |
| return params; | |
| } | |
| async function transcribeAudio() { | |
| const apiKey = document.getElementByID('apiKey').value; | |
| const apiBaseUrl = document.getElementByID('apiBaseUrl').value; | |
| const audioFile = document.getElementById('audioFile').files[0]; | |
| const transcriptionDiv = document.getElementById('transcription'); | |
| if (!apiKey || !audioFile) { | |
| console.log(apiKey); | |
| alert('Please provide both API key and a WAV file.'); | |
| return; | |
| } | |
| const transcribeButton = document.getElementById('transcribeButton'); | |
| transcribeButton.disabled = true; | |
| transcribeButton.textContent = 'Transcribing...'; | |
| transcriptionDiv.innerHTML = 'Transcribing... Please wait.'; | |
| try { | |
| const chunks = await splitAudioFile(audioFile); | |
| let fullTranscription = ''; | |
| for (let i = 0; i < chunks.length; i++) { | |
| const formData = new FormData(); | |
| formData.append('file', chunks[i], `chunk_${i + 1}.wav`); | |
| formData.append('model', 'whisper'); | |
| const response = await fetch(`${apiBaseUrl}audio/transcriptions`, { | |
| method: 'POST', | |
| headers: { | |
| 'Authorization': `Bearer ${apiKey}` | |
| }, | |
| body: formData | |
| }); | |
| if (!response.ok) { | |
| const errorData = await response.json(); | |
| throw new Error(`API error: ${errorData.error?.message || response.statusText}`); | |
| } | |
| const data = await response.json(); | |
| fullTranscription += (fullTranscription ? ' ' : '') + data.text.trim(); | |
| transcriptionDiv.innerHTML = fullTranscription; | |
| } | |
| transcriptionDiv.innerHTML = fullTranscription; | |
| document.getElementById('copyButton').style.display = 'block'; // Show copy button | |
| } catch (error) { | |
| console.error('Error during transcription:', error); | |
| transcriptionDiv.innerHTML = `Error: ${error.message}`; | |
| } finally { | |
| transcribeButton.disabled = false; | |
| transcribeButton.textContent = 'Transcribe'; | |
| } | |
| } | |
| // Audio Chunking Functions (from original code) | |
| async function splitAudioFile(file) { | |
| const audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| const reader = new FileReader(); | |
| return new Promise((resolve, reject) => { | |
| reader.onload = async function (event) { | |
| try { | |
| const audioBuffer = await audioContext.decodeAudioData(event.target.result); | |
| const chunks = []; | |
| const chunkSize = 24 * 1024 * 1024; // 25MB in bytes | |
| const sampleRate = audioBuffer.sampleRate; | |
| const bytesPerSample = 4; // Assuming 32-bit audio | |
| const samplesPerChunk = Math.floor(chunkSize / bytesPerSample); | |
| const chunksCount = Math.ceil(audioBuffer.length / samplesPerChunk); | |
| for (let i = 0; i < chunksCount; i++) { | |
| const startSample = i * samplesPerChunk; | |
| const endSample = Math.min((i + 1) * samplesPerChunk, audioBuffer.length); | |
| const chunkDuration = (endSample - startSample) / sampleRate; | |
| const chunkBuffer = audioContext.createBuffer( | |
| audioBuffer.numberOfChannels, | |
| endSample - startSample, | |
| sampleRate | |
| ); | |
| for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) { | |
| const chunkChannelData = chunkBuffer.getChannelData(channel); | |
| audioBuffer.copyFromChannel(chunkChannelData, channel, startSample); | |
| } | |
| const wavBlob = await bufferToWav(chunkBuffer); | |
| chunks.push(new File([wavBlob], `chunk_${i + 1}.wav`, { type: 'audio/wav' })); | |
| } | |
| resolve(chunks); | |
| } catch (error) { | |
| reject(error); | |
| } | |
| }; | |
| reader.onerror = reject; | |
| reader.readAsArrayBuffer(file); | |
| }); | |
| } | |
| function bufferToWav(buffer) { | |
| const interleaved = new Float32Array(buffer.length * buffer.numberOfChannels); | |
| for (let channel = 0; channel < buffer.numberOfChannels; channel++) { | |
| const channelData = buffer.getChannelData(channel); | |
| for (let i = 0; i < buffer.length; i++) { | |
| interleaved[i * buffer.numberOfChannels + channel] = channelData[i]; | |
| } | |
| } | |
| const wavBuffer = new ArrayBuffer(44 + interleaved.length * 2); | |
| const view = new DataView(wavBuffer); | |
| writeString(view, 0, 'RIFF'); | |
| view.setUint32(4, 36 + interleaved.length * 2, true); | |
| writeString(view, 8, 'WAVE'); | |
| writeString(view, 12, 'fmt '); | |
| view.setUint32(16, 16, true); | |
| view.setUint16(20, 1, true); | |
| view.setUint16(22, buffer.numberOfChannels, true); | |
| view.setUint32(24, buffer.sampleRate, true); | |
| view.setUint32(28, buffer.sampleRate * 4, true); | |
| view.setUint16(32, buffer.numberOfChannels * 2, true); | |
| view.setUint16(34, 16, true); | |
| writeString(view, 36, 'data'); | |
| view.setUint32(40, interleaved.length * 2, true); | |
| const floatTo16BitPCM = (output, offset, input) => { | |
| for (let i = 0; i < input.length; i++, offset += 2) { | |
| const s = Math.max(-1, Math.min(1, input[i])); | |
| output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); | |
| } | |
| }; | |
| floatTo16BitPCM(view, 44, interleaved); | |
| return new Blob([wavBuffer], { type: 'audio/wav' }); | |
| } | |
| function writeString(view, offset, string) { | |
| for (let i = 0; i < string.length; i++) { | |
| view.setUint8(offset + i, string.charCodeAt(i)); | |
| } | |
| } | |
| function copyToClipboard() { | |
| const transcriptionText = document.getElementById('transcription').innerText; | |
| navigator.clipboard.writeText(transcriptionText) | |
| .then(() => { | |
| alert('Transcription copied to clipboard!'); | |
| }) | |
| .catch(err => { | |
| console.error('Failed to copy transcription: ', err); | |
| }); | |
| } | |
| </script> | |
| </body> | |
| </html> |