Spaces:

prthm11
/

AudioTransDiar

Sleeping

File size: 15,880 Bytes
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Transcription Studio</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
    <style>

        body {

            font-family: 'Inter', sans-serif;

            background-color: #1a1a2e; /* Dark purple background */

            color: #ffffff;

        }



        .container-bg {

            background-color: #2c2c44; /* Slightly lighter purple for containers */

        }

        

        .panel-bg {

            background-color: #22223b; /* Darker panel background */

        }



        .input-field {

            background-color: #3b3b55;

            border: 1px solid #4a4a6b;

            color: #e0e0e0;

        }

        

        .button-glow {

            box-shadow: 0 0 10px 2px #6a1b9a;

        }

        

        .glow-text {

            text-shadow: 0 0 8px #d1c4e9;

        }

    </style>
</head>
<body class="flex items-center justify-center min-h-screen p-8">
    <div class="w-full max-w-6xl">
        <!-- Main Header -->
        <header class="text-center mb-10">
            <h1 class="text-5xl font-extrabold text-[#d1c4e9] glow-text mb-2">Audio Transcription Studio</h1>
            <p class="text-lg text-gray-400">Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.</p>
        </header>

        <!-- Main Content Grid -->
        <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
            <!-- Left Panel: Live Transcription -->
            <div class="lg:col-span-2 panel-bg p-8 rounded-2xl shadow-xl">
                <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-file-alt mr-2"></i> Live Transcription</h2>
                
                <!-- Recording Status & Button -->
                <div id="recording-status-area" class="flex flex-col items-center justify-center p-6 mb-8">
                    <div id="status-spinner" class="relative w-32 h-32 hidden">
                        <div class="absolute inset-0 border-4 border-purple-500 rounded-full animate-ping"></div>
                        <div class="absolute inset-4 border-4 border-purple-400 rounded-full animate-ping delay-200"></div>
                        <div class="absolute inset-8 border-4 border-purple-300 rounded-full animate-ping delay-400"></div>
                        <div class="flex items-center justify-center h-full w-full">
                             <i class="fas fa-microphone text-4xl text-white"></i>
                        </div>
                    </div>
                    <div id="status-icon" class="relative w-32 h-32 flex items-center justify-center bg-purple-600 rounded-full">
                        <i class="fas fa-microphone text-4xl text-white"></i>
                    </div>
                    <p id="status-text" class="mt-4 text-green-400 font-semibold text-lg">Ready to record</p>
                    <div id="start-stop-buttons" class="mt-4">
                        <button id="start-btn" class="bg-purple-600 hover:bg-purple-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 button-glow disabled:opacity-50 disabled:cursor-not-allowed">
                            Start Recording
                        </button>
                        <button id="stop-btn" class="bg-red-600 hover:bg-red-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 disabled:opacity-50 disabled:cursor-not-allowed hidden">
                            Stop Recording
                        </button>
                    </div>
                </div>

                <!-- Live Transcription Display -->
                <div id="live-transcription" class="bg-[#1b1b2a] p-6 rounded-lg h-96 overflow-y-auto border border-[#3b3b55]">
                    <p class="text-gray-400 text-center text-lg mt-12">Start recording to see live transcription</p>
                </div>
            </div>

            <!-- Right Panel: Recording Settings & Files -->
            <div class="lg:col-span-1 space-y-8">
                <!-- Recording Settings Panel -->
                <div class="panel-bg p-8 rounded-2xl shadow-xl">
                    <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-cogs mr-2"></i> Recording Settings</h2>
                    <div class="space-y-6">
                        <!-- Microphone Device -->
                        <div>
                            <label for="mic-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-microphone mr-2"></i>Microphone Device</label>
                            <select id="mic-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
                                <option value="">Loading devices...</option>
                            </select>
                        </div>
                        
                        <!-- System Audio -->
                        <div>
                            <label for="sys-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-desktop mr-2"></i>System Audio (Optional)</label>
                            <select id="sys-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
                                <option value="null">None</option>
                            </select>
                        </div>
                        
                        <!-- Chunk Length -->
                        <div>
                            <label for="chunk-secs-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-clock mr-2"></i>Chunk Length (seconds)</label>
                            <input type="number" id="chunk-secs-input" value="5" min="1" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
                        </div>
                        
                        <!-- Transcription Model -->
                        <div>
                            <label for="model-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-brain mr-2"></i>Transcription Model</label>
                            <select id="model-input" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
                                <option value="medium">Medium (Balanced)</option>
                                <option value="small">Small</option>
                                <option value="large">Large</option>
                            </select>
                        </div>
                        
                        <!-- Disable Transcription Toggle -->
                        <div class="flex items-center">
                            <input id="no-transcribe-checkbox" type="checkbox" class="h-5 w-5 text-purple-600 focus:ring-purple-500 rounded border-gray-600 bg-gray-700">
                            <label for="no-transcribe-checkbox" class="ml-2 block text-sm text-gray-300">Disable Transcription</label>
                        </div>
                    </div>
                </div>

                <!-- Recording Files Panel -->
                <div class="panel-bg p-8 rounded-2xl shadow-xl">
                    <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-folder-open mr-2"></i> Recording Files</h2>
                    <div id="final-files-list" class="space-y-2 text-gray-300">
                        <p class="text-gray-500">No files yet...</p>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <script>

        const micSelect = document.getElementById('mic-select');

        const sysSelect = document.getElementById('sys-select');

        const chunkSecsInput = document.getElementById('chunk-secs-input');

        const modelInput = document.getElementById('model-input');

        const noTranscribeCheckbox = document.getElementById('no-transcribe-checkbox');

        const startBtn = document.getElementById('start-btn');

        const stopBtn = document.getElementById('stop-btn');

        const statusText = document.getElementById('status-text');

        const liveTranscription = document.getElementById('live-transcription');

        const finalFilesList = document.getElementById('final-files-list');

        const statusIcon = document.getElementById('status-icon');

        const statusSpinner = document.getElementById('status-spinner');



        let statusPollingInterval;



        // Fetch available audio devices and populate the dropdowns

        async function fetchDevices() {

            try {

                const response = await fetch('/api/devices');

                const data = await response.json();

                

                const micOptions = data.devices.map(device => `<option value="${device.index}">${device.name}</option>`).join('');

                micSelect.innerHTML = micOptions;

                

                const sysOptions = `<option value="null">None</option>` + micOptions;

                sysSelect.innerHTML = sysOptions;



                if (data.devices.length > 0) {

                    micSelect.value = data.devices[0].index;

                }

            } catch (error) {

                console.error('Error fetching devices:', error);

                micSelect.innerHTML = `<option>Error loading devices</option>`;

                sysSelect.innerHTML = `<option>Error loading devices</option>`;

            }

        }



        // Fetch final files and display them

        async function fetchFinalFiles() {

            try {

                const response = await fetch('/api/final-files');

                const data = await response.json();

                if (data.files.length > 0) {

                    const filesHtml = data.files.map(file => `

                        <a href="${file.url}" class="flex items-center text-purple-400 hover:text-purple-300 transition-colors duration-200" target="_blank">

                            <i class="fas fa-file-waveform mr-2"></i><span>${file.name}</span>

                        </a>

                    `).join('');

                    finalFilesList.innerHTML = filesHtml;

                } else {

                    finalFilesList.innerHTML = `<p class="text-gray-500">No files yet...</p>`;

                }

            } catch (error) {

                console.error('Error fetching final files:', error);

                finalFilesList.innerHTML = `<p class="text-red-500">Error loading files.</p>`;

            }

        }



        // Poll the server for recording status and live segments

        function startStatusPolling() {

            statusPollingInterval = setInterval(async () => {

                try {

                    const response = await fetch('/api/recording-status');

                    const data = await response.json();



                    if (data.recording) {

                        statusText.textContent = 'Recording...';

                        statusText.classList.remove('text-green-400');

                        statusText.classList.add('text-purple-400');

                        statusIcon.classList.add('hidden');

                        statusSpinner.classList.remove('hidden');

                        

                        liveTranscription.innerHTML = '';

                        if (data.live_segments.length === 0) {

                            liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Recording started. Waiting for transcription...</p>`;

                        } else {

                            data.live_segments.forEach(segment => {

                                const p = document.createElement('p');

                                p.className = 'text-gray-200 mb-1 leading-snug';

                                p.innerHTML = `<span class="font-semibold text-purple-300">${segment.speaker}:</span> ${segment.text}`;

                                liveTranscription.appendChild(p);

                            });

                            liveTranscription.scrollTop = liveTranscription.scrollHeight;

                        }

                        fetchFinalFiles();

                        

                    } else {

                        statusText.textContent = 'Ready to record';

                        statusText.classList.remove('text-purple-400');

                        statusText.classList.add('text-green-400');

                        statusIcon.classList.remove('hidden');

                        statusSpinner.classList.add('hidden');

                        clearInterval(statusPollingInterval);

                        startBtn.classList.remove('hidden');

                        stopBtn.classList.add('hidden');

                        fetchFinalFiles(); 

                    }

                } catch (error) {

                    console.error('Error polling status:', error);

                    clearInterval(statusPollingInterval);

                }

            }, 1000); 

        }



        // Start recording

        startBtn.addEventListener('click', async () => {

            const mic = micSelect.value;

            const sys = sysSelect.value === 'null' ? null : sysSelect.value;

            const chunk_secs = chunkSecsInput.value;

            const model = modelInput.value;

            const no_transcribe = noTranscribeCheckbox.checked;



            try {

                const response = await fetch('/api/start-recording', {

                    method: 'POST',

                    headers: { 'Content-Type': 'application/json' },

                    body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })

                });

                

                if (response.ok) {

                    startBtn.classList.add('hidden');

                    stopBtn.classList.remove('hidden');

                    liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Starting recording...</p>`;

                    startStatusPolling();

                } else {

                    const error = await response.json();

                    alert(`Error: ${error.error}`);

                }

            } catch (error) {

                console.error('Failed to start recording:', error);

                alert('Failed to start recording. Check server connection.');

            }

        });



        // Stop recording

        stopBtn.addEventListener('click', async () => {

            try {

                const response = await fetch('/api/stop-recording', {

                    method: 'POST'

                });

                if (response.ok) {

                    // Status polling will handle UI updates after the server stops

                }

            } catch (error) {

                console.error('Failed to stop recording:', error);

            }

        });



        // Initial setup on page load

        window.onload = () => {

            fetchDevices();

            fetchFinalFiles();

        };



    </script>
</body>
</html>