Spaces:

omar1232
/

Advanced_Audio_Visualizer

Sleeping

App Files Files Community

omar1232 commited on Apr 23, 2025

Commit

4c13763

verified ·

1 Parent(s): a85a269

Update app.py

Browse files

Changes from visualizer too. Audio transcript

Files changed (1) hide show

app.py +56 -211

app.py CHANGED Viewed

@@ -1,240 +1,85 @@
 import gradio as gr
-import numpy as np
-import librosa
-import soundfile as sf
 import tempfile
 import os
-# Process audio file or recording
-def process_audio(audio_input, sample_rate=44100):
-    # Handle Gradio audio input (tuple of (sample_rate, numpy_array))
-    if isinstance(audio_input, tuple):
-        sr, audio_data = audio_input
-    else:
-        # Load audio file
-        audio_data, sr = librosa.load(audio_input, sr=sample_rate)
-    # Extract frequency data (spectrogram)
-    fft = np.abs(librosa.stft(audio_data))
-    freq_data = np.mean(fft, axis=1)[:200]  # Average across time, take first 200 bins
-    # Beat detection
-    tempo, beats = librosa.beat.beat_track(y=audio_data, sr=sr)
-    beat_times = librosa.frames_to_time(beats, sr=sr)
-    # Prepare visualization data
-    vis_data = {
-        "frequencies": freq_data.tolist(),
-        "beat_times": beat_times.tolist(),
-        "volume": float(np.mean(np.abs(audio_data)) * 100)
-    }
-    # Save audio to a temporary file on disk
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
-        sf.write(temp_file.name, audio_data, sr, format='wav')
-        temp_file_path = temp_file.name
-    return vis_data, temp_file_path
 # Gradio interface function
-def audio_visualizer(audio_file, audio_record):
     if audio_file:
-        vis_data, audio_output = process_audio(audio_file)
     elif audio_record:
-        vis_data, audio_output = process_audio(audio_record)
     else:
-        return "Please upload an audio file or record audio.", None
-    return vis_data, audio_output
-# Custom CSS and JavaScript for the visualizer
-visualizer_html = """
-<canvas id="visualizerCanvas" style="width: 100%; height: 500px; background: #1a1a2e; border-radius: 16px; box-shadow: 0 15px 40px rgba(0, 0, 0, 0.4);"></canvas>
-<style>
-    canvas {
-        display: block;
-        max-width: 800px;
-        margin: 0 auto;
-    }
-</style>
-<script>
-    document.addEventListener('DOMContentLoaded', () => {
-        const canvas = document.getElementById('visualizerCanvas');
-        const ctx = canvas.getContext('2d');
-        let audioElement = null;
-        let data = { frequencies: [], beat_times: [], volume: 0 };
-        let particles = [];
-        let lastBeatIndex = 0;
-        // Set canvas size to match its CSS size
-        function resizeCanvas() {
-            canvas.width = canvas.offsetWidth;
-            canvas.height = canvas.offsetHeight;
-        }
-        resizeCanvas();
-        window.addEventListener('resize', resizeCanvas);
-        // Particle class for beat effects
-        class Particle {
-            constructor(x, y, radius, speedX, speedY) {
-                this.x = x;
-                this.y = y;
-                this.radius = radius;
-                this.speedX = speedX;
-                this.speedY = speedY;
-                this.alpha = 1;
-            }
-            update() {
-                this.x += this.speedX;
-                this.y += this.speedY;
-                this.alpha -= 0.02;
-            }
-            draw() {
-                ctx.beginPath();
-                ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2);
-                ctx.fillStyle = `rgba(0, 180, 219, ${this.alpha})`;
-                ctx.fill();
-            }
-        }
-        // Spawn particles on beats
-        function spawnParticles(volume) {
-            const centerX = canvas.width / 2;
-            const centerY = canvas.height / 2;
-            const particleCount = Math.floor(volume / 2) + 5; // More particles for higher volume
-            for (let i = 0; i < particleCount; i++) {
-                const angle = Math.random() * Math.PI * 2;
-                const speed = Math.random() * 5 + 2;
-                const speedX = Math.cos(angle) * speed;
-                const speedY = Math.sin(angle) * speed;
-                const radius = Math.random() * 5 + 2;
-                particles.push(new Particle(centerX, centerY, radius, speedX, speedY));
-            }
-        }
-        // Check for beats based on audio playback time
-        function checkBeats() {
-            if (!audioElement || !data.beat_times) return;
-            const currentTime = audioElement.currentTime;
-            for (let i = lastBeatIndex; i < data.beat_times.length; i++) {
-                if (currentTime >= data.beat_times[i]) {
-                    spawnParticles(data.volume);
-                    lastBeatIndex = i + 1;
-                } else {
-                    break;
-                }
-            }
-        }
-        // Animation loop
-        function animate() {
-            requestAnimationFrame(animate);
-            // Clear canvas
-            ctx.fillStyle = 'rgba(26, 26, 46, 0.8)';
-            ctx.fillRect(0, 0, canvas.width, canvas.height);
-            // Center of the canvas
-            const centerX = canvas.width / 2;
-            const centerY = canvas.height / 2;
-            const radius = Math.min(canvas.width, canvas.height) * 0.2;
-            // Draw glowing center circle (pulsing with volume)
-            const glowRadius = radius * (1 + data.volume / 100);
-            const gradient = ctx.createRadialGradient(centerX, centerY, 0, centerX, centerY, glowRadius);
-            gradient.addColorStop(0, `rgba(0, 180, 219, ${0.5 + data.volume / 200})`);
-            gradient.addColorStop(1, 'rgba(0, 180, 219, 0)');
-            ctx.beginPath();
-            ctx.arc(centerX, centerY, glowRadius, 0, Math.PI * 2);
-            ctx.fillStyle = gradient;
-            ctx.fill();
-            // Draw circular spectrum
-            const freqCount = data.frequencies.length;
-            const barCount = 100; // Number of bars in the circle
-            const angleStep = (Math.PI * 2) / barCount;
-            for (let i = 0; i < barCount; i++) {
-                const freqIndex = Math.floor((i / barCount) * freqCount);
-                const freqValue = freqIndex < freqCount ? data.frequencies[freqIndex] : 0;
-                const maxFreq = Math.max(...data.frequencies) || 1;
-                const barLength = (freqValue / maxFreq) * 100 + 20; // Scale bar length
-                const angle = i * angleStep;
-                const x1 = centerX + Math.cos(angle) * radius;
-                const y1 = centerY + Math.sin(angle) * radius;
-                const x2 = centerX + Math.cos(angle) * (radius + barLength);
-                const y2 = centerY + Math.sin(angle) * (radius + barLength);
-                ctx.beginPath();
-                ctx.moveTo(x1, y1);
-                ctx.lineTo(x2, y2);
-                ctx.strokeStyle = `hsl(${i * (360 / barCount)}, 80%, 50%)`;
-                ctx.lineWidth = 2;
-                ctx.stroke();
-            }
-            // Update and draw particles
-            particles = particles.filter(p => p.alpha > 0);
-            particles.forEach(particle => {
-                particle.update();
-                particle.draw();
-            });
-            // Check for beats
-            checkBeats();
-        }
-        // Start animation
-        animate();
-        // Poll the visible JSON output for updates
-        setInterval(() => {
-            const visDataOutput = document.querySelector('div[label="Visualization Data"] textarea');
-            audioElement = document.querySelector('audio'); // Get the audio player
-            if (visDataOutput && visDataOutput.value) {
-                try {
-                    data = JSON.parse(visDataOutput.value);
-                } catch (e) {
-                    console.error('Error parsing visualization data:', e);
-                    data = { frequencies: [], beat_times: [], volume: 0 };
-                }
-            } else {
-                data = { frequencies: [], beat_times: [], volume: 0 };
-                lastBeatIndex = 0; // Reset beat index
-            }
-        }, 100); // Poll more frequently for smoother animations
-    });
-</script>
-"""
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Advanced Audio Visualizer")
-    gr.Markdown("Upload an audio file or record audio to visualize frequencies and beats with dynamic effects.")
     with gr.Row():
         audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
         audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
     with gr.Row():
-        vis_output = gr.JSON(label="Visualization Data")
-        audio_output = gr.Audio(label="Audio Playback", type="filepath")
     with gr.Row():
-        submit = gr.Button("Visualize")
         clear = gr.Button("Clear")
-    # Visualizer section
-    gr.HTML(visualizer_html)
     submit.click(
-        fn=audio_visualizer,
         inputs=[audio_file, audio_record],
-        outputs=[vis_output, audio_output]
     )
     clear.click(
         fn=lambda: (None, None),

 import gradio as gr
+import speech_recognition as sr
+from pydub import AudioSegment
 import tempfile
+from langdetect import detect
 import os
+# Process audio and transcribe
+def process_audio(audio_input):
+    # Initialize recognizer
+    recognizer = sr.Recognizer()
+    # Handle Gradio audio input
+    if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
+        sr, audio_data = audio_input
+        # Convert numpy array to WAV file using pydub
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            AudioSegment(audio_data, sample_rate=sr, frame_rate=sr, channels=1).export(temp_file.name, format="wav")
+            audio_file_path = temp_file.name
+    else:  # Uploaded audio file
+        audio_file_path = audio_input
+    # Transcribe audio
+    with sr.AudioFile(audio_file_path) as source:
+        audio = recognizer.record(source)
+        try:
+            transcription = recognizer.recognize_google(audio)
+        except sr.UnknownValueError:
+            transcription = "Could not understand the audio."
+        except sr.RequestError:
+            transcription = "Transcription service unavailable."
+    # Detect language
+    try:
+        language = detect(transcription)
+    except:
+        language = "Unknown"
+    # Save transcription to a text file
+    with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode='w') as text_file:
+        text_file.write(transcription)
+        text_file_path = text_file.name
+    # Clean up temporary audio file (if created)
+    if isinstance(audio_input, tuple) and os.path.exists(audio_file_path):
+        os.remove(audio_file_path)
+    return language, transcription, text_file_path
 # Gradio interface function
+def audio_transcriptor(audio_file, audio_record):
     if audio_file:
+        language, transcription, text_file = process_audio(audio_file)
     elif audio_record:
+        language, transcription, text_file = process_audio(audio_record)
     else:
+        return "Please upload an audio file or record audio.", "", None
+    return language, transcription, text_file
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcriptor")
+    gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language.")
     with gr.Row():
         audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
         audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
     with gr.Row():
+        language_output = gr.Textbox(label="Detected Language")
+        transcription_output = gr.Textbox(label="Transcription")
+        text_file_output = gr.File(label="Download Transcription as Text File")
     with gr.Row():
+        submit = gr.Button("Transcribe")
         clear = gr.Button("Clear")
     submit.click(
+        fn=audio_transcriptor,
         inputs=[audio_file, audio_record],
+        outputs=[language_output, transcription_output, text_file_output]
     )
     clear.click(
         fn=lambda: (None, None),