omar1232 commited on
Commit
4c13763
·
verified ·
1 Parent(s): a85a269

Update app.py

Browse files

Changes from visualizer too. Audio transcript

Files changed (1) hide show
  1. app.py +56 -211
app.py CHANGED
@@ -1,240 +1,85 @@
1
  import gradio as gr
2
- import numpy as np
3
- import librosa
4
- import soundfile as sf
5
  import tempfile
 
6
  import os
7
 
8
- # Process audio file or recording
9
- def process_audio(audio_input, sample_rate=44100):
10
- # Handle Gradio audio input (tuple of (sample_rate, numpy_array))
11
- if isinstance(audio_input, tuple):
12
- sr, audio_data = audio_input
13
- else:
14
- # Load audio file
15
- audio_data, sr = librosa.load(audio_input, sr=sample_rate)
16
-
17
- # Extract frequency data (spectrogram)
18
- fft = np.abs(librosa.stft(audio_data))
19
- freq_data = np.mean(fft, axis=1)[:200] # Average across time, take first 200 bins
20
-
21
- # Beat detection
22
- tempo, beats = librosa.beat.beat_track(y=audio_data, sr=sr)
23
- beat_times = librosa.frames_to_time(beats, sr=sr)
24
 
25
- # Prepare visualization data
26
- vis_data = {
27
- "frequencies": freq_data.tolist(),
28
- "beat_times": beat_times.tolist(),
29
- "volume": float(np.mean(np.abs(audio_data)) * 100)
30
- }
31
-
32
- # Save audio to a temporary file on disk
33
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
34
- sf.write(temp_file.name, audio_data, sr, format='wav')
35
- temp_file_path = temp_file.name
36
-
37
- return vis_data, temp_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Gradio interface function
40
- def audio_visualizer(audio_file, audio_record):
41
  if audio_file:
42
- vis_data, audio_output = process_audio(audio_file)
43
  elif audio_record:
44
- vis_data, audio_output = process_audio(audio_record)
45
  else:
46
- return "Please upload an audio file or record audio.", None
47
-
48
- return vis_data, audio_output
49
-
50
- # Custom CSS and JavaScript for the visualizer
51
- visualizer_html = """
52
- <canvas id="visualizerCanvas" style="width: 100%; height: 500px; background: #1a1a2e; border-radius: 16px; box-shadow: 0 15px 40px rgba(0, 0, 0, 0.4);"></canvas>
53
-
54
- <style>
55
- canvas {
56
- display: block;
57
- max-width: 800px;
58
- margin: 0 auto;
59
- }
60
- </style>
61
-
62
- <script>
63
- document.addEventListener('DOMContentLoaded', () => {
64
- const canvas = document.getElementById('visualizerCanvas');
65
- const ctx = canvas.getContext('2d');
66
- let audioElement = null;
67
- let data = { frequencies: [], beat_times: [], volume: 0 };
68
- let particles = [];
69
- let lastBeatIndex = 0;
70
-
71
- // Set canvas size to match its CSS size
72
- function resizeCanvas() {
73
- canvas.width = canvas.offsetWidth;
74
- canvas.height = canvas.offsetHeight;
75
- }
76
- resizeCanvas();
77
- window.addEventListener('resize', resizeCanvas);
78
-
79
- // Particle class for beat effects
80
- class Particle {
81
- constructor(x, y, radius, speedX, speedY) {
82
- this.x = x;
83
- this.y = y;
84
- this.radius = radius;
85
- this.speedX = speedX;
86
- this.speedY = speedY;
87
- this.alpha = 1;
88
- }
89
-
90
- update() {
91
- this.x += this.speedX;
92
- this.y += this.speedY;
93
- this.alpha -= 0.02;
94
- }
95
 
96
- draw() {
97
- ctx.beginPath();
98
- ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2);
99
- ctx.fillStyle = `rgba(0, 180, 219, ${this.alpha})`;
100
- ctx.fill();
101
- }
102
- }
103
-
104
- // Spawn particles on beats
105
- function spawnParticles(volume) {
106
- const centerX = canvas.width / 2;
107
- const centerY = canvas.height / 2;
108
- const particleCount = Math.floor(volume / 2) + 5; // More particles for higher volume
109
- for (let i = 0; i < particleCount; i++) {
110
- const angle = Math.random() * Math.PI * 2;
111
- const speed = Math.random() * 5 + 2;
112
- const speedX = Math.cos(angle) * speed;
113
- const speedY = Math.sin(angle) * speed;
114
- const radius = Math.random() * 5 + 2;
115
- particles.push(new Particle(centerX, centerY, radius, speedX, speedY));
116
- }
117
- }
118
-
119
- // Check for beats based on audio playback time
120
- function checkBeats() {
121
- if (!audioElement || !data.beat_times) return;
122
- const currentTime = audioElement.currentTime;
123
- for (let i = lastBeatIndex; i < data.beat_times.length; i++) {
124
- if (currentTime >= data.beat_times[i]) {
125
- spawnParticles(data.volume);
126
- lastBeatIndex = i + 1;
127
- } else {
128
- break;
129
- }
130
- }
131
- }
132
-
133
- // Animation loop
134
- function animate() {
135
- requestAnimationFrame(animate);
136
-
137
- // Clear canvas
138
- ctx.fillStyle = 'rgba(26, 26, 46, 0.8)';
139
- ctx.fillRect(0, 0, canvas.width, canvas.height);
140
-
141
- // Center of the canvas
142
- const centerX = canvas.width / 2;
143
- const centerY = canvas.height / 2;
144
- const radius = Math.min(canvas.width, canvas.height) * 0.2;
145
-
146
- // Draw glowing center circle (pulsing with volume)
147
- const glowRadius = radius * (1 + data.volume / 100);
148
- const gradient = ctx.createRadialGradient(centerX, centerY, 0, centerX, centerY, glowRadius);
149
- gradient.addColorStop(0, `rgba(0, 180, 219, ${0.5 + data.volume / 200})`);
150
- gradient.addColorStop(1, 'rgba(0, 180, 219, 0)');
151
- ctx.beginPath();
152
- ctx.arc(centerX, centerY, glowRadius, 0, Math.PI * 2);
153
- ctx.fillStyle = gradient;
154
- ctx.fill();
155
-
156
- // Draw circular spectrum
157
- const freqCount = data.frequencies.length;
158
- const barCount = 100; // Number of bars in the circle
159
- const angleStep = (Math.PI * 2) / barCount;
160
- for (let i = 0; i < barCount; i++) {
161
- const freqIndex = Math.floor((i / barCount) * freqCount);
162
- const freqValue = freqIndex < freqCount ? data.frequencies[freqIndex] : 0;
163
- const maxFreq = Math.max(...data.frequencies) || 1;
164
- const barLength = (freqValue / maxFreq) * 100 + 20; // Scale bar length
165
- const angle = i * angleStep;
166
-
167
- const x1 = centerX + Math.cos(angle) * radius;
168
- const y1 = centerY + Math.sin(angle) * radius;
169
- const x2 = centerX + Math.cos(angle) * (radius + barLength);
170
- const y2 = centerY + Math.sin(angle) * (radius + barLength);
171
-
172
- ctx.beginPath();
173
- ctx.moveTo(x1, y1);
174
- ctx.lineTo(x2, y2);
175
- ctx.strokeStyle = `hsl(${i * (360 / barCount)}, 80%, 50%)`;
176
- ctx.lineWidth = 2;
177
- ctx.stroke();
178
- }
179
-
180
- // Update and draw particles
181
- particles = particles.filter(p => p.alpha > 0);
182
- particles.forEach(particle => {
183
- particle.update();
184
- particle.draw();
185
- });
186
-
187
- // Check for beats
188
- checkBeats();
189
- }
190
-
191
- // Start animation
192
- animate();
193
-
194
- // Poll the visible JSON output for updates
195
- setInterval(() => {
196
- const visDataOutput = document.querySelector('div[label="Visualization Data"] textarea');
197
- audioElement = document.querySelector('audio'); // Get the audio player
198
- if (visDataOutput && visDataOutput.value) {
199
- try {
200
- data = JSON.parse(visDataOutput.value);
201
- } catch (e) {
202
- console.error('Error parsing visualization data:', e);
203
- data = { frequencies: [], beat_times: [], volume: 0 };
204
- }
205
- } else {
206
- data = { frequencies: [], beat_times: [], volume: 0 };
207
- lastBeatIndex = 0; // Reset beat index
208
- }
209
- }, 100); // Poll more frequently for smoother animations
210
- });
211
- </script>
212
- """
213
 
214
  # Gradio interface
215
  with gr.Blocks() as demo:
216
- gr.Markdown("# Advanced Audio Visualizer")
217
- gr.Markdown("Upload an audio file or record audio to visualize frequencies and beats with dynamic effects.")
218
 
219
  with gr.Row():
220
  audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
221
  audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
222
 
223
  with gr.Row():
224
- vis_output = gr.JSON(label="Visualization Data")
225
- audio_output = gr.Audio(label="Audio Playback", type="filepath")
 
226
 
227
  with gr.Row():
228
- submit = gr.Button("Visualize")
229
  clear = gr.Button("Clear")
230
 
231
- # Visualizer section
232
- gr.HTML(visualizer_html)
233
-
234
  submit.click(
235
- fn=audio_visualizer,
236
  inputs=[audio_file, audio_record],
237
- outputs=[vis_output, audio_output]
238
  )
239
  clear.click(
240
  fn=lambda: (None, None),
 
1
  import gradio as gr
2
+ import speech_recognition as sr
3
+ from pydub import AudioSegment
 
4
  import tempfile
5
+ from langdetect import detect
6
  import os
7
 
8
+ # Process audio and transcribe
9
+ def process_audio(audio_input):
10
+ # Initialize recognizer
11
+ recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Handle Gradio audio input
14
+ if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
15
+ sr, audio_data = audio_input
16
+ # Convert numpy array to WAV file using pydub
17
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
18
+ AudioSegment(audio_data, sample_rate=sr, frame_rate=sr, channels=1).export(temp_file.name, format="wav")
19
+ audio_file_path = temp_file.name
20
+ else: # Uploaded audio file
21
+ audio_file_path = audio_input
22
+
23
+ # Transcribe audio
24
+ with sr.AudioFile(audio_file_path) as source:
25
+ audio = recognizer.record(source)
26
+ try:
27
+ transcription = recognizer.recognize_google(audio)
28
+ except sr.UnknownValueError:
29
+ transcription = "Could not understand the audio."
30
+ except sr.RequestError:
31
+ transcription = "Transcription service unavailable."
32
+
33
+ # Detect language
34
+ try:
35
+ language = detect(transcription)
36
+ except:
37
+ language = "Unknown"
38
+
39
+ # Save transcription to a text file
40
+ with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode='w') as text_file:
41
+ text_file.write(transcription)
42
+ text_file_path = text_file.name
43
+
44
+ # Clean up temporary audio file (if created)
45
+ if isinstance(audio_input, tuple) and os.path.exists(audio_file_path):
46
+ os.remove(audio_file_path)
47
+
48
+ return language, transcription, text_file_path
49
 
50
  # Gradio interface function
51
+ def audio_transcriptor(audio_file, audio_record):
52
  if audio_file:
53
+ language, transcription, text_file = process_audio(audio_file)
54
  elif audio_record:
55
+ language, transcription, text_file = process_audio(audio_record)
56
  else:
57
+ return "Please upload an audio file or record audio.", "", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ return language, transcription, text_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # Gradio interface
62
  with gr.Blocks() as demo:
63
+ gr.Markdown("# Audio Transcriptor")
64
+ gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language.")
65
 
66
  with gr.Row():
67
  audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
68
  audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
69
 
70
  with gr.Row():
71
+ language_output = gr.Textbox(label="Detected Language")
72
+ transcription_output = gr.Textbox(label="Transcription")
73
+ text_file_output = gr.File(label="Download Transcription as Text File")
74
 
75
  with gr.Row():
76
+ submit = gr.Button("Transcribe")
77
  clear = gr.Button("Clear")
78
 
 
 
 
79
  submit.click(
80
+ fn=audio_transcriptor,
81
  inputs=[audio_file, audio_record],
82
+ outputs=[language_output, transcription_output, text_file_output]
83
  )
84
  clear.click(
85
  fn=lambda: (None, None),