maliahson commited on
Commit
c2dbc7b
·
verified ·
1 Parent(s): 3f2580a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import math
3
+ import time
4
+ import numpy as np
5
+ from pydub import AudioSegment
6
+ import io
7
+ import zipfile
8
+ import os
9
+
10
+
11
+ def numpy_to_mp3(audio_array, sampling_rate):
12
+ # Normalize audio_array if it's floating-point
13
+ if np.issubdtype(audio_array.dtype, np.floating):
14
+ max_val = np.max(np.abs(audio_array))
15
+ audio_array = (audio_array / max_val) * 32767 # Normalize to 16-bit range
16
+ audio_array = audio_array.astype(np.int16)
17
+
18
+ # Create an audio segment from the numpy array
19
+ audio_segment = AudioSegment(
20
+ audio_array.tobytes(),
21
+ frame_rate=sampling_rate,
22
+ sample_width=audio_array.dtype.itemsize,
23
+ channels=1
24
+ )
25
+
26
+ # Export the audio segment to MP3 bytes - use a high bitrate to maximize quality
27
+ mp3_io = io.BytesIO()
28
+ audio_segment.export(mp3_io, format="mp3", bitrate="320k")
29
+
30
+ # Get the MP3 bytes
31
+ mp3_bytes = mp3_io.getvalue()
32
+ mp3_io.close()
33
+
34
+ return mp3_bytes
35
+
36
+ def stream(audio, chunk_length_s):
37
+ start_time = time.time()
38
+ sampling_rate, array = audio
39
+
40
+ # Ensure the chunk length does not exceed 30 seconds
41
+ chunk_length_s = min(chunk_length_s, 30) # Limit chunk length to 30 seconds
42
+
43
+ chunk_length = int(chunk_length_s * sampling_rate)
44
+ time_length = chunk_length_s / 2 # always stream outputs faster than it takes to process
45
+ audio_length = len(array)
46
+ num_batches = math.ceil(audio_length / chunk_length)
47
+
48
+ # Create a temporary directory to save the MP3 chunks
49
+ temp_dir = "temp_chunks"
50
+ os.makedirs(temp_dir, exist_ok=True)
51
+
52
+ # List to keep track of all MP3 filenames
53
+ mp3_files = []
54
+
55
+ for idx in range(num_batches):
56
+ time.sleep(time_length)
57
+ start_pos = idx * chunk_length
58
+ end_pos = min((idx + 1) * chunk_length, audio_length)
59
+ chunk = array[start_pos : end_pos]
60
+ chunk_mp3 = numpy_to_mp3(chunk, sampling_rate=sampling_rate)
61
+
62
+ # Save the MP3 file to the temp directory
63
+ mp3_filename = f"{temp_dir}/chunk_{idx + 1}.mp3"
64
+ with open(mp3_filename, "wb") as f:
65
+ f.write(chunk_mp3)
66
+
67
+ mp3_files.append(mp3_filename)
68
+
69
+ if idx == 0:
70
+ first_time = round(time.time() - start_time, 2)
71
+ run_time = round(time.time() - start_time, 2)
72
+
73
+ # Create a zip file containing all the MP3 chunks
74
+ zip_filename = "audio_chunks.zip"
75
+ with zipfile.ZipFile(zip_filename, "w") as zipf:
76
+ for mp3_file in mp3_files:
77
+ zipf.write(mp3_file, os.path.basename(mp3_file))
78
+
79
+ # Clean up the temporary directory
80
+ for mp3_file in mp3_files:
81
+ os.remove(mp3_file)
82
+ os.rmdir(temp_dir)
83
+
84
+ return zip_filename, first_time, run_time
85
+
86
+ with gr.Blocks() as demo:
87
+ with gr.Row():
88
+ with gr.Column():
89
+ audio_in = gr.Audio(value="librispeech.wav", sources=["upload"], type="numpy")
90
+ chunk_length = gr.Slider(minimum=2, maximum=30, value=2, step=2, label="Chunk length (s)")
91
+ run_button = gr.Button("Stream audio")
92
+ with gr.Column():
93
+ zip_out = gr.File(label="Download Zip of Chunks")
94
+ first_time = gr.Textbox(label="Time to first chunk (s)")
95
+ run_time = gr.Textbox(label="Time to current chunk (s)")
96
+
97
+ run_button.click(fn=stream, inputs=[audio_in, chunk_length], outputs=[zip_out, first_time, run_time])
98
+
99
+ demo.launch()