Spaces:

maliahson
/

Audio_Splitter

Sleeping

App Files Files Community

maliahson commited on Dec 1, 2024

Commit

60634e1

verified ·

1 Parent(s): 65a9a1e

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -47

app.py CHANGED Viewed

@@ -1,72 +1,85 @@
 import gradio as gr
 import numpy as np
-import os
-import zipfile
 from pydub import AudioSegment
 import io
-import tempfile
-# Function to split audio and save as separate files in a zip folder
-def split_audio_and_zip(audio, chunk_length_s):
-    # Temporary directory to store the split audio files
-    temp_dir = tempfile.mkdtemp()
-    # Convert the numpy audio array into an AudioSegment object
     audio_segment = AudioSegment(
-        audio.tobytes(),
-        frame_rate=16000,  # Assuming the sample rate is 16kHz, you can adjust this
-        sample_width=audio.dtype.itemsize,
         channels=1
     )
-    # Get the length of the audio in milliseconds
-    audio_length_ms = len(audio_segment)
-    # Create a list to store paths of split audio files
-    split_audio_paths = []
-    # Split the audio into chunks based on the specified length
-    chunk_length_ms = chunk_length_s * 1000  # Convert chunk length from seconds to milliseconds
-    num_chunks = int(np.ceil(audio_length_ms / chunk_length_ms))
-    for i in range(num_chunks):
-        start_ms = i * chunk_length_ms
-        end_ms = min((i + 1) * chunk_length_ms, audio_length_ms)
-        chunk = audio_segment[start_ms:end_ms]
-        # Save each chunk as an MP3 file in the temporary directory
-        chunk_filename = os.path.join(temp_dir, f"chunk_{i + 1}.mp3")
-        chunk.export(chunk_filename, format="mp3")
-        split_audio_paths.append(chunk_filename)
-    # Create a zip file to download
-    zip_filename = tempfile.mktemp(suffix=".zip")
-    with zipfile.ZipFile(zip_filename, 'w') as zipf:
-        for chunk_path in split_audio_paths:
-            zipf.write(chunk_path, os.path.basename(chunk_path))
-    # Return the zip file
-    return zip_filename
-# Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            # Audio file upload interface (no 'source' argument)
-            audio_in = gr.Audio(type="numpy", label="Upload Your Audio File")
-            chunk_length = gr.Slider(minimum=1, maximum=30, value=5, step=1, label="Chunk length (seconds)")
-            run_button = gr.Button("Split and Zip Audio")
         with gr.Column():
-            # Download option for the zip file containing split audio files
-            output_zip = gr.File(label="Download Split Audio Files (Zip)")
-    # Run the split audio function and output the zip file
-    run_button.click(
-        fn=split_audio_and_zip,
-        inputs=[audio_in, chunk_length],
-        outputs=output_zip
-    )
 demo.launch()

 import gradio as gr
+import math
+import time
 import numpy as np
 from pydub import AudioSegment
 import io
+def numpy_to_mp3(audio_array, sampling_rate):
+    # Normalize audio_array if it's floating-point
+    if np.issubdtype(audio_array.dtype, np.floating):
+        max_val = np.max(np.abs(audio_array))
+        audio_array = (audio_array / max_val) * 32767  # Normalize to 16-bit range
+        audio_array = audio_array.astype(np.int16)
+    # Create an audio segment from the numpy array
     audio_segment = AudioSegment(
+        audio_array.tobytes(),
+        frame_rate=sampling_rate,
+        sample_width=audio_array.dtype.itemsize,
         channels=1
     )
+    # Export the audio segment to MP3 bytes - use a high bitrate to maximise quality
+    mp3_io = io.BytesIO()
+    audio_segment.export(mp3_io, format="mp3", bitrate="320k")
+    # Get the MP3 bytes
+    mp3_bytes = mp3_io.getvalue()
+    mp3_io.close()
+    return mp3_bytes
+def stream(audio, chunk_length_s):
+    start_time = time.time()
+    sampling_rate, array = audio
+    chunk_length = int(chunk_length_s * sampling_rate)
+    time_length = chunk_length_s / 2  # always stream outputs faster than it takes to process
+    audio_length = len(array)
+    num_batches = math.ceil(audio_length / chunk_length)
+    # Initialize a list to store timestamps for a 30-second timespan
+    timestamps = []
+    for idx in range(num_batches):
+        time.sleep(time_length)
+        start_pos = idx * chunk_length
+        end_pos = min((idx + 1) * chunk_length, audio_length)
+        chunk_start_time = start_pos / sampling_rate
+        chunk_end_time = end_pos / sampling_rate
+        # Save timestamps for a 30-second window
+        if chunk_start_time < 30:
+            timestamps.append((chunk_start_time, chunk_end_time))
+        chunk = array[start_pos: end_pos]
+        chunk_mp3 = numpy_to_mp3(chunk, sampling_rate=sampling_rate)
+        if idx == 0:
+            first_time = round(time.time() - start_time, 2)
+        run_time = round(time.time() - start_time, 2)
+        yield chunk_mp3, first_time, run_time
+    # Print the timestamps after streaming
+    print("Timestamps for the first 30 seconds:")
+    for start, end in timestamps:
+        print(f"Start: {start:.2f}s, End: {end:.2f}s")
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            audio_in = gr.Audio(value="librispeech.wav", sources=["upload"], type="numpy")
+            chunk_length = gr.Slider(minimum=2, maximum=10, value=2, step=2, label="Chunk length (s)")
+            run_button = gr.Button("Stream audio")
         with gr.Column():
+            audio_out = gr.Audio(streaming=True, autoplay=True, format="mp3", label="mp3")
+            first_time = gr.Textbox(label="Time to first chunk (s)")
+            run_time = gr.Textbox(label="Time to current chunk (s)")
+    run_button.click(fn=stream, inputs=[audio_in, chunk_length], outputs=[audio_out, first_time, run_time])
 demo.launch()