Spaces:

sripathiavinashkumar
/

audio_resample

Sleeping

App Files Files Community

sripathiavinashkumar commited on Feb 10

Commit

e581869

verified ·

1 Parent(s): fc6f60c

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -83

app.py CHANGED Viewed

@@ -4,121 +4,178 @@ import soundfile as sf
 import torchaudio
 import torch
 import numpy as np
 import tempfile
-import os
-# ---------- Utility Functions ----------
-def process_with_librosa(audio, target_sr):
-    y, sr = audio
-    y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
-    return (target_sr, y_resampled)
-def process_with_soundfile(audio, target_sr):
-    y, sr = audio
-    y = np.asarray(y)
-    # soundfile itself doesn't resample, so we use librosa for resampling
-    y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
-    return (target_sr, y_resampled)
-def process_with_torchaudio(audio, target_sr):
     y, sr = audio
-    waveform = torch.tensor(y).unsqueeze(0)
-    resampler = torchaudio.transforms.Resample(
-        orig_freq=sr, new_freq=target_sr
-    )
-    waveform_resampled = resampler(waveform)
-    return (target_sr, waveform_resampled.squeeze(0).numpy())
-# ---------- Gradio UI ----------
-with gr.Blocks(title="Audio Resampling Playground") as demo:
     gr.Markdown(
         """
-        # 🎧 Audio Resampling Playground
-        Upload an audio file and see how **librosa**, **soundfile**, and **torchaudio**
-        handle different **sample rates**.
         """
     )
-    audio_input = gr.Audio(type="numpy", label="Upload Audio")
     with gr.Tabs():
-        # ===== TAB 1: 16kHz =====
-        with gr.Tab("16 kHz"):
-            gr.Markdown("### 🔊 Target Sample Rate: **16,000 Hz**")
-            with gr.Row():
-                out_librosa_16k = gr.Audio(label="Librosa Output")
-                out_sf_16k = gr.Audio(label="SoundFile Output")
-                out_ta_16k = gr.Audio(label="Torchaudio Output")
-            btn_16k = gr.Button("Process @ 16kHz")
-            btn_16k.click(
-                fn=lambda x: (
-                    process_with_librosa(x, 16000),
-                    process_with_soundfile(x, 16000),
-                    process_with_torchaudio(x, 16000),
-                ),
-                inputs=audio_input,
-                outputs=[out_librosa_16k, out_sf_16k, out_ta_16k],
             )
-        # ===== TAB 2: 22.05kHz =====
-        with gr.Tab("22.05 kHz"):
-            gr.Markdown("### 🔊 Target Sample Rate: **22,050 Hz**")
             with gr.Row():
-                out_librosa_22k = gr.Audio(label="Librosa Output")
-                out_sf_22k = gr.Audio(label="SoundFile Output")
-                out_ta_22k = gr.Audio(label="Torchaudio Output")
-            btn_22k = gr.Button("Process @ 22.05kHz")
-            btn_22k.click(
-                fn=lambda x: (
-                    process_with_librosa(x, 22050),
-                    process_with_soundfile(x, 22050),
-                    process_with_torchaudio(x, 22050),
-                ),
-                inputs=audio_input,
-                outputs=[out_librosa_22k, out_sf_22k, out_ta_22k],
             )
-        # ===== TAB 3: 44.1kHz =====
-        with gr.Tab("44.1 kHz"):
-            gr.Markdown("### 🔊 Target Sample Rate: **44,100 Hz**")
             with gr.Row():
-                out_librosa_44k = gr.Audio(label="Librosa Output")
-                out_sf_44k = gr.Audio(label="SoundFile Output")
-                out_ta_44k = gr.Audio(label="Torchaudio Output")
-            btn_44k = gr.Button("Process @ 44.1kHz")
-            btn_44k.click(
-                fn=lambda x: (
-                    process_with_librosa(x, 44100),
-                    process_with_soundfile(x, 44100),
-                    process_with_torchaudio(x, 44100),
-                ),
-                inputs=audio_input,
-                outputs=[out_librosa_44k, out_sf_44k, out_ta_44k],
             )
     gr.Markdown(
         """
         ---
-        ### 🧠 Notes
-        - **librosa**: great for research & analysis
-        - **soundfile**: focused on I/O (resampling added here for comparison)
-        - **torchaudio**: fast & PyTorch-native
         """
     )

 import torchaudio
 import torch
 import numpy as np
+import zipfile
 import tempfile
+from pathlib import Path
+# =========================================================
+# Core Resampling Logic
+# =========================================================
+def resample_audio(y, sr, target_sr, backend):
+    if backend == "librosa":
+        y_out = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
+    elif backend == "soundfile":
+        # soundfile is I/O only, librosa used for resampling
+        y_out = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
+    elif backend == "torchaudio":
+        waveform = torch.tensor(y).unsqueeze(0)
+        resampler = torchaudio.transforms.Resample(
+            orig_freq=sr,
+            new_freq=target_sr
+        )
+        y_out = resampler(waveform).squeeze(0).numpy()
+    return y_out, target_sr
+# =========================================================
+# Single File Processing
+# =========================================================
+def single_file_process(audio, target_sr, backend):
     y, sr = audio
+    y_out, sr_out = resample_audio(y, sr, target_sr, backend)
+    return (sr_out, y_out)
+# =========================================================
+# Batch ZIP Processing
+# =========================================================
+def batch_process_zip(zip_file, target_sr, backend):
+    output_zip_path = tempfile.NamedTemporaryFile(
+        delete=False, suffix=".zip"
+    ).name
+    with zipfile.ZipFile(zip_file, "r") as zin, \
+         zipfile.ZipFile(output_zip_path, "w") as zout:
+        for file in zin.namelist():
+            if not file.lower().endswith((".wav", ".mp3", ".flac", ".ogg")):
+                continue
+            # Read file from ZIP
+            with zin.open(file) as f:
+                with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
+                    tmp.write(f.read())
+                    tmp.flush()
+                    # Load audio
+                    y, sr = librosa.load(tmp.name, sr=None, mono=True)
+                    # Resample
+                    y_out, sr_out = resample_audio(y, sr, target_sr, backend)
+                    # Save output
+                    out_name = f"{Path(file).stem}_{backend}_{target_sr}.wav"
+                    with tempfile.NamedTemporaryFile(suffix=".wav") as out_tmp:
+                        sf.write(out_tmp.name, y_out, sr_out)
+                        zout.write(out_tmp.name, out_name)
+    return output_zip_path
+# =========================================================
+# Gradio UI
+# =========================================================
+with gr.Blocks(title="Audio Resampling Studio") as demo:
     gr.Markdown(
         """
+        # 🎚 Audio Resampling Studio
+        Resample audio using **librosa**, **soundfile**, or **torchaudio**.
+        **Features**
+        - 🎧 Single-file processing
+        - 📦 Batch ZIP processing
+        - 🎯 Sample rates: 16k, 22.05k, 44.1k, 48k
         """
     )
     with gr.Tabs():
+        # =================================================
+        # Single File Tab
+        # =================================================
+        with gr.Tab("🎧 Single File"):
+            gr.Markdown("### Process a single audio file")
+            audio_input = gr.Audio(
+                type="numpy",
+                label="Upload Audio"
             )
             with gr.Row():
+                backend = gr.Radio(
+                    ["librosa", "soundfile", "torchaudio"],
+                    value="librosa",
+                    label="Backend"
+                )
+                target_sr = gr.Dropdown(
+                    [16000, 22050, 44100, 48000],
+                    value=16000,
+                    label="Target Sample Rate (Hz)"
+                )
+            process_btn = gr.Button("Resample Audio")
+            audio_output = gr.Audio(label="Resampled Output")
+            process_btn.click(
+                fn=single_file_process,
+                inputs=[audio_input, target_sr, backend],
+                outputs=audio_output
             )
+        # =================================================
+        # Batch ZIP Tab
+        # =================================================
+        with gr.Tab("📦 Batch ZIP"):
+            gr.Markdown(
+                """
+                ### Batch ZIP Processing
+                Upload a ZIP file containing audio files.
+                You will receive a ZIP of **resampled WAV files**.
+                """
+            )
+            zip_input = gr.File(
+                label="Upload ZIP",
+                file_types=[".zip"]
+            )
             with gr.Row():
+                backend_zip = gr.Radio(
+                    ["librosa", "soundfile", "torchaudio"],
+                    value="librosa",
+                    label="Backend"
+                )
+                target_sr_zip = gr.Dropdown(
+                    [16000, 22050, 44100, 48000],
+                    value=16000,
+                    label="Target Sample Rate (Hz)"
+                )
+            zip_btn = gr.Button("Process ZIP")
+            zip_output = gr.File(label="Download Resampled ZIP")
+            zip_btn.click(
+                fn=batch_process_zip,
+                inputs=[zip_input, target_sr_zip, backend_zip],
+                outputs=zip_output
             )
     gr.Markdown(
         """
         ---
+        **Output format:** WAV
+        **CPU-safe:** Yes (HF Spaces compatible)
+        **Filename format:**
+        `originalname_backend_samplerate.wav`
         """
     )