sripathiavinashkumar commited on
Commit
e581869
Β·
verified Β·
1 Parent(s): fc6f60c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -83
app.py CHANGED
@@ -4,121 +4,178 @@ import soundfile as sf
4
  import torchaudio
5
  import torch
6
  import numpy as np
 
7
  import tempfile
8
- import os
9
 
10
- # ---------- Utility Functions ----------
 
 
11
 
12
- def process_with_librosa(audio, target_sr):
13
- y, sr = audio
14
- y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
15
- return (target_sr, y_resampled)
16
 
17
- def process_with_soundfile(audio, target_sr):
18
- y, sr = audio
19
- y = np.asarray(y)
 
 
 
 
 
 
 
 
20
 
21
- # soundfile itself doesn't resample, so we use librosa for resampling
22
- y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
23
 
24
- return (target_sr, y_resampled)
25
 
26
- def process_with_torchaudio(audio, target_sr):
 
 
 
 
27
  y, sr = audio
28
- waveform = torch.tensor(y).unsqueeze(0)
 
29
 
30
- resampler = torchaudio.transforms.Resample(
31
- orig_freq=sr, new_freq=target_sr
32
- )
33
- waveform_resampled = resampler(waveform)
34
 
35
- return (target_sr, waveform_resampled.squeeze(0).numpy())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # ---------- Gradio UI ----------
 
 
 
 
38
 
39
- with gr.Blocks(title="Audio Resampling Playground") as demo:
 
 
 
 
 
 
 
40
  gr.Markdown(
41
  """
42
- # 🎧 Audio Resampling Playground
43
- Upload an audio file and see how **librosa**, **soundfile**, and **torchaudio**
44
- handle different **sample rates**.
 
 
 
 
45
  """
46
  )
47
 
48
- audio_input = gr.Audio(type="numpy", label="Upload Audio")
49
-
50
  with gr.Tabs():
51
 
52
- # ===== TAB 1: 16kHz =====
53
- with gr.Tab("16 kHz"):
54
- gr.Markdown("### πŸ”Š Target Sample Rate: **16,000 Hz**")
 
 
55
 
56
- with gr.Row():
57
- out_librosa_16k = gr.Audio(label="Librosa Output")
58
- out_sf_16k = gr.Audio(label="SoundFile Output")
59
- out_ta_16k = gr.Audio(label="Torchaudio Output")
60
-
61
- btn_16k = gr.Button("Process @ 16kHz")
62
-
63
- btn_16k.click(
64
- fn=lambda x: (
65
- process_with_librosa(x, 16000),
66
- process_with_soundfile(x, 16000),
67
- process_with_torchaudio(x, 16000),
68
- ),
69
- inputs=audio_input,
70
- outputs=[out_librosa_16k, out_sf_16k, out_ta_16k],
71
  )
72
 
73
- # ===== TAB 2: 22.05kHz =====
74
- with gr.Tab("22.05 kHz"):
75
- gr.Markdown("### πŸ”Š Target Sample Rate: **22,050 Hz**")
76
-
77
  with gr.Row():
78
- out_librosa_22k = gr.Audio(label="Librosa Output")
79
- out_sf_22k = gr.Audio(label="SoundFile Output")
80
- out_ta_22k = gr.Audio(label="Torchaudio Output")
81
-
82
- btn_22k = gr.Button("Process @ 22.05kHz")
83
-
84
- btn_22k.click(
85
- fn=lambda x: (
86
- process_with_librosa(x, 22050),
87
- process_with_soundfile(x, 22050),
88
- process_with_torchaudio(x, 22050),
89
- ),
90
- inputs=audio_input,
91
- outputs=[out_librosa_22k, out_sf_22k, out_ta_22k],
 
 
 
 
 
92
  )
93
 
94
- # ===== TAB 3: 44.1kHz =====
95
- with gr.Tab("44.1 kHz"):
96
- gr.Markdown("### πŸ”Š Target Sample Rate: **44,100 Hz**")
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  with gr.Row():
99
- out_librosa_44k = gr.Audio(label="Librosa Output")
100
- out_sf_44k = gr.Audio(label="SoundFile Output")
101
- out_ta_44k = gr.Audio(label="Torchaudio Output")
102
-
103
- btn_44k = gr.Button("Process @ 44.1kHz")
104
-
105
- btn_44k.click(
106
- fn=lambda x: (
107
- process_with_librosa(x, 44100),
108
- process_with_soundfile(x, 44100),
109
- process_with_torchaudio(x, 44100),
110
- ),
111
- inputs=audio_input,
112
- outputs=[out_librosa_44k, out_sf_44k, out_ta_44k],
 
 
 
 
 
113
  )
114
 
115
  gr.Markdown(
116
  """
117
  ---
118
- ### 🧠 Notes
119
- - **librosa**: great for research & analysis
120
- - **soundfile**: focused on I/O (resampling added here for comparison)
121
- - **torchaudio**: fast & PyTorch-native
122
  """
123
  )
124
 
 
4
  import torchaudio
5
  import torch
6
  import numpy as np
7
+ import zipfile
8
  import tempfile
9
+ from pathlib import Path
10
 
11
+ # =========================================================
12
+ # Core Resampling Logic
13
+ # =========================================================
14
 
15
+ def resample_audio(y, sr, target_sr, backend):
16
+ if backend == "librosa":
17
+ y_out = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
 
18
 
19
+ elif backend == "soundfile":
20
+ # soundfile is I/O only, librosa used for resampling
21
+ y_out = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
22
+
23
+ elif backend == "torchaudio":
24
+ waveform = torch.tensor(y).unsqueeze(0)
25
+ resampler = torchaudio.transforms.Resample(
26
+ orig_freq=sr,
27
+ new_freq=target_sr
28
+ )
29
+ y_out = resampler(waveform).squeeze(0).numpy()
30
 
31
+ return y_out, target_sr
 
32
 
 
33
 
34
+ # =========================================================
35
+ # Single File Processing
36
+ # =========================================================
37
+
38
+ def single_file_process(audio, target_sr, backend):
39
  y, sr = audio
40
+ y_out, sr_out = resample_audio(y, sr, target_sr, backend)
41
+ return (sr_out, y_out)
42
 
 
 
 
 
43
 
44
+ # =========================================================
45
+ # Batch ZIP Processing
46
+ # =========================================================
47
+
48
+ def batch_process_zip(zip_file, target_sr, backend):
49
+ output_zip_path = tempfile.NamedTemporaryFile(
50
+ delete=False, suffix=".zip"
51
+ ).name
52
+
53
+ with zipfile.ZipFile(zip_file, "r") as zin, \
54
+ zipfile.ZipFile(output_zip_path, "w") as zout:
55
+
56
+ for file in zin.namelist():
57
+ if not file.lower().endswith((".wav", ".mp3", ".flac", ".ogg")):
58
+ continue
59
+
60
+ # Read file from ZIP
61
+ with zin.open(file) as f:
62
+ with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
63
+ tmp.write(f.read())
64
+ tmp.flush()
65
+
66
+ # Load audio
67
+ y, sr = librosa.load(tmp.name, sr=None, mono=True)
68
+
69
+ # Resample
70
+ y_out, sr_out = resample_audio(y, sr, target_sr, backend)
71
 
72
+ # Save output
73
+ out_name = f"{Path(file).stem}_{backend}_{target_sr}.wav"
74
+ with tempfile.NamedTemporaryFile(suffix=".wav") as out_tmp:
75
+ sf.write(out_tmp.name, y_out, sr_out)
76
+ zout.write(out_tmp.name, out_name)
77
 
78
+ return output_zip_path
79
+
80
+
81
+ # =========================================================
82
+ # Gradio UI
83
+ # =========================================================
84
+
85
+ with gr.Blocks(title="Audio Resampling Studio") as demo:
86
  gr.Markdown(
87
  """
88
+ # 🎚 Audio Resampling Studio
89
+ Resample audio using **librosa**, **soundfile**, or **torchaudio**.
90
+
91
+ **Features**
92
+ - 🎧 Single-file processing
93
+ - πŸ“¦ Batch ZIP processing
94
+ - 🎯 Sample rates: 16k, 22.05k, 44.1k, 48k
95
  """
96
  )
97
 
 
 
98
  with gr.Tabs():
99
 
100
+ # =================================================
101
+ # Single File Tab
102
+ # =================================================
103
+ with gr.Tab("🎧 Single File"):
104
+ gr.Markdown("### Process a single audio file")
105
 
106
+ audio_input = gr.Audio(
107
+ type="numpy",
108
+ label="Upload Audio"
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
110
 
 
 
 
 
111
  with gr.Row():
112
+ backend = gr.Radio(
113
+ ["librosa", "soundfile", "torchaudio"],
114
+ value="librosa",
115
+ label="Backend"
116
+ )
117
+
118
+ target_sr = gr.Dropdown(
119
+ [16000, 22050, 44100, 48000],
120
+ value=16000,
121
+ label="Target Sample Rate (Hz)"
122
+ )
123
+
124
+ process_btn = gr.Button("Resample Audio")
125
+ audio_output = gr.Audio(label="Resampled Output")
126
+
127
+ process_btn.click(
128
+ fn=single_file_process,
129
+ inputs=[audio_input, target_sr, backend],
130
+ outputs=audio_output
131
  )
132
 
133
+ # =================================================
134
+ # Batch ZIP Tab
135
+ # =================================================
136
+ with gr.Tab("πŸ“¦ Batch ZIP"):
137
+ gr.Markdown(
138
+ """
139
+ ### Batch ZIP Processing
140
+ Upload a ZIP file containing audio files.
141
+ You will receive a ZIP of **resampled WAV files**.
142
+ """
143
+ )
144
+
145
+ zip_input = gr.File(
146
+ label="Upload ZIP",
147
+ file_types=[".zip"]
148
+ )
149
 
150
  with gr.Row():
151
+ backend_zip = gr.Radio(
152
+ ["librosa", "soundfile", "torchaudio"],
153
+ value="librosa",
154
+ label="Backend"
155
+ )
156
+
157
+ target_sr_zip = gr.Dropdown(
158
+ [16000, 22050, 44100, 48000],
159
+ value=16000,
160
+ label="Target Sample Rate (Hz)"
161
+ )
162
+
163
+ zip_btn = gr.Button("Process ZIP")
164
+ zip_output = gr.File(label="Download Resampled ZIP")
165
+
166
+ zip_btn.click(
167
+ fn=batch_process_zip,
168
+ inputs=[zip_input, target_sr_zip, backend_zip],
169
+ outputs=zip_output
170
  )
171
 
172
  gr.Markdown(
173
  """
174
  ---
175
+ **Output format:** WAV
176
+ **CPU-safe:** Yes (HF Spaces compatible)
177
+ **Filename format:**
178
+ `originalname_backend_samplerate.wav`
179
  """
180
  )
181