Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,10 @@ import gradio as gr
|
|
| 2 |
import whisper
|
| 3 |
import torch
|
| 4 |
import os
|
|
|
|
| 5 |
from pydub import AudioSegment, silence
|
| 6 |
from faster_whisper import WhisperModel # Import faster-whisper
|
|
|
|
| 7 |
from spleeter.separator import Separator # Import Spleeter for music separation
|
| 8 |
|
| 9 |
# Mapping of model names to Whisper model sizes
|
|
@@ -186,6 +188,45 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
| 186 |
|
| 187 |
return output_path
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
def remove_background_music(audio_file):
|
| 190 |
"""
|
| 191 |
Remove background music from the audio file using Spleeter.
|
|
@@ -199,16 +240,13 @@ def remove_background_music(audio_file):
|
|
| 199 |
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
| 200 |
separator = Separator('spleeter:2stems')
|
| 201 |
|
| 202 |
-
# Separate
|
| 203 |
-
|
| 204 |
-
separator.separate_to_file(audio_file, output_folder)
|
| 205 |
|
| 206 |
# Load the separated vocals
|
| 207 |
-
|
| 208 |
-
vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
|
| 209 |
|
| 210 |
-
|
| 211 |
-
return vocals_path
|
| 212 |
|
| 213 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
| 214 |
"""Transcribe the audio file."""
|
|
@@ -301,11 +339,22 @@ with gr.Blocks() as demo:
|
|
| 301 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
| 302 |
silence_button = gr.Button("Remove Silence")
|
| 303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
with gr.Tab("Remove Background Music"):
|
| 305 |
gr.Markdown("Upload an audio file to remove background music.")
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
|
| 310 |
# Link buttons to functions
|
| 311 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
@@ -319,10 +368,15 @@ with gr.Blocks() as demo:
|
|
| 319 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
| 320 |
outputs=silence_output
|
| 321 |
)
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
remove_background_music,
|
| 324 |
-
inputs=
|
| 325 |
-
outputs=
|
| 326 |
)
|
| 327 |
|
| 328 |
# Launch the Gradio interface
|
|
|
|
| 2 |
import whisper
|
| 3 |
import torch
|
| 4 |
import os
|
| 5 |
+
import numpy as np
|
| 6 |
from pydub import AudioSegment, silence
|
| 7 |
from faster_whisper import WhisperModel # Import faster-whisper
|
| 8 |
+
import noisereduce as nr # Import noisereduce for background noise removal
|
| 9 |
from spleeter.separator import Separator # Import Spleeter for music separation
|
| 10 |
|
| 11 |
# Mapping of model names to Whisper model sizes
|
|
|
|
| 188 |
|
| 189 |
return output_path
|
| 190 |
|
| 191 |
+
def remove_background_noise(audio_file, noise_reduce_level=0.5):
|
| 192 |
+
"""
|
| 193 |
+
Remove background noise from the audio file using AI-based noise reduction.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
audio_file (str): Path to the input audio file.
|
| 197 |
+
noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
str: Path to the output audio file with background noise removed.
|
| 201 |
+
"""
|
| 202 |
+
# Load the audio file
|
| 203 |
+
audio = AudioSegment.from_file(audio_file)
|
| 204 |
+
|
| 205 |
+
# Convert audio to numpy array for noisereduce
|
| 206 |
+
samples = np.array(audio.get_array_of_samples())
|
| 207 |
+
sample_rate = audio.frame_rate
|
| 208 |
+
|
| 209 |
+
# Perform noise reduction
|
| 210 |
+
reduced_noise = nr.reduce_noise(
|
| 211 |
+
y=samples,
|
| 212 |
+
sr=sample_rate,
|
| 213 |
+
prop_decrease=noise_reduce_level
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
# Convert back to AudioSegment
|
| 217 |
+
reduced_audio = AudioSegment(
|
| 218 |
+
reduced_noise.tobytes(),
|
| 219 |
+
frame_rate=sample_rate,
|
| 220 |
+
sample_width=audio.sample_width,
|
| 221 |
+
channels=audio.channels
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
# Export the processed audio
|
| 225 |
+
output_path = "noise_reduced_audio.wav"
|
| 226 |
+
reduced_audio.export(output_path, format="wav")
|
| 227 |
+
|
| 228 |
+
return output_path
|
| 229 |
+
|
| 230 |
def remove_background_music(audio_file):
|
| 231 |
"""
|
| 232 |
Remove background music from the audio file using Spleeter.
|
|
|
|
| 240 |
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
| 241 |
separator = Separator('spleeter:2stems')
|
| 242 |
|
| 243 |
+
# Separate vocals from background music
|
| 244 |
+
separator.separate_to_file(audio_file, "output")
|
|
|
|
| 245 |
|
| 246 |
# Load the separated vocals
|
| 247 |
+
output_path = os.path.join("output", os.path.basename(audio_file).replace(".wav", ""), "vocals.wav")
|
|
|
|
| 248 |
|
| 249 |
+
return output_path
|
|
|
|
| 250 |
|
| 251 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
| 252 |
"""Transcribe the audio file."""
|
|
|
|
| 339 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
| 340 |
silence_button = gr.Button("Remove Silence")
|
| 341 |
|
| 342 |
+
with gr.Tab("Remove Background Noise"):
|
| 343 |
+
gr.Markdown("Upload an audio file to remove background noise.")
|
| 344 |
+
noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
| 345 |
+
noise_reduce_slider = gr.Slider(
|
| 346 |
+
minimum=0.0, maximum=1.0, value=0.5, step=0.1,
|
| 347 |
+
label="Noise Reduction Level",
|
| 348 |
+
info="Higher values remove more noise."
|
| 349 |
+
)
|
| 350 |
+
noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
|
| 351 |
+
noise_button = gr.Button("Remove Background Noise")
|
| 352 |
+
|
| 353 |
with gr.Tab("Remove Background Music"):
|
| 354 |
gr.Markdown("Upload an audio file to remove background music.")
|
| 355 |
+
music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
| 356 |
+
music_output = gr.Audio(label="Processed Audio (Music Removed)", type="filepath")
|
| 357 |
+
music_button = gr.Button("Remove Background Music")
|
| 358 |
|
| 359 |
# Link buttons to functions
|
| 360 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
|
|
| 368 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
| 369 |
outputs=silence_output
|
| 370 |
)
|
| 371 |
+
noise_button.click(
|
| 372 |
+
remove_background_noise,
|
| 373 |
+
inputs=[noise_audio_input, noise_reduce_slider],
|
| 374 |
+
outputs=noise_output
|
| 375 |
+
)
|
| 376 |
+
music_button.click(
|
| 377 |
remove_background_music,
|
| 378 |
+
inputs=music_audio_input,
|
| 379 |
+
outputs=music_output
|
| 380 |
)
|
| 381 |
|
| 382 |
# Launch the Gradio interface
|