Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
| 3 |
-
from pydub.silence import detect_nonsilent
|
| 4 |
import numpy as np
|
| 5 |
import tempfile
|
| 6 |
import os
|
|
@@ -85,6 +84,38 @@ def apply_bass_boost(audio, gain=10):
|
|
| 85 |
def apply_treble_boost(audio, gain=10):
|
| 86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
# === Vocal Isolation Helpers ===
|
| 89 |
def load_track_local(path, sample_rate, channels=2):
|
| 90 |
sig, rate = torchaudio.load(path)
|
|
@@ -152,7 +183,12 @@ if not preset_choices:
|
|
| 152 |
preset_choices = {
|
| 153 |
"Default": [],
|
| 154 |
"Clean Podcast": ["Noise Reduction", "Normalize"],
|
| 155 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
}
|
| 157 |
|
| 158 |
preset_names = list(preset_choices.keys())
|
|
@@ -210,6 +246,13 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
| 210 |
"Bass Boost": apply_bass_boost,
|
| 211 |
"Treble Boost": apply_treble_boost,
|
| 212 |
"Normalize": apply_normalize,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
}
|
| 214 |
|
| 215 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
@@ -312,7 +355,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
|
| 312 |
if not nonsilent_ranges:
|
| 313 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
| 314 |
|
| 315 |
-
trimmed = audio[nonsilent_ranges[0][0]:
|
| 316 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
| 317 |
trimmed.export(out_path, format="wav")
|
| 318 |
return out_path
|
|
@@ -326,7 +369,7 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
| 326 |
mixed.export(out_path, format="wav")
|
| 327 |
return out_path
|
| 328 |
|
| 329 |
-
# === Dummy Voice Cloning Tab – Works on
|
| 330 |
def clone_voice(*args):
|
| 331 |
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
| 332 |
|
|
@@ -382,7 +425,14 @@ effect_options = [
|
|
| 382 |
"Stereo Widening",
|
| 383 |
"Bass Boost",
|
| 384 |
"Treble Boost",
|
| 385 |
-
"Normalize"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
]
|
| 387 |
|
| 388 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
@@ -421,7 +471,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 421 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
| 422 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
| 423 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 424 |
-
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]
|
| 425 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 426 |
],
|
| 427 |
outputs=[
|
|
@@ -462,7 +512,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 462 |
description="Convert voice to text and edit it before exporting again."
|
| 463 |
)
|
| 464 |
|
| 465 |
-
# --- Voice Cloning (
|
| 466 |
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
| 467 |
gr.Interface(
|
| 468 |
fn=clone_voice,
|
|
@@ -472,7 +522,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 472 |
gr.Textbox(label="Text to Clone", lines=5)
|
| 473 |
],
|
| 474 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 475 |
-
title="Replace One Voice With Another
|
| 476 |
description="Clone voice from source to target speaker using AI"
|
| 477 |
)
|
| 478 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import tempfile
|
| 5 |
import os
|
|
|
|
| 84 |
def apply_treble_boost(audio, gain=10):
|
| 85 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
| 86 |
|
| 87 |
+
def apply_noise_gate(audio, threshold=-50.0, attack=50, release=100):
|
| 88 |
+
samples = np.array(audio.get_array_of_samples())
|
| 89 |
+
rms = np.sqrt(np.mean(samples**2))
|
| 90 |
+
if rms < 1:
|
| 91 |
+
return audio
|
| 92 |
+
normalized = samples / np.max(np.abs(samples))
|
| 93 |
+
envelope = np.abs(normalized)
|
| 94 |
+
gated = np.where(envelope > threshold / 100, normalized, 0)
|
| 95 |
+
return array_to_audiosegment(gated * np.iinfo(np.int16).max, audio.frame_rate, channels=audio.channels)
|
| 96 |
+
|
| 97 |
+
def apply_limiter(audio, limit_dB=-1):
|
| 98 |
+
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
| 99 |
+
return limiter.apply_gain(limit_dB)
|
| 100 |
+
|
| 101 |
+
def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
|
| 102 |
+
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
| 103 |
+
|
| 104 |
+
def apply_bitcrush(audio, bit_depth=8):
|
| 105 |
+
samples = np.array(audio.get_array_of_samples())
|
| 106 |
+
max_val = np.iinfo(np.int16).max
|
| 107 |
+
crushed = (samples // (max_val // (2 ** bit_depth))).astype(np.int16)
|
| 108 |
+
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
| 109 |
+
|
| 110 |
+
def apply_auto_gain(audio, target_dB=-20):
|
| 111 |
+
change = target_dB - audio.dBFS
|
| 112 |
+
return audio.apply_gain(change)
|
| 113 |
+
|
| 114 |
+
def apply_vocal_distortion(audio, intensity=0.3):
|
| 115 |
+
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
| 116 |
+
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
| 117 |
+
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
| 118 |
+
|
| 119 |
# === Vocal Isolation Helpers ===
|
| 120 |
def load_track_local(path, sample_rate, channels=2):
|
| 121 |
sig, rate = torchaudio.load(path)
|
|
|
|
| 183 |
preset_choices = {
|
| 184 |
"Default": [],
|
| 185 |
"Clean Podcast": ["Noise Reduction", "Normalize"],
|
| 186 |
+
"Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
|
| 187 |
+
"Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
|
| 188 |
+
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
| 189 |
+
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
| 190 |
+
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
| 191 |
+
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
|
| 192 |
}
|
| 193 |
|
| 194 |
preset_names = list(preset_choices.keys())
|
|
|
|
| 246 |
"Bass Boost": apply_bass_boost,
|
| 247 |
"Treble Boost": apply_treble_boost,
|
| 248 |
"Normalize": apply_normalize,
|
| 249 |
+
"Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
|
| 250 |
+
"Limiter": lambda x: apply_limiter(x, limit_dB=-1),
|
| 251 |
+
"Phaser": lambda x: apply_phaser(x),
|
| 252 |
+
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
| 253 |
+
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
| 254 |
+
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
| 255 |
+
"Vocal Distortion": lambda x: apply_vocal_distortion(x)
|
| 256 |
}
|
| 257 |
|
| 258 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
|
|
| 355 |
if not nonsilent_ranges:
|
| 356 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
| 357 |
|
| 358 |
+
trimmed = audio[nonsilent_ranges[0][0]:nonsilent_tracks[-1][1]]
|
| 359 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
| 360 |
trimmed.export(out_path, format="wav")
|
| 361 |
return out_path
|
|
|
|
| 369 |
mixed.export(out_path, format="wav")
|
| 370 |
return out_path
|
| 371 |
|
| 372 |
+
# === Dummy Voice Cloning Tab – Works on Local Only ===
|
| 373 |
def clone_voice(*args):
|
| 374 |
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
| 375 |
|
|
|
|
| 425 |
"Stereo Widening",
|
| 426 |
"Bass Boost",
|
| 427 |
"Treble Boost",
|
| 428 |
+
"Normalize",
|
| 429 |
+
"Noise Gate",
|
| 430 |
+
"Limiter",
|
| 431 |
+
"Phaser",
|
| 432 |
+
"Flanger",
|
| 433 |
+
"Bitcrusher",
|
| 434 |
+
"Auto Gain",
|
| 435 |
+
"Vocal Distortion"
|
| 436 |
]
|
| 437 |
|
| 438 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
|
|
| 471 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
| 472 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
| 473 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 474 |
+
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
|
| 475 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 476 |
],
|
| 477 |
outputs=[
|
|
|
|
| 512 |
description="Convert voice to text and edit it before exporting again."
|
| 513 |
)
|
| 514 |
|
| 515 |
+
# --- Voice Cloning (Local Only) ===
|
| 516 |
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
| 517 |
gr.Interface(
|
| 518 |
fn=clone_voice,
|
|
|
|
| 522 |
gr.Textbox(label="Text to Clone", lines=5)
|
| 523 |
],
|
| 524 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 525 |
+
title="Replace One Voice With Another",
|
| 526 |
description="Clone voice from source to target speaker using AI"
|
| 527 |
)
|
| 528 |
|