Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import tempfile
|
| 5 |
import os
|
|
@@ -102,9 +103,9 @@ def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
|
|
| 102 |
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
| 103 |
|
| 104 |
def apply_bitcrush(audio, bit_depth=8):
|
| 105 |
-
samples = np.array(audio.get_array_of_samples())
|
| 106 |
max_val = np.iinfo(np.int16).max
|
| 107 |
-
crushed = (samples
|
| 108 |
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
| 109 |
|
| 110 |
def apply_auto_gain(audio, target_dB=-20):
|
|
@@ -116,6 +117,16 @@ def apply_vocal_distortion(audio, intensity=0.3):
|
|
| 116 |
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
| 117 |
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
# === Vocal Isolation Helpers ===
|
| 120 |
def load_track_local(path, sample_rate, channels=2):
|
| 121 |
sig, rate = torchaudio.load(path)
|
|
@@ -188,7 +199,15 @@ if not preset_choices:
|
|
| 188 |
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
| 189 |
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
| 190 |
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
| 191 |
-
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
}
|
| 193 |
|
| 194 |
preset_names = list(preset_choices.keys())
|
|
@@ -252,7 +271,9 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
| 252 |
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
| 253 |
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
| 254 |
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
| 255 |
-
"Vocal Distortion": lambda x: apply_vocal_distortion(x)
|
|
|
|
|
|
|
| 256 |
}
|
| 257 |
|
| 258 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
@@ -317,7 +338,7 @@ def transcribe_audio(audio_path):
|
|
| 317 |
text = " ".join([seg.text for seg in segments])
|
| 318 |
return text
|
| 319 |
|
| 320 |
-
# === TTS
|
| 321 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
| 322 |
|
| 323 |
def generate_tts(text):
|
|
@@ -355,7 +376,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
|
| 355 |
if not nonsilent_ranges:
|
| 356 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
| 357 |
|
| 358 |
-
trimmed = audio[nonsilent_ranges[0][0]:
|
| 359 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
| 360 |
trimmed.export(out_path, format="wav")
|
| 361 |
return out_path
|
|
@@ -369,7 +390,7 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
| 369 |
mixed.export(out_path, format="wav")
|
| 370 |
return out_path
|
| 371 |
|
| 372 |
-
# === Dummy Voice Cloning Tab β Works
|
| 373 |
def clone_voice(*args):
|
| 374 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
| 375 |
|
|
@@ -432,7 +453,9 @@ effect_options = [
|
|
| 432 |
"Flanger",
|
| 433 |
"Bitcrusher",
|
| 434 |
"Auto Gain",
|
| 435 |
-
"Vocal Distortion"
|
|
|
|
|
|
|
| 436 |
]
|
| 437 |
|
| 438 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
@@ -512,6 +535,47 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 512 |
description="Convert voice to text and edit it before exporting again."
|
| 513 |
)
|
| 514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
# --- Voice Cloning (Local Only) ===
|
| 516 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
| 517 |
gr.Interface(
|
|
@@ -563,7 +627,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 563 |
return None, None, None, None
|
| 564 |
|
| 565 |
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
| 566 |
-
gr.Markdown("Save your current state and resume
|
| 567 |
|
| 568 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
| 569 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
|
@@ -622,7 +686,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 622 |
)
|
| 623 |
|
| 624 |
# --- Mix Two Tracks ===
|
| 625 |
-
with gr.Tab("
|
| 626 |
gr.Interface(
|
| 627 |
fn=mix_tracks,
|
| 628 |
inputs=[
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
| 3 |
+
from pydub.silence import detect_nonsilent
|
| 4 |
import numpy as np
|
| 5 |
import tempfile
|
| 6 |
import os
|
|
|
|
| 103 |
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
| 104 |
|
| 105 |
def apply_bitcrush(audio, bit_depth=8):
|
| 106 |
+
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
| 107 |
max_val = np.iinfo(np.int16).max
|
| 108 |
+
crushed = ((samples / max_val) * (2 ** bit_depth)).astype(np.int16)
|
| 109 |
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
| 110 |
|
| 111 |
def apply_auto_gain(audio, target_dB=-20):
|
|
|
|
| 117 |
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
| 118 |
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
| 119 |
|
| 120 |
+
def apply_harmony(audio, shift_semitones=4):
|
| 121 |
+
shifted_up = apply_pitch_shift(audio, shift_semitones)
|
| 122 |
+
shifted_down = apply_pitch_shift(audio, -shift_semitones)
|
| 123 |
+
return audio.overlay(shifted_up).overlay(shifted_down)
|
| 124 |
+
|
| 125 |
+
def apply_stage_mode(audio):
|
| 126 |
+
processed = apply_reverb(audio)
|
| 127 |
+
processed = apply_bass_boost(processed, gain=6)
|
| 128 |
+
return apply_limiter(processed, limit_dB=-2)
|
| 129 |
+
|
| 130 |
# === Vocal Isolation Helpers ===
|
| 131 |
def load_track_local(path, sample_rate, channels=2):
|
| 132 |
sig, rate = torchaudio.load(path)
|
|
|
|
| 199 |
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
| 200 |
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
| 201 |
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
| 202 |
+
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
|
| 203 |
+
|
| 204 |
+
# π€ Vocalist Presets
|
| 205 |
+
"π Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
|
| 206 |
+
"π§ͺ Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
|
| 207 |
+
"πΆ Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
|
| 208 |
+
"π« ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
|
| 209 |
+
"πΌ Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
|
| 210 |
+
"π΅ Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"]
|
| 211 |
}
|
| 212 |
|
| 213 |
preset_names = list(preset_choices.keys())
|
|
|
|
| 271 |
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
| 272 |
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
| 273 |
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
| 274 |
+
"Vocal Distortion": lambda x: apply_vocal_distortion(x),
|
| 275 |
+
"Harmony": lambda x: apply_harmony(x),
|
| 276 |
+
"Stage Mode": apply_stage_mode
|
| 277 |
}
|
| 278 |
|
| 279 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
|
|
| 338 |
text = " ".join([seg.text for seg in segments])
|
| 339 |
return text
|
| 340 |
|
| 341 |
+
# === TTS Voice Generator ===
|
| 342 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
| 343 |
|
| 344 |
def generate_tts(text):
|
|
|
|
| 376 |
if not nonsilent_ranges:
|
| 377 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
| 378 |
|
| 379 |
+
trimmed = audio[nonsilent_ranges[0][0]:nonsilent_ranges[-1][1]]
|
| 380 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
| 381 |
trimmed.export(out_path, format="wav")
|
| 382 |
return out_path
|
|
|
|
| 390 |
mixed.export(out_path, format="wav")
|
| 391 |
return out_path
|
| 392 |
|
| 393 |
+
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
| 394 |
def clone_voice(*args):
|
| 395 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
| 396 |
|
|
|
|
| 453 |
"Flanger",
|
| 454 |
"Bitcrusher",
|
| 455 |
"Auto Gain",
|
| 456 |
+
"Vocal Distortion",
|
| 457 |
+
"Harmony",
|
| 458 |
+
"Stage Mode"
|
| 459 |
]
|
| 460 |
|
| 461 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
|
|
| 535 |
description="Convert voice to text and edit it before exporting again."
|
| 536 |
)
|
| 537 |
|
| 538 |
+
# --- Vocal Presets for Singers ===
|
| 539 |
+
with gr.Tab("π€ Vocal Presets for Singers"):
|
| 540 |
+
gr.Interface(
|
| 541 |
+
fn=process_audio,
|
| 542 |
+
inputs=[
|
| 543 |
+
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
| 544 |
+
gr.CheckboxGroup(choices=[
|
| 545 |
+
"Noise Reduction",
|
| 546 |
+
"Normalize",
|
| 547 |
+
"Compress Dynamic Range",
|
| 548 |
+
"Bass Boost",
|
| 549 |
+
"Treble Boost",
|
| 550 |
+
"Reverb",
|
| 551 |
+
"Auto Gain",
|
| 552 |
+
"Vocal Distortion",
|
| 553 |
+
"Harmony",
|
| 554 |
+
"Stage Mode"
|
| 555 |
+
]),
|
| 556 |
+
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 557 |
+
gr.Dropdown(choices=[
|
| 558 |
+
"π Clean Vocal",
|
| 559 |
+
"π§ͺ Vocal Distortion",
|
| 560 |
+
"πΆ Singer's Harmony",
|
| 561 |
+
"π« ASMR Vocal",
|
| 562 |
+
"πΌ Stage Mode",
|
| 563 |
+
"π΅ Auto-Tune Style"
|
| 564 |
+
], label="Select Vocal Preset", value="Default"),
|
| 565 |
+
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 566 |
+
],
|
| 567 |
+
outputs=[
|
| 568 |
+
gr.Audio(label="Processed Vocal", type="filepath"),
|
| 569 |
+
gr.Image(label="Waveform Preview"),
|
| 570 |
+
gr.Textbox(label="Session Log (JSON)", lines=5),
|
| 571 |
+
gr.Textbox(label="Detected Genre", lines=1),
|
| 572 |
+
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
| 573 |
+
],
|
| 574 |
+
title="Create Studio-Quality Vocal Tracks",
|
| 575 |
+
description="Apply singer-friendly presets and effects to enhance vocals.",
|
| 576 |
+
allow_flagging="never"
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
# --- Voice Cloning (Local Only) ===
|
| 580 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
| 581 |
gr.Interface(
|
|
|
|
| 627 |
return None, None, None, None
|
| 628 |
|
| 629 |
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
| 630 |
+
gr.Markdown("Save your current state and resume later.")
|
| 631 |
|
| 632 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
| 633 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
|
|
|
| 686 |
)
|
| 687 |
|
| 688 |
# --- Mix Two Tracks ===
|
| 689 |
+
with gr.Tab(" remix mode"),
|
| 690 |
gr.Interface(
|
| 691 |
fn=mix_tracks,
|
| 692 |
inputs=[
|