Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -85,7 +85,7 @@ def apply_bass_boost(audio, gain=10):
|
|
| 85 |
def apply_treble_boost(audio, gain=10):
|
| 86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
| 87 |
|
| 88 |
-
def apply_noise_gate(audio, threshold=-50.0
|
| 89 |
samples = np.array(audio.get_array_of_samples())
|
| 90 |
rms = np.sqrt(np.mean(samples**2))
|
| 91 |
if rms < 1:
|
|
@@ -99,8 +99,8 @@ def apply_limiter(audio, limit_dB=-1):
|
|
| 99 |
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
| 100 |
return limiter.apply_gain(limit_dB)
|
| 101 |
|
| 102 |
-
def apply_phaser(audio
|
| 103 |
-
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate *
|
| 104 |
|
| 105 |
def apply_bitcrush(audio, bit_depth=8):
|
| 106 |
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
|
@@ -127,6 +127,23 @@ def apply_stage_mode(audio):
|
|
| 127 |
processed = apply_bass_boost(processed, gain=6)
|
| 128 |
return apply_limiter(processed, limit_dB=-2)
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
# === Vocal Isolation Helpers ===
|
| 131 |
def load_track_local(path, sample_rate, channels=2):
|
| 132 |
sig, rate = torchaudio.load(path)
|
|
@@ -390,10 +407,49 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
| 390 |
mixed.export(out_path, format="wav")
|
| 391 |
return out_path
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
| 394 |
def clone_voice(*args):
|
| 395 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
# === Speaker Diarization ("Who Spoke When?") ===
|
| 398 |
try:
|
| 399 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
|
@@ -417,7 +473,6 @@ def diarize_and_transcribe(audio_path):
|
|
| 417 |
audio.export(temp_wav, format="wav")
|
| 418 |
|
| 419 |
try:
|
| 420 |
-
from pyannote.audio import Pipeline as DiarizationPipeline
|
| 421 |
diarization = diarize_pipeline(temp_wav)
|
| 422 |
|
| 423 |
result = whisper.transcribe(temp_wav)
|
|
@@ -525,6 +580,20 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 525 |
clear_btn=None
|
| 526 |
)
|
| 527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
# --- Transcribe & Edit Tab ===
|
| 529 |
with gr.Tab("π Transcribe & Edit"):
|
| 530 |
gr.Interface(
|
|
@@ -535,40 +604,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 535 |
description="Convert voice to text and edit it before exporting again."
|
| 536 |
)
|
| 537 |
|
| 538 |
-
# --- Vocal Presets for Singers ===
|
| 539 |
-
with gr.Tab("π€ Vocal Presets for Singers"):
|
| 540 |
-
gr.Interface(
|
| 541 |
-
fn=process_audio,
|
| 542 |
-
inputs=[
|
| 543 |
-
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
| 544 |
-
gr.CheckboxGroup(choices=[
|
| 545 |
-
"Noise Reduction",
|
| 546 |
-
"Normalize",
|
| 547 |
-
"Compress Dynamic Range",
|
| 548 |
-
"Bass Boost",
|
| 549 |
-
"Treble Boost",
|
| 550 |
-
"Reverb",
|
| 551 |
-
"Auto Gain",
|
| 552 |
-
"Vocal Distortion",
|
| 553 |
-
"Harmony",
|
| 554 |
-
"Stage Mode"
|
| 555 |
-
]),
|
| 556 |
-
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 557 |
-
gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0] if preset_names else None),
|
| 558 |
-
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 559 |
-
],
|
| 560 |
-
outputs=[
|
| 561 |
-
gr.Audio(label="Processed Vocal", type="filepath"),
|
| 562 |
-
gr.Image(label="Waveform Preview"),
|
| 563 |
-
gr.Textbox(label="Session Log (JSON)", lines=5),
|
| 564 |
-
gr.Textbox(label="Detected Genre", lines=1),
|
| 565 |
-
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
| 566 |
-
],
|
| 567 |
-
title="Create Studio-Quality Vocal Tracks",
|
| 568 |
-
description="Apply singer-friendly presets and effects to enhance vocals.",
|
| 569 |
-
allow_flagging="never"
|
| 570 |
-
)
|
| 571 |
-
|
| 572 |
# --- Voice Cloning (Local Only) ===
|
| 573 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
| 574 |
gr.Interface(
|
|
@@ -679,7 +714,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 679 |
)
|
| 680 |
|
| 681 |
# --- Mix Two Tracks ===
|
| 682 |
-
with gr.Tab("
|
| 683 |
gr.Interface(
|
| 684 |
fn=mix_tracks,
|
| 685 |
inputs=[
|
|
|
|
| 85 |
def apply_treble_boost(audio, gain=10):
|
| 86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
| 87 |
|
| 88 |
+
def apply_noise_gate(audio, threshold=-50.0):
|
| 89 |
samples = np.array(audio.get_array_of_samples())
|
| 90 |
rms = np.sqrt(np.mean(samples**2))
|
| 91 |
if rms < 1:
|
|
|
|
| 99 |
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
| 100 |
return limiter.apply_gain(limit_dB)
|
| 101 |
|
| 102 |
+
def apply_phaser(audio):
|
| 103 |
+
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 1.1)})
|
| 104 |
|
| 105 |
def apply_bitcrush(audio, bit_depth=8):
|
| 106 |
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
|
|
|
| 127 |
processed = apply_bass_boost(processed, gain=6)
|
| 128 |
return apply_limiter(processed, limit_dB=-2)
|
| 129 |
|
| 130 |
+
# === Genre Mastering Presets ===
|
| 131 |
+
genre_presets = {
|
| 132 |
+
"Soul": ["Warmth", "Bass Boost (+6dB)", "Mid Enhance"],
|
| 133 |
+
"Funk": ["Treble Boost (+6dB)", "Compression", "Stereo Widening"],
|
| 134 |
+
"Rock": ["Distortion", "Punchy Mids", "Reverb"],
|
| 135 |
+
"Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
|
| 136 |
+
"Acoustic": ["Natural Reverb", "Gentle Compression", "Mid Focus"],
|
| 137 |
+
"Dance": ["Loudness Maximizer", "Bass Emphasis", "Stereo Widen"],
|
| 138 |
+
"EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
|
| 139 |
+
"Country": ["Clean Mix", "Subtle Reverb", "Mid Focus"],
|
| 140 |
+
"Disco": ["Rhythmic Echo", "Bass Thump", "Treble Boost (+8dB)"],
|
| 141 |
+
"Metal": ["Distortion", "High Gain", "Crisp Highs"],
|
| 142 |
+
"Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"],
|
| 143 |
+
"Trap": ["808 Bass", "Reverb", "Lo-Fi Texture"],
|
| 144 |
+
"LoFi": ["Bitcrusher", "Tape Hiss", "Soft Compression"]
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
# === Vocal Isolation Helpers ===
|
| 148 |
def load_track_local(path, sample_rate, channels=2):
|
| 149 |
sig, rate = torchaudio.load(path)
|
|
|
|
| 407 |
mixed.export(out_path, format="wav")
|
| 408 |
return out_path
|
| 409 |
|
| 410 |
+
# === Genre Mastering Tab ===
|
| 411 |
+
def apply_genre_preset(audio, genre):
|
| 412 |
+
global preset_choices
|
| 413 |
+
selected_preset = preset_choices.get(genre, [])
|
| 414 |
+
return process_audio(audio, selected_preset, False, genre, "WAV")
|
| 415 |
+
|
| 416 |
+
with gr.Tab("π§ Genre Mastering"):
|
| 417 |
+
gr.Markdown("Apply pre-tuned mastering settings for different music genres.")
|
| 418 |
+
|
| 419 |
+
genre_dropdown = gr.Dropdown(
|
| 420 |
+
choices=list(genre_presets.keys()),
|
| 421 |
+
label="Select Genre",
|
| 422 |
+
value="Pop"
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
gr.Interface(
|
| 426 |
+
fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
|
| 427 |
+
inputs=[
|
| 428 |
+
gr.Audio(label="Upload Track", type="filepath"),
|
| 429 |
+
genre_dropdown
|
| 430 |
+
],
|
| 431 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
| 432 |
+
title="Genre-Specific Mastering",
|
| 433 |
+
description="Apply professionally tuned presets for popular music genres."
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
| 437 |
def clone_voice(*args):
|
| 438 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
| 439 |
|
| 440 |
+
with gr.Tab("π Voice Cloning (Local Only)"):
|
| 441 |
+
gr.Interface(
|
| 442 |
+
fn=clone_voice,
|
| 443 |
+
inputs=[
|
| 444 |
+
gr.File(label="Source Voice Clip"),
|
| 445 |
+
gr.File(label="Target Voice Clip"),
|
| 446 |
+
gr.Textbox(label="Text to Clone", lines=5)
|
| 447 |
+
],
|
| 448 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 449 |
+
title="Replace One Voice With Another",
|
| 450 |
+
description="Clone voice from source to target speaker using AI"
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
# === Speaker Diarization ("Who Spoke When?") ===
|
| 454 |
try:
|
| 455 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
|
|
|
| 473 |
audio.export(temp_wav, format="wav")
|
| 474 |
|
| 475 |
try:
|
|
|
|
| 476 |
diarization = diarize_pipeline(temp_wav)
|
| 477 |
|
| 478 |
result = whisper.transcribe(temp_wav)
|
|
|
|
| 580 |
clear_btn=None
|
| 581 |
)
|
| 582 |
|
| 583 |
+
# --- Genre Mastering Tab ===
|
| 584 |
+
with gr.Tab("π§ Genre Mastering"):
|
| 585 |
+
gr.Interface(
|
| 586 |
+
fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
|
| 587 |
+
inputs=[
|
| 588 |
+
gr.Audio(label="Upload Track", type="filepath"),
|
| 589 |
+
gr.Dropdown(choices=list(genre_presets.keys()), label="Select Genre", value="Pop")
|
| 590 |
+
],
|
| 591 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
| 592 |
+
title="Genre-Specific Mastering",
|
| 593 |
+
description="Apply professionally tuned presets for popular music genres.",
|
| 594 |
+
allow_flagging="never"
|
| 595 |
+
)
|
| 596 |
+
|
| 597 |
# --- Transcribe & Edit Tab ===
|
| 598 |
with gr.Tab("π Transcribe & Edit"):
|
| 599 |
gr.Interface(
|
|
|
|
| 604 |
description="Convert voice to text and edit it before exporting again."
|
| 605 |
)
|
| 606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
# --- Voice Cloning (Local Only) ===
|
| 608 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
| 609 |
gr.Interface(
|
|
|
|
| 714 |
)
|
| 715 |
|
| 716 |
# --- Mix Two Tracks ===
|
| 717 |
+
with gr.Tab(" remix mode"),
|
| 718 |
gr.Interface(
|
| 719 |
fn=mix_tracks,
|
| 720 |
inputs=[
|