Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12, 2025

Commit

651e9be

verified ·

1 Parent(s): 6085d7e

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -8

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 from pydub import AudioSegment
-from pydub.silence import detect_nonsilent
 import numpy as np
 import tempfile
 import os
@@ -85,6 +84,38 @@ def apply_bass_boost(audio, gain=10):
 def apply_treble_boost(audio, gain=10):
     return audio.high_pass_filter(4000).apply_gain(gain)
 # === Vocal Isolation Helpers ===
 def load_track_local(path, sample_rate, channels=2):
     sig, rate = torchaudio.load(path)
@@ -152,7 +183,12 @@ if not preset_choices:
     preset_choices = {
         "Default": [],
         "Clean Podcast": ["Noise Reduction", "Normalize"],
-        "Music Remix": ["Bass Boost", "Stereo Widening"]
     }
 preset_names = list(preset_choices.keys())
@@ -210,6 +246,13 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
             "Bass Boost": apply_bass_boost,
             "Treble Boost": apply_treble_boost,
             "Normalize": apply_normalize,
         }
         effects_to_apply = preset_choices.get(preset_name, selected_effects)
@@ -312,7 +355,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
     if not nonsilent_ranges:
         return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
-    trimmed = audio[nonsilent_ranges[0][0]:nonsilent_ranges[-1][1]]
     out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
     trimmed.export(out_path, format="wav")
     return out_path
@@ -326,7 +369,7 @@ def mix_tracks(track1, track2, volume_offset=0):
     mixed.export(out_path, format="wav")
     return out_path
-# === Dummy Voice Cloning Tab – Works on Hugging Face ===
 def clone_voice(*args):
     return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
@@ -382,7 +425,14 @@ effect_options = [
     "Stereo Widening",
     "Bass Boost",
     "Treble Boost",
-    "Normalize"
 ]
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
@@ -421,7 +471,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
                 gr.File(label="Upload Multiple Files", file_count="multiple"),
                 gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
-                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
                 gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
             ],
             outputs=[
@@ -462,7 +512,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Convert voice to text and edit it before exporting again."
         )
-    # --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
     with gr.Tab("🎭 Voice Cloning (Local Only)"):
         gr.Interface(
             fn=clone_voice,
@@ -472,7 +522,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
                 gr.Textbox(label="Text to Clone", lines=5)
             ],
             outputs=gr.Audio(label="Cloned Output", type="filepath"),
-            title="Replace One Voice With Another (Local Only)",
             description="Clone voice from source to target speaker using AI"
         )

 import gradio as gr
 from pydub import AudioSegment
 import numpy as np
 import tempfile
 import os
 def apply_treble_boost(audio, gain=10):
     return audio.high_pass_filter(4000).apply_gain(gain)
+def apply_noise_gate(audio, threshold=-50.0, attack=50, release=100):
+    samples = np.array(audio.get_array_of_samples())
+    rms = np.sqrt(np.mean(samples**2))
+    if rms < 1:
+        return audio
+    normalized = samples / np.max(np.abs(samples))
+    envelope = np.abs(normalized)
+    gated = np.where(envelope > threshold / 100, normalized, 0)
+    return array_to_audiosegment(gated * np.iinfo(np.int16).max, audio.frame_rate, channels=audio.channels)
+def apply_limiter(audio, limit_dB=-1):
+    limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
+    return limiter.apply_gain(limit_dB)
+def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
+    return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
+def apply_bitcrush(audio, bit_depth=8):
+    samples = np.array(audio.get_array_of_samples())
+    max_val = np.iinfo(np.int16).max
+    crushed = (samples // (max_val // (2 ** bit_depth))).astype(np.int16)
+    return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
+def apply_auto_gain(audio, target_dB=-20):
+    change = target_dB - audio.dBFS
+    return audio.apply_gain(change)
+def apply_vocal_distortion(audio, intensity=0.3):
+    samples = np.array(audio.get_array_of_samples()).astype(np.float32)
+    distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
+    return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
 # === Vocal Isolation Helpers ===
 def load_track_local(path, sample_rate, channels=2):
     sig, rate = torchaudio.load(path)
     preset_choices = {
         "Default": [],
         "Clean Podcast": ["Noise Reduction", "Normalize"],
+        "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
+        "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
+        "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
+        "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
+        "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
+        "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
     }
 preset_names = list(preset_choices.keys())
             "Bass Boost": apply_bass_boost,
             "Treble Boost": apply_treble_boost,
             "Normalize": apply_normalize,
+            "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
+            "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
+            "Phaser": lambda x: apply_phaser(x),
+            "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
+            "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
+            "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
+            "Vocal Distortion": lambda x: apply_vocal_distortion(x)
         }
         effects_to_apply = preset_choices.get(preset_name, selected_effects)
     if not nonsilent_ranges:
         return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
+    trimmed = audio[nonsilent_ranges[0][0]:nonsilent_tracks[-1][1]]
     out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
     trimmed.export(out_path, format="wav")
     return out_path
     mixed.export(out_path, format="wav")
     return out_path
+# === Dummy Voice Cloning Tab – Works on Local Only ===
 def clone_voice(*args):
     return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
     "Stereo Widening",
     "Bass Boost",
     "Treble Boost",
+    "Normalize",
+    "Noise Gate",
+    "Limiter",
+    "Phaser",
+    "Flanger",
+    "Bitcrusher",
+    "Auto Gain",
+    "Vocal Distortion"
 ]
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
                 gr.File(label="Upload Multiple Files", file_count="multiple"),
                 gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
+                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
                 gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
             ],
             outputs=[
             description="Convert voice to text and edit it before exporting again."
         )
+    # --- Voice Cloning (Local Only) ===
     with gr.Tab("🎭 Voice Cloning (Local Only)"):
         gr.Interface(
             fn=clone_voice,
                 gr.Textbox(label="Text to Clone", lines=5)
             ],
             outputs=gr.Audio(label="Cloned Output", type="filepath"),
+            title="Replace One Voice With Another",
             description="Clone voice from source to target speaker using AI"
         )