Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import torch
|
|
| 9 |
from demucs import pretrained
|
| 10 |
from demucs.apply import apply_model
|
| 11 |
import torchaudio
|
|
|
|
| 12 |
import matplotlib.pyplot as plt
|
| 13 |
from io import BytesIO
|
| 14 |
from PIL import Image
|
|
@@ -18,8 +19,8 @@ import librosa
|
|
| 18 |
import warnings
|
| 19 |
from faster_whisper import WhisperModel
|
| 20 |
from TTS.api import TTS
|
| 21 |
-
import pickle
|
| 22 |
import base64
|
|
|
|
| 23 |
|
| 24 |
# Suppress warnings
|
| 25 |
warnings.filterwarnings("ignore")
|
|
@@ -156,39 +157,47 @@ def auto_eq(audio, genre="Pop"):
|
|
| 156 |
|
| 157 |
return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
|
| 158 |
|
| 159 |
-
# ===
|
| 160 |
-
def
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
return out_path
|
| 175 |
|
| 176 |
-
# ===
|
| 177 |
-
def
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
elif saturation_type == "Console":
|
| 185 |
-
saturated = np.clip(samples, -32768, 32768) * intensity
|
| 186 |
-
elif saturation_type == "Mix Bus":
|
| 187 |
-
saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
|
| 188 |
-
else:
|
| 189 |
-
saturated = samples
|
| 190 |
|
| 191 |
-
return
|
| 192 |
|
| 193 |
# === Process Audio Function ===
|
| 194 |
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
|
|
@@ -246,7 +255,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
| 246 |
status = f"β Error: {str(e)}"
|
| 247 |
return None, None, status, "", status
|
| 248 |
|
| 249 |
-
# === Waveform
|
| 250 |
def show_waveform(audio_file):
|
| 251 |
try:
|
| 252 |
audio = AudioSegment.from_file(audio_file)
|
|
@@ -301,25 +310,60 @@ preset_choices = {
|
|
| 301 |
|
| 302 |
preset_names = list(preset_choices.keys())
|
| 303 |
|
| 304 |
-
# ===
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
# === Main UI ===
|
| 321 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
| 322 |
-
gr.HTML(
|
| 323 |
gr.Markdown("### Upload, edit, export β powered by AI!")
|
| 324 |
|
| 325 |
with gr.Tab("π΅ Single File Studio"):
|
|
@@ -344,6 +388,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 344 |
output_audio, waveform_img, session_log_out, genre_out, status_box
|
| 345 |
])
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
# --- AI Mastering Chain Tab ===
|
| 348 |
with gr.Tab("π§ AI Mastering Chain"):
|
| 349 |
gr.Interface(
|
|
@@ -376,9 +437,19 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 376 |
# --- Preset Cards Gallery ===
|
| 377 |
with gr.Tab("π Preset Gallery"):
|
| 378 |
gr.Markdown("### Select a preset visually")
|
| 379 |
-
preset_gallery = gr.Gallery(value=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
| 381 |
-
preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices
|
| 382 |
|
| 383 |
def load_preset_by_card(evt: gr.SelectData):
|
| 384 |
index = evt.index % len(preset_names)
|
|
@@ -397,23 +468,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 397 |
description="Enhance vocals with doubling or harmony"
|
| 398 |
)
|
| 399 |
|
| 400 |
-
# --- Remix Mode ---
|
| 401 |
-
with gr.Tab("π Remix Mode"):
|
| 402 |
-
gr.Interface(
|
| 403 |
-
fn=stem_split,
|
| 404 |
-
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
| 405 |
-
outputs=[
|
| 406 |
-
gr.File(label="Vocals"),
|
| 407 |
-
gr.File(label="Drums"),
|
| 408 |
-
gr.File(label="Bass"),
|
| 409 |
-
gr.File(label="Other")
|
| 410 |
-
],
|
| 411 |
-
title="Split Into Drums, Bass, Vocals, and More",
|
| 412 |
-
description="Use AI to separate musical elements like vocals, drums, and bass.",
|
| 413 |
-
flagging_mode="never",
|
| 414 |
-
clear_btn=None
|
| 415 |
-
)
|
| 416 |
-
|
| 417 |
# --- Batch Processing ---
|
| 418 |
with gr.Tab("π Batch Processing"):
|
| 419 |
gr.Interface(
|
|
@@ -450,23 +504,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 450 |
)
|
| 451 |
|
| 452 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
| 453 |
-
def visualize_spectrum(audio_path):
|
| 454 |
-
y, sr = torchaudio.load(audio_path)
|
| 455 |
-
y_np = y.numpy().flatten()
|
| 456 |
-
stft = librosa.stft(y_np)
|
| 457 |
-
db = librosa.amplitude_to_db(abs(stft))
|
| 458 |
-
|
| 459 |
-
plt.figure(figsize=(10, 4))
|
| 460 |
-
img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
|
| 461 |
-
plt.colorbar(img, format="%+2.0f dB")
|
| 462 |
-
plt.title("Frequency Spectrum")
|
| 463 |
-
plt.tight_layout()
|
| 464 |
-
buf = BytesIO()
|
| 465 |
-
plt.savefig(buf, format="png")
|
| 466 |
-
plt.close()
|
| 467 |
-
buf.seek(0)
|
| 468 |
-
return Image.open(buf)
|
| 469 |
-
|
| 470 |
with gr.Tab("π Frequency Spectrum"):
|
| 471 |
gr.Interface(
|
| 472 |
fn=visualize_spectrum,
|
|
@@ -490,6 +527,22 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 490 |
)
|
| 491 |
|
| 492 |
# --- Save/Load Mix Session (.aiproj) ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
with gr.Tab("π Save/Load Project"):
|
| 494 |
gr.Interface(
|
| 495 |
fn=save_project,
|
|
|
|
| 9 |
from demucs import pretrained
|
| 10 |
from demucs.apply import apply_model
|
| 11 |
import torchaudio
|
| 12 |
+
from pathlib import Path
|
| 13 |
import matplotlib.pyplot as plt
|
| 14 |
from io import BytesIO
|
| 15 |
from PIL import Image
|
|
|
|
| 19 |
import warnings
|
| 20 |
from faster_whisper import WhisperModel
|
| 21 |
from TTS.api import TTS
|
|
|
|
| 22 |
import base64
|
| 23 |
+
import pickle
|
| 24 |
|
| 25 |
# Suppress warnings
|
| 26 |
warnings.filterwarnings("ignore")
|
|
|
|
| 157 |
|
| 158 |
return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
|
| 159 |
|
| 160 |
+
# === Vocal Isolation Helpers ===
|
| 161 |
+
def load_track_local(path, sample_rate, channels=2):
|
| 162 |
+
sig, rate = torchaudio.load(path)
|
| 163 |
+
if rate != sample_rate:
|
| 164 |
+
sig = torchaudio.functional.resample(sig, rate, sample_rate)
|
| 165 |
+
if channels == 1:
|
| 166 |
+
sig = sig.mean(0)
|
| 167 |
+
return sig
|
| 168 |
+
|
| 169 |
+
def save_track(path, wav, sample_rate):
|
| 170 |
+
path = Path(path)
|
| 171 |
+
torchaudio.save(str(path), wav, sample_rate)
|
| 172 |
+
|
| 173 |
+
def apply_vocal_isolation(audio_path):
|
| 174 |
+
model = pretrained.get_model(name='htdemucs')
|
| 175 |
+
wav = load_track_local(audio_path, model.samplerate, channels=2)
|
| 176 |
+
ref = wav.mean(0)
|
| 177 |
+
wav -= ref[:, None]
|
| 178 |
+
sources = apply_model(model, wav[None])[0]
|
| 179 |
+
wav += ref[:, None]
|
| 180 |
+
|
| 181 |
+
vocal_track = sources[3].cpu()
|
| 182 |
+
out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
|
| 183 |
+
save_track(out_path, vocal_track, model.samplerate)
|
| 184 |
return out_path
|
| 185 |
|
| 186 |
+
# === Stem Splitting (Drums, Bass, Other, Vocals) β Now Defined! ===
|
| 187 |
+
def stem_split(audio_path):
|
| 188 |
+
model = pretrained.get_model(name='htdemucs')
|
| 189 |
+
wav = load_track_local(audio_path, model.samplerate, channels=2)
|
| 190 |
+
sources = apply_model(model, wav[None])[0]
|
| 191 |
+
|
| 192 |
+
output_dir = tempfile.mkdtemp()
|
| 193 |
+
stem_paths = []
|
| 194 |
|
| 195 |
+
for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
|
| 196 |
+
path = os.path.join(output_dir, f"{name}.wav")
|
| 197 |
+
save_track(path, sources[i].cpu(), model.samplerate)
|
| 198 |
+
stem_paths.append(gr.File(value=path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
return stem_paths
|
| 201 |
|
| 202 |
# === Process Audio Function ===
|
| 203 |
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
|
|
|
|
| 255 |
status = f"β Error: {str(e)}"
|
| 256 |
return None, None, status, "", status
|
| 257 |
|
| 258 |
+
# === Visualize Waveform ===
|
| 259 |
def show_waveform(audio_file):
|
| 260 |
try:
|
| 261 |
audio = AudioSegment.from_file(audio_file)
|
|
|
|
| 310 |
|
| 311 |
preset_names = list(preset_choices.keys())
|
| 312 |
|
| 313 |
+
# === Batch Processing Function ===
|
| 314 |
+
def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
|
| 315 |
+
status = "π Loading files..."
|
| 316 |
+
try:
|
| 317 |
+
output_dir = tempfile.mkdtemp()
|
| 318 |
+
results = []
|
| 319 |
+
session_logs = []
|
| 320 |
+
|
| 321 |
+
for file in files:
|
| 322 |
+
processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
|
| 323 |
+
results.append(processed_path)
|
| 324 |
+
session_logs.append(log)
|
| 325 |
+
|
| 326 |
+
zip_path = os.path.join(output_dir, "batch_output.zip")
|
| 327 |
+
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
| 328 |
+
for i, res in enumerate(results):
|
| 329 |
+
filename = f"processed_{i}.{export_format.lower()}"
|
| 330 |
+
zipf.write(res, filename)
|
| 331 |
+
zipf.writestr(f"session_info_{i}.json", session_logs[i])
|
| 332 |
+
|
| 333 |
+
return zip_path, "π¦ ZIP created successfully!"
|
| 334 |
+
|
| 335 |
+
except Exception as e:
|
| 336 |
+
return None, f"β Batch processing failed: {str(e)}"
|
| 337 |
+
|
| 338 |
+
# === Vocal Pitch Correction β Auto-Tune Style ===
|
| 339 |
+
def auto_tune_vocal(audio_path, target_key="C"):
|
| 340 |
+
try:
|
| 341 |
+
# Placeholder for real-time pitch detection
|
| 342 |
+
return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
|
| 343 |
+
except Exception as e:
|
| 344 |
+
return None
|
| 345 |
+
|
| 346 |
+
# === Real-Time Spectrum Analyzer + Live EQ Preview ===
|
| 347 |
+
def visualize_spectrum(audio_path):
|
| 348 |
+
y, sr = torchaudio.load(audio_path)
|
| 349 |
+
y_np = y.numpy().flatten()
|
| 350 |
+
stft = librosa.stft(y_np)
|
| 351 |
+
db = librosa.amplitude_to_db(abs(stft))
|
| 352 |
+
|
| 353 |
+
plt.figure(figsize=(10, 4))
|
| 354 |
+
img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
|
| 355 |
+
plt.colorbar(img, format="%+2.0f dB")
|
| 356 |
+
plt.title("Frequency Spectrum")
|
| 357 |
+
plt.tight_layout()
|
| 358 |
+
buf = BytesIO()
|
| 359 |
+
plt.savefig(buf, format="png")
|
| 360 |
+
plt.close()
|
| 361 |
+
buf.seek(0)
|
| 362 |
+
return Image.open(buf)
|
| 363 |
|
| 364 |
# === Main UI ===
|
| 365 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
| 366 |
+
gr.HTML('<div class="studio-header"><img src="logo.png" width="400" /></div>')
|
| 367 |
gr.Markdown("### Upload, edit, export β powered by AI!")
|
| 368 |
|
| 369 |
with gr.Tab("π΅ Single File Studio"):
|
|
|
|
| 388 |
output_audio, waveform_img, session_log_out, genre_out, status_box
|
| 389 |
])
|
| 390 |
|
| 391 |
+
# --- Remix Mode ---
|
| 392 |
+
with gr.Tab("π Remix Mode"):
|
| 393 |
+
gr.Interface(
|
| 394 |
+
fn=stem_split,
|
| 395 |
+
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
| 396 |
+
outputs=[
|
| 397 |
+
gr.File(label="Vocals"),
|
| 398 |
+
gr.File(label="Drums"),
|
| 399 |
+
gr.File(label="Bass"),
|
| 400 |
+
gr.File(label="Other")
|
| 401 |
+
],
|
| 402 |
+
title="Split Into Drums, Bass, Vocals, and More",
|
| 403 |
+
description="Use AI to separate musical elements like vocals, drums, and bass.",
|
| 404 |
+
flagging_mode="never",
|
| 405 |
+
clear_btn=None
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
# --- AI Mastering Chain Tab ===
|
| 409 |
with gr.Tab("π§ AI Mastering Chain"):
|
| 410 |
gr.Interface(
|
|
|
|
| 437 |
# --- Preset Cards Gallery ===
|
| 438 |
with gr.Tab("π Preset Gallery"):
|
| 439 |
gr.Markdown("### Select a preset visually")
|
| 440 |
+
preset_gallery = gr.Gallery(value=[
|
| 441 |
+
("images/pop_card.png", "Pop"),
|
| 442 |
+
("images/edm_card.png", "EDM"),
|
| 443 |
+
("images/rock_card.png", "Rock"),
|
| 444 |
+
("images/hiphop_card.png", "Hip-Hop"),
|
| 445 |
+
("images/acoustic_card.png", "Acoustic"),
|
| 446 |
+
("images/stage_mode_card.png", "Stage Mode"),
|
| 447 |
+
("images/vocal_distortion_card.png", "Vocal Distortion"),
|
| 448 |
+
("images/tube_saturation_card.png", "Tube Saturation")
|
| 449 |
+
], label="Preset Cards", columns=4, height="auto")
|
| 450 |
+
|
| 451 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
| 452 |
+
preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Effects")
|
| 453 |
|
| 454 |
def load_preset_by_card(evt: gr.SelectData):
|
| 455 |
index = evt.index % len(preset_names)
|
|
|
|
| 468 |
description="Enhance vocals with doubling or harmony"
|
| 469 |
)
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
# --- Batch Processing ---
|
| 472 |
with gr.Tab("π Batch Processing"):
|
| 473 |
gr.Interface(
|
|
|
|
| 504 |
)
|
| 505 |
|
| 506 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
with gr.Tab("π Frequency Spectrum"):
|
| 508 |
gr.Interface(
|
| 509 |
fn=visualize_spectrum,
|
|
|
|
| 527 |
)
|
| 528 |
|
| 529 |
# --- Save/Load Mix Session (.aiproj) ===
|
| 530 |
+
def save_project(audio, preset, effects):
|
| 531 |
+
project_data = {
|
| 532 |
+
"audio": AudioSegment.from_file(audio).raw_data,
|
| 533 |
+
"preset": preset,
|
| 534 |
+
"effects": effects
|
| 535 |
+
}
|
| 536 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
| 537 |
+
with open(out_path, "wb") as f:
|
| 538 |
+
pickle.dump(project_data, f)
|
| 539 |
+
return out_path
|
| 540 |
+
|
| 541 |
+
def load_project(project_file):
|
| 542 |
+
with open(project_file.name, "rb") as f:
|
| 543 |
+
data = pickle.load(f)
|
| 544 |
+
return data["preset"], data["effects"]
|
| 545 |
+
|
| 546 |
with gr.Tab("π Save/Load Project"):
|
| 547 |
gr.Interface(
|
| 548 |
fn=save_project,
|