Spaces:
Sleeping
Sleeping
added timer and changed reverb params
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import numpy as np
|
|
| 4 |
from inference import generate_drum_kit
|
| 5 |
from audio_utils import play_audio
|
| 6 |
from fx import get_fx
|
|
|
|
| 7 |
|
| 8 |
# Streamlit UI
|
| 9 |
st.title("semantic spaces: kit generator")
|
|
@@ -13,23 +14,38 @@ st.write("hint: turn audio effects on! try weird prompts!")
|
|
| 13 |
|
| 14 |
with st.container(border=True):
|
| 15 |
# User Inputs
|
| 16 |
-
prompt = st.text_input("Describe your drum kit:", "
|
| 17 |
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
|
| 18 |
use_fx = st.toggle("Apply audio effects?", value=True)
|
| 19 |
if use_fx:
|
| 20 |
if st.toggle("Use a different prompt for audio effects?", value=True):
|
| 21 |
-
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal
|
| 22 |
else:
|
| 23 |
fx_prompt = prompt
|
| 24 |
|
| 25 |
# Run the inference
|
| 26 |
if st.button("Generate Drum Kit"):
|
|
|
|
| 27 |
drum_kit = generate_drum_kit(prompt, kit_size)
|
|
|
|
| 28 |
st.session_state["dry_kit"] = drum_kit
|
|
|
|
| 29 |
if use_fx:
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
st.session_state["fx_params"] = fx_params
|
| 32 |
st.session_state["drum_kit"] = drum_kit # Store results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Display results
|
| 35 |
if "drum_kit" in st.session_state:
|
|
@@ -43,11 +59,4 @@ if "drum_kit" in st.session_state:
|
|
| 43 |
for i, sound_file in enumerate(sounds):
|
| 44 |
with cols[i]:
|
| 45 |
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
|
| 46 |
-
play_audio(sound_file)
|
| 47 |
-
|
| 48 |
-
if st.toggle("Show parameters?"):
|
| 49 |
-
if "fx_params" in st.session_state:
|
| 50 |
-
st.subheader("FX Parameters")
|
| 51 |
-
st.write(st.session_state["fx_params"])
|
| 52 |
-
if "dry_kit" in st.session_state:
|
| 53 |
-
st.write(st.session_state["dry_kit"])
|
|
|
|
| 4 |
from inference import generate_drum_kit
|
| 5 |
from audio_utils import play_audio
|
| 6 |
from fx import get_fx
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
# Streamlit UI
|
| 10 |
st.title("semantic spaces: kit generator")
|
|
|
|
| 14 |
|
| 15 |
with st.container(border=True):
|
| 16 |
# User Inputs
|
| 17 |
+
prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
|
| 18 |
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
|
| 19 |
use_fx = st.toggle("Apply audio effects?", value=True)
|
| 20 |
if use_fx:
|
| 21 |
if st.toggle("Use a different prompt for audio effects?", value=True):
|
| 22 |
+
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
|
| 23 |
else:
|
| 24 |
fx_prompt = prompt
|
| 25 |
|
| 26 |
# Run the inference
|
| 27 |
if st.button("Generate Drum Kit"):
|
| 28 |
+
start_drum_time = time.time()
|
| 29 |
drum_kit = generate_drum_kit(prompt, kit_size)
|
| 30 |
+
drum_time = time.time() - start_drum_time
|
| 31 |
st.session_state["dry_kit"] = drum_kit
|
| 32 |
+
st.write(f"Drum kit generated in {drum_time} seconds.")
|
| 33 |
if use_fx:
|
| 34 |
+
start_fx_time = time.time()
|
| 35 |
+
drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
|
| 36 |
+
fx_time = time.time() - start_fx_time
|
| 37 |
+
st.write(f"Effects generated and applied in {fx_time} seconds.")
|
| 38 |
+
st.write(f"Pre-effects loss: {pre_fx_fitness}")
|
| 39 |
+
st.write(f"Post-effects loss: {post_fx_fitness}")
|
| 40 |
st.session_state["fx_params"] = fx_params
|
| 41 |
st.session_state["drum_kit"] = drum_kit # Store results
|
| 42 |
+
|
| 43 |
+
if "drum_kit" in st.session_state:
|
| 44 |
+
with st.expander("Click to view samples and parameters"):
|
| 45 |
+
if "fx_params" in st.session_state:
|
| 46 |
+
st.write(st.session_state["fx_params"])
|
| 47 |
+
if "dry_kit" in st.session_state:
|
| 48 |
+
st.write(st.session_state["dry_kit"])
|
| 49 |
|
| 50 |
# Display results
|
| 51 |
if "drum_kit" in st.session_state:
|
|
|
|
| 59 |
for i, sound_file in enumerate(sounds):
|
| 60 |
with cols[i]:
|
| 61 |
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
|
| 62 |
+
play_audio(sound_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fx.py
CHANGED
|
@@ -8,6 +8,8 @@ import librosa
|
|
| 8 |
import numpy as np
|
| 9 |
import os
|
| 10 |
|
|
|
|
|
|
|
| 11 |
def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
|
| 12 |
"""Stitch together all drum sounds into one audio file."""
|
| 13 |
all_audio = []
|
|
@@ -89,7 +91,7 @@ search_space = [
|
|
| 89 |
Real(4000, 20000, name="lowpass"),
|
| 90 |
Real(50, 1000, name="highpass"),
|
| 91 |
Real(0.0, 0.8, name="reverb_size"),
|
| 92 |
-
Real(0.
|
| 93 |
Real(0.0, 10.0, name="drive_db"),
|
| 94 |
Real(4.0, 32.0, name="bit_depth")
|
| 95 |
]
|
|
@@ -106,10 +108,16 @@ def get_fx(drum_kit, fx_prompt):
|
|
| 106 |
def obj_func(params):
|
| 107 |
return objective_function(params, concat_file, text_embedding)
|
| 108 |
|
|
|
|
|
|
|
|
|
|
| 109 |
# Run Bayesian optimization
|
| 110 |
res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
|
| 111 |
best_params = res.x
|
| 112 |
|
|
|
|
|
|
|
|
|
|
| 113 |
# Apply the best FX parameters to each individual sound
|
| 114 |
optimized_kit = {}
|
| 115 |
for instrument, samples in drum_kit.items():
|
|
@@ -122,4 +130,4 @@ def get_fx(drum_kit, fx_prompt):
|
|
| 122 |
"bit_depth": best_params[5]
|
| 123 |
}, write_wav=True) for sample in samples]
|
| 124 |
|
| 125 |
-
return optimized_kit, get_params_dict(best_params)
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
import os
|
| 10 |
|
| 11 |
+
concat_file_path = "temp_concat.wav"
|
| 12 |
+
|
| 13 |
def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
|
| 14 |
"""Stitch together all drum sounds into one audio file."""
|
| 15 |
all_audio = []
|
|
|
|
| 91 |
Real(4000, 20000, name="lowpass"),
|
| 92 |
Real(50, 1000, name="highpass"),
|
| 93 |
Real(0.0, 0.8, name="reverb_size"),
|
| 94 |
+
Real(0.2, 1.0, name="reverb_wet"),
|
| 95 |
Real(0.0, 10.0, name="drive_db"),
|
| 96 |
Real(4.0, 32.0, name="bit_depth")
|
| 97 |
]
|
|
|
|
| 108 |
def obj_func(params):
|
| 109 |
return objective_function(params, concat_file, text_embedding)
|
| 110 |
|
| 111 |
+
# Get CLAP similarity without FX (for evaluation purposes)
|
| 112 |
+
pre_fx_fitness = - evaluate_fitness(concat_file_path, text_embedding)
|
| 113 |
+
|
| 114 |
# Run Bayesian optimization
|
| 115 |
res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
|
| 116 |
best_params = res.x
|
| 117 |
|
| 118 |
+
# Get post-FX fitness (for evaluation purposes)
|
| 119 |
+
post_fx_fitness = obj_func(best_params)
|
| 120 |
+
|
| 121 |
# Apply the best FX parameters to each individual sound
|
| 122 |
optimized_kit = {}
|
| 123 |
for instrument, samples in drum_kit.items():
|
|
|
|
| 130 |
"bit_depth": best_params[5]
|
| 131 |
}, write_wav=True) for sample in samples]
|
| 132 |
|
| 133 |
+
return optimized_kit, get_params_dict(best_params), pre_fx_fitness, post_fx_fitness
|