sonicase's picture
go back to previous version
5a448ad
"""
Granular Synthesis Demo // Rain Simulation (v3)
================================================
Why v1/v2 sounded like frying bacon:
The old approach generated individual noise-burst "drops" and summed them.
This creates a sparse, clicky texture because:
1. Short noise bursts have flat spectra (white noise = frying sound)
2. Box/naive filters barely shape the spectrum
3. Individual grains are too sparse to fuse into a continuous texture
Real rain is NOT a sum of isolated clicks. Acoustically, rain is a
CONTINUOUS stochastic process with a specific spectral shape:
- Energy concentrated between 1 kHz and 15 kHz
- Peak around 5-8 kHz (research: Nystuen et al., raindrop acoustics)
- Spectral slope that varies with rain intensity
- Slow amplitude modulation (gusts, intensity fluctuation)
- Small drops produce 13-25 kHz (drizzle shimmer)
- Large drops add energy below 2 kHz (heavy rain rumble)
v3 approach: spectral domain synthesis.
1. Generate white noise in the frequency domain (FFT)
2. Sculpt the spectrum to match real rain profiles
3. Add temporal modulation (amplitude envelopes that breathe)
4. Layer: continuous wash + transient drops + optional thunder
5. Stereo decorrelation for spatial width
This is still granular thinking: the "grains" are now overlapping
FFT frames (STFT), each with a shaped spectrum. The overlap-add
reconstruction is the same principle as classic granular synthesis.
"""
import numpy as np
from scipy import signal as sig
from scipy.fft import rfft, irfft
import gradio as gr
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
SR = 44100
DURATION = 7.0 # seconds
# ---------------------------------------------------------------------------
# Spectral rain profile
# ---------------------------------------------------------------------------
def rain_spectral_profile(
n_fft: int,
brightness: float,
rain_type: str,
) -> np.ndarray:
"""
Build a frequency-domain magnitude envelope that matches
the spectral shape of real rainfall.
Based on underwater acoustic rainfall studies:
small drops peak at 13-25 kHz, large drops are broadband 1-50 kHz,
most rain energy sits in the 2-12 kHz band.
We model this as a bandpass profile (skewed Gaussian in log-frequency)
whose center frequency and bandwidth shift with brightness and rain type.
"""
n_bins = n_fft // 2 + 1
freqs = np.linspace(0, SR / 2, n_bins)
# Avoid log(0)
freqs_safe = np.maximum(freqs, 1.0)
log_freqs = np.log2(freqs_safe)
# Center frequency shifts with brightness
# Low brightness (dark/soil): center around 2 kHz
# High brightness (glass/metal): center around 8 kHz
center_hz = 1500 * (2.0 ** (brightness * 2.5)) # 1.5 kHz to ~8.5 kHz
center_log = np.log2(center_hz)
# Bandwidth in octaves (wider for heavy rain)
bw_map = {"light": 1.8, "medium": 2.2, "heavy": 3.0, "thunder": 3.5}
bw = bw_map.get(rain_type, 2.2)
# Skewed Gaussian in log-frequency space
profile = np.exp(-0.5 * ((log_freqs - center_log) / bw) ** 2)
# Add high-frequency shimmer for light rain (drizzle peak at 13-25 kHz)
if rain_type == "light":
shimmer_center = np.log2(16000)
shimmer = 0.4 * np.exp(-0.5 * ((log_freqs - shimmer_center) / 0.5) ** 2)
profile += shimmer
# Add sub-bass rumble for heavy/thunder
if rain_type in ("heavy", "thunder"):
bass_center = np.log2(300)
bass = 0.3 * np.exp(-0.5 * ((log_freqs - bass_center) / 1.0) ** 2)
profile += bass
# Roll off everything below 80 Hz (rumble is not rain)
highpass = 1.0 / (1.0 + (80.0 / freqs_safe) ** 4)
profile *= highpass
# Normalize
profile /= np.max(profile) + 1e-12
return profile
# ---------------------------------------------------------------------------
# Temporal modulation (rain is not perfectly steady)
# ---------------------------------------------------------------------------
def make_modulation(n_samples: int, speed: float = 0.2) -> np.ndarray:
"""
Slow amplitude modulation to simulate natural intensity fluctuation.
Rain intensity varies over seconds (gusts, cloud cells passing).
We sum a few slow random sinusoids to create an organic envelope.
"""
t = np.linspace(0, DURATION, n_samples)
mod = np.ones(n_samples, dtype=np.float64)
# Sum of 4 slow sinusoids with random phases
for i in range(4):
freq = speed * (0.5 + i * 0.3) + np.random.uniform(-0.05, 0.05)
phase = np.random.uniform(0, 2 * np.pi)
depth = 0.08 + 0.07 * i # increasing modulation depth
mod += depth * np.sin(2 * np.pi * freq * t + phase)
# Keep in a reasonable range
mod = np.clip(mod, 0.3, 1.5)
# Smooth with a gentle low-pass to avoid sudden jumps
window = np.hanning(int(SR * 0.3))
window /= window.sum()
mod = np.convolve(mod, window, mode="same")
return mod
# ---------------------------------------------------------------------------
# Core: spectral rain synthesis via overlap-add STFT
# ---------------------------------------------------------------------------
def synthesize_rain(
rain_type: str,
brightness: float,
density: float,
modulation_speed: float,
stereo_width: float,
highcut: float,
lowcut: float,
) -> np.ndarray:
"""
Synthesize rain using FFT-based spectral shaping.
This is granular synthesis at the frame level:
each STFT frame is a "grain" whose spectrum is sculpted,
and the overlap-add reconstruction creates the continuous texture.
"""
n_out = int(DURATION * SR)
# FFT parameters
# 2048 samples at 44.1kHz = ~46ms frames. This is our "grain size"
# in the spectral domain. Overlap of 75% ensures smooth transitions.
n_fft = 2048
hop = n_fft // 4 # 75% overlap (standard for STFT)
n_frames = (n_out // hop) + 1
n_bins = n_fft // 2 + 1
# Build the target spectral profile
profile = rain_spectral_profile(n_fft, brightness, rain_type)
# Apply density scaling (affects overall energy)
profile *= (0.3 + density * 0.7)
# Apply frequency range limits from sliders
freqs = np.linspace(0, SR / 2, n_bins)
# Gentle roll-off at the edges (not a brick wall, which sounds unnatural)
low_rolloff = 1.0 / (1.0 + (lowcut / (freqs + 1e-6)) ** 6)
high_rolloff = 1.0 / (1.0 + (freqs / highcut) ** 6)
profile *= low_rolloff * high_rolloff
# Synthesis window (Hann for overlap-add, same as classic granular)
window = np.hanning(n_fft)
# Two independent channels for stereo
output_L = np.zeros(n_out + n_fft, dtype=np.float64)
output_R = np.zeros(n_out + n_fft, dtype=np.float64)
for frame in range(n_frames):
# Generate random phase noise in the frequency domain.
# This is the key insight: white noise = uniform random phase
# + flat magnitude. By keeping random phase but imposing our
# spectral profile as magnitude, we get colored noise that
# matches the rain spectrum exactly.
# Left channel
phase_L = np.random.uniform(0, 2 * np.pi, n_bins)
spectrum_L = profile * np.exp(1j * phase_L)
grain_L = irfft(spectrum_L, n=n_fft).real * window
# Right channel: independent phase for stereo decorrelation.
# stereo_width controls how different L and R are.
# width=0: identical (mono). width=1: fully independent.
if stereo_width > 0.01:
phase_R = phase_L * (1 - stereo_width) + np.random.uniform(0, 2 * np.pi, n_bins) * stereo_width
spectrum_R = profile * np.exp(1j * phase_R)
grain_R = irfft(spectrum_R, n=n_fft).real * window
else:
grain_R = grain_L.copy()
# Place grain in output (overlap-add)
pos = frame * hop
if pos + n_fft <= len(output_L):
output_L[pos:pos + n_fft] += grain_L
output_R[pos:pos + n_fft] += grain_R
# Trim to exact length
output_L = output_L[:n_out]
output_R = output_R[:n_out]
# Apply temporal modulation
mod = make_modulation(n_out, speed=modulation_speed)
output_L *= mod
output_R *= mod
# Add transient drop layer for texture (sparse individual drops on top)
drop_layer_L, drop_layer_R = make_drop_layer(n_out, rain_type, brightness, density)
# Drops are much quieter than the continuous layer
drop_mix = {"light": 0.5, "medium": 0.3, "heavy": 0.15, "thunder": 0.1}
dmix = drop_mix.get(rain_type, 0.3)
output_L += drop_layer_L * dmix
output_R += drop_layer_R * dmix
# Thunder
if rain_type == "thunder":
th_L, th_R = make_thunder(n_out)
output_L += th_L
output_R += th_R
# Final normalization
stereo = np.column_stack([output_L, output_R])
peak = np.max(np.abs(stereo))
if peak > 0:
stereo *= 0.85 / peak
return stereo
# ---------------------------------------------------------------------------
# Transient drop layer (sparse individual drops for texture)
# ---------------------------------------------------------------------------
def make_drop_layer(
n_out: int,
rain_type: str,
brightness: float,
density: float,
) -> tuple:
"""
Sparse individual drops layered on top of the continuous wash.
These provide the "pointillistic" detail that makes rain sound alive.
Without them, the wash alone sounds like generic colored noise.
"""
output_L = np.zeros(n_out, dtype=np.float64)
output_R = np.zeros(n_out, dtype=np.float64)
# Number of audible drops (not all rain drops are individually heard)
drops_per_sec = {"light": 8, "medium": 20, "heavy": 40, "thunder": 50}
n_drops = int(drops_per_sec.get(rain_type, 20) * density * DURATION)
# Drop duration in samples (10-40ms)
base_dur = int(SR * 0.02)
# Cutoff frequency for drops (matches surface brightness)
cutoff_base = 1000 * (2.0 ** (brightness * 3.0)) # 1kHz to 8kHz
for _ in range(n_drops):
pos = np.random.randint(0, max(n_out - base_dur * 3, 1))
# Each drop varies in duration and brightness
dur = int(base_dur * np.random.uniform(0.5, 2.0))
dur = max(dur, 64)
# Synthesize drop: filtered noise with sharp exponential decay
t = np.linspace(0, 1, dur)
envelope = np.exp(-np.random.uniform(8, 20) * t)
noise = np.random.randn(dur) * envelope
# Bandpass filter each drop using scipy
# Cutoff varies per drop for realism
this_cutoff = cutoff_base * np.random.uniform(0.5, 1.5)
this_cutoff = min(this_cutoff, SR * 0.45)
low = max(this_cutoff * 0.3, 100)
try:
sos = sig.butter(2, [low, this_cutoff], btype="bandpass", fs=SR, output="sos")
drop = sig.sosfilt(sos, noise)
except Exception:
drop = noise # fallback if filter params are out of range
# Random amplitude (distance simulation)
amp = np.random.uniform(0.1, 1.0) ** 1.3
# Stereo position
pan = np.random.uniform(0, 1)
L_gain = np.cos(pan * np.pi / 2) * amp
R_gain = np.sin(pan * np.pi / 2) * amp
end = min(pos + dur, n_out)
seg = drop[:end - pos]
output_L[pos:end] += seg * L_gain
output_R[pos:end] += seg * R_gain
return output_L, output_R
# ---------------------------------------------------------------------------
# Thunder
# ---------------------------------------------------------------------------
def make_thunder(n_out: int) -> tuple:
"""Low-frequency rumble events with slow attack and long tail."""
L = np.zeros(n_out, dtype=np.float64)
R = np.zeros(n_out, dtype=np.float64)
n_events = np.random.randint(1, 3)
for _ in range(n_events):
pos = np.random.randint(0, n_out // 2)
dur = int(SR * np.random.uniform(2.0, 4.0))
t = np.linspace(0, 1, dur)
# Sum of low frequencies with random phases
rumble = np.zeros(dur)
for f in [20, 30, 45, 60, 80, 100]:
phase = np.random.uniform(0, 2 * np.pi)
rumble += np.sin(2 * np.pi * f * t + phase) * np.random.uniform(0.3, 1.0)
# Envelope: slow build, long decay
env = np.exp(-1.0 * t) * (1 - np.exp(-6 * t))
rumble *= env * 0.4
end = min(pos + dur, n_out)
seg = rumble[:end - pos]
# Slightly different L/R for width
L[pos:end] += seg * np.random.uniform(0.7, 1.0)
R[pos:end] += seg * np.random.uniform(0.7, 1.0)
return L, R
# ---------------------------------------------------------------------------
# Tonal granular engine (unchanged)
# ---------------------------------------------------------------------------
def make_tonal_source(freq: float = 220.0, duration: float = 2.0) -> np.ndarray:
t = np.linspace(0, duration, int(SR * duration), endpoint=False)
s = np.zeros_like(t)
for k in range(1, 8):
s += (1.0 / k) * np.sin(2 * np.pi * freq * k * t)
s /= np.max(np.abs(s))
return s
def granular_synthesize(source, grain_size_ms, density, randomness, pitch_shift):
grain_samples = max(int((grain_size_ms / 1000.0) * SR), 64)
window = np.hanning(grain_samples)
hop = max(int(grain_samples / density), 1)
n_out = int(DURATION * SR)
output = np.zeros(n_out, dtype=np.float64)
if pitch_shift != 1.0 and pitch_shift > 0:
indices = np.arange(0, len(source), pitch_shift)
indices = indices[indices < len(source) - 1].astype(int)
pitched = source[indices]
else:
pitched = source
src_len = len(pitched)
num_grains = (n_out - grain_samples) // hop
read_head = 0.0
step = (src_len - grain_samples) / max(num_grains, 1)
for i in range(num_grains):
seq_pos = int(read_head) % max(src_len - grain_samples, 1)
rand_pos = np.random.randint(0, max(src_len - grain_samples, 1))
start = int(seq_pos * (1 - randomness) + rand_pos * randomness)
start = np.clip(start, 0, src_len - grain_samples)
grain = pitched[start: start + grain_samples] * window
out_pos = i * hop
if out_pos + grain_samples > n_out:
break
output[out_pos: out_pos + grain_samples] += grain
read_head += step
peak = np.max(np.abs(output))
if peak > 0:
output /= peak
return output
# ---------------------------------------------------------------------------
# Callbacks
# ---------------------------------------------------------------------------
def cb_rain(rain_type, brightness, density, mod_speed, stereo, highcut, lowcut):
audio = synthesize_rain(rain_type, brightness, density, mod_speed, stereo, highcut, lowcut)
return (SR, audio.astype(np.float32))
def cb_tonal(grain_size, density, randomness, pitch_shift, freq):
source = make_tonal_source(freq=freq)
audio = granular_synthesize(source, grain_size, density, randomness, pitch_shift)
return (SR, audio.astype(np.float32))
# ---------------------------------------------------------------------------
# Theme + CSS
# ---------------------------------------------------------------------------
dark_theme = gr.themes.Base(
primary_hue=gr.themes.colors.blue,
secondary_hue=gr.themes.colors.slate,
neutral_hue=gr.themes.colors.slate,
font=gr.themes.GoogleFont("Inter"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
).set(
body_background_fill="#0d1117",
body_background_fill_dark="#0d1117",
body_text_color="#c9d1d9",
body_text_color_dark="#c9d1d9",
body_text_color_subdued="#8b949e",
body_text_color_subdued_dark="#8b949e",
background_fill_primary="#161b22",
background_fill_primary_dark="#161b22",
background_fill_secondary="#0d1117",
background_fill_secondary_dark="#0d1117",
block_background_fill="#161b22",
block_background_fill_dark="#161b22",
block_border_color="#21262d",
block_border_color_dark="#21262d",
block_label_text_color="#8b949e",
block_label_text_color_dark="#8b949e",
block_title_text_color="#e6edf3",
block_title_text_color_dark="#e6edf3",
border_color_primary="#21262d",
border_color_primary_dark="#21262d",
button_primary_background_fill="#238636",
button_primary_background_fill_dark="#238636",
button_primary_background_fill_hover="#2ea043",
button_primary_background_fill_hover_dark="#2ea043",
button_primary_text_color="#ffffff",
button_primary_text_color_dark="#ffffff",
button_secondary_background_fill="#21262d",
button_secondary_background_fill_dark="#21262d",
button_secondary_text_color="#c9d1d9",
button_secondary_text_color_dark="#c9d1d9",
input_background_fill="#0d1117",
input_background_fill_dark="#0d1117",
input_border_color="#30363d",
input_border_color_dark="#30363d",
slider_color="#58a6ff",
slider_color_dark="#58a6ff",
link_text_color="#58a6ff",
link_text_color_dark="#58a6ff",
)
CUSTOM_CSS = """
footer { display: none !important; }
h1 { letter-spacing: -0.03em !important; font-weight: 600 !important; }
h3 { color: #8b949e !important; font-weight: 400 !important; }
.tab-nav button {
font-weight: 500 !important;
border: none !important;
border-bottom: 2px solid transparent !important;
}
.tab-nav button.selected {
color: #58a6ff !important;
border-bottom-color: #58a6ff !important;
}
table { font-size: 0.82rem !important; }
th { background: #21262d !important; color: #8b949e !important; font-weight: 500 !important; }
td { border-top: 1px solid #21262d !important; }
"""
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
with gr.Blocks(title="Granular Synthesis", css=CUSTOM_CSS, theme=dark_theme) as demo:
gr.Markdown(
"""
# Granular Synthesis
### micro-sound, grain clouds, texture design
"""
)
with gr.Tabs():
# ======================== RAIN ========================
with gr.TabItem("Rain Simulation"):
gr.Markdown(
"""
Spectral-domain rain synthesis. Instead of summing noise clicks,
we sculpt white noise in the frequency domain to match the spectral
profile of real rainfall (energy concentrated 2 to 12 kHz, slope varies
with intensity). Each STFT frame is a "grain" whose spectrum is shaped
by the rain profile, then overlap-added into the output.
"""
)
with gr.Row():
with gr.Column(scale=1):
rain_type = gr.Radio(
choices=["light", "medium", "heavy", "thunder"],
value="medium",
label="Rain type",
info="Controls spectral shape, transient density, and optional layers.",
)
brightness = gr.Slider(
0.0, 1.0, 0.45, step=0.01,
label="Surface brightness",
info="0 = dark (earth, foliage). 1 = bright (glass, tin roof).",
)
rain_density = gr.Slider(
0.2, 3.0, 1.0, step=0.1,
label="Density",
info="Overall thickness of the rain texture.",
)
mod_speed = gr.Slider(
0.05, 1.0, 0.2, step=0.05,
label="Modulation speed",
info="How fast the rain intensity fluctuates (gusts).",
)
stereo = gr.Slider(
0.0, 1.0, 0.7, step=0.01,
label="Stereo width",
info="0 = mono. 1 = fully decorrelated L/R.",
)
lowcut = gr.Slider(
50, 2000, 150, step=10,
label="Low cut (Hz)",
info="Remove frequencies below this point.",
)
highcut = gr.Slider(
2000, 20000, 14000, step=100,
label="High cut (Hz)",
info="Remove frequencies above this point.",
)
rain_btn = gr.Button("Generate rain", variant="primary", size="lg")
with gr.Column(scale=1):
rain_audio = gr.Audio(label="Output", type="numpy")
gr.Markdown(
"""
**Presets**
| Scene | Type | Bright | Dens | Mod | LoCut | HiCut |
|---|---|---|---|---|---|---|
| Drizzle on leaves | light | 0.2 | 0.6 | 0.1 | 200 | 18000 |
| Window at night | medium | 0.5 | 1.0 | 0.2 | 150 | 14000 |
| Tin roof | medium | 0.9 | 1.2 | 0.15 | 300 | 16000 |
| Downpour | heavy | 0.4 | 2.0 | 0.3 | 100 | 12000 |
| Thunderstorm | thunder | 0.35 | 2.5 | 0.4 | 80 | 10000 |
| Forest canopy | light | 0.15 | 0.5 | 0.08 | 200 | 15000 |
"""
)
rain_btn.click(
fn=cb_rain,
inputs=[rain_type, brightness, rain_density, mod_speed, stereo, highcut, lowcut],
outputs=rain_audio,
)
# ======================== TONAL ========================
with gr.TabItem("Tonal Granular"):
gr.Markdown(
"""
Classic granular synthesis: slice a harmonic source into grains,
apply Hann windows, randomize read positions, overlap-add.
"""
)
with gr.Row():
with gr.Column(scale=1):
source_freq = gr.Slider(80, 880, 220, step=1, label="Source frequency (Hz)")
grain_size = gr.Slider(5, 200, 50, step=1, label="Grain size (ms)",
info="Smaller = buzzy. Larger = smooth.")
tonal_density = gr.Slider(1, 8, 4, step=0.5, label="Density (overlap)")
randomness_sl = gr.Slider(0, 1, 0.3, step=0.01, label="Position randomness",
info="0 = sequential. 1 = fully random (freeze/texture).")
pitch = gr.Slider(0.25, 4.0, 1.0, step=0.05, label="Pitch shift")
tonal_btn = gr.Button("Synthesize", variant="primary", size="lg")
with gr.Column(scale=1):
tonal_audio = gr.Audio(label="Output", type="numpy")
gr.Markdown(
"""
**Signal chain**
Source (additive harmonics) > grain extraction (sequential + random blend)
> Hann window (click-free edges) > overlap-add > normalize
"""
)
tonal_btn.click(
fn=cb_tonal,
inputs=[grain_size, tonal_density, randomness_sl, pitch, source_freq],
outputs=tonal_audio,
)
gr.Markdown(
"""
---
Built with Python, NumPy, SciPy and Gradio. Everything is synthesized from scratch, no samples.
Part of [Generative Audio Soundscapes Lab](https://my-sonicase.github.io/genaudio-soundscapes/).
"""
)
if __name__ == "__main__":
demo.launch()