Spaces:

sonicase
/

granular-synthesis

Sleeping

App Files Files Community

granular-synthesis / app.py

sonicase

go back to previous version

5a448ad 2 months ago

raw

history blame contribute delete

23.8 kB

	"""
	Granular Synthesis Demo // Rain Simulation (v3)
	================================================

	Why v1/v2 sounded like frying bacon:
	The old approach generated individual noise-burst "drops" and summed them.
	This creates a sparse, clicky texture because:
	1. Short noise bursts have flat spectra (white noise = frying sound)
	2. Box/naive filters barely shape the spectrum
	3. Individual grains are too sparse to fuse into a continuous texture

	Real rain is NOT a sum of isolated clicks. Acoustically, rain is a
	CONTINUOUS stochastic process with a specific spectral shape:
	- Energy concentrated between 1 kHz and 15 kHz
	- Peak around 5-8 kHz (research: Nystuen et al., raindrop acoustics)
	- Spectral slope that varies with rain intensity
	- Slow amplitude modulation (gusts, intensity fluctuation)
	- Small drops produce 13-25 kHz (drizzle shimmer)
	- Large drops add energy below 2 kHz (heavy rain rumble)

	v3 approach: spectral domain synthesis.
	1. Generate white noise in the frequency domain (FFT)
	2. Sculpt the spectrum to match real rain profiles
	3. Add temporal modulation (amplitude envelopes that breathe)
	4. Layer: continuous wash + transient drops + optional thunder
	5. Stereo decorrelation for spatial width

	This is still granular thinking: the "grains" are now overlapping
	FFT frames (STFT), each with a shaped spectrum. The overlap-add
	reconstruction is the same principle as classic granular synthesis.
	"""

	import numpy as np
	from scipy import signal as sig
	from scipy.fft import rfft, irfft
	import gradio as gr

	# ---------------------------------------------------------------------------
	# Constants
	# ---------------------------------------------------------------------------
	SR = 44100
	DURATION = 7.0 # seconds

	# ---------------------------------------------------------------------------
	# Spectral rain profile
	# ---------------------------------------------------------------------------

	def rain_spectral_profile(
	n_fft: int,
	brightness: float,
	rain_type: str,
	) -> np.ndarray:
	"""
	Build a frequency-domain magnitude envelope that matches
	the spectral shape of real rainfall.

	Based on underwater acoustic rainfall studies:
	small drops peak at 13-25 kHz, large drops are broadband 1-50 kHz,
	most rain energy sits in the 2-12 kHz band.

	We model this as a bandpass profile (skewed Gaussian in log-frequency)
	whose center frequency and bandwidth shift with brightness and rain type.
	"""
	n_bins = n_fft // 2 + 1
	freqs = np.linspace(0, SR / 2, n_bins)

	# Avoid log(0)
	freqs_safe = np.maximum(freqs, 1.0)
	log_freqs = np.log2(freqs_safe)

	# Center frequency shifts with brightness
	# Low brightness (dark/soil): center around 2 kHz
	# High brightness (glass/metal): center around 8 kHz
	center_hz = 1500 * (2.0 ** (brightness * 2.5)) # 1.5 kHz to ~8.5 kHz
	center_log = np.log2(center_hz)

	# Bandwidth in octaves (wider for heavy rain)
	bw_map = {"light": 1.8, "medium": 2.2, "heavy": 3.0, "thunder": 3.5}
	bw = bw_map.get(rain_type, 2.2)

	# Skewed Gaussian in log-frequency space
	profile = np.exp(-0.5 * ((log_freqs - center_log) / bw) ** 2)

	# Add high-frequency shimmer for light rain (drizzle peak at 13-25 kHz)
	if rain_type == "light":
	shimmer_center = np.log2(16000)
	shimmer = 0.4 * np.exp(-0.5 * ((log_freqs - shimmer_center) / 0.5) ** 2)
	profile += shimmer

	# Add sub-bass rumble for heavy/thunder
	if rain_type in ("heavy", "thunder"):
	bass_center = np.log2(300)
	bass = 0.3 * np.exp(-0.5 * ((log_freqs - bass_center) / 1.0) ** 2)
	profile += bass

	# Roll off everything below 80 Hz (rumble is not rain)
	highpass = 1.0 / (1.0 + (80.0 / freqs_safe) ** 4)
	profile *= highpass

	# Normalize
	profile /= np.max(profile) + 1e-12

	return profile


	# ---------------------------------------------------------------------------
	# Temporal modulation (rain is not perfectly steady)
	# ---------------------------------------------------------------------------

	def make_modulation(n_samples: int, speed: float = 0.2) -> np.ndarray:
	"""
	Slow amplitude modulation to simulate natural intensity fluctuation.
	Rain intensity varies over seconds (gusts, cloud cells passing).

	We sum a few slow random sinusoids to create an organic envelope.
	"""
	t = np.linspace(0, DURATION, n_samples)
	mod = np.ones(n_samples, dtype=np.float64)

	# Sum of 4 slow sinusoids with random phases
	for i in range(4):
	freq = speed * (0.5 + i * 0.3) + np.random.uniform(-0.05, 0.05)
	phase = np.random.uniform(0, 2 * np.pi)
	depth = 0.08 + 0.07 * i # increasing modulation depth
	mod += depth * np.sin(2 * np.pi * freq * t + phase)

	# Keep in a reasonable range
	mod = np.clip(mod, 0.3, 1.5)
	# Smooth with a gentle low-pass to avoid sudden jumps
	window = np.hanning(int(SR * 0.3))
	window /= window.sum()
	mod = np.convolve(mod, window, mode="same")

	return mod


	# ---------------------------------------------------------------------------
	# Core: spectral rain synthesis via overlap-add STFT
	# ---------------------------------------------------------------------------

	def synthesize_rain(
	rain_type: str,
	brightness: float,
	density: float,
	modulation_speed: float,
	stereo_width: float,
	highcut: float,
	lowcut: float,
	) -> np.ndarray:
	"""
	Synthesize rain using FFT-based spectral shaping.

	This is granular synthesis at the frame level:
	each STFT frame is a "grain" whose spectrum is sculpted,
	and the overlap-add reconstruction creates the continuous texture.
	"""
	n_out = int(DURATION * SR)

	# FFT parameters
	# 2048 samples at 44.1kHz = ~46ms frames. This is our "grain size"
	# in the spectral domain. Overlap of 75% ensures smooth transitions.
	n_fft = 2048
	hop = n_fft // 4 # 75% overlap (standard for STFT)
	n_frames = (n_out // hop) + 1
	n_bins = n_fft // 2 + 1

	# Build the target spectral profile
	profile = rain_spectral_profile(n_fft, brightness, rain_type)

	# Apply density scaling (affects overall energy)
	profile = (0.3 + density 0.7)

	# Apply frequency range limits from sliders
	freqs = np.linspace(0, SR / 2, n_bins)
	# Gentle roll-off at the edges (not a brick wall, which sounds unnatural)
	low_rolloff = 1.0 / (1.0 + (lowcut / (freqs + 1e-6)) ** 6)
	high_rolloff = 1.0 / (1.0 + (freqs / highcut) ** 6)
	profile = low_rolloff high_rolloff

	# Synthesis window (Hann for overlap-add, same as classic granular)
	window = np.hanning(n_fft)

	# Two independent channels for stereo
	output_L = np.zeros(n_out + n_fft, dtype=np.float64)
	output_R = np.zeros(n_out + n_fft, dtype=np.float64)

	for frame in range(n_frames):
	# Generate random phase noise in the frequency domain.
	# This is the key insight: white noise = uniform random phase
	# + flat magnitude. By keeping random phase but imposing our
	# spectral profile as magnitude, we get colored noise that
	# matches the rain spectrum exactly.

	# Left channel
	phase_L = np.random.uniform(0, 2 * np.pi, n_bins)
	spectrum_L = profile * np.exp(1j * phase_L)
	grain_L = irfft(spectrum_L, n=n_fft).real * window

	# Right channel: independent phase for stereo decorrelation.
	# stereo_width controls how different L and R are.
	# width=0: identical (mono). width=1: fully independent.
	if stereo_width > 0.01:
	phase_R = phase_L * (1 - stereo_width) + np.random.uniform(0, 2 * np.pi, n_bins) * stereo_width
	spectrum_R = profile * np.exp(1j * phase_R)
	grain_R = irfft(spectrum_R, n=n_fft).real * window
	else:
	grain_R = grain_L.copy()

	# Place grain in output (overlap-add)
	pos = frame * hop
	if pos + n_fft <= len(output_L):
	output_L[pos:pos + n_fft] += grain_L
	output_R[pos:pos + n_fft] += grain_R

	# Trim to exact length
	output_L = output_L[:n_out]
	output_R = output_R[:n_out]

	# Apply temporal modulation
	mod = make_modulation(n_out, speed=modulation_speed)
	output_L *= mod
	output_R *= mod

	# Add transient drop layer for texture (sparse individual drops on top)
	drop_layer_L, drop_layer_R = make_drop_layer(n_out, rain_type, brightness, density)
	# Drops are much quieter than the continuous layer
	drop_mix = {"light": 0.5, "medium": 0.3, "heavy": 0.15, "thunder": 0.1}
	dmix = drop_mix.get(rain_type, 0.3)
	output_L += drop_layer_L * dmix
	output_R += drop_layer_R * dmix

	# Thunder
	if rain_type == "thunder":
	th_L, th_R = make_thunder(n_out)
	output_L += th_L
	output_R += th_R

	# Final normalization
	stereo = np.column_stack([output_L, output_R])
	peak = np.max(np.abs(stereo))
	if peak > 0:
	stereo *= 0.85 / peak

	return stereo


	# ---------------------------------------------------------------------------
	# Transient drop layer (sparse individual drops for texture)
	# ---------------------------------------------------------------------------

	def make_drop_layer(
	n_out: int,
	rain_type: str,
	brightness: float,
	density: float,
	) -> tuple:
	"""
	Sparse individual drops layered on top of the continuous wash.
	These provide the "pointillistic" detail that makes rain sound alive.
	Without them, the wash alone sounds like generic colored noise.
	"""
	output_L = np.zeros(n_out, dtype=np.float64)
	output_R = np.zeros(n_out, dtype=np.float64)

	# Number of audible drops (not all rain drops are individually heard)
	drops_per_sec = {"light": 8, "medium": 20, "heavy": 40, "thunder": 50}
	n_drops = int(drops_per_sec.get(rain_type, 20) * density * DURATION)

	# Drop duration in samples (10-40ms)
	base_dur = int(SR * 0.02)

	# Cutoff frequency for drops (matches surface brightness)
	cutoff_base = 1000 * (2.0 ** (brightness * 3.0)) # 1kHz to 8kHz

	for _ in range(n_drops):
	pos = np.random.randint(0, max(n_out - base_dur * 3, 1))

	# Each drop varies in duration and brightness
	dur = int(base_dur * np.random.uniform(0.5, 2.0))
	dur = max(dur, 64)

	# Synthesize drop: filtered noise with sharp exponential decay
	t = np.linspace(0, 1, dur)
	envelope = np.exp(-np.random.uniform(8, 20) * t)
	noise = np.random.randn(dur) * envelope

	# Bandpass filter each drop using scipy
	# Cutoff varies per drop for realism
	this_cutoff = cutoff_base * np.random.uniform(0.5, 1.5)
	this_cutoff = min(this_cutoff, SR * 0.45)
	low = max(this_cutoff * 0.3, 100)

	try:
	sos = sig.butter(2, [low, this_cutoff], btype="bandpass", fs=SR, output="sos")
	drop = sig.sosfilt(sos, noise)
	except Exception:
	drop = noise # fallback if filter params are out of range

	# Random amplitude (distance simulation)
	amp = np.random.uniform(0.1, 1.0) ** 1.3

	# Stereo position
	pan = np.random.uniform(0, 1)
	L_gain = np.cos(pan * np.pi / 2) * amp
	R_gain = np.sin(pan * np.pi / 2) * amp

	end = min(pos + dur, n_out)
	seg = drop[:end - pos]
	output_L[pos:end] += seg * L_gain
	output_R[pos:end] += seg * R_gain

	return output_L, output_R


	# ---------------------------------------------------------------------------
	# Thunder
	# ---------------------------------------------------------------------------

	def make_thunder(n_out: int) -> tuple:
	"""Low-frequency rumble events with slow attack and long tail."""
	L = np.zeros(n_out, dtype=np.float64)
	R = np.zeros(n_out, dtype=np.float64)

	n_events = np.random.randint(1, 3)
	for _ in range(n_events):
	pos = np.random.randint(0, n_out // 2)
	dur = int(SR * np.random.uniform(2.0, 4.0))
	t = np.linspace(0, 1, dur)

	# Sum of low frequencies with random phases
	rumble = np.zeros(dur)
	for f in [20, 30, 45, 60, 80, 100]:
	phase = np.random.uniform(0, 2 * np.pi)
	rumble += np.sin(2 * np.pi * f * t + phase) * np.random.uniform(0.3, 1.0)

	# Envelope: slow build, long decay
	env = np.exp(-1.0 * t) * (1 - np.exp(-6 * t))
	rumble = env 0.4

	end = min(pos + dur, n_out)
	seg = rumble[:end - pos]

	# Slightly different L/R for width
	L[pos:end] += seg * np.random.uniform(0.7, 1.0)
	R[pos:end] += seg * np.random.uniform(0.7, 1.0)

	return L, R


	# ---------------------------------------------------------------------------
	# Tonal granular engine (unchanged)
	# ---------------------------------------------------------------------------

	def make_tonal_source(freq: float = 220.0, duration: float = 2.0) -> np.ndarray:
	t = np.linspace(0, duration, int(SR * duration), endpoint=False)
	s = np.zeros_like(t)
	for k in range(1, 8):
	s += (1.0 / k) * np.sin(2 * np.pi * freq * k * t)
	s /= np.max(np.abs(s))
	return s


	def granular_synthesize(source, grain_size_ms, density, randomness, pitch_shift):
	grain_samples = max(int((grain_size_ms / 1000.0) * SR), 64)
	window = np.hanning(grain_samples)
	hop = max(int(grain_samples / density), 1)
	n_out = int(DURATION * SR)
	output = np.zeros(n_out, dtype=np.float64)

	if pitch_shift != 1.0 and pitch_shift > 0:
	indices = np.arange(0, len(source), pitch_shift)
	indices = indices[indices < len(source) - 1].astype(int)
	pitched = source[indices]
	else:
	pitched = source

	src_len = len(pitched)
	num_grains = (n_out - grain_samples) // hop
	read_head = 0.0
	step = (src_len - grain_samples) / max(num_grains, 1)

	for i in range(num_grains):
	seq_pos = int(read_head) % max(src_len - grain_samples, 1)
	rand_pos = np.random.randint(0, max(src_len - grain_samples, 1))
	start = int(seq_pos * (1 - randomness) + rand_pos * randomness)
	start = np.clip(start, 0, src_len - grain_samples)
	grain = pitched[start: start + grain_samples] * window
	out_pos = i * hop
	if out_pos + grain_samples > n_out:
	break
	output[out_pos: out_pos + grain_samples] += grain
	read_head += step

	peak = np.max(np.abs(output))
	if peak > 0:
	output /= peak
	return output


	# ---------------------------------------------------------------------------
	# Callbacks
	# ---------------------------------------------------------------------------

	def cb_rain(rain_type, brightness, density, mod_speed, stereo, highcut, lowcut):
	audio = synthesize_rain(rain_type, brightness, density, mod_speed, stereo, highcut, lowcut)
	return (SR, audio.astype(np.float32))


	def cb_tonal(grain_size, density, randomness, pitch_shift, freq):
	source = make_tonal_source(freq=freq)
	audio = granular_synthesize(source, grain_size, density, randomness, pitch_shift)
	return (SR, audio.astype(np.float32))


	# ---------------------------------------------------------------------------
	# Theme + CSS
	# ---------------------------------------------------------------------------

	dark_theme = gr.themes.Base(
	primary_hue=gr.themes.colors.blue,
	secondary_hue=gr.themes.colors.slate,
	neutral_hue=gr.themes.colors.slate,
	font=gr.themes.GoogleFont("Inter"),
	font_mono=gr.themes.GoogleFont("JetBrains Mono"),
	).set(
	body_background_fill="#0d1117",
	body_background_fill_dark="#0d1117",
	body_text_color="#c9d1d9",
	body_text_color_dark="#c9d1d9",
	body_text_color_subdued="#8b949e",
	body_text_color_subdued_dark="#8b949e",
	background_fill_primary="#161b22",
	background_fill_primary_dark="#161b22",
	background_fill_secondary="#0d1117",
	background_fill_secondary_dark="#0d1117",
	block_background_fill="#161b22",
	block_background_fill_dark="#161b22",
	block_border_color="#21262d",
	block_border_color_dark="#21262d",
	block_label_text_color="#8b949e",
	block_label_text_color_dark="#8b949e",
	block_title_text_color="#e6edf3",
	block_title_text_color_dark="#e6edf3",
	border_color_primary="#21262d",
	border_color_primary_dark="#21262d",
	button_primary_background_fill="#238636",
	button_primary_background_fill_dark="#238636",
	button_primary_background_fill_hover="#2ea043",
	button_primary_background_fill_hover_dark="#2ea043",
	button_primary_text_color="#ffffff",
	button_primary_text_color_dark="#ffffff",
	button_secondary_background_fill="#21262d",
	button_secondary_background_fill_dark="#21262d",
	button_secondary_text_color="#c9d1d9",
	button_secondary_text_color_dark="#c9d1d9",
	input_background_fill="#0d1117",
	input_background_fill_dark="#0d1117",
	input_border_color="#30363d",
	input_border_color_dark="#30363d",
	slider_color="#58a6ff",
	slider_color_dark="#58a6ff",
	link_text_color="#58a6ff",
	link_text_color_dark="#58a6ff",
	)

	CUSTOM_CSS = """
	footer { display: none !important; }
	h1 { letter-spacing: -0.03em !important; font-weight: 600 !important; }
	h3 { color: #8b949e !important; font-weight: 400 !important; }
	.tab-nav button {
	font-weight: 500 !important;
	border: none !important;
	border-bottom: 2px solid transparent !important;
	}
	.tab-nav button.selected {
	color: #58a6ff !important;
	border-bottom-color: #58a6ff !important;
	}
	table { font-size: 0.82rem !important; }
	th { background: #21262d !important; color: #8b949e !important; font-weight: 500 !important; }
	td { border-top: 1px solid #21262d !important; }
	"""

	# ---------------------------------------------------------------------------
	# UI
	# ---------------------------------------------------------------------------

	with gr.Blocks(title="Granular Synthesis", css=CUSTOM_CSS, theme=dark_theme) as demo:

	gr.Markdown(
	"""
	# Granular Synthesis
	### micro-sound, grain clouds, texture design
	"""
	)

	with gr.Tabs():

	# ======================== RAIN ========================
	with gr.TabItem("Rain Simulation"):
	gr.Markdown(
	"""
	Spectral-domain rain synthesis. Instead of summing noise clicks,
	we sculpt white noise in the frequency domain to match the spectral
	profile of real rainfall (energy concentrated 2 to 12 kHz, slope varies
	with intensity). Each STFT frame is a "grain" whose spectrum is shaped
	by the rain profile, then overlap-added into the output.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	rain_type = gr.Radio(
	choices=["light", "medium", "heavy", "thunder"],
	value="medium",
	label="Rain type",
	info="Controls spectral shape, transient density, and optional layers.",
	)
	brightness = gr.Slider(
	0.0, 1.0, 0.45, step=0.01,
	label="Surface brightness",
	info="0 = dark (earth, foliage). 1 = bright (glass, tin roof).",
	)
	rain_density = gr.Slider(
	0.2, 3.0, 1.0, step=0.1,
	label="Density",
	info="Overall thickness of the rain texture.",
	)
	mod_speed = gr.Slider(
	0.05, 1.0, 0.2, step=0.05,
	label="Modulation speed",
	info="How fast the rain intensity fluctuates (gusts).",
	)
	stereo = gr.Slider(
	0.0, 1.0, 0.7, step=0.01,
	label="Stereo width",
	info="0 = mono. 1 = fully decorrelated L/R.",
	)
	lowcut = gr.Slider(
	50, 2000, 150, step=10,
	label="Low cut (Hz)",
	info="Remove frequencies below this point.",
	)
	highcut = gr.Slider(
	2000, 20000, 14000, step=100,
	label="High cut (Hz)",
	info="Remove frequencies above this point.",
	)
	rain_btn = gr.Button("Generate rain", variant="primary", size="lg")

	with gr.Column(scale=1):
	rain_audio = gr.Audio(label="Output", type="numpy")

	gr.Markdown(
	"""
	Presets

	\| Scene \| Type \| Bright \| Dens \| Mod \| LoCut \| HiCut \|
	\|---\|---\|---\|---\|---\|---\|---\|
	\| Drizzle on leaves \| light \| 0.2 \| 0.6 \| 0.1 \| 200 \| 18000 \|
	\| Window at night \| medium \| 0.5 \| 1.0 \| 0.2 \| 150 \| 14000 \|
	\| Tin roof \| medium \| 0.9 \| 1.2 \| 0.15 \| 300 \| 16000 \|
	\| Downpour \| heavy \| 0.4 \| 2.0 \| 0.3 \| 100 \| 12000 \|
	\| Thunderstorm \| thunder \| 0.35 \| 2.5 \| 0.4 \| 80 \| 10000 \|
	\| Forest canopy \| light \| 0.15 \| 0.5 \| 0.08 \| 200 \| 15000 \|
	"""
	)

	rain_btn.click(
	fn=cb_rain,
	inputs=[rain_type, brightness, rain_density, mod_speed, stereo, highcut, lowcut],
	outputs=rain_audio,
	)

	# ======================== TONAL ========================
	with gr.TabItem("Tonal Granular"):
	gr.Markdown(
	"""
	Classic granular synthesis: slice a harmonic source into grains,
	apply Hann windows, randomize read positions, overlap-add.
	"""
	)
	with gr.Row():
	with gr.Column(scale=1):
	source_freq = gr.Slider(80, 880, 220, step=1, label="Source frequency (Hz)")
	grain_size = gr.Slider(5, 200, 50, step=1, label="Grain size (ms)",
	info="Smaller = buzzy. Larger = smooth.")
	tonal_density = gr.Slider(1, 8, 4, step=0.5, label="Density (overlap)")
	randomness_sl = gr.Slider(0, 1, 0.3, step=0.01, label="Position randomness",
	info="0 = sequential. 1 = fully random (freeze/texture).")
	pitch = gr.Slider(0.25, 4.0, 1.0, step=0.05, label="Pitch shift")
	tonal_btn = gr.Button("Synthesize", variant="primary", size="lg")

	with gr.Column(scale=1):
	tonal_audio = gr.Audio(label="Output", type="numpy")
	gr.Markdown(
	"""
	Signal chain

	Source (additive harmonics) > grain extraction (sequential + random blend)
	> Hann window (click-free edges) > overlap-add > normalize
	"""
	)

	tonal_btn.click(
	fn=cb_tonal,
	inputs=[grain_size, tonal_density, randomness_sl, pitch, source_freq],
	outputs=tonal_audio,
	)

	gr.Markdown(
	"""
	---
	Built with Python, NumPy, SciPy and Gradio. Everything is synthesized from scratch, no samples.
	Part of [Generative Audio Soundscapes Lab](https://my-sonicase.github.io/genaudio-soundscapes/).
	"""
	)


	if __name__ == "__main__":
	demo.launch()