Spaces:

build-small-hackathon
/

LoFinity

Running on Zero

LoFinity / ambience.py

Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.

722a5d8 13 days ago

Raw

History Blame Contribute Delete

7.16 kB

	"""Ambience beds for LoFinity tapes.

	MusicGen ignores texture words ("vinyl crackle", "ocean waves"), so the
	background layer is mixed in here instead: a bed is rendered at song length
	and summed a few dB under the music. Lofi ambience loops through the whole
	track anyway, so nothing needs to be generated per song.

	vinyl_crackle and tape_hiss are synthesized procedurally (cheap, and never
	sound repeated); the other seven are loops in assets/ambience/<slug>.wav,
	rendered once by scripts/make_ambience.py and tiled with crossfades. A
	missing asset falls back to vinyl crackle so every tape still has texture.
	"""

	import wave
	from pathlib import Path

	import numpy as np

	ASSETS = Path(__file__).parent / "assets" / "ambience"

	# Bed RMS relative to the music RMS, in dB. Starting points — tune by ear:
	# spiky textures (crackle, fire) read louder than their RMS suggests.
	GAIN_DB = {
	"vinyl_crackle": -14.0,
	"tape_hiss": -18.0,
	"soft_rain": -14.0,
	"ocean_waves": -12.0,
	"fireplace_crackle": -14.0,
	"birdsong": -16.0,
	"night_crickets": -16.0,
	"cafe_murmur": -16.0,
	"wind_in_trees": -14.0,
	}
	DEFAULT = "vinyl_crackle"

	# Checked in order; first hit wins ("fireplace crackle" must match fire
	# before crackle can claim it for vinyl).
	_KEYWORDS = (
	("fire", "fireplace_crackle"),
	("rain", "soft_rain"),
	("wave", "ocean_waves"),
	("ocean", "ocean_waves"),
	("sea", "ocean_waves"),
	("bird", "birdsong"),
	("cricket", "night_crickets"),
	("cafe", "cafe_murmur"),
	("coffee", "cafe_murmur"),
	("murmur", "cafe_murmur"),
	("chatter", "cafe_murmur"),
	("wind", "wind_in_trees"),
	("tree", "wind_in_trees"),
	("leaves", "wind_in_trees"),
	("vinyl", "vinyl_crackle"),
	("crackle", "vinyl_crackle"),
	("static", "vinyl_crackle"),
	("record", "vinyl_crackle"),
	("hiss", "tape_hiss"),
	("tape", "tape_hiss"),
	("noise", "tape_hiss"),
	)


	def normalize_slug(value) -> str:
	"""Map whatever the LLM produced onto a known slug ("Ocean waves!" ->
	ocean_waves); anything unrecognizable becomes the default crackle."""
	text = str(value or "").strip().lower()
	slug = text.replace(" ", "_").replace("-", "_")
	if slug in GAIN_DB:
	return slug
	for word, match in _KEYWORDS:
	if word in text:
	return match
	return DEFAULT


	# --- procedural beds ----------------------------------------------------------


	def _lowpassed_noise(n: int, rate: int, cutoff: float, rng) -> np.ndarray:
	"""Cheap dull noise: draw at ~2*cutoff and linearly upsample (the
	interpolation is the lowpass)."""
	low_rate = max(int(cutoff * 2), 200)
	m = max(int(n * low_rate / rate) + 2, 2)
	coarse = rng.standard_normal(m)
	return np.interp(np.arange(n) * (low_rate / rate), np.arange(m), coarse)


	def _vinyl_crackle(n: int, rate: int, rng) -> np.ndarray:
	"""Dusty surface noise plus sparse pops, tiny pops, not loud."""
	out = _lowpassed_noise(n, rate, 2500, rng) * 0.06
	for pos in rng.integers(0, n, max(int(n / rate * 9), 1)):
	length = int(rate * rng.uniform(0.001, 0.004))
	amp = rng.uniform(0.15, 1.0) ** 2 * np.sign(rng.standard_normal())
	pop = amp * np.exp(-np.arange(length) / (length / 5))
	end = min(pos + length, n)
	out[pos:end] += pop[: end - pos]
	return out


	def _tape_hiss(n: int, rate: int, rng) -> np.ndarray:
	white = rng.standard_normal(n)
	# first difference tilts the spectrum toward the highs, where hiss lives
	tilted = np.zeros(n)
	tilted[1:] = np.diff(white)
	hiss = 0.35 * white + 0.65 * tilted
	# slow wobble so it breathes like a real transport
	lfo = 0.3 # Hz
	phase = rng.uniform(0, 2 * np.pi)
	return hiss * (1.0 + 0.08 * np.sin(2 * np.pi * lfo * np.arange(n) / rate + phase))


	_PROCEDURAL = {"vinyl_crackle": _vinyl_crackle, "tape_hiss": _tape_hiss}


	# --- sampled beds ---------------------------------------------------------------


	def _read_wav(path: Path) -> tuple[np.ndarray, int]:
	with wave.open(str(path), "rb") as w:
	rate, channels, width = w.getframerate(), w.getnchannels(), w.getsampwidth()
	raw = w.readframes(w.getnframes())
	if width != 2:
	raise ValueError(f"{path.name}: expected 16-bit wav, got {width * 8}-bit")
	data = np.frombuffer(raw, dtype="<i2").astype(np.float64) / 32768.0
	if channels > 1:
	data = data.reshape(-1, channels).mean(axis=1)
	return data, rate


	def _resample(data: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
	if src_rate == dst_rate:
	return data
	m = int(len(data) * dst_rate / src_rate)
	return np.interp(np.arange(m) * (src_rate / dst_rate), np.arange(len(data)), data)


	def _tile(loop: np.ndarray, n: int, rate: int) -> np.ndarray:
	"""Repeat the loop out to n samples, crossfading each seam so it
	doesn't click. The loop does not need to be seamless.

	The fade uses equal-power (sqrt) ramps, not linear: the tail and head
	being blended are uncorrelated audio, so linear ramps would sum to ~3-6 dB
	below the surrounding level at the crossfade midpoint (an audible dip every
	loop). With sqrt ramps gain_out2 + gain_in2 == 1, holding power steady."""
	if len(loop) >= n:
	return loop[:n].copy()
	fade = min(int(rate * 0.5), len(loop) // 4)
	if fade == 0:
	return np.tile(loop, n // len(loop) + 1)[:n]
	ramp = np.sqrt(np.linspace(0.0, 1.0, fade))
	out = np.zeros(n + len(loop))
	pos = 0
	while pos < n:
	seg = loop.copy()
	if pos:
	seg[:fade] *= ramp
	seg[-fade:] *= ramp[::-1]
	out[pos : pos + len(seg)] += seg
	pos += len(loop) - fade
	return out[:n]


	# --- public API -----------------------------------------------------------------


	def render(slug: str, n: int, rate: int) -> np.ndarray:
	"""A peak-normalized bed of n samples at `rate`; the caller sets the level."""
	if slug in _PROCEDURAL:
	bed = _PROCEDURAL[slug](n, rate, np.random.default_rng())
	else:
	loop, loop_rate = _read_wav(ASSETS / f"{slug}.wav")
	bed = _tile(_resample(loop, loop_rate, rate), n, rate)
	return bed / (float(np.abs(bed).max()) or 1.0)


	def mix(music, rate: int, slug: str) -> np.ndarray:
	"""Sum the ambience bed under the music at its slug's relative RMS level."""
	slug = normalize_slug(slug)
	if slug not in _PROCEDURAL and not (ASSETS / f"{slug}.wav").exists():
	print(
	f"[lofinity] no ambience asset for {slug!r} "
	"(run scripts/make_ambience.py), using vinyl crackle"
	)
	slug = DEFAULT
	music = np.asarray(music, dtype=np.float64)
	music_rms = float(np.sqrt(np.mean(music**2)))
	if music_rms < 1e-6: # silence in, silence out
	return music
	bed = render(slug, len(music), rate)
	bed_rms = float(np.sqrt(np.mean(bed**2))) or 1.0
	bed = music_rms 10 ** (GAIN_DB[slug] / 20) / bed_rms
	edge = min(int(rate * 0.75), len(bed) // 4)
	if edge:
	ramp = np.linspace(0.0, 1.0, edge)
	bed[:edge] *= ramp
	bed[-edge:] *= ramp[::-1]
	return music + bed