Spaces:

build-small-hackathon
/

LoFinity

Running on Zero

LoFinity / scripts /fetch_ambience.py

Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.

722a5d8 11 days ago

Raw

History Blame Contribute Delete

17.4 kB

	"""Download the sampled ambience beds from Wikimedia Commons.

	A no-GPU alternative to make_ambience.py: instead of generating the seven
	sampled beds, this pulls real field recordings from Wikimedia Commons
	(public-domain / CC-licensed), trims each to a steady ~14 s loop, and writes
	mono 16-bit wavs into assets/ambience/ — the format ambience.py expects.

	It auto-selects: for each slug it searches Commons, drops obvious junk
	(alarms, music, traffic…) by keyword, then downloads candidates in turn and
	measures them, keeping the first that is long enough and not near-silent.
	Provenance + licence for every pick is written to assets/ambience/CREDITS.md
	so attribution can be honored when the Space ships.

	Usage:
	uv pip install soundfile # bundles libsndfile (ogg/mp3/flac/wav)
	python scripts/fetch_ambience.py # fill in what's missing
	python scripts/fetch_ambience.py ocean_waves --force
	"""

	import argparse
	import io
	import json
	import re
	import sys
	import time
	import unicodedata
	import urllib.parse
	import urllib.request
	import wave
	from pathlib import Path

	import numpy as np

	ROOT = Path(__file__).resolve().parent.parent
	OUT_DIR = ROOT / "assets" / "ambience"
	API = "https://commons.wikimedia.org/w/api.php"
	UA = "LoFinity/0.1 (lofi hackathon ambience fetcher; https://huggingface.co/spaces)"

	TARGET_S = 30.0 # loop length we keep == default song length, so a bed
	# this long tiles to a 30 s song with zero seams
	MIN_SRC_DUR = 8.0 # too short to be useful ambience
	MAX_SRC_DUR = 400.0 # skip anything longer (podcasts, mixes)
	MAX_BYTES = 30_000_000 # don't pull giant wavs
	MAX_RATE = 32000 # cap stored rate (== musicgen rate); keeps files small

	# How to find each bed: a list of probes whose results are unioned. Commons
	# search ANDs every word in a probe, so each probe stays 1-2 words; more
	# probes = more candidates to fall back through. ("category", name) lists a
	# curated category; ("search", terms) is a File-namespace full-text search.
	SOURCES = {
	"soft_rain": [("category", "Sounds of rain"), ("search", "rain ambience")],
	"ocean_waves": [("search", "ocean waves"), ("search", "sea waves"),
	("search", "surf beach")],
	"fireplace_crackle": [("search", "campfire"), ("search", "fireplace"),
	("search", "fire crackling")],
	"birdsong": [("search", "birdsong"), ("search", "dawn chorus"),
	("search", "birds chirping")],
	"night_crickets": [("search", "crickets"), ("search", "cricket chirping"),
	("search", "cicada")],
	"wind_in_trees": [("search", "wind trees"), ("search", "wind forest"),
	("search", "wind leaves")],
	"cafe_murmur": [("search", "restaurant ambience"), ("search", "cafe ambience"),
	("search", "crowd murmur")],
	}

	# Hand-vetted Commons files tried before falling back to search — auto-selection
	# can't judge "continuous dawn chorus" vs "one repetitive cuckoo", so the good
	# picks found during development are pinned here. Still run through every gate
	# below, so a renamed/deleted file just falls through to search.
	PREFERRED = {
	"soft_rain": "File:Lluvia en techo de lamina.wav",
	"ocean_waves": "File:Sea waves.wav",
	"fireplace_crackle": "File:WWS Fireoftheforge.ogg",
	"birdsong": "File:Birds singing in Fribourg 01.ogg",
	"night_crickets": "File:Black-Prince-Cicada- Psaltoda-plaga.wav",
	"wind_in_trees": "File:Wind in forest (Gravity Sound).wav",
	"cafe_murmur": "File:Shopping mall less crowded.ogg",
	}

	# Title contains any of these (lowercased) -> not ambience, skip it. This is
	# what keeps "fire" from returning fire alarms, "sea" from podcasts, and
	# "waves" from sine-wave test tones.
	BLOCKLIST = (
	"alarm", "podcast", "episode", "interview", "speech", "talk", "lecture",
	"music", "song -", "band", "orchestra", "anthem", "hymn", "vocal", "choir",
	"dance", "ritual", "march", "siren", "horn", "traffic", "tram", "engine",
	"motor", "gun", "explosion", "war", "radio", "national", "voice", "demo",
	"sine", "tone", "hz", "sweep", "beep", "dtmf", "calibration", "signal",
	"woodwind", "clarinet", "flute", "accordion", "instrument", "guitar",
	)

	# Chosen file's title must contain one of these (accent-stripped) — a sound
	# actually related to the slug. Multilingual because Commons is international.
	RELEVANCE = {
	"soft_rain": ("rain", "lluvia", "regen", "pluie", "pioggia", "chuva",
	"downpour", "drizzle", "storm"),
	"ocean_waves": ("ocean", "wave", "sea", "surf", "beach", "mar", "ola",
	"vague", "welle", "tide", "shore", "playa", "costa"),
	"fireplace_crackle": ("fire", "campfire", "fireplace", "crackl", "crepit",
	"feu", "fuego", "hoguera", "fogata", "ember", "hearth"),
	"birdsong": ("bird", "song", "chorus", "dawn", "chirp", "cuckoo", "wren",
	"sparrow", "robin", "blackbird", "finch", "warbler", "thrush",
	"nightingale", "lark", "vogel", "oiseau", "pajaro", "canto"),
	"night_crickets": ("cricket", "cicada", "cicad", "cigarra", "grasshopper",
	"grillo", "grille", "katydid", "locust", "insect", "chirp"),
	"wind_in_trees": ("wind", "breeze", "gust", "rustl", "viento", "vent",
	"howl", "gale", "brisa", "blowing"),
	"cafe_murmur": ("cafe", "restaurant", "crowd", "murmur", "coffee", "bar",
	"pub", "chatter", "ambien", "mall", "station", "people",
	"plaza", "market", "tunnel", "hall", "lobby", "gente"),
	}


	def _norm(s):
	"""Lowercase + strip accents so 'pájaro'/'Pajaro' both match 'pajaro'."""
	s = unicodedata.normalize("NFKD", str(s))
	return "".join(c for c in s if not unicodedata.combining(c)).lower()


	def commons_api(params, tries=5):
	params = {**params, "format": "json", "formatversion": "2"}
	url = API + "?" + urllib.parse.urlencode(params)
	for i in range(tries):
	try:
	req = urllib.request.Request(url, headers={"User-Agent": UA})
	with urllib.request.urlopen(req, timeout=30) as r:
	return json.load(r)
	except urllib.error.HTTPError as e:
	if e.code == 429 and i < tries - 1:
	time.sleep(2 * (i + 1))
	continue
	raise
	return {}


	def find_titles(slug):
	titles = []
	for kind, value in SOURCES[slug]:
	if kind == "category":
	res = commons_api({"action": "query", "list": "categorymembers",
	"cmtitle": f"Category:{value}", "cmtype": "file",
	"cmlimit": "30"})
	hits = [m["title"] for m in res.get("query", {}).get("categorymembers", [])]
	else:
	res = commons_api({"action": "query", "list": "search", "srnamespace": "6",
	"srsearch": f"filetype:audio {value}", "srlimit": "15"})
	hits = [h["title"] for h in res.get("query", {}).get("search", [])]
	titles += hits
	time.sleep(1)
	# dedupe (keep order); drop junk, then require a slug-relevant word
	seen, kept = set(), []
	for t in titles:
	nt = _norm(t)
	if t in seen or any(b in nt for b in BLOCKLIST):
	continue
	if not any(kw in nt for kw in RELEVANCE[slug]):
	continue
	seen.add(t)
	kept.append(t)
	return kept


	def file_info(titles):
	"""title -> dict(url, dur, license, artist, page) for a batch of titles."""
	out = {}
	for i in range(0, len(titles), 20):
	info = commons_api({"action": "query", "titles": "\|".join(titles[i:i + 20]),
	"prop": "imageinfo",
	"iiprop": "url\|size\|mediatype\|extmetadata"})
	for page in info.get("query", {}).get("pages", []):
	ii = (page.get("imageinfo") or [{}])[0]
	ext = ii.get("extmetadata", {})
	def field(k):
	return ext.get(k, {}).get("value", "")
	out[page.get("title", "?")] = {
	"url": ii.get("url", ""),
	"dur": float(ii.get("duration") or 0.0),
	"mediatype": ii.get("mediatype", ""),
	"license": field("LicenseShortName") or "?",
	"artist": _strip_html(field("Artist")) or "Unknown",
	"page": ii.get("descriptionurl", ""),
	}
	time.sleep(1)
	return out


	def _strip_html(s):
	return re.sub(r"<[^>]+>", "", s).strip()


	def spectral_flatness(mono, rate):
	"""Ratio of geometric to arithmetic mean of the power spectrum. ~0 for a
	pure tone, higher for broadband texture — catches test tones that slip
	past the title filter (a 'Sine Wave' file is named like a sea 'wave').

	The signal is detrended and high-passed (first difference) first: crowd
	and surf ambience carries heavy low-frequency rumble that otherwise
	dominates the spectrum and reads as falsely 'tonal' (calibration showed
	real cafe recordings at 2e-5 raw vs 1e-12 for a true sine — too close;
	after the high-pass they separate to 2e-3 vs 1e-12)."""
	seg = mono[: rate * 4].astype(np.float64)
	if len(seg) < 256:
	return 1.0
	seg = np.diff(seg - seg.mean())
	power = np.abs(np.fft.rfft(seg * np.hanning(len(seg)))) ** 2 + 1e-12
	return float(np.exp(np.mean(np.log(power))) / np.mean(power))


	def download(url):
	req = urllib.request.Request(url, headers={"User-Agent": UA})
	with urllib.request.urlopen(req, timeout=60) as r:
	length = int(r.headers.get("Content-Length") or 0)
	if length and length > MAX_BYTES:
	raise ValueError(f"too big ({length / 1e6:.0f} MB)")
	return r.read(MAX_BYTES + 1)


	def decode_mono(blob):
	import soundfile as sf

	try:
	data, rate = sf.read(io.BytesIO(blob), dtype="float64", always_2d=True)
	return data.mean(axis=1), rate
	except sf.LibsndfileError:
	return _decode_av(blob) # Opus/other codecs libsndfile can't open


	def _decode_av(blob):
	"""Fallback decoder via PyAV (bundles ffmpeg) — most Commons crowd/cafe
	recordings are Ogg/Opus, which libsndfile doesn't support."""
	import av

	with av.open(io.BytesIO(blob)) as container:
	stream = container.streams.audio[0]
	rate = stream.codec_context.sample_rate
	chunks = []
	resampler = av.AudioResampler(format="flt", layout="mono", rate=rate)
	for frame in container.decode(stream):
	for out in resampler.resample(frame):
	chunks.append(out.to_ndarray().reshape(-1))
	if not chunks:
	raise ValueError("no audio frames decoded")
	return np.concatenate(chunks).astype(np.float64), rate


	def steady_window(mono, rate):
	"""Pick the best TARGET_S loop window. Short clips are returned whole (the
	mixer tiles them). The window is scored on three things, because the mixer
	crossfades the loop's tail back into its head:
	- steady interior (low RMS variation) so it doesn't swell or drop
	- head and tail at matched energy, so the crossfade blends like-for-like
	- neither boundary in a lull, so the loop point doesn't briefly drop out
	The last two matter for sparse textures (birdsong, fireplace): a window
	that merely minimizes variance can still start/end in a gap, dipping ~10 dB
	every loop."""
	n = int(TARGET_S * rate)
	if len(mono) <= n:
	return mono
	hop = max(int(rate * 0.1), 1) # 100 ms frames: fine enough to see the seam
	frame_rms = np.array([
	np.sqrt(np.mean(mono[i:i + hop] ** 2)) for i in range(0, len(mono) - hop, hop)
	])
	median = float(np.median(frame_rms)) or 1.0
	win_frames = max(n // hop, 1)
	edge = max(int(rate * 0.5) // hop, 1) # frames spanning one crossfade (~0.5 s)
	best, best_score = None, 1e9
	for start in range(0, len(frame_rms) - win_frames, max(win_frames // 8, 1)):
	seg = frame_rms[start:start + win_frames]
	mean = float(seg.mean())
	if mean < 0.5 * median: # window mostly in a lull
	continue
	head, tail = float(seg[:edge].mean()), float(seg[-edge:].mean())
	cv = float(seg.std()) / (mean or 1.0)
	mismatch = abs(head - tail) / median
	lull = max(0.0, 1.0 - min(head, tail) / median) # 0 once boundary >= median
	score = cv + 2.0 * mismatch + 2.0 * lull
	if score < best_score:
	best_score, best = score, start * hop
	start = best if best is not None else (len(mono) - n) // 2
	return mono[start:start + n]


	def resample(mono, src, dst):
	if src <= dst:
	return mono, src
	m = int(len(mono) * dst / src)
	return np.interp(np.arange(m) * (src / dst), np.arange(len(mono)), mono), dst


	def write_wav(mono, rate, path):
	peak = float(np.abs(mono).max() or 1.0)
	pcm = (mono * (0.9 / peak) * 32767).astype("<i2")
	with wave.open(str(path), "wb") as w:
	w.setnchannels(1)
	w.setsampwidth(2)
	w.setframerate(rate)
	w.writeframes(pcm.tobytes())


	def fetch_one(slug):
	"""Return a credit dict on success, or None if nothing usable was found."""
	found = find_titles(slug)
	pref = PREFERRED.get(slug)
	# the pinned pick is tried first; search results (relevance order) back it up
	lookup, seen = [], set()
	for t in ([pref] if pref else []) + found:
	if t not in seen:
	seen.add(t)
	lookup.append(t)
	if not lookup:
	print(f" no candidates found for {slug}")
	return None
	info = file_info(lookup)
	for title in [t for t in lookup if info.get(t, {}).get("url")][:8]:
	meta = info[title]
	if meta["dur"] and meta["dur"] > MAX_SRC_DUR:
	continue
	try:
	blob = download(meta["url"])
	mono, rate = decode_mono(blob)
	except Exception as e: # noqa: BLE001 — try the next candidate
	print(f" skip {title[5:][:40]!r}: {e}")
	continue
	dur = len(mono) / rate
	rms = float(np.sqrt(np.mean(mono ** 2)))
	flat = spectral_flatness(mono, rate)
	if dur < MIN_SRC_DUR or dur > MAX_SRC_DUR or rms < 5e-3:
	print(f" skip {title[5:][:40]!r}: dur={dur:.0f}s rms={rms:.3f}")
	continue
	if flat < 1e-3: # essentially a pure tone, not ambience (sines ~1e-12)
	print(f" skip {title[5:][:40]!r}: too tonal (flatness {flat:.0e})")
	continue
	seg = steady_window(mono, rate)
	seg, out_rate = resample(seg, rate, MAX_RATE)
	write_wav(seg, out_rate, OUT_DIR / f"{slug}.wav")
	seams = "no seam" if len(seg) / out_rate >= 30 else "1 seam @30s"
	print(f" {slug} <- {title[5:][:42]!r} "
	f"({dur:.0f}s src -> {len(seg)/out_rate:.0f}s, {seams}, {meta['license']})")
	return {"slug": slug, "title": title[5:], "license": meta["license"],
	"artist": meta["artist"], "page": meta["page"]}
	print(f" no usable file for {slug} (all candidates failed checks)")
	return None


	def save_credits(new_credits):
	"""Merge this run's picks into credits.json (the source of truth, keyed by
	slug) and re-render CREDITS.md. Merging means fetching one slug doesn't
	drop the others' attribution."""
	store = OUT_DIR / "credits.json"
	merged = {}
	if store.exists():
	try:
	merged = json.loads(store.read_text())
	except ValueError:
	pass
	for c in new_credits:
	merged[c["slug"]] = c
	store.write_text(json.dumps(merged, indent=2, sort_keys=True))

	lines = ["# Ambience sample credits", "",
	"Auto-fetched from Wikimedia Commons by `scripts/fetch_ambience.py`.",
	"vinyl_crackle and tape_hiss are synthesized in `ambience.py` and not listed.", ""]
	for slug in sorted(merged):
	c = merged[slug]
	lines += [
	f"## {slug}",
	f"- {c['title']}",
	f"- Author: {c['artist']}",
	f"- Licence: {c['license']}",
	f"- Source: {c['page']}",
	"",
	]
	(OUT_DIR / "CREDITS.md").write_text("\n".join(lines))


	def main():
	parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
	parser.add_argument("slugs", nargs="", choices=[SOURCES, []], metavar="slug",
	help=f"beds to fetch (default: missing ones). One of: {', '.join(SOURCES)}")
	parser.add_argument("--force", action="store_true", help="re-fetch even if the wav exists")
	args = parser.parse_args()

	todo = args.slugs or [s for s in SOURCES if args.force or not (OUT_DIR / f"{s}.wav").exists()]
	if not todo:
	print("all sampled beds already present — use --force to refetch")
	return 0
	OUT_DIR.mkdir(parents=True, exist_ok=True)

	credits = []
	for slug in todo:
	print(f"\n[{slug}]")
	c = fetch_one(slug)
	if c:
	credits.append(c)
	time.sleep(1)

	if credits:
	save_credits(credits) # merges into credits.json, won't drop other slugs
	got = len(credits)
	print(f"\nfetched {got}/{len(todo)} beds -> {OUT_DIR.relative_to(ROOT)}")
	return 0 if got else 1


	if __name__ == "__main__":
	sys.exit(main())