"""Download the sampled ambience beds from Wikimedia Commons. A no-GPU alternative to make_ambience.py: instead of generating the seven sampled beds, this pulls real field recordings from Wikimedia Commons (public-domain / CC-licensed), trims each to a steady ~14 s loop, and writes mono 16-bit wavs into assets/ambience/ — the format ambience.py expects. It auto-selects: for each slug it searches Commons, drops obvious junk (alarms, music, traffic…) by keyword, then downloads candidates in turn and measures them, keeping the first that is long enough and not near-silent. Provenance + licence for every pick is written to assets/ambience/CREDITS.md so attribution can be honored when the Space ships. Usage: uv pip install soundfile # bundles libsndfile (ogg/mp3/flac/wav) python scripts/fetch_ambience.py # fill in what's missing python scripts/fetch_ambience.py ocean_waves --force """ import argparse import io import json import re import sys import time import unicodedata import urllib.parse import urllib.request import wave from pathlib import Path import numpy as np ROOT = Path(__file__).resolve().parent.parent OUT_DIR = ROOT / "assets" / "ambience" API = "https://commons.wikimedia.org/w/api.php" UA = "LoFinity/0.1 (lofi hackathon ambience fetcher; https://huggingface.co/spaces)" TARGET_S = 30.0 # loop length we keep == default song length, so a bed # this long tiles to a 30 s song with zero seams MIN_SRC_DUR = 8.0 # too short to be useful ambience MAX_SRC_DUR = 400.0 # skip anything longer (podcasts, mixes) MAX_BYTES = 30_000_000 # don't pull giant wavs MAX_RATE = 32000 # cap stored rate (== musicgen rate); keeps files small # How to find each bed: a list of probes whose results are unioned. Commons # search ANDs every word in a probe, so each probe stays 1-2 words; more # probes = more candidates to fall back through. ("category", name) lists a # curated category; ("search", terms) is a File-namespace full-text search. SOURCES = { "soft_rain": [("category", "Sounds of rain"), ("search", "rain ambience")], "ocean_waves": [("search", "ocean waves"), ("search", "sea waves"), ("search", "surf beach")], "fireplace_crackle": [("search", "campfire"), ("search", "fireplace"), ("search", "fire crackling")], "birdsong": [("search", "birdsong"), ("search", "dawn chorus"), ("search", "birds chirping")], "night_crickets": [("search", "crickets"), ("search", "cricket chirping"), ("search", "cicada")], "wind_in_trees": [("search", "wind trees"), ("search", "wind forest"), ("search", "wind leaves")], "cafe_murmur": [("search", "restaurant ambience"), ("search", "cafe ambience"), ("search", "crowd murmur")], } # Hand-vetted Commons files tried before falling back to search — auto-selection # can't judge "continuous dawn chorus" vs "one repetitive cuckoo", so the good # picks found during development are pinned here. Still run through every gate # below, so a renamed/deleted file just falls through to search. PREFERRED = { "soft_rain": "File:Lluvia en techo de lamina.wav", "ocean_waves": "File:Sea waves.wav", "fireplace_crackle": "File:WWS Fireoftheforge.ogg", "birdsong": "File:Birds singing in Fribourg 01.ogg", "night_crickets": "File:Black-Prince-Cicada- Psaltoda-plaga.wav", "wind_in_trees": "File:Wind in forest (Gravity Sound).wav", "cafe_murmur": "File:Shopping mall less crowded.ogg", } # Title contains any of these (lowercased) -> not ambience, skip it. This is # what keeps "fire" from returning fire *alarms*, "sea" from podcasts, and # "waves" from sine-wave test tones. BLOCKLIST = ( "alarm", "podcast", "episode", "interview", "speech", "talk", "lecture", "music", "song -", "band", "orchestra", "anthem", "hymn", "vocal", "choir", "dance", "ritual", "march", "siren", "horn", "traffic", "tram", "engine", "motor", "gun", "explosion", "war", "radio", "national", "voice", "demo", "sine", "tone", "hz", "sweep", "beep", "dtmf", "calibration", "signal", "woodwind", "clarinet", "flute", "accordion", "instrument", "guitar", ) # Chosen file's title must contain one of these (accent-stripped) — a sound # actually related to the slug. Multilingual because Commons is international. RELEVANCE = { "soft_rain": ("rain", "lluvia", "regen", "pluie", "pioggia", "chuva", "downpour", "drizzle", "storm"), "ocean_waves": ("ocean", "wave", "sea", "surf", "beach", "mar", "ola", "vague", "welle", "tide", "shore", "playa", "costa"), "fireplace_crackle": ("fire", "campfire", "fireplace", "crackl", "crepit", "feu", "fuego", "hoguera", "fogata", "ember", "hearth"), "birdsong": ("bird", "song", "chorus", "dawn", "chirp", "cuckoo", "wren", "sparrow", "robin", "blackbird", "finch", "warbler", "thrush", "nightingale", "lark", "vogel", "oiseau", "pajaro", "canto"), "night_crickets": ("cricket", "cicada", "cicad", "cigarra", "grasshopper", "grillo", "grille", "katydid", "locust", "insect", "chirp"), "wind_in_trees": ("wind", "breeze", "gust", "rustl", "viento", "vent", "howl", "gale", "brisa", "blowing"), "cafe_murmur": ("cafe", "restaurant", "crowd", "murmur", "coffee", "bar", "pub", "chatter", "ambien", "mall", "station", "people", "plaza", "market", "tunnel", "hall", "lobby", "gente"), } def _norm(s): """Lowercase + strip accents so 'pájaro'/'Pajaro' both match 'pajaro'.""" s = unicodedata.normalize("NFKD", str(s)) return "".join(c for c in s if not unicodedata.combining(c)).lower() def commons_api(params, tries=5): params = {**params, "format": "json", "formatversion": "2"} url = API + "?" + urllib.parse.urlencode(params) for i in range(tries): try: req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=30) as r: return json.load(r) except urllib.error.HTTPError as e: if e.code == 429 and i < tries - 1: time.sleep(2 * (i + 1)) continue raise return {} def find_titles(slug): titles = [] for kind, value in SOURCES[slug]: if kind == "category": res = commons_api({"action": "query", "list": "categorymembers", "cmtitle": f"Category:{value}", "cmtype": "file", "cmlimit": "30"}) hits = [m["title"] for m in res.get("query", {}).get("categorymembers", [])] else: res = commons_api({"action": "query", "list": "search", "srnamespace": "6", "srsearch": f"filetype:audio {value}", "srlimit": "15"}) hits = [h["title"] for h in res.get("query", {}).get("search", [])] titles += hits time.sleep(1) # dedupe (keep order); drop junk, then require a slug-relevant word seen, kept = set(), [] for t in titles: nt = _norm(t) if t in seen or any(b in nt for b in BLOCKLIST): continue if not any(kw in nt for kw in RELEVANCE[slug]): continue seen.add(t) kept.append(t) return kept def file_info(titles): """title -> dict(url, dur, license, artist, page) for a batch of titles.""" out = {} for i in range(0, len(titles), 20): info = commons_api({"action": "query", "titles": "|".join(titles[i:i + 20]), "prop": "imageinfo", "iiprop": "url|size|mediatype|extmetadata"}) for page in info.get("query", {}).get("pages", []): ii = (page.get("imageinfo") or [{}])[0] ext = ii.get("extmetadata", {}) def field(k): return ext.get(k, {}).get("value", "") out[page.get("title", "?")] = { "url": ii.get("url", ""), "dur": float(ii.get("duration") or 0.0), "mediatype": ii.get("mediatype", ""), "license": field("LicenseShortName") or "?", "artist": _strip_html(field("Artist")) or "Unknown", "page": ii.get("descriptionurl", ""), } time.sleep(1) return out def _strip_html(s): return re.sub(r"<[^>]+>", "", s).strip() def spectral_flatness(mono, rate): """Ratio of geometric to arithmetic mean of the power spectrum. ~0 for a pure tone, higher for broadband texture — catches test tones that slip past the title filter (a 'Sine Wave' file is named like a sea 'wave'). The signal is detrended and high-passed (first difference) first: crowd and surf ambience carries heavy low-frequency rumble that otherwise dominates the spectrum and reads as falsely 'tonal' (calibration showed real cafe recordings at 2e-5 raw vs 1e-12 for a true sine — too close; after the high-pass they separate to 2e-3 vs 1e-12).""" seg = mono[: rate * 4].astype(np.float64) if len(seg) < 256: return 1.0 seg = np.diff(seg - seg.mean()) power = np.abs(np.fft.rfft(seg * np.hanning(len(seg)))) ** 2 + 1e-12 return float(np.exp(np.mean(np.log(power))) / np.mean(power)) def download(url): req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=60) as r: length = int(r.headers.get("Content-Length") or 0) if length and length > MAX_BYTES: raise ValueError(f"too big ({length / 1e6:.0f} MB)") return r.read(MAX_BYTES + 1) def decode_mono(blob): import soundfile as sf try: data, rate = sf.read(io.BytesIO(blob), dtype="float64", always_2d=True) return data.mean(axis=1), rate except sf.LibsndfileError: return _decode_av(blob) # Opus/other codecs libsndfile can't open def _decode_av(blob): """Fallback decoder via PyAV (bundles ffmpeg) — most Commons crowd/cafe recordings are Ogg/Opus, which libsndfile doesn't support.""" import av with av.open(io.BytesIO(blob)) as container: stream = container.streams.audio[0] rate = stream.codec_context.sample_rate chunks = [] resampler = av.AudioResampler(format="flt", layout="mono", rate=rate) for frame in container.decode(stream): for out in resampler.resample(frame): chunks.append(out.to_ndarray().reshape(-1)) if not chunks: raise ValueError("no audio frames decoded") return np.concatenate(chunks).astype(np.float64), rate def steady_window(mono, rate): """Pick the best TARGET_S loop window. Short clips are returned whole (the mixer tiles them). The window is scored on three things, because the mixer crossfades the loop's tail back into its head: - steady interior (low RMS variation) so it doesn't swell or drop - head and tail at matched energy, so the crossfade blends like-for-like - neither boundary in a lull, so the loop point doesn't briefly drop out The last two matter for sparse textures (birdsong, fireplace): a window that merely minimizes variance can still start/end in a gap, dipping ~10 dB every loop.""" n = int(TARGET_S * rate) if len(mono) <= n: return mono hop = max(int(rate * 0.1), 1) # 100 ms frames: fine enough to see the seam frame_rms = np.array([ np.sqrt(np.mean(mono[i:i + hop] ** 2)) for i in range(0, len(mono) - hop, hop) ]) median = float(np.median(frame_rms)) or 1.0 win_frames = max(n // hop, 1) edge = max(int(rate * 0.5) // hop, 1) # frames spanning one crossfade (~0.5 s) best, best_score = None, 1e9 for start in range(0, len(frame_rms) - win_frames, max(win_frames // 8, 1)): seg = frame_rms[start:start + win_frames] mean = float(seg.mean()) if mean < 0.5 * median: # window mostly in a lull continue head, tail = float(seg[:edge].mean()), float(seg[-edge:].mean()) cv = float(seg.std()) / (mean or 1.0) mismatch = abs(head - tail) / median lull = max(0.0, 1.0 - min(head, tail) / median) # 0 once boundary >= median score = cv + 2.0 * mismatch + 2.0 * lull if score < best_score: best_score, best = score, start * hop start = best if best is not None else (len(mono) - n) // 2 return mono[start:start + n] def resample(mono, src, dst): if src <= dst: return mono, src m = int(len(mono) * dst / src) return np.interp(np.arange(m) * (src / dst), np.arange(len(mono)), mono), dst def write_wav(mono, rate, path): peak = float(np.abs(mono).max() or 1.0) pcm = (mono * (0.9 / peak) * 32767).astype(" MAX_SRC_DUR: continue try: blob = download(meta["url"]) mono, rate = decode_mono(blob) except Exception as e: # noqa: BLE001 — try the next candidate print(f" skip {title[5:][:40]!r}: {e}") continue dur = len(mono) / rate rms = float(np.sqrt(np.mean(mono ** 2))) flat = spectral_flatness(mono, rate) if dur < MIN_SRC_DUR or dur > MAX_SRC_DUR or rms < 5e-3: print(f" skip {title[5:][:40]!r}: dur={dur:.0f}s rms={rms:.3f}") continue if flat < 1e-3: # essentially a pure tone, not ambience (sines ~1e-12) print(f" skip {title[5:][:40]!r}: too tonal (flatness {flat:.0e})") continue seg = steady_window(mono, rate) seg, out_rate = resample(seg, rate, MAX_RATE) write_wav(seg, out_rate, OUT_DIR / f"{slug}.wav") seams = "no seam" if len(seg) / out_rate >= 30 else "1 seam @30s" print(f" {slug} <- {title[5:][:42]!r} " f"({dur:.0f}s src -> {len(seg)/out_rate:.0f}s, {seams}, {meta['license']})") return {"slug": slug, "title": title[5:], "license": meta["license"], "artist": meta["artist"], "page": meta["page"]} print(f" no usable file for {slug} (all candidates failed checks)") return None def save_credits(new_credits): """Merge this run's picks into credits.json (the source of truth, keyed by slug) and re-render CREDITS.md. Merging means fetching one slug doesn't drop the others' attribution.""" store = OUT_DIR / "credits.json" merged = {} if store.exists(): try: merged = json.loads(store.read_text()) except ValueError: pass for c in new_credits: merged[c["slug"]] = c store.write_text(json.dumps(merged, indent=2, sort_keys=True)) lines = ["# Ambience sample credits", "", "Auto-fetched from Wikimedia Commons by `scripts/fetch_ambience.py`.", "vinyl_crackle and tape_hiss are synthesized in `ambience.py` and not listed.", ""] for slug in sorted(merged): c = merged[slug] lines += [ f"## {slug}", f"- **{c['title']}**", f"- Author: {c['artist']}", f"- Licence: {c['license']}", f"- Source: {c['page']}", "", ] (OUT_DIR / "CREDITS.md").write_text("\n".join(lines)) def main(): parser = argparse.ArgumentParser(description=__doc__.split("\n")[0]) parser.add_argument("slugs", nargs="*", choices=[*SOURCES, []], metavar="slug", help=f"beds to fetch (default: missing ones). One of: {', '.join(SOURCES)}") parser.add_argument("--force", action="store_true", help="re-fetch even if the wav exists") args = parser.parse_args() todo = args.slugs or [s for s in SOURCES if args.force or not (OUT_DIR / f"{s}.wav").exists()] if not todo: print("all sampled beds already present — use --force to refetch") return 0 OUT_DIR.mkdir(parents=True, exist_ok=True) credits = [] for slug in todo: print(f"\n[{slug}]") c = fetch_one(slug) if c: credits.append(c) time.sleep(1) if credits: save_credits(credits) # merges into credits.json, won't drop other slugs got = len(credits) print(f"\nfetched {got}/{len(todo)} beds -> {OUT_DIR.relative_to(ROOT)}") return 0 if got else 1 if __name__ == "__main__": sys.exit(main())