Spaces:
Running on Zero
Running on Zero
Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.
722a5d8 | """Download the sampled ambience beds from Wikimedia Commons. | |
| A no-GPU alternative to make_ambience.py: instead of generating the seven | |
| sampled beds, this pulls real field recordings from Wikimedia Commons | |
| (public-domain / CC-licensed), trims each to a steady ~14 s loop, and writes | |
| mono 16-bit wavs into assets/ambience/ — the format ambience.py expects. | |
| It auto-selects: for each slug it searches Commons, drops obvious junk | |
| (alarms, music, traffic…) by keyword, then downloads candidates in turn and | |
| measures them, keeping the first that is long enough and not near-silent. | |
| Provenance + licence for every pick is written to assets/ambience/CREDITS.md | |
| so attribution can be honored when the Space ships. | |
| Usage: | |
| uv pip install soundfile # bundles libsndfile (ogg/mp3/flac/wav) | |
| python scripts/fetch_ambience.py # fill in what's missing | |
| python scripts/fetch_ambience.py ocean_waves --force | |
| """ | |
| import argparse | |
| import io | |
| import json | |
| import re | |
| import sys | |
| import time | |
| import unicodedata | |
| import urllib.parse | |
| import urllib.request | |
| import wave | |
| from pathlib import Path | |
| import numpy as np | |
| ROOT = Path(__file__).resolve().parent.parent | |
| OUT_DIR = ROOT / "assets" / "ambience" | |
| API = "https://commons.wikimedia.org/w/api.php" | |
| UA = "LoFinity/0.1 (lofi hackathon ambience fetcher; https://huggingface.co/spaces)" | |
| TARGET_S = 30.0 # loop length we keep == default song length, so a bed | |
| # this long tiles to a 30 s song with zero seams | |
| MIN_SRC_DUR = 8.0 # too short to be useful ambience | |
| MAX_SRC_DUR = 400.0 # skip anything longer (podcasts, mixes) | |
| MAX_BYTES = 30_000_000 # don't pull giant wavs | |
| MAX_RATE = 32000 # cap stored rate (== musicgen rate); keeps files small | |
| # How to find each bed: a list of probes whose results are unioned. Commons | |
| # search ANDs every word in a probe, so each probe stays 1-2 words; more | |
| # probes = more candidates to fall back through. ("category", name) lists a | |
| # curated category; ("search", terms) is a File-namespace full-text search. | |
| SOURCES = { | |
| "soft_rain": [("category", "Sounds of rain"), ("search", "rain ambience")], | |
| "ocean_waves": [("search", "ocean waves"), ("search", "sea waves"), | |
| ("search", "surf beach")], | |
| "fireplace_crackle": [("search", "campfire"), ("search", "fireplace"), | |
| ("search", "fire crackling")], | |
| "birdsong": [("search", "birdsong"), ("search", "dawn chorus"), | |
| ("search", "birds chirping")], | |
| "night_crickets": [("search", "crickets"), ("search", "cricket chirping"), | |
| ("search", "cicada")], | |
| "wind_in_trees": [("search", "wind trees"), ("search", "wind forest"), | |
| ("search", "wind leaves")], | |
| "cafe_murmur": [("search", "restaurant ambience"), ("search", "cafe ambience"), | |
| ("search", "crowd murmur")], | |
| } | |
| # Hand-vetted Commons files tried before falling back to search — auto-selection | |
| # can't judge "continuous dawn chorus" vs "one repetitive cuckoo", so the good | |
| # picks found during development are pinned here. Still run through every gate | |
| # below, so a renamed/deleted file just falls through to search. | |
| PREFERRED = { | |
| "soft_rain": "File:Lluvia en techo de lamina.wav", | |
| "ocean_waves": "File:Sea waves.wav", | |
| "fireplace_crackle": "File:WWS Fireoftheforge.ogg", | |
| "birdsong": "File:Birds singing in Fribourg 01.ogg", | |
| "night_crickets": "File:Black-Prince-Cicada- Psaltoda-plaga.wav", | |
| "wind_in_trees": "File:Wind in forest (Gravity Sound).wav", | |
| "cafe_murmur": "File:Shopping mall less crowded.ogg", | |
| } | |
| # Title contains any of these (lowercased) -> not ambience, skip it. This is | |
| # what keeps "fire" from returning fire *alarms*, "sea" from podcasts, and | |
| # "waves" from sine-wave test tones. | |
| BLOCKLIST = ( | |
| "alarm", "podcast", "episode", "interview", "speech", "talk", "lecture", | |
| "music", "song -", "band", "orchestra", "anthem", "hymn", "vocal", "choir", | |
| "dance", "ritual", "march", "siren", "horn", "traffic", "tram", "engine", | |
| "motor", "gun", "explosion", "war", "radio", "national", "voice", "demo", | |
| "sine", "tone", "hz", "sweep", "beep", "dtmf", "calibration", "signal", | |
| "woodwind", "clarinet", "flute", "accordion", "instrument", "guitar", | |
| ) | |
| # Chosen file's title must contain one of these (accent-stripped) — a sound | |
| # actually related to the slug. Multilingual because Commons is international. | |
| RELEVANCE = { | |
| "soft_rain": ("rain", "lluvia", "regen", "pluie", "pioggia", "chuva", | |
| "downpour", "drizzle", "storm"), | |
| "ocean_waves": ("ocean", "wave", "sea", "surf", "beach", "mar", "ola", | |
| "vague", "welle", "tide", "shore", "playa", "costa"), | |
| "fireplace_crackle": ("fire", "campfire", "fireplace", "crackl", "crepit", | |
| "feu", "fuego", "hoguera", "fogata", "ember", "hearth"), | |
| "birdsong": ("bird", "song", "chorus", "dawn", "chirp", "cuckoo", "wren", | |
| "sparrow", "robin", "blackbird", "finch", "warbler", "thrush", | |
| "nightingale", "lark", "vogel", "oiseau", "pajaro", "canto"), | |
| "night_crickets": ("cricket", "cicada", "cicad", "cigarra", "grasshopper", | |
| "grillo", "grille", "katydid", "locust", "insect", "chirp"), | |
| "wind_in_trees": ("wind", "breeze", "gust", "rustl", "viento", "vent", | |
| "howl", "gale", "brisa", "blowing"), | |
| "cafe_murmur": ("cafe", "restaurant", "crowd", "murmur", "coffee", "bar", | |
| "pub", "chatter", "ambien", "mall", "station", "people", | |
| "plaza", "market", "tunnel", "hall", "lobby", "gente"), | |
| } | |
| def _norm(s): | |
| """Lowercase + strip accents so 'pájaro'/'Pajaro' both match 'pajaro'.""" | |
| s = unicodedata.normalize("NFKD", str(s)) | |
| return "".join(c for c in s if not unicodedata.combining(c)).lower() | |
| def commons_api(params, tries=5): | |
| params = {**params, "format": "json", "formatversion": "2"} | |
| url = API + "?" + urllib.parse.urlencode(params) | |
| for i in range(tries): | |
| try: | |
| req = urllib.request.Request(url, headers={"User-Agent": UA}) | |
| with urllib.request.urlopen(req, timeout=30) as r: | |
| return json.load(r) | |
| except urllib.error.HTTPError as e: | |
| if e.code == 429 and i < tries - 1: | |
| time.sleep(2 * (i + 1)) | |
| continue | |
| raise | |
| return {} | |
| def find_titles(slug): | |
| titles = [] | |
| for kind, value in SOURCES[slug]: | |
| if kind == "category": | |
| res = commons_api({"action": "query", "list": "categorymembers", | |
| "cmtitle": f"Category:{value}", "cmtype": "file", | |
| "cmlimit": "30"}) | |
| hits = [m["title"] for m in res.get("query", {}).get("categorymembers", [])] | |
| else: | |
| res = commons_api({"action": "query", "list": "search", "srnamespace": "6", | |
| "srsearch": f"filetype:audio {value}", "srlimit": "15"}) | |
| hits = [h["title"] for h in res.get("query", {}).get("search", [])] | |
| titles += hits | |
| time.sleep(1) | |
| # dedupe (keep order); drop junk, then require a slug-relevant word | |
| seen, kept = set(), [] | |
| for t in titles: | |
| nt = _norm(t) | |
| if t in seen or any(b in nt for b in BLOCKLIST): | |
| continue | |
| if not any(kw in nt for kw in RELEVANCE[slug]): | |
| continue | |
| seen.add(t) | |
| kept.append(t) | |
| return kept | |
| def file_info(titles): | |
| """title -> dict(url, dur, license, artist, page) for a batch of titles.""" | |
| out = {} | |
| for i in range(0, len(titles), 20): | |
| info = commons_api({"action": "query", "titles": "|".join(titles[i:i + 20]), | |
| "prop": "imageinfo", | |
| "iiprop": "url|size|mediatype|extmetadata"}) | |
| for page in info.get("query", {}).get("pages", []): | |
| ii = (page.get("imageinfo") or [{}])[0] | |
| ext = ii.get("extmetadata", {}) | |
| def field(k): | |
| return ext.get(k, {}).get("value", "") | |
| out[page.get("title", "?")] = { | |
| "url": ii.get("url", ""), | |
| "dur": float(ii.get("duration") or 0.0), | |
| "mediatype": ii.get("mediatype", ""), | |
| "license": field("LicenseShortName") or "?", | |
| "artist": _strip_html(field("Artist")) or "Unknown", | |
| "page": ii.get("descriptionurl", ""), | |
| } | |
| time.sleep(1) | |
| return out | |
| def _strip_html(s): | |
| return re.sub(r"<[^>]+>", "", s).strip() | |
| def spectral_flatness(mono, rate): | |
| """Ratio of geometric to arithmetic mean of the power spectrum. ~0 for a | |
| pure tone, higher for broadband texture — catches test tones that slip | |
| past the title filter (a 'Sine Wave' file is named like a sea 'wave'). | |
| The signal is detrended and high-passed (first difference) first: crowd | |
| and surf ambience carries heavy low-frequency rumble that otherwise | |
| dominates the spectrum and reads as falsely 'tonal' (calibration showed | |
| real cafe recordings at 2e-5 raw vs 1e-12 for a true sine — too close; | |
| after the high-pass they separate to 2e-3 vs 1e-12).""" | |
| seg = mono[: rate * 4].astype(np.float64) | |
| if len(seg) < 256: | |
| return 1.0 | |
| seg = np.diff(seg - seg.mean()) | |
| power = np.abs(np.fft.rfft(seg * np.hanning(len(seg)))) ** 2 + 1e-12 | |
| return float(np.exp(np.mean(np.log(power))) / np.mean(power)) | |
| def download(url): | |
| req = urllib.request.Request(url, headers={"User-Agent": UA}) | |
| with urllib.request.urlopen(req, timeout=60) as r: | |
| length = int(r.headers.get("Content-Length") or 0) | |
| if length and length > MAX_BYTES: | |
| raise ValueError(f"too big ({length / 1e6:.0f} MB)") | |
| return r.read(MAX_BYTES + 1) | |
| def decode_mono(blob): | |
| import soundfile as sf | |
| try: | |
| data, rate = sf.read(io.BytesIO(blob), dtype="float64", always_2d=True) | |
| return data.mean(axis=1), rate | |
| except sf.LibsndfileError: | |
| return _decode_av(blob) # Opus/other codecs libsndfile can't open | |
| def _decode_av(blob): | |
| """Fallback decoder via PyAV (bundles ffmpeg) — most Commons crowd/cafe | |
| recordings are Ogg/Opus, which libsndfile doesn't support.""" | |
| import av | |
| with av.open(io.BytesIO(blob)) as container: | |
| stream = container.streams.audio[0] | |
| rate = stream.codec_context.sample_rate | |
| chunks = [] | |
| resampler = av.AudioResampler(format="flt", layout="mono", rate=rate) | |
| for frame in container.decode(stream): | |
| for out in resampler.resample(frame): | |
| chunks.append(out.to_ndarray().reshape(-1)) | |
| if not chunks: | |
| raise ValueError("no audio frames decoded") | |
| return np.concatenate(chunks).astype(np.float64), rate | |
| def steady_window(mono, rate): | |
| """Pick the best TARGET_S loop window. Short clips are returned whole (the | |
| mixer tiles them). The window is scored on three things, because the mixer | |
| crossfades the loop's tail back into its head: | |
| - steady interior (low RMS variation) so it doesn't swell or drop | |
| - head and tail at matched energy, so the crossfade blends like-for-like | |
| - neither boundary in a lull, so the loop point doesn't briefly drop out | |
| The last two matter for sparse textures (birdsong, fireplace): a window | |
| that merely minimizes variance can still start/end in a gap, dipping ~10 dB | |
| every loop.""" | |
| n = int(TARGET_S * rate) | |
| if len(mono) <= n: | |
| return mono | |
| hop = max(int(rate * 0.1), 1) # 100 ms frames: fine enough to see the seam | |
| frame_rms = np.array([ | |
| np.sqrt(np.mean(mono[i:i + hop] ** 2)) for i in range(0, len(mono) - hop, hop) | |
| ]) | |
| median = float(np.median(frame_rms)) or 1.0 | |
| win_frames = max(n // hop, 1) | |
| edge = max(int(rate * 0.5) // hop, 1) # frames spanning one crossfade (~0.5 s) | |
| best, best_score = None, 1e9 | |
| for start in range(0, len(frame_rms) - win_frames, max(win_frames // 8, 1)): | |
| seg = frame_rms[start:start + win_frames] | |
| mean = float(seg.mean()) | |
| if mean < 0.5 * median: # window mostly in a lull | |
| continue | |
| head, tail = float(seg[:edge].mean()), float(seg[-edge:].mean()) | |
| cv = float(seg.std()) / (mean or 1.0) | |
| mismatch = abs(head - tail) / median | |
| lull = max(0.0, 1.0 - min(head, tail) / median) # 0 once boundary >= median | |
| score = cv + 2.0 * mismatch + 2.0 * lull | |
| if score < best_score: | |
| best_score, best = score, start * hop | |
| start = best if best is not None else (len(mono) - n) // 2 | |
| return mono[start:start + n] | |
| def resample(mono, src, dst): | |
| if src <= dst: | |
| return mono, src | |
| m = int(len(mono) * dst / src) | |
| return np.interp(np.arange(m) * (src / dst), np.arange(len(mono)), mono), dst | |
| def write_wav(mono, rate, path): | |
| peak = float(np.abs(mono).max() or 1.0) | |
| pcm = (mono * (0.9 / peak) * 32767).astype("<i2") | |
| with wave.open(str(path), "wb") as w: | |
| w.setnchannels(1) | |
| w.setsampwidth(2) | |
| w.setframerate(rate) | |
| w.writeframes(pcm.tobytes()) | |
| def fetch_one(slug): | |
| """Return a credit dict on success, or None if nothing usable was found.""" | |
| found = find_titles(slug) | |
| pref = PREFERRED.get(slug) | |
| # the pinned pick is tried first; search results (relevance order) back it up | |
| lookup, seen = [], set() | |
| for t in ([pref] if pref else []) + found: | |
| if t not in seen: | |
| seen.add(t) | |
| lookup.append(t) | |
| if not lookup: | |
| print(f" no candidates found for {slug}") | |
| return None | |
| info = file_info(lookup) | |
| for title in [t for t in lookup if info.get(t, {}).get("url")][:8]: | |
| meta = info[title] | |
| if meta["dur"] and meta["dur"] > MAX_SRC_DUR: | |
| continue | |
| try: | |
| blob = download(meta["url"]) | |
| mono, rate = decode_mono(blob) | |
| except Exception as e: # noqa: BLE001 — try the next candidate | |
| print(f" skip {title[5:][:40]!r}: {e}") | |
| continue | |
| dur = len(mono) / rate | |
| rms = float(np.sqrt(np.mean(mono ** 2))) | |
| flat = spectral_flatness(mono, rate) | |
| if dur < MIN_SRC_DUR or dur > MAX_SRC_DUR or rms < 5e-3: | |
| print(f" skip {title[5:][:40]!r}: dur={dur:.0f}s rms={rms:.3f}") | |
| continue | |
| if flat < 1e-3: # essentially a pure tone, not ambience (sines ~1e-12) | |
| print(f" skip {title[5:][:40]!r}: too tonal (flatness {flat:.0e})") | |
| continue | |
| seg = steady_window(mono, rate) | |
| seg, out_rate = resample(seg, rate, MAX_RATE) | |
| write_wav(seg, out_rate, OUT_DIR / f"{slug}.wav") | |
| seams = "no seam" if len(seg) / out_rate >= 30 else "1 seam @30s" | |
| print(f" {slug} <- {title[5:][:42]!r} " | |
| f"({dur:.0f}s src -> {len(seg)/out_rate:.0f}s, {seams}, {meta['license']})") | |
| return {"slug": slug, "title": title[5:], "license": meta["license"], | |
| "artist": meta["artist"], "page": meta["page"]} | |
| print(f" no usable file for {slug} (all candidates failed checks)") | |
| return None | |
| def save_credits(new_credits): | |
| """Merge this run's picks into credits.json (the source of truth, keyed by | |
| slug) and re-render CREDITS.md. Merging means fetching one slug doesn't | |
| drop the others' attribution.""" | |
| store = OUT_DIR / "credits.json" | |
| merged = {} | |
| if store.exists(): | |
| try: | |
| merged = json.loads(store.read_text()) | |
| except ValueError: | |
| pass | |
| for c in new_credits: | |
| merged[c["slug"]] = c | |
| store.write_text(json.dumps(merged, indent=2, sort_keys=True)) | |
| lines = ["# Ambience sample credits", "", | |
| "Auto-fetched from Wikimedia Commons by `scripts/fetch_ambience.py`.", | |
| "vinyl_crackle and tape_hiss are synthesized in `ambience.py` and not listed.", ""] | |
| for slug in sorted(merged): | |
| c = merged[slug] | |
| lines += [ | |
| f"## {slug}", | |
| f"- **{c['title']}**", | |
| f"- Author: {c['artist']}", | |
| f"- Licence: {c['license']}", | |
| f"- Source: {c['page']}", | |
| "", | |
| ] | |
| (OUT_DIR / "CREDITS.md").write_text("\n".join(lines)) | |
| def main(): | |
| parser = argparse.ArgumentParser(description=__doc__.split("\n")[0]) | |
| parser.add_argument("slugs", nargs="*", choices=[*SOURCES, []], metavar="slug", | |
| help=f"beds to fetch (default: missing ones). One of: {', '.join(SOURCES)}") | |
| parser.add_argument("--force", action="store_true", help="re-fetch even if the wav exists") | |
| args = parser.parse_args() | |
| todo = args.slugs or [s for s in SOURCES if args.force or not (OUT_DIR / f"{s}.wav").exists()] | |
| if not todo: | |
| print("all sampled beds already present — use --force to refetch") | |
| return 0 | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| credits = [] | |
| for slug in todo: | |
| print(f"\n[{slug}]") | |
| c = fetch_one(slug) | |
| if c: | |
| credits.append(c) | |
| time.sleep(1) | |
| if credits: | |
| save_credits(credits) # merges into credits.json, won't drop other slugs | |
| got = len(credits) | |
| print(f"\nfetched {got}/{len(todo)} beds -> {OUT_DIR.relative_to(ROOT)}") | |
| return 0 if got else 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |