Spaces:

build-small-hackathon
/

LoFinity

Running on Zero

Add ambience generation features and assets. Introduced ambience.py for procedural and sampled ambience beds, updated app.py to integrate ambience selection into music generation, and modified requirements.txt to include new dependencies. Added scripts for fetching and rendering ambience samples, along with new audio assets and credits for attribution.

722a5d8 11 days ago

Raw

History Blame Contribute Delete

4.44 kB

	"""One-off renderer for the sampled ambience beds.

	vinyl_crackle and tape_hiss are synthesized live in ambience.py; the seven
	beds below only need to exist once on disk. This script fills
	assets/ambience/ with text-to-audio renders from AudioLDM2.

	(AudioGen would also work, but it lives in the unmaintained audiocraft
	package which doesn't install on Python 3.13; AudioLDM2 ships in diffusers
	and runs next to the project's torch/transformers as-is.)

	Usage:
	pip install diffusers
	python scripts/make_ambience.py # render whatever is missing
	python scripts/make_ambience.py ocean_waves --force # redo one

	Each clip is ~12 s; the runtime mixer tiles it with crossfades, so it does
	not need to loop perfectly. Re-run any slug whose render sounds off —
	text-to-audio is a slot machine, two pulls usually land one keeper.
	"""

	import argparse
	import os
	import sys
	import wave
	from pathlib import Path

	ROOT = Path(__file__).resolve().parent.parent
	OUT_DIR = ROOT / "assets" / "ambience"

	PROMPTS = {
	"soft_rain": "gentle steady rain falling on leaves, calm rain ambience, no thunder",
	"ocean_waves": "calm ocean waves gently rolling onto a sandy beach, soft surf",
	"fireplace_crackle": "cozy fireplace, fire crackling and popping softly",
	"birdsong": "soft morning birdsong, small birds chirping in a quiet garden",
	"night_crickets": "crickets chirping steadily on a calm summer night",
	"cafe_murmur": "quiet coffee shop ambience, soft murmur of distant conversation, occasional clink of cups",
	"wind_in_trees": "soft wind rustling through tree leaves, gentle breeze",
	}
	NEGATIVE = "music, melody, singing, speech, voice, loud, harsh, low quality, distortion"


	def write_wav(samples, rate: int, path: Path) -> None:
	import numpy as np

	peak = float(np.abs(samples).max() or 1.0)
	pcm = (samples * (0.9 / peak) * 32767).astype("<i2")
	with wave.open(str(path), "wb") as w:
	w.setnchannels(1)
	w.setsampwidth(2)
	w.setframerate(rate)
	w.writeframes(pcm.tobytes())


	def main() -> int:
	parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
	parser.add_argument("slugs", nargs="", choices=[PROMPTS, []], metavar="slug",
	help=f"which beds to render (default: all missing). One of: {', '.join(PROMPTS)}")
	parser.add_argument("--force", action="store_true", help="re-render even if the wav exists")
	parser.add_argument("--duration", type=float, default=12.0, help="clip length in seconds")
	parser.add_argument("--steps", type=int, default=200, help="diffusion steps (more = cleaner, slower)")
	parser.add_argument("--candidates", type=int, default=2,
	help="waveforms per prompt; the pipeline keeps the best text match")
	args = parser.parse_args()

	todo = args.slugs or [s for s in PROMPTS if args.force or not (OUT_DIR / f"{s}.wav").exists()]
	if not todo:
	print("all ambience beds already rendered — use --force to redo")
	return 0
	OUT_DIR.mkdir(parents=True, exist_ok=True)

	import torch
	from diffusers import AudioLDM2Pipeline

	device = os.getenv("LOFINITY_DEVICE") or ("mps" if torch.backends.mps.is_available() else "cpu")
	print(f"first run downloads ~3 GB (cvssp/audioldm2); rendering on {device}")
	pipe = AudioLDM2Pipeline.from_pretrained("cvssp/audioldm2")
	pipe.to(device)

	for slug in todo:
	path = OUT_DIR / f"{slug}.wav"
	if path.exists() and not args.force and not args.slugs:
	continue
	print(f"rendering {slug}: {PROMPTS[slug]!r}")

	def run():
	return pipe(
	prompt=PROMPTS[slug],
	negative_prompt=NEGATIVE,
	num_inference_steps=args.steps,
	audio_length_in_s=args.duration,
	num_waveforms_per_prompt=args.candidates,
	).audios[0] # audios come back ranked by text alignment

	try:
	audio = run()
	except Exception as e: # noqa: BLE001 — mps kernels are still patchy
	if device == "cpu":
	raise
	print(f" {device} failed ({e!r}), retrying on cpu")
	pipe.to("cpu")
	device = "cpu"
	audio = run()
	write_wav(audio, 16000, path)
	print(f" -> {path.relative_to(ROOT)}")
	return 0


	if __name__ == "__main__":
	sys.exit(main())