Spaces:

build-small-hackathon
/

lolaby

Running

André Oliveira

6472a71 6 days ago

8.91 kB

	"""
	trace.py — opt-in pipeline tracer.

	Records ONE complete Lolaby generation (inputs, every stage, final outputs)
	into a timestamped folder so you can publish an example trace as an HF
	Dataset for the "Sharing is Caring" hackathon badge.

	How to use:

	LULLABY_TRACE=1 python app.py

	Generate a lullaby in the UI as you normally would. When the generation
	finishes, a folder is written under ./traces/<timestamp>/ containing:

	trace.json — the structured pipeline log
	input_drawing.png — the drawing the user gave (if any)
	output_lullaby.wav — the final audio
	README.md — short note pointing at trace.json

	That folder is what you upload to a Hugging Face Dataset.

	Privacy posture:
	- DISABLED by default (do nothing unless LULLABY_TRACE is set).
	- Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the
	input drawing (records the trace structure but not the actual image).

	Design note:
	This is deliberately a module-level buffer accumulated by side-effects
	rather than a context manager threaded through every call site. The
	goal is MINIMAL changes to app.py — instrumentation calls are one
	line each at the places that matter.
	"""

	import json
	import os
	import time
	from datetime import datetime
	from pathlib import Path


	ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes")
	SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes")
	TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces"))


	# Current run's accumulated state. Reset at the start of each generation
	# via `begin()`. Read by `finalize()` at the end.
	_buf = {}
	_t0 = None


	def is_enabled():
	return ENABLED


	def begin():
	"""Reset the buffer at the start of a new generation. Cheap no-op if
	tracing is disabled, so it's safe to call unconditionally."""
	global _buf, _t0
	if not ENABLED:
	return
	_t0 = time.monotonic()
	_buf = {
	"timestamp": datetime.utcnow().isoformat() + "Z",
	"stages": {},
	"stage_timings_ms": {},
	}


	def stage(name, **fields):
	"""Record a stage's metadata. Each call merges into stages[name]."""
	if not ENABLED:
	return
	if name not in _buf["stages"]:
	_buf["stages"][name] = {}
	_buf["stages"][name].update(fields)
	# Mark when this stage was last updated, relative to begin().
	if _t0 is not None:
	_buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000)


	def set_inputs(**kw):
	"""Record the user-facing inputs (name, age, loves, fears, etc.).
	Image arrays are NOT stored here — `save_input_drawing` handles them."""
	if not ENABLED:
	return
	_buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")}


	def finalize(output_audio=None, sample_rate=44100, error=None):
	"""Write the trace folder. Called at the end of a generation, whether
	or not it succeeded. Returns the trace folder path (or None if
	tracing is disabled)."""
	if not ENABLED:
	return None
	if _t0 is not None:
	_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
	if error is not None:
	_buf["error"] = str(error)

	TRACE_ROOT.mkdir(parents=True, exist_ok=True)
	stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
	folder = TRACE_ROOT / stamp
	folder.mkdir(exist_ok=True)

	# Write trace.json
	with open(folder / "trace.json", "w") as f:
	json.dump(_buf, f, indent=2, default=_json_safe)

	# Write output audio if provided
	if output_audio is not None:
	try:
	import soundfile as sf
	import numpy as np
	audio = output_audio
	if isinstance(audio, np.ndarray):
	sf.write(folder / "output_lullaby.wav", audio, sample_rate)
	except Exception as e:
	print(f"[trace] could not write audio: {e}")

	# Write a small README pointing at the trace file
	readme = folder / "README.md"
	readme.write_text(_README_TEMPLATE.format(stamp=stamp))

	print(f"[trace] wrote {folder}/")
	return folder


	def save_input_drawing(image_array):
	"""Save the input drawing as input_drawing.png in the current run's
	folder. Called inside the pipeline as soon as we know the image. Has
	to be deferred-write because we don't know the folder name until
	finalize() — so we stash the array here and write on finalize."""
	if not ENABLED or not SAVE_IMAGES:
	return
	if image_array is None:
	return
	_buf["_pending_image"] = image_array


	def _flush_image(folder):
	"""Internal: write the stashed image to the trace folder."""
	arr = _buf.get("_pending_image")
	if arr is None:
	return
	try:
	from PIL import Image
	import numpy as np
	if isinstance(arr, np.ndarray):
	if arr.ndim == 2:
	img = Image.fromarray(arr).convert("RGB")
	else:
	img = Image.fromarray(arr[..., :3].astype(np.uint8))
	img.save(folder / "input_drawing.png")
	elif isinstance(arr, Image.Image):
	arr.save(folder / "input_drawing.png")
	except Exception as e:
	print(f"[trace] could not write input drawing: {e}")
	# Strip the binary out of the JSON-bound buffer.
	del _buf["_pending_image"]


	def _json_safe(o):
	"""Last-resort encoder for objects that aren't JSON-serializable."""
	try:
	import numpy as np
	if isinstance(o, np.ndarray):
	return f"<ndarray shape={o.shape} dtype={o.dtype}>"
	if isinstance(o, (np.integer,)):
	return int(o)
	if isinstance(o, (np.floating,)):
	return float(o)
	except ImportError:
	pass
	return str(o)


	_README_TEMPLATE = """# Lolaby — example generation trace

	Captured: {stamp} (UTC)

	## Files

	- `trace.json` — full pipeline log: inputs, every stage's prompts and
	outputs, stage timings, and the parsed lyric structure.
	- `input_drawing.png` — the drawing the user gave Lola at the start of
	this generation (omitted if no drawing was provided, or if traces
	were captured with `LULLABY_TRACE_NO_IMAGES=1`).
	- `output_lullaby.wav` — the audio Lola produced. WAV, 44.1 kHz mono.

	## Note on reproducibility

	This trace records one real generation end-to-end. The deterministic
	stages (vision-model output with `do_sample=False`, DSP synth rendering
	from parsed lyrics) will reproduce given the same inputs. The sampling
	stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce
	similar but not identical output on re-run — they're inherently
	stochastic. The trace is a witness of one run, not a recipe.

	## Pipeline at a glance

	```
	drawing ─▶ MiniCPM-V 4.6 ─▶ "what Lola saw"
	│
	+ name, fears, mood, instruments
	▼
	Fine-tuned Llama 3.2 3B (GGUF, llama.cpp)
	│
	parsed lyric + chords + tempo
	│
	┌─────────────────┴─────────────────┐
	▼ ▼
	Kokoro TTS DSP synths (custom)
	└─────────────────┬─────────────────┘
	▼
	output_lullaby.wav
	```
	"""


	# Patch finalize() to call _flush_image at the right point. We do it this
	# way to keep the begin/stage/finalize public API tiny.
	_orig_finalize = finalize
	def finalize(output_audio=None, sample_rate=44100, error=None): # noqa: F811
	if not ENABLED:
	return None
	if _t0 is not None:
	_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
	if error is not None:
	_buf["error"] = str(error)

	TRACE_ROOT.mkdir(parents=True, exist_ok=True)
	stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
	folder = TRACE_ROOT / stamp
	folder.mkdir(exist_ok=True)

	# Pull the stashed image out before we serialize the rest to JSON.
	_flush_image(folder)

	with open(folder / "trace.json", "w") as f:
	json.dump(_buf, f, indent=2, default=_json_safe)

	if output_audio is not None:
	try:
	import soundfile as sf
	import numpy as np
	audio = output_audio
	if isinstance(audio, np.ndarray):
	sf.write(folder / "output_lullaby.wav", audio, sample_rate)
	except Exception as e:
	print(f"[trace] could not write audio: {e}")

	(folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp))
	print(f"[trace] wrote {folder}/")
	return folder