""" trace.py — opt-in pipeline tracer. Records ONE complete Lolaby generation (inputs, every stage, final outputs) into a timestamped folder so you can publish an example trace as an HF Dataset for the "Sharing is Caring" hackathon badge. How to use: LULLABY_TRACE=1 python app.py Generate a lullaby in the UI as you normally would. When the generation finishes, a folder is written under ./traces// containing: trace.json — the structured pipeline log input_drawing.png — the drawing the user gave (if any) output_lullaby.wav — the final audio README.md — short note pointing at trace.json That folder is what you upload to a Hugging Face Dataset. Privacy posture: - DISABLED by default (do nothing unless LULLABY_TRACE is set). - Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the input drawing (records the trace structure but not the actual image). Design note: This is deliberately a module-level buffer accumulated by side-effects rather than a context manager threaded through every call site. The goal is MINIMAL changes to app.py — instrumentation calls are one line each at the places that matter. """ import json import os import time from datetime import datetime from pathlib import Path ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes") SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes") TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces")) # Current run's accumulated state. Reset at the start of each generation # via `begin()`. Read by `finalize()` at the end. _buf = {} _t0 = None def is_enabled(): return ENABLED def begin(): """Reset the buffer at the start of a new generation. Cheap no-op if tracing is disabled, so it's safe to call unconditionally.""" global _buf, _t0 if not ENABLED: return _t0 = time.monotonic() _buf = { "timestamp": datetime.utcnow().isoformat() + "Z", "stages": {}, "stage_timings_ms": {}, } def stage(name, **fields): """Record a stage's metadata. Each call merges into stages[name].""" if not ENABLED: return if name not in _buf["stages"]: _buf["stages"][name] = {} _buf["stages"][name].update(fields) # Mark when this stage was last updated, relative to begin(). if _t0 is not None: _buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000) def set_inputs(**kw): """Record the user-facing inputs (name, age, loves, fears, etc.). Image arrays are NOT stored here — `save_input_drawing` handles them.""" if not ENABLED: return _buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")} def finalize(output_audio=None, sample_rate=44100, error=None): """Write the trace folder. Called at the end of a generation, whether or not it succeeded. Returns the trace folder path (or None if tracing is disabled).""" if not ENABLED: return None if _t0 is not None: _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000) if error is not None: _buf["error"] = str(error) TRACE_ROOT.mkdir(parents=True, exist_ok=True) stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S") folder = TRACE_ROOT / stamp folder.mkdir(exist_ok=True) # Write trace.json with open(folder / "trace.json", "w") as f: json.dump(_buf, f, indent=2, default=_json_safe) # Write output audio if provided if output_audio is not None: try: import soundfile as sf import numpy as np audio = output_audio if isinstance(audio, np.ndarray): sf.write(folder / "output_lullaby.wav", audio, sample_rate) except Exception as e: print(f"[trace] could not write audio: {e}") # Write a small README pointing at the trace file readme = folder / "README.md" readme.write_text(_README_TEMPLATE.format(stamp=stamp)) print(f"[trace] wrote {folder}/") return folder def save_input_drawing(image_array): """Save the input drawing as input_drawing.png in the *current* run's folder. Called inside the pipeline as soon as we know the image. Has to be deferred-write because we don't know the folder name until finalize() — so we stash the array here and write on finalize.""" if not ENABLED or not SAVE_IMAGES: return if image_array is None: return _buf["_pending_image"] = image_array def _flush_image(folder): """Internal: write the stashed image to the trace folder.""" arr = _buf.get("_pending_image") if arr is None: return try: from PIL import Image import numpy as np if isinstance(arr, np.ndarray): if arr.ndim == 2: img = Image.fromarray(arr).convert("RGB") else: img = Image.fromarray(arr[..., :3].astype(np.uint8)) img.save(folder / "input_drawing.png") elif isinstance(arr, Image.Image): arr.save(folder / "input_drawing.png") except Exception as e: print(f"[trace] could not write input drawing: {e}") # Strip the binary out of the JSON-bound buffer. del _buf["_pending_image"] def _json_safe(o): """Last-resort encoder for objects that aren't JSON-serializable.""" try: import numpy as np if isinstance(o, np.ndarray): return f"" if isinstance(o, (np.integer,)): return int(o) if isinstance(o, (np.floating,)): return float(o) except ImportError: pass return str(o) _README_TEMPLATE = """# Lolaby — example generation trace Captured: {stamp} (UTC) ## Files - `trace.json` — full pipeline log: inputs, every stage's prompts and outputs, stage timings, and the parsed lyric structure. - `input_drawing.png` — the drawing the user gave Lola at the start of this generation (omitted if no drawing was provided, or if traces were captured with `LULLABY_TRACE_NO_IMAGES=1`). - `output_lullaby.wav` — the audio Lola produced. WAV, 44.1 kHz mono. ## Note on reproducibility This trace records one real generation end-to-end. The deterministic stages (vision-model output with `do_sample=False`, DSP synth rendering from parsed lyrics) will reproduce given the same inputs. The sampling stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce *similar but not identical* output on re-run — they're inherently stochastic. The trace is a witness of one run, not a recipe. ## Pipeline at a glance ``` drawing ─▶ MiniCPM-V 4.6 ─▶ "what Lola saw" │ + name, fears, mood, instruments ▼ Fine-tuned Llama 3.2 3B (GGUF, llama.cpp) │ parsed lyric + chords + tempo │ ┌─────────────────┴─────────────────┐ ▼ ▼ Kokoro TTS DSP synths (custom) └─────────────────┬─────────────────┘ ▼ output_lullaby.wav ``` """ # Patch finalize() to call _flush_image at the right point. We do it this # way to keep the begin/stage/finalize public API tiny. _orig_finalize = finalize def finalize(output_audio=None, sample_rate=44100, error=None): # noqa: F811 if not ENABLED: return None if _t0 is not None: _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000) if error is not None: _buf["error"] = str(error) TRACE_ROOT.mkdir(parents=True, exist_ok=True) stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S") folder = TRACE_ROOT / stamp folder.mkdir(exist_ok=True) # Pull the stashed image out before we serialize the rest to JSON. _flush_image(folder) with open(folder / "trace.json", "w") as f: json.dump(_buf, f, indent=2, default=_json_safe) if output_audio is not None: try: import soundfile as sf import numpy as np audio = output_audio if isinstance(audio, np.ndarray): sf.write(folder / "output_lullaby.wav", audio, sample_rate) except Exception as e: print(f"[trace] could not write audio: {e}") (folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp)) print(f"[trace] wrote {folder}/") return folder