lolaby / utils /trace.py
AndrΓ© Oliveira
v0
6472a71
"""
trace.py β€” opt-in pipeline tracer.
Records ONE complete Lolaby generation (inputs, every stage, final outputs)
into a timestamped folder so you can publish an example trace as an HF
Dataset for the "Sharing is Caring" hackathon badge.
How to use:
LULLABY_TRACE=1 python app.py
Generate a lullaby in the UI as you normally would. When the generation
finishes, a folder is written under ./traces/<timestamp>/ containing:
trace.json β€” the structured pipeline log
input_drawing.png β€” the drawing the user gave (if any)
output_lullaby.wav β€” the final audio
README.md β€” short note pointing at trace.json
That folder is what you upload to a Hugging Face Dataset.
Privacy posture:
- DISABLED by default (do nothing unless LULLABY_TRACE is set).
- Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the
input drawing (records the trace structure but not the actual image).
Design note:
This is deliberately a module-level buffer accumulated by side-effects
rather than a context manager threaded through every call site. The
goal is MINIMAL changes to app.py β€” instrumentation calls are one
line each at the places that matter.
"""
import json
import os
import time
from datetime import datetime
from pathlib import Path
ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes")
SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes")
TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces"))
# Current run's accumulated state. Reset at the start of each generation
# via `begin()`. Read by `finalize()` at the end.
_buf = {}
_t0 = None
def is_enabled():
return ENABLED
def begin():
"""Reset the buffer at the start of a new generation. Cheap no-op if
tracing is disabled, so it's safe to call unconditionally."""
global _buf, _t0
if not ENABLED:
return
_t0 = time.monotonic()
_buf = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"stages": {},
"stage_timings_ms": {},
}
def stage(name, **fields):
"""Record a stage's metadata. Each call merges into stages[name]."""
if not ENABLED:
return
if name not in _buf["stages"]:
_buf["stages"][name] = {}
_buf["stages"][name].update(fields)
# Mark when this stage was last updated, relative to begin().
if _t0 is not None:
_buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000)
def set_inputs(**kw):
"""Record the user-facing inputs (name, age, loves, fears, etc.).
Image arrays are NOT stored here β€” `save_input_drawing` handles them."""
if not ENABLED:
return
_buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")}
def finalize(output_audio=None, sample_rate=44100, error=None):
"""Write the trace folder. Called at the end of a generation, whether
or not it succeeded. Returns the trace folder path (or None if
tracing is disabled)."""
if not ENABLED:
return None
if _t0 is not None:
_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
if error is not None:
_buf["error"] = str(error)
TRACE_ROOT.mkdir(parents=True, exist_ok=True)
stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
folder = TRACE_ROOT / stamp
folder.mkdir(exist_ok=True)
# Write trace.json
with open(folder / "trace.json", "w") as f:
json.dump(_buf, f, indent=2, default=_json_safe)
# Write output audio if provided
if output_audio is not None:
try:
import soundfile as sf
import numpy as np
audio = output_audio
if isinstance(audio, np.ndarray):
sf.write(folder / "output_lullaby.wav", audio, sample_rate)
except Exception as e:
print(f"[trace] could not write audio: {e}")
# Write a small README pointing at the trace file
readme = folder / "README.md"
readme.write_text(_README_TEMPLATE.format(stamp=stamp))
print(f"[trace] wrote {folder}/")
return folder
def save_input_drawing(image_array):
"""Save the input drawing as input_drawing.png in the *current* run's
folder. Called inside the pipeline as soon as we know the image. Has
to be deferred-write because we don't know the folder name until
finalize() β€” so we stash the array here and write on finalize."""
if not ENABLED or not SAVE_IMAGES:
return
if image_array is None:
return
_buf["_pending_image"] = image_array
def _flush_image(folder):
"""Internal: write the stashed image to the trace folder."""
arr = _buf.get("_pending_image")
if arr is None:
return
try:
from PIL import Image
import numpy as np
if isinstance(arr, np.ndarray):
if arr.ndim == 2:
img = Image.fromarray(arr).convert("RGB")
else:
img = Image.fromarray(arr[..., :3].astype(np.uint8))
img.save(folder / "input_drawing.png")
elif isinstance(arr, Image.Image):
arr.save(folder / "input_drawing.png")
except Exception as e:
print(f"[trace] could not write input drawing: {e}")
# Strip the binary out of the JSON-bound buffer.
del _buf["_pending_image"]
def _json_safe(o):
"""Last-resort encoder for objects that aren't JSON-serializable."""
try:
import numpy as np
if isinstance(o, np.ndarray):
return f"<ndarray shape={o.shape} dtype={o.dtype}>"
if isinstance(o, (np.integer,)):
return int(o)
if isinstance(o, (np.floating,)):
return float(o)
except ImportError:
pass
return str(o)
_README_TEMPLATE = """# Lolaby β€” example generation trace
Captured: {stamp} (UTC)
## Files
- `trace.json` β€” full pipeline log: inputs, every stage's prompts and
outputs, stage timings, and the parsed lyric structure.
- `input_drawing.png` β€” the drawing the user gave Lola at the start of
this generation (omitted if no drawing was provided, or if traces
were captured with `LULLABY_TRACE_NO_IMAGES=1`).
- `output_lullaby.wav` β€” the audio Lola produced. WAV, 44.1 kHz mono.
## Note on reproducibility
This trace records one real generation end-to-end. The deterministic
stages (vision-model output with `do_sample=False`, DSP synth rendering
from parsed lyrics) will reproduce given the same inputs. The sampling
stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce
*similar but not identical* output on re-run β€” they're inherently
stochastic. The trace is a witness of one run, not a recipe.
## Pipeline at a glance
```
drawing ─▢ MiniCPM-V 4.6 ─▢ "what Lola saw"
β”‚
+ name, fears, mood, instruments
β–Ό
Fine-tuned Llama 3.2 3B (GGUF, llama.cpp)
β”‚
parsed lyric + chords + tempo
β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β–Ό β–Ό
Kokoro TTS DSP synths (custom)
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
β–Ό
output_lullaby.wav
```
"""
# Patch finalize() to call _flush_image at the right point. We do it this
# way to keep the begin/stage/finalize public API tiny.
_orig_finalize = finalize
def finalize(output_audio=None, sample_rate=44100, error=None): # noqa: F811
if not ENABLED:
return None
if _t0 is not None:
_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
if error is not None:
_buf["error"] = str(error)
TRACE_ROOT.mkdir(parents=True, exist_ok=True)
stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
folder = TRACE_ROOT / stamp
folder.mkdir(exist_ok=True)
# Pull the stashed image out before we serialize the rest to JSON.
_flush_image(folder)
with open(folder / "trace.json", "w") as f:
json.dump(_buf, f, indent=2, default=_json_safe)
if output_audio is not None:
try:
import soundfile as sf
import numpy as np
audio = output_audio
if isinstance(audio, np.ndarray):
sf.write(folder / "output_lullaby.wav", audio, sample_rate)
except Exception as e:
print(f"[trace] could not write audio: {e}")
(folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp))
print(f"[trace] wrote {folder}/")
return folder