Spaces:
Running
Running
File size: 8,907 Bytes
6472a71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | """
trace.py β opt-in pipeline tracer.
Records ONE complete Lolaby generation (inputs, every stage, final outputs)
into a timestamped folder so you can publish an example trace as an HF
Dataset for the "Sharing is Caring" hackathon badge.
How to use:
LULLABY_TRACE=1 python app.py
Generate a lullaby in the UI as you normally would. When the generation
finishes, a folder is written under ./traces/<timestamp>/ containing:
trace.json β the structured pipeline log
input_drawing.png β the drawing the user gave (if any)
output_lullaby.wav β the final audio
README.md β short note pointing at trace.json
That folder is what you upload to a Hugging Face Dataset.
Privacy posture:
- DISABLED by default (do nothing unless LULLABY_TRACE is set).
- Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the
input drawing (records the trace structure but not the actual image).
Design note:
This is deliberately a module-level buffer accumulated by side-effects
rather than a context manager threaded through every call site. The
goal is MINIMAL changes to app.py β instrumentation calls are one
line each at the places that matter.
"""
import json
import os
import time
from datetime import datetime
from pathlib import Path
ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes")
SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes")
TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces"))
# Current run's accumulated state. Reset at the start of each generation
# via `begin()`. Read by `finalize()` at the end.
_buf = {}
_t0 = None
def is_enabled():
return ENABLED
def begin():
"""Reset the buffer at the start of a new generation. Cheap no-op if
tracing is disabled, so it's safe to call unconditionally."""
global _buf, _t0
if not ENABLED:
return
_t0 = time.monotonic()
_buf = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"stages": {},
"stage_timings_ms": {},
}
def stage(name, **fields):
"""Record a stage's metadata. Each call merges into stages[name]."""
if not ENABLED:
return
if name not in _buf["stages"]:
_buf["stages"][name] = {}
_buf["stages"][name].update(fields)
# Mark when this stage was last updated, relative to begin().
if _t0 is not None:
_buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000)
def set_inputs(**kw):
"""Record the user-facing inputs (name, age, loves, fears, etc.).
Image arrays are NOT stored here β `save_input_drawing` handles them."""
if not ENABLED:
return
_buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")}
def finalize(output_audio=None, sample_rate=44100, error=None):
"""Write the trace folder. Called at the end of a generation, whether
or not it succeeded. Returns the trace folder path (or None if
tracing is disabled)."""
if not ENABLED:
return None
if _t0 is not None:
_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
if error is not None:
_buf["error"] = str(error)
TRACE_ROOT.mkdir(parents=True, exist_ok=True)
stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
folder = TRACE_ROOT / stamp
folder.mkdir(exist_ok=True)
# Write trace.json
with open(folder / "trace.json", "w") as f:
json.dump(_buf, f, indent=2, default=_json_safe)
# Write output audio if provided
if output_audio is not None:
try:
import soundfile as sf
import numpy as np
audio = output_audio
if isinstance(audio, np.ndarray):
sf.write(folder / "output_lullaby.wav", audio, sample_rate)
except Exception as e:
print(f"[trace] could not write audio: {e}")
# Write a small README pointing at the trace file
readme = folder / "README.md"
readme.write_text(_README_TEMPLATE.format(stamp=stamp))
print(f"[trace] wrote {folder}/")
return folder
def save_input_drawing(image_array):
"""Save the input drawing as input_drawing.png in the *current* run's
folder. Called inside the pipeline as soon as we know the image. Has
to be deferred-write because we don't know the folder name until
finalize() β so we stash the array here and write on finalize."""
if not ENABLED or not SAVE_IMAGES:
return
if image_array is None:
return
_buf["_pending_image"] = image_array
def _flush_image(folder):
"""Internal: write the stashed image to the trace folder."""
arr = _buf.get("_pending_image")
if arr is None:
return
try:
from PIL import Image
import numpy as np
if isinstance(arr, np.ndarray):
if arr.ndim == 2:
img = Image.fromarray(arr).convert("RGB")
else:
img = Image.fromarray(arr[..., :3].astype(np.uint8))
img.save(folder / "input_drawing.png")
elif isinstance(arr, Image.Image):
arr.save(folder / "input_drawing.png")
except Exception as e:
print(f"[trace] could not write input drawing: {e}")
# Strip the binary out of the JSON-bound buffer.
del _buf["_pending_image"]
def _json_safe(o):
"""Last-resort encoder for objects that aren't JSON-serializable."""
try:
import numpy as np
if isinstance(o, np.ndarray):
return f"<ndarray shape={o.shape} dtype={o.dtype}>"
if isinstance(o, (np.integer,)):
return int(o)
if isinstance(o, (np.floating,)):
return float(o)
except ImportError:
pass
return str(o)
_README_TEMPLATE = """# Lolaby β example generation trace
Captured: {stamp} (UTC)
## Files
- `trace.json` β full pipeline log: inputs, every stage's prompts and
outputs, stage timings, and the parsed lyric structure.
- `input_drawing.png` β the drawing the user gave Lola at the start of
this generation (omitted if no drawing was provided, or if traces
were captured with `LULLABY_TRACE_NO_IMAGES=1`).
- `output_lullaby.wav` β the audio Lola produced. WAV, 44.1 kHz mono.
## Note on reproducibility
This trace records one real generation end-to-end. The deterministic
stages (vision-model output with `do_sample=False`, DSP synth rendering
from parsed lyrics) will reproduce given the same inputs. The sampling
stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce
*similar but not identical* output on re-run β they're inherently
stochastic. The trace is a witness of one run, not a recipe.
## Pipeline at a glance
```
drawing ββΆ MiniCPM-V 4.6 ββΆ "what Lola saw"
β
+ name, fears, mood, instruments
βΌ
Fine-tuned Llama 3.2 3B (GGUF, llama.cpp)
β
parsed lyric + chords + tempo
β
βββββββββββββββββββ΄ββββββββββββββββββ
βΌ βΌ
Kokoro TTS DSP synths (custom)
βββββββββββββββββββ¬ββββββββββββββββββ
βΌ
output_lullaby.wav
```
"""
# Patch finalize() to call _flush_image at the right point. We do it this
# way to keep the begin/stage/finalize public API tiny.
_orig_finalize = finalize
def finalize(output_audio=None, sample_rate=44100, error=None): # noqa: F811
if not ENABLED:
return None
if _t0 is not None:
_buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
if error is not None:
_buf["error"] = str(error)
TRACE_ROOT.mkdir(parents=True, exist_ok=True)
stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
folder = TRACE_ROOT / stamp
folder.mkdir(exist_ok=True)
# Pull the stashed image out before we serialize the rest to JSON.
_flush_image(folder)
with open(folder / "trace.json", "w") as f:
json.dump(_buf, f, indent=2, default=_json_safe)
if output_audio is not None:
try:
import soundfile as sf
import numpy as np
audio = output_audio
if isinstance(audio, np.ndarray):
sf.write(folder / "output_lullaby.wav", audio, sample_rate)
except Exception as e:
print(f"[trace] could not write audio: {e}")
(folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp))
print(f"[trace] wrote {folder}/")
return folder
|