File size: 8,907 Bytes
6472a71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
trace.py β€” opt-in pipeline tracer.

Records ONE complete Lolaby generation (inputs, every stage, final outputs)
into a timestamped folder so you can publish an example trace as an HF
Dataset for the "Sharing is Caring" hackathon badge.

How to use:

    LULLABY_TRACE=1 python app.py

Generate a lullaby in the UI as you normally would. When the generation
finishes, a folder is written under ./traces/<timestamp>/ containing:

    trace.json            β€” the structured pipeline log
    input_drawing.png     β€” the drawing the user gave (if any)
    output_lullaby.wav    β€” the final audio
    README.md             β€” short note pointing at trace.json

That folder is what you upload to a Hugging Face Dataset.

Privacy posture:
  - DISABLED by default (do nothing unless LULLABY_TRACE is set).
  - Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the
    input drawing (records the trace structure but not the actual image).

Design note:
  This is deliberately a module-level buffer accumulated by side-effects
  rather than a context manager threaded through every call site. The
  goal is MINIMAL changes to app.py β€” instrumentation calls are one
  line each at the places that matter.
"""

import json
import os
import time
from datetime import datetime
from pathlib import Path


ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes")
SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes")
TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces"))


# Current run's accumulated state. Reset at the start of each generation
# via `begin()`. Read by `finalize()` at the end.
_buf = {}
_t0 = None


def is_enabled():
    return ENABLED


def begin():
    """Reset the buffer at the start of a new generation. Cheap no-op if
    tracing is disabled, so it's safe to call unconditionally."""
    global _buf, _t0
    if not ENABLED:
        return
    _t0 = time.monotonic()
    _buf = {
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "stages": {},
        "stage_timings_ms": {},
    }


def stage(name, **fields):
    """Record a stage's metadata. Each call merges into stages[name]."""
    if not ENABLED:
        return
    if name not in _buf["stages"]:
        _buf["stages"][name] = {}
    _buf["stages"][name].update(fields)
    # Mark when this stage was last updated, relative to begin().
    if _t0 is not None:
        _buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000)


def set_inputs(**kw):
    """Record the user-facing inputs (name, age, loves, fears, etc.).
    Image arrays are NOT stored here β€” `save_input_drawing` handles them."""
    if not ENABLED:
        return
    _buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")}


def finalize(output_audio=None, sample_rate=44100, error=None):
    """Write the trace folder. Called at the end of a generation, whether
    or not it succeeded. Returns the trace folder path (or None if
    tracing is disabled)."""
    if not ENABLED:
        return None
    if _t0 is not None:
        _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
    if error is not None:
        _buf["error"] = str(error)

    TRACE_ROOT.mkdir(parents=True, exist_ok=True)
    stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
    folder = TRACE_ROOT / stamp
    folder.mkdir(exist_ok=True)

    # Write trace.json
    with open(folder / "trace.json", "w") as f:
        json.dump(_buf, f, indent=2, default=_json_safe)

    # Write output audio if provided
    if output_audio is not None:
        try:
            import soundfile as sf
            import numpy as np
            audio = output_audio
            if isinstance(audio, np.ndarray):
                sf.write(folder / "output_lullaby.wav", audio, sample_rate)
        except Exception as e:
            print(f"[trace] could not write audio: {e}")

    # Write a small README pointing at the trace file
    readme = folder / "README.md"
    readme.write_text(_README_TEMPLATE.format(stamp=stamp))

    print(f"[trace] wrote {folder}/")
    return folder


def save_input_drawing(image_array):
    """Save the input drawing as input_drawing.png in the *current* run's
    folder. Called inside the pipeline as soon as we know the image. Has
    to be deferred-write because we don't know the folder name until
    finalize() β€” so we stash the array here and write on finalize."""
    if not ENABLED or not SAVE_IMAGES:
        return
    if image_array is None:
        return
    _buf["_pending_image"] = image_array


def _flush_image(folder):
    """Internal: write the stashed image to the trace folder."""
    arr = _buf.get("_pending_image")
    if arr is None:
        return
    try:
        from PIL import Image
        import numpy as np
        if isinstance(arr, np.ndarray):
            if arr.ndim == 2:
                img = Image.fromarray(arr).convert("RGB")
            else:
                img = Image.fromarray(arr[..., :3].astype(np.uint8))
            img.save(folder / "input_drawing.png")
        elif isinstance(arr, Image.Image):
            arr.save(folder / "input_drawing.png")
    except Exception as e:
        print(f"[trace] could not write input drawing: {e}")
    # Strip the binary out of the JSON-bound buffer.
    del _buf["_pending_image"]


def _json_safe(o):
    """Last-resort encoder for objects that aren't JSON-serializable."""
    try:
        import numpy as np
        if isinstance(o, np.ndarray):
            return f"<ndarray shape={o.shape} dtype={o.dtype}>"
        if isinstance(o, (np.integer,)):
            return int(o)
        if isinstance(o, (np.floating,)):
            return float(o)
    except ImportError:
        pass
    return str(o)


_README_TEMPLATE = """# Lolaby β€” example generation trace

Captured: {stamp} (UTC)

## Files

- `trace.json` β€” full pipeline log: inputs, every stage's prompts and
  outputs, stage timings, and the parsed lyric structure.
- `input_drawing.png` β€” the drawing the user gave Lola at the start of
  this generation (omitted if no drawing was provided, or if traces
  were captured with `LULLABY_TRACE_NO_IMAGES=1`).
- `output_lullaby.wav` β€” the audio Lola produced. WAV, 44.1 kHz mono.

## Note on reproducibility

This trace records one real generation end-to-end. The deterministic
stages (vision-model output with `do_sample=False`, DSP synth rendering
from parsed lyrics) will reproduce given the same inputs. The sampling
stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce
*similar but not identical* output on re-run β€” they're inherently
stochastic. The trace is a witness of one run, not a recipe.

## Pipeline at a glance

```
drawing ─▢ MiniCPM-V 4.6 ─▢ "what Lola saw"
                                  β”‚
                + name, fears, mood, instruments
                                  β–Ό
                Fine-tuned Llama 3.2 3B (GGUF, llama.cpp)
                                  β”‚
                          parsed lyric + chords + tempo
                                  β”‚
                β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
                β–Ό                                   β–Ό
          Kokoro TTS                       DSP synths (custom)
                β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
                                  β–Ό
                            output_lullaby.wav
```
"""


# Patch finalize() to call _flush_image at the right point. We do it this
# way to keep the begin/stage/finalize public API tiny.
_orig_finalize = finalize
def finalize(output_audio=None, sample_rate=44100, error=None):  # noqa: F811
    if not ENABLED:
        return None
    if _t0 is not None:
        _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
    if error is not None:
        _buf["error"] = str(error)

    TRACE_ROOT.mkdir(parents=True, exist_ok=True)
    stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
    folder = TRACE_ROOT / stamp
    folder.mkdir(exist_ok=True)

    # Pull the stashed image out before we serialize the rest to JSON.
    _flush_image(folder)

    with open(folder / "trace.json", "w") as f:
        json.dump(_buf, f, indent=2, default=_json_safe)

    if output_audio is not None:
        try:
            import soundfile as sf
            import numpy as np
            audio = output_audio
            if isinstance(audio, np.ndarray):
                sf.write(folder / "output_lullaby.wav", audio, sample_rate)
        except Exception as e:
            print(f"[trace] could not write audio: {e}")

    (folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp))
    print(f"[trace] wrote {folder}/")
    return folder