Spaces:

build-small-hackathon
/

lolaby

Running

File size: 8,907 Bytes

6472a71

"""
trace.py — opt-in pipeline tracer.

Records ONE complete Lolaby generation (inputs, every stage, final outputs)
into a timestamped folder so you can publish an example trace as an HF
Dataset for the "Sharing is Caring" hackathon badge.

How to use:

    LULLABY_TRACE=1 python app.py

Generate a lullaby in the UI as you normally would. When the generation
finishes, a folder is written under ./traces/<timestamp>/ containing:

    trace.json            — the structured pipeline log
    input_drawing.png     — the drawing the user gave (if any)
    output_lullaby.wav    — the final audio
    README.md             — short note pointing at trace.json

That folder is what you upload to a Hugging Face Dataset.

Privacy posture:
  - DISABLED by default (do nothing unless LULLABY_TRACE is set).
  - Even when enabled, set LULLABY_TRACE_NO_IMAGES=1 to skip saving the
    input drawing (records the trace structure but not the actual image).

Design note:
  This is deliberately a module-level buffer accumulated by side-effects
  rather than a context manager threaded through every call site. The
  goal is MINIMAL changes to app.py — instrumentation calls are one
  line each at the places that matter.
"""

import json
import os
import time
from datetime import datetime
from pathlib import Path


ENABLED = os.environ.get("LULLABY_TRACE", "").lower() in ("1", "true", "yes")
SAVE_IMAGES = os.environ.get("LULLABY_TRACE_NO_IMAGES", "").lower() not in ("1", "true", "yes")
TRACE_ROOT = Path(os.environ.get("LULLABY_TRACE_DIR", "./traces"))


# Current run's accumulated state. Reset at the start of each generation
# via `begin()`. Read by `finalize()` at the end.
_buf = {}
_t0 = None


def is_enabled():
    return ENABLED


def begin():
    """Reset the buffer at the start of a new generation. Cheap no-op if
    tracing is disabled, so it's safe to call unconditionally."""
    global _buf, _t0
    if not ENABLED:
        return
    _t0 = time.monotonic()
    _buf = {
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "stages": {},
        "stage_timings_ms": {},
    }


def stage(name, **fields):
    """Record a stage's metadata. Each call merges into stages[name]."""
    if not ENABLED:
        return
    if name not in _buf["stages"]:
        _buf["stages"][name] = {}
    _buf["stages"][name].update(fields)
    # Mark when this stage was last updated, relative to begin().
    if _t0 is not None:
        _buf["stage_timings_ms"][name] = round((time.monotonic() - _t0) * 1000)


def set_inputs(**kw):
    """Record the user-facing inputs (name, age, loves, fears, etc.).
    Image arrays are NOT stored here — `save_input_drawing` handles them."""
    if not ENABLED:
        return
    _buf["inputs"] = {k: v for k, v in kw.items() if v not in (None, "")}


def finalize(output_audio=None, sample_rate=44100, error=None):
    """Write the trace folder. Called at the end of a generation, whether
    or not it succeeded. Returns the trace folder path (or None if
    tracing is disabled)."""
    if not ENABLED:
        return None
    if _t0 is not None:
        _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
    if error is not None:
        _buf["error"] = str(error)

    TRACE_ROOT.mkdir(parents=True, exist_ok=True)
    stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
    folder = TRACE_ROOT / stamp
    folder.mkdir(exist_ok=True)

    # Write trace.json
    with open(folder / "trace.json", "w") as f:
        json.dump(_buf, f, indent=2, default=_json_safe)

    # Write output audio if provided
    if output_audio is not None:
        try:
            import soundfile as sf
            import numpy as np
            audio = output_audio
            if isinstance(audio, np.ndarray):
                sf.write(folder / "output_lullaby.wav", audio, sample_rate)
        except Exception as e:
            print(f"[trace] could not write audio: {e}")

    # Write a small README pointing at the trace file
    readme = folder / "README.md"
    readme.write_text(_README_TEMPLATE.format(stamp=stamp))

    print(f"[trace] wrote {folder}/")
    return folder


def save_input_drawing(image_array):
    """Save the input drawing as input_drawing.png in the *current* run's
    folder. Called inside the pipeline as soon as we know the image. Has
    to be deferred-write because we don't know the folder name until
    finalize() — so we stash the array here and write on finalize."""
    if not ENABLED or not SAVE_IMAGES:
        return
    if image_array is None:
        return
    _buf["_pending_image"] = image_array


def _flush_image(folder):
    """Internal: write the stashed image to the trace folder."""
    arr = _buf.get("_pending_image")
    if arr is None:
        return
    try:
        from PIL import Image
        import numpy as np
        if isinstance(arr, np.ndarray):
            if arr.ndim == 2:
                img = Image.fromarray(arr).convert("RGB")
            else:
                img = Image.fromarray(arr[..., :3].astype(np.uint8))
            img.save(folder / "input_drawing.png")
        elif isinstance(arr, Image.Image):
            arr.save(folder / "input_drawing.png")
    except Exception as e:
        print(f"[trace] could not write input drawing: {e}")
    # Strip the binary out of the JSON-bound buffer.
    del _buf["_pending_image"]


def _json_safe(o):
    """Last-resort encoder for objects that aren't JSON-serializable."""
    try:
        import numpy as np
        if isinstance(o, np.ndarray):
            return f"<ndarray shape={o.shape} dtype={o.dtype}>"
        if isinstance(o, (np.integer,)):
            return int(o)
        if isinstance(o, (np.floating,)):
            return float(o)
    except ImportError:
        pass
    return str(o)


_README_TEMPLATE = """# Lolaby — example generation trace

Captured: {stamp} (UTC)

## Files

- `trace.json` — full pipeline log: inputs, every stage's prompts and
  outputs, stage timings, and the parsed lyric structure.
- `input_drawing.png` — the drawing the user gave Lola at the start of
  this generation (omitted if no drawing was provided, or if traces
  were captured with `LULLABY_TRACE_NO_IMAGES=1`).
- `output_lullaby.wav` — the audio Lola produced. WAV, 44.1 kHz mono.

## Note on reproducibility

This trace records one real generation end-to-end. The deterministic
stages (vision-model output with `do_sample=False`, DSP synth rendering
from parsed lyrics) will reproduce given the same inputs. The sampling
stages (the lyric model at `temperature=0.85`, Kokoro TTS) will produce
*similar but not identical* output on re-run — they're inherently
stochastic. The trace is a witness of one run, not a recipe.

## Pipeline at a glance

```
drawing ─▶ MiniCPM-V 4.6 ─▶ "what Lola saw"
                                  │
                + name, fears, mood, instruments
                                  ▼
                Fine-tuned Llama 3.2 3B (GGUF, llama.cpp)
                                  │
                          parsed lyric + chords + tempo
                                  │
                ┌─────────────────┴─────────────────┐
                ▼                                   ▼
          Kokoro TTS                       DSP synths (custom)
                └─────────────────┬─────────────────┘
                                  ▼
                            output_lullaby.wav
```
"""


# Patch finalize() to call _flush_image at the right point. We do it this
# way to keep the begin/stage/finalize public API tiny.
_orig_finalize = finalize
def finalize(output_audio=None, sample_rate=44100, error=None):  # noqa: F811
    if not ENABLED:
        return None
    if _t0 is not None:
        _buf["total_wall_time_ms"] = round((time.monotonic() - _t0) * 1000)
    if error is not None:
        _buf["error"] = str(error)

    TRACE_ROOT.mkdir(parents=True, exist_ok=True)
    stamp = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
    folder = TRACE_ROOT / stamp
    folder.mkdir(exist_ok=True)

    # Pull the stashed image out before we serialize the rest to JSON.
    _flush_image(folder)

    with open(folder / "trace.json", "w") as f:
        json.dump(_buf, f, indent=2, default=_json_safe)

    if output_audio is not None:
        try:
            import soundfile as sf
            import numpy as np
            audio = output_audio
            if isinstance(audio, np.ndarray):
                sf.write(folder / "output_lullaby.wav", audio, sample_rate)
        except Exception as e:
            print(f"[trace] could not write audio: {e}")

    (folder / "README.md").write_text(_README_TEMPLATE.format(stamp=stamp))
    print(f"[trace] wrote {folder}/")
    return folder