Spaces:

Simo76
/

MelodyDeterminism-Demo

Build error

File size: 24,868 Bytes

ed7d2bb
 
a114e62
296a8c2
 
d8dc6f8
296a8c2
 
ed7d2bb
 
296a8c2
 
 
 
cd785c2
3e19efd
 
daf6fa4
 
3e19efd
ed7d2bb
3e19efd
 
296a8c2
3e19efd
ed7d2bb
 
3e19efd
a114e62
 
 
 
 
 
 
 
 
296a8c2
 
 
 
 
 
 
 
 
3e19efd
ed7d2bb
3e19efd
ed7d2bb
daf6fa4
296a8c2
 
 
daf6fa4
296a8c2
 
 
 
ed7d2bb
 
 
 
 
 
 
 
 
 
 
 
 
 
3e19efd
d8dc6f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e19efd
ed7d2bb
d8dc6f8
ed7d2bb
 
 
d8dc6f8
 
 
 
 
 
 
3e19efd
 
ed7d2bb
 
 
 
 
 
 
 
3e19efd
 
 
296a8c2
 
daf6fa4
296a8c2
d8dc6f8
 
 
daf6fa4
ed7d2bb
296a8c2
 
ed7d2bb
 
296a8c2
d8dc6f8
 
 
 
 
 
ed7d2bb
daf6fa4
d8dc6f8
296a8c2
ed7d2bb
3e19efd
296a8c2
 
 
 
 
 
 
 
 
 
 
 
ed7d2bb
296a8c2
 
 
ed7d2bb
3e19efd
 
296a8c2
daf6fa4
296a8c2
d8dc6f8
296a8c2
 
d8dc6f8
daf6fa4
ed7d2bb
daf6fa4
ed7d2bb
 
daf6fa4
3e19efd
daf6fa4
296a8c2
d8dc6f8
 
 
 
daf6fa4
 
d8dc6f8
daf6fa4
296a8c2
daf6fa4
3e19efd
daf6fa4
3e19efd
296a8c2
3e19efd
ed7d2bb
3e19efd
ed7d2bb
3e19efd
ed7d2bb
3e19efd
ed7d2bb
3e19efd
ed7d2bb
 
 
 
 
3e19efd
ed7d2bb
3e19efd
ed7d2bb
 
3e19efd
ed7d2bb
 
296a8c2
ed7d2bb
 
 
 
 
 
3e19efd
ed7d2bb
3e19efd
296a8c2
3e19efd
296a8c2
 
 
3e19efd
ed7d2bb
 
3e19efd
296a8c2
3e19efd
 
296a8c2
 
ed7d2bb
 
 
3e19efd
ed7d2bb
 
a114e62
 
 
 
ed7d2bb
 
 
 
 
 
 
 
3e19efd
 
 
296a8c2
 
ed7d2bb
 
296a8c2
ed7d2bb
3e19efd
ed7d2bb
 
a114e62
ed7d2bb
296a8c2
3e19efd
 
296a8c2
 
ed7d2bb
 
296a8c2
ed7d2bb
296a8c2
3e19efd
ed7d2bb
 
 
 
 
 
 
 
 
296a8c2
3e19efd
 
296a8c2
 
 
 
 
 
d8dc6f8
296a8c2
3e19efd
 
296a8c2
 
 
 
 
 
d8dc6f8
296a8c2
 
 
d8dc6f8
296a8c2
 
 
 
d8dc6f8
296a8c2
 
 
d8dc6f8
296a8c2
 
d8dc6f8
 
296a8c2
d8dc6f8
296a8c2
 
 
 
 
 
 
 
 
3e19efd
296a8c2
 
 
 
3e19efd
 
296a8c2
 
ed7d2bb
296a8c2
ed7d2bb
 
296a8c2
 
 
ed7d2bb
 
296a8c2
 
 
ed7d2bb
 
296a8c2
 
 
ed7d2bb
296a8c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e19efd
296a8c2
ed7d2bb
 
 
296a8c2
ed7d2bb
 
a114e62
 
 
 
296a8c2
3e19efd
296a8c2
 
 
 
 
 
 
 
 
3e19efd
296a8c2
ed7d2bb
296a8c2
ed7d2bb
296a8c2
 
ed7d2bb
a114e62
ed7d2bb
3e19efd
296a8c2
 
 
 
 
 
 
 
3e19efd
296a8c2
ed7d2bb
296a8c2
ed7d2bb
296a8c2
 
 
ed7d2bb
 
296a8c2
 
3e19efd
296a8c2
 
 
 
 
 
 
 
 
3e19efd
296a8c2
 
3e19efd
296a8c2
b342f7f
 
 
296a8c2
b342f7f
 
 
296a8c2
3e19efd
296a8c2
3e19efd
a114e62
 
 
 
 
 
 
 
 
296a8c2
a114e62
 
 
296a8c2
a114e62
 
 
 
296a8c2
a114e62
 
 
296a8c2
a114e62
296a8c2
a114e62
296a8c2
a114e62
 
 
296a8c2
a114e62
 
 
296a8c2
 
 
 
 
a114e62
 
 
 
 
 
 
 
 
 
 
daf6fa4
a114e62
 
 
 
 
 
 
daf6fa4
a114e62
 
296a8c2
a114e62
 
 
3e19efd
d8dc6f8
296a8c2
 
3e19efd
 
a114e62
 
 
 
 
 
 
 
 
 
296a8c2
a114e62
 
 
 
 
 
 
296a8c2
d8dc6f8
296a8c2
a114e62
 
 
296a8c2
a114e62
 
296a8c2
 
 
 
 
a114e62
 
296a8c2
 
d8dc6f8
296a8c2
a114e62
 
296a8c2
a114e62
 
 
 
 
296a8c2
ed7d2bb
 
ed3acd1

# -*- coding: utf-8 -*-
"""
MelodyDeterminism - Canonical Determinism Demo (NumPy-only, CPU)
Esteso con:
- PRNG switch: philox (veloce, GPU-like) / sha256 (indipendente)
- Softmax canonica con riduzioni pairwise: max tree + sum (kahan/tree)
- Edge test: maschere, ±inf, nan, invariance a shift, idempotenza
- dtype selezionabile, benchmark parametrico
"""

import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

import hashlib
import json
import time
import tempfile
from typing import Any, Dict, List, Tuple

import numpy as np
import gradio as gr

# ========================== Utils ==========================

def sha256_ndarray(a: np.ndarray) -> str:
    return hashlib.sha256(np.ascontiguousarray(a).tobytes()).hexdigest()

def tol_stats(ref: np.ndarray, got: np.ndarray, eps: float = 1e-12) -> Dict[str, float]:
    ref64 = np.asarray(ref, dtype=np.float64)
    got64 = np.asarray(got, dtype=np.float64)
    diff = got64 - ref64
    mae = float(np.max(np.abs(diff)))
    denom = float(max(np.max(np.abs(ref64)), eps))
    mre = float(np.max(np.abs(diff) / denom))
    return {"max_abs_err": mae, "max_rel_err": mre}

# ========================== PRNG config ==========================

PRNG_MODE = "philox"  # "philox" (consigliato) oppure "sha256"

def _philox_random(seed: int, shape):
    g = np.random.Generator(np.random.Philox(int(seed)))
    return g.random(shape, dtype=np.float64).astype(np.float32, copy=False)

# ====================== Deterministic ops ========================

class D:
    @staticmethod
    def counter_prng(seed: int, counter: int, shape: Tuple[int, ...]) -> np.ndarray:
        """
        PRNG dichiarativo:
        - philox: veloce, counter-based, vicino all'ambiente GPU
        - sha256: indipendente da NumPy, più lento
        """
        if PRNG_MODE == "philox":
            return _philox_random(seed + counter, shape)

        # Fallback SHA256 (deterministico, ma lento)
        total = int(np.prod(shape)) if len(shape) else 1
        vals: List[float] = []
        i = 0
        while len(vals) < total:
            payload = f"{seed}:{counter}:{i}".encode("utf-8")
            h = hashlib.sha256(payload).digest()  # 32 bytes
            for k in range(0, 32, 8):
                chunk = int.from_bytes(h[k:k+8], "big", signed=False)
                vals.append((chunk % (1 << 53)) / float(1 << 53))  # [0,1)
                if len(vals) >= total:
                    break
            i += 1
        arr = np.array(vals, dtype=np.float64).reshape(shape)
        return arr.astype(np.float32, copy=False)

    @staticmethod
    def _tree_sum_row(vec64: np.ndarray) -> float:
        v = np.asarray(vec64, dtype=np.float64)
        n = v.size
        m = 1 << (n - 1).bit_length()
        if m != n:
            v = np.pad(v, (0, m - n), constant_values=0.0)
        while v.size > 1:
            v = v[0::2] + v[1::2]
        return float(v[0])

    @staticmethod
    def _tree_max_row(vec64: np.ndarray) -> float:
        """Riduzione deterministica del massimo (pairwise, GPU-like)."""
        v = np.asarray(vec64, dtype=np.float64)
        n = v.size
        m = 1 << (n - 1).bit_length()
        if m != n:
            v = np.pad(v, (0, m - n), constant_values=-np.inf)
        while v.size > 1:
            v = np.maximum(v[0::2], v[1::2])
        return float(v[0])

    @staticmethod
    def tree_fixed_reduce(x: np.ndarray) -> np.float64:
        """Somma pairwise deterministica su vettore intero."""
        y = np.asarray(x, dtype=np.float64).reshape(-1)
        if y.size == 0:
            return np.float64(0.0)
        n = y.size
        m = 1 << (n - 1).bit_length()
        if m != n:
            y = np.pad(y, (0, m - n), constant_values=0.0)
        while y.size > 1:
            y = y[0::2] + y[1::2]
        return np.float64(y[0])

    @staticmethod
    def kahan_sum(x: np.ndarray) -> np.float64:
        y = np.asarray(x, dtype=np.float64).reshape(-1)
        s = np.float64(0.0)
        c = np.float64(0.0)
        for v in y:
            yk = v - c
            t = s + yk
            c = (t - s) - yk
            s = t
        return s

    @staticmethod
    def deterministic_softmax(x: np.ndarray, axis: int = -1, mask: np.ndarray = None, sum_mode: str = "kahan") -> np.ndarray:
        """
        Softmax stabile e deterministica.
        - max pairwise (tree) per asse scelto (GPU-like)
        - sum_mode: 'kahan' (più precisa) | 'tree' (pairwise GPU-like)
        - mask: True = valido, False = mascherato a -inf
        """
        x64 = np.asarray(x, dtype=np.float64)
        if mask is not None:
            x64 = np.where(mask.astype(bool), x64, -np.inf)
        if axis < 0:
            axis = x64.ndim + axis

        # --- max deterministico pairwise lungo axis ---
        x_move = np.moveaxis(x64, axis, -1)  # [..., L]
        flatx = x_move.reshape(-1, x_move.shape[-1])
        m_rows = np.array([D._tree_max_row(flatx[i]) for i in range(flatx.shape[0])], dtype=np.float64)
        m = np.moveaxis(m_rows.reshape(x_move.shape[:-1] + (1,)), -1, axis)

        z = np.exp(x64 - m)

        # --- sum deterministica (kahan/tree) lungo axis ---
        z_move = np.moveaxis(z, axis, -1)  # [..., L]
        flat = z_move.reshape(-1, z_move.shape[-1])

        if sum_mode == "tree":
            sums = np.array([D._tree_sum_row(flat[i]) for i in range(flat.shape[0])], dtype=np.float64)
        else:
            sums = np.zeros((flat.shape[0],), dtype=np.float64)
            comp = np.zeros((flat.shape[0],), dtype=np.float64)
            for j in range(flat.shape[-1]):
                yj = flat[:, j] - comp
                tj = sums + yj
                comp = (tj - sums) - yj
                sums = tj

        sums = sums.reshape(z_move.shape[:-1])
        denom = np.expand_dims(sums, axis=-1)
        out = (z_move / denom)
        out = np.where(np.isfinite(out), out, 0.0)  # sicurezza in caso di tutti -inf
        out = out.astype(x.dtype, copy=False)
        return np.moveaxis(out.reshape(z_move.shape), -1, axis)

    @staticmethod
    def deterministic_categorical(logits: np.ndarray, num_samples: int, seed: int, sum_mode: str = "kahan") -> np.ndarray:
        """
        Sampling deterministico (vectorizzato):
        - softmax canonica (max tree, sum kahan/tree)
        - CDF una volta
        - U in blocco con PRNG dichiarativo (philox/sha256)
        - searchsorted(..., 'left') ⇒ tie-break deterministico (min indice)
        """
        x = np.asarray(logits, dtype=np.float64)
        single = False
        if x.ndim == 1:
            x = x[None, :]
            single = True
        B, V = x.shape

        probs = D.deterministic_softmax(x, axis=-1, sum_mode=sum_mode).astype(np.float64)
        cdf = np.cumsum(probs, axis=-1)
        # clamp robusto per chiusura [0,1]
        np.clip(cdf, 0.0, 1.0, out=cdf)
        cdf[:, -1] = 1.0

        U = D.counter_prng(seed, 0, (B, num_samples)).astype(np.float64)
        # tie-break deterministico: side='left'
        idx_rows = [np.searchsorted(cdf[b], U[b], side="left") for b in range(B)]
        out = np.stack(idx_rows, axis=0).astype(np.int64, copy=False)
        if single:
            out = out[0]
        return out

# ======================= Standard refs =======================

def standard_sum(x: np.ndarray) -> Tuple[np.float64, float, str]:
    t0 = time.perf_counter()
    y = np.sum(x.astype(np.float64))
    dt = (time.perf_counter() - t0) * 1000.0
    return np.float64(y), dt, hashlib.sha256(np.ascontiguousarray(np.array([y], dtype=np.float64)).tobytes()).hexdigest()

def standard_softmax(x: np.ndarray, axis: int = -1) -> Tuple[np.ndarray, float, str]:
    t0 = time.perf_counter()
    xx = x.astype(np.float64)
    m = np.max(xx, axis=axis, keepdims=True)
    z = np.exp(xx - m)
    s = np.sum(z, axis=axis, keepdims=True)
    y = (z / s).astype(x.dtype, copy=False)
    dt = (time.perf_counter() - t0) * 1000.0
    return y, dt, sha256_ndarray(y)

def standard_categorical(logits: np.ndarray, num_samples: int, seed: int) -> Tuple[np.ndarray, float, str]:
    rng = np.random.default_rng(seed)
    t0 = time.perf_counter()
    x = logits.astype(np.float64)
    if x.ndim == 1:
        probs = np.exp(x - np.max(x)); probs /= probs.sum()
        y = rng.choice(len(x), size=num_samples, replace=True, p=probs)
    else:
        B, V = x.shape
        probs = np.exp(x - np.max(x, axis=1, keepdims=True))
        probs /= probs.sum(axis=1, keepdims=True)
        y = np.stack([rng.choice(V, size=num_samples, replace=True, p=probs[b]) for b in range(B)], axis=0)
    dt = (time.perf_counter() - t0) * 1000.0
    return y.astype(np.int64, copy=False), dt, sha256_ndarray(y.astype(np.int64, copy=False))

# ======================== Suite helpers ========================

def gen_demo(seed: int, shape: Tuple[int, ...], dtype: str = "float32") -> np.ndarray:
    arr = D.counter_prng(seed, 0, shape).astype(np.float64, copy=False)
    return arr.astype(np.float32 if dtype == "float32" else np.float64, copy=False)

def compare_close(a: np.ndarray, b: np.ndarray, atol=1e-9, rtol=1e-9) -> bool:
    return np.allclose(a.astype(np.float64), b.astype(np.float64), atol=atol, rtol=rtol)

def run_full_suite(seed: int, n: int, v: int, dtype: str, sum_mode: str) -> Dict[str, Any]:
    rep: Dict[str, Any] = {}

    # Reduce
    x = gen_demo(seed, (n, v), dtype=dtype).reshape(-1)
    s_std, ms_std, h_std = standard_sum(x)
    s_tree = D.tree_fixed_reduce(x)
    s_kah  = D.kahan_sum(x)
    rep["reduce"] = {
        "ms": {"standard": round(ms_std, 3)},
        "values": {"standard": float(s_std), "tree": float(s_tree), "kahan": float(s_kah)},
        "tolerance_vs_standard": {
            "tree": tol_stats(np.array([s_std]), np.array([s_tree])),
            "kahan": tol_stats(np.array([s_std]), np.array([s_kah])),
        },
        "hash": {
            "standard": h_std,
            "tree": hashlib.sha256(np.ascontiguousarray(np.array([s_tree], dtype=np.float64)).tobytes()).hexdigest(),
            "kahan": hashlib.sha256(np.ascontiguousarray(np.array([s_kah], dtype=np.float64)).tobytes()).hexdigest(),
        },
        "equalities": {
            "standard_vs_tree_equal": bool(abs(s_std - s_tree) < 1e-12),
            "standard_vs_kahan_equal": bool(abs(s_std - s_kah) < 1e-12),
        }
    }

    # Softmax
    logits = gen_demo(seed + 1, (n, v), dtype=dtype)
    sm_std, ms_sm_std, h_sm_std = standard_softmax(logits, axis=-1)
    t0 = time.perf_counter()
    sm_can = D.deterministic_softmax(logits, axis=-1, sum_mode=sum_mode)
    ms_sm_can = (time.perf_counter() - t0) * 1000.0
    rep["softmax"] = {
        "ms": {"standard": round(ms_sm_std, 3), "canonical": round(ms_sm_can, 3)},
        "allclose": bool(compare_close(sm_std, sm_can, 1e-9, 1e-9)),
        "tolerance_vs_standard": tol_stats(sm_std, sm_can),
        "hash": {"standard": h_sm_std, "canonical": sha256_ndarray(sm_can)},
        "sum_mode": sum_mode,
    }

    # Sampling
    logits1 = gen_demo(seed + 2, (v,), dtype=dtype)
    samp_std, ms_samp_std, h_samp_std = standard_categorical(logits1, num_samples=16, seed=seed)
    t0 = time.perf_counter()
    samp_det = D.deterministic_categorical(logits1, num_samples=16, seed=seed, sum_mode=sum_mode)
    ms_samp_det = (time.perf_counter() - t0) * 1000.0
    samp_det2 = D.deterministic_categorical(logits1, num_samples=16, seed=seed, sum_mode=sum_mode)
    rep["sampling"] = {
        "ms": {"standard": round(ms_samp_std, 3), "deterministic": round(ms_samp_det, 3)},
        "standard_vs_deterministic_equal": bool(np.array_equal(samp_std, samp_det)),
        "deterministic_stable": bool(np.array_equal(samp_det, samp_det2)),
        "hash": {
            "standard": h_samp_std,
            "deterministic": sha256_ndarray(samp_det),
            "deterministic_again": sha256_ndarray(samp_det2),
        },
        "samples": {"standard": samp_std.tolist(), "deterministic": samp_det.tolist()},
        "sum_mode": sum_mode,
    }

    rep["meta"] = {
        "backend": "numpy",
        "seed": seed,
        "shape": [n, v],
        "dtype": dtype,
        "prng": PRNG_MODE,
        "note": "NumPy-only canonical ops; philox/sha256 PRNG; softmax max/sum pairwise deterministici.",
    }
    return rep

def run_edge_softmax(seed: int, n: int, v: int, dtype: str, sum_mode: str) -> Dict[str, Any]:
    """
    Edge cases: ±inf, nan, mask, invariance a shift, idempotenza.
    """
    rng = np.random.default_rng(seed)
    x = rng.standard_normal((n, v)).astype(np.float64)
    # Estremi nella prima riga
    x[0, 0] = np.inf
    x[0, 1] = -np.inf
    x[0, 2] = np.nan
    # Mask: ~80% valido
    mask = rng.random((n, v)) > 0.2
    x = x.astype(np.float32 if dtype == "float32" else np.float64, copy=False)

    p1 = D.deterministic_softmax(x, axis=-1, mask=mask, sum_mode=sum_mode)
    # invariance a shift
    c = 123.456
    p2 = D.deterministic_softmax(x + c, axis=-1, mask=mask, sum_mode=sum_mode)
    inv_shift = bool(np.allclose(p1, p2))
    # idempotenza
    p3 = D.deterministic_softmax(p1, axis=-1, mask=np.ones_like(p1, dtype=bool), sum_mode=sum_mode)
    idempotent = bool(np.allclose(p1, p3))
    # conserva probabilità
    conserve = bool(np.allclose(np.sum(p1, axis=-1), 1.0))
    return {
        "sum_mode": sum_mode, "dtype": dtype,
        "mask_ratio": float(np.mean(mask)),
        "invariance_shift": inv_shift,
        "idempotent": idempotent,
        "conserve_prob": conserve,
        "finite": bool(np.isfinite(p1).all()),
        "tolerance_against_self": tol_stats(p1, p2),
    }

# ========================= Gradio callbacks =========================

def run_single_test(test_kind: str, seed: float, n: float, v: float, dtype: str, sum_mode: str, prng_choice: str):
    global PRNG_MODE
    PRNG_MODE = prng_choice  # aggiorna PRNG globale
    seed, n, v = int(seed), int(n), int(v)

    if test_kind == "Full suite":
        rep = run_full_suite(seed, n, v, dtype=dtype, sum_mode=sum_mode)
        text_lines = [
            "== MelodyDeterminism - Full suite (NumPy) ==",
            f"Seed: {seed}  Shape: ({n},{v})  dtype={dtype}  PRNG={PRNG_MODE}  sum={sum_mode}",
            "",
            "[Reduce]",
            f"  values: {rep['reduce']['values']}",
            f"  tol(tree): {rep['reduce']['tolerance_vs_standard']['tree']}",
            f"  tol(kahan): {rep['reduce']['tolerance_vs_standard']['kahan']}",
            "",
            "[Softmax]",
            f"  allclose(standard, canonical): {rep['softmax']['allclose']}",
            f"  tol: {rep['softmax']['tolerance_vs_standard']}",
            f"  ms: std={rep['softmax']['ms']['standard']}  canonical={rep['softmax']['ms']['canonical']} (sum={sum_mode})",
            "",
            "[Sampling]",
            f"  deterministic_stable (two runs): {rep['sampling']['deterministic_stable']}",
            f"  ms: std={rep['sampling']['ms']['standard']}  deterministic={rep['sampling']['ms']['deterministic']} (PRNG={PRNG_MODE})",
            f"  samples_deterministic: {rep['sampling']['samples']['deterministic']}",
        ]
        return "\n".join(text_lines), _save_json(rep)

    elif test_kind == "Softmax (edge: mask ±inf/nan)":
        rep = run_edge_softmax(seed, n, v, dtype=dtype, sum_mode=sum_mode)
        text = [
            "== Softmax Edge ==",
            f"Seed: {seed}  Shape: ({n},{v})  dtype={dtype}  PRNG={PRNG_MODE}  sum={sum_mode}",
            f"invariance_shift: {rep['invariance_shift']}",
            f"idempotent: {rep['idempotent']}",
            f"conserve_prob (≈1): {rep['conserve_prob']}",
            f"finite: {rep['finite']}",
            f"mask_ratio: {rep['mask_ratio']:.2f}",
            f"tolerance_against_self: {rep['tolerance_against_self']}",
        ]
        return "\n".join(text), _save_json(rep)

    elif test_kind == "Reduce (tree vs kahan vs standard)":
        x = gen_demo(seed, (n, v), dtype=dtype).reshape(-1)
        s_std, ms_std, h_std = standard_sum(x)
        s_tree = D.tree_fixed_reduce(x)
        s_kah  = D.kahan_sum(x)
        rep = {
            "seed": seed, "N": int(x.size),
            "values": {"standard": float(s_std), "tree": float(s_tree), "kahan": float(s_kah)},
            "tolerance_vs_standard": {
                "tree": tol_stats(np.array([s_std]), np.array([s_tree])),
                "kahan": tol_stats(np.array([s_std]), np.array([s_kah])),
            },
            "ms": {"standard": round(ms_std, 3)},
        }
        text = [
            "== Reduce ==",
            f"seed={seed} len={x.size} dtype={dtype}",
            f"standard: {float(s_std)}  ms={round(ms_std,3)}",
            f"tree: {float(s_tree)}  tol={rep['tolerance_vs_standard']['tree']}",
            f"kahan: {float(s_kah)}  tol={rep['tolerance_vs_standard']['kahan']}",
        ]
        return "\n".join(text), _save_json(rep)

    elif test_kind == "Softmax (canonical vs standard)":
        logits = gen_demo(seed, (n, v), dtype=dtype)
        sm_std, ms_std, h_std = standard_softmax(logits, axis=-1)
        t0 = time.perf_counter(); sm_can = D.deterministic_softmax(logits, axis=-1, sum_mode=sum_mode)
        ms_can = (time.perf_counter() - t0) * 1000.0
        rep = {
            "seed": seed, "shape": [n, v], "dtype": dtype, "sum_mode": sum_mode,
            "ms": {"standard": round(ms_std,3), "canonical": round(ms_can,3)},
            "tolerance_vs_standard": tol_stats(sm_std, sm_can),
            "hash": {"standard": h_std, "canonical": sha256_ndarray(sm_can)},
        }
        text = [
            "== Softmax ==",
            f"seed={seed} shape=({n},{v}) dtype={dtype} sum={sum_mode}",
            f"tol: {rep['tolerance_vs_standard']}",
            f"ms: standard={round(ms_std,3)} canonical={round(ms_can,3)}",
        ]
        return "\n".join(text), _save_json(rep)

    elif test_kind == "Categorical sampling (deterministic)":
        logits = gen_demo(seed, (v,), dtype=dtype)
        samp_std, ms_std, h_std = standard_categorical(logits, num_samples=16, seed=seed)
        t0 = time.perf_counter(); det1 = D.deterministic_categorical(logits, num_samples=16, seed=seed, sum_mode=sum_mode)
        ms_det = (time.perf_counter() - t0) * 1000.0
        det2 = D.deterministic_categorical(logits, num_samples=16, seed=seed, sum_mode=sum_mode)
        rep = {
            "seed": seed, "vocab": v, "samples": 16, "dtype": dtype, "prng": PRNG_MODE, "sum_mode": sum_mode,
            "standard_samples": samp_std.tolist(),
            "deterministic_samples": det1.tolist(),
            "deterministic_stable": bool(np.array_equal(det1, det2)),
            "ms": {"standard": round(ms_std,3), "deterministic": round(ms_det,3)},
        }
        text = [
            "== Categorical sampling ==",
            f"seed={seed} vocab={v} samples=16 dtype={dtype} PRNG={PRNG_MODE} sum={sum_mode}",
            f"deterministic_stable: {rep['deterministic_stable']}",
            f"ms: std={rep['ms']['standard']} deterministic={rep['ms']['deterministic']}",
            f"deterministic samples (first 16): {det1.tolist()}",
        ]
        return "\n".join(text), _save_json(rep)

    else:
        rep = {"error": "unknown test"}
        return "Unknown test.", _save_json(rep)

def _save_json(payload: Dict[str, Any]) -> str:
    json_bytes = json.dumps(payload, indent=2).encode("utf-8")
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
    try:
        tmp.write(json_bytes); tmp.flush()
        path = tmp.name
    finally:
        tmp.close()
    return path

# =========================== Benchmark ===========================

def _timed(fn, repeats: int = 10, warmup: int = 3) -> float:
    for _ in range(warmup):
        fn()
    t0 = time.perf_counter()
    for _ in range(repeats):
        fn()
    t1 = time.perf_counter()
    return (t1 - t0) / repeats

def bench_suite(ns=(1, 8, 32), vs=(128, 512, 1024), dtype="float32", sum_mode="kahan"):
    results = []
    for n in ns:
        for v in vs:
            x = np.random.standard_normal((n, v)).astype(np.float32 if dtype=="float32" else np.float64)

            def f_std():
                p = np.exp(x - np.max(x, axis=1, keepdims=True))
                p = p / np.sum(p, axis=1, keepdims=True)
                _ = np.argmax(p, axis=1)

            def f_can():
                for i in range(n):
                    _ = D.deterministic_categorical(x[i], num_samples=1, seed=42, sum_mode=sum_mode)

            t_std = _timed(f_std); t_can = _timed(f_can)
            results.append({
                "n": int(n), "v": int(v),
                "t_std_ms": round(1000.0 * t_std, 3),
                "t_can_ms": round(1000.0 * t_can, 3),
                "overhead_pct": round(100.0 * (t_can - t_std) / max(t_std, 1e-9), 2),
                "dtype": dtype, "prng": PRNG_MODE, "sum": sum_mode,
            })
    return results

def run_benchmark_and_save(dtype: str, sum_mode: str, prng_choice: str):
    global PRNG_MODE
    PRNG_MODE = prng_choice
    res = bench_suite(dtype=dtype, sum_mode=sum_mode)
    headers = ["n","v","t_std_ms","t_can_ms","overhead_pct","dtype","prng","sum"]
    table = [[r[h] for h in headers] for r in res]

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
    try:
        tmp.write(",".join(headers) + "\n")
        for row in table:
            tmp.write(",".join(str(x) for x in row) + "\n")
        tmp.flush()
        path = tmp.name
    finally:
        tmp.close()

    jtmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
    try:
        json.dump(res, jtmp, indent=2)
        jtmp.flush()
        jpath = jtmp.name
    finally:
        jtmp.close()

    return table, path, jpath

# =========================== Gradio UI ===========================

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# MelodyDeterminism - Canonical Determinism Demo (NumPy / CPU)")
    gr.Markdown(
        "Deterministic ops: reduce (Kahan/Tree), softmax canonica (max tree + sum kahan/tree), sampling RNG dichiarativo. "
        "PRNG: Philox (GPU-like) o SHA256 (indipendente). Edge: maschera, ±inf, nan, shift, idempotenza. "
        "Benchmark parametrico con overhead%."
    )

    with gr.Tabs():
        with gr.Tab("Suite"):
            with gr.Row():
                with gr.Column(scale=1):
                    test_kind = gr.Dropdown(
                        label="Select test",
                        choices=[
                            "Full suite",
                            "Reduce (tree vs kahan vs standard)",
                            "Softmax (canonical vs standard)",
                            "Softmax (edge: mask ±inf/nan)",
                            "Categorical sampling (deterministic)",
                        ],
                        value="Full suite",
                    )
                    seed = gr.Number(value=42, precision=0, label="Seed")
                    n = gr.Slider(1, 256, step=1, value=8, label="Rows / Batch (n)")
                    v = gr.Slider(2, 4096, step=1, value=32, label="Width / Vocab (v)")
                    dtype = gr.Radio(["float32","float64"], value="float32", label="dtype")
                    sum_mode = gr.Radio(["kahan","tree"], value="tree", label="Softmax sum")  # default GPU-like
                    prng_choice = gr.Radio(["philox","sha256"], value="philox", label="PRNG")
                    run_btn = gr.Button("Run")

                with gr.Column(scale=2):
                    report = gr.Textbox(label="Report", lines=24)
                    download = gr.File(label="Download JSON report")

            run_btn.click(
                run_single_test,
                inputs=[test_kind, seed, n, v, dtype, sum_mode, prng_choice],
                outputs=[report, download]
            )

        with gr.Tab("Benchmark"):
            gr.Markdown("Confronto standard vs deterministico (sampling) con le scelte sotto.")
            dtype_b = gr.Radio(["float32","float64"], value="float32", label="dtype")
            sum_mode_b = gr.Radio(["kahan","tree"], value="tree", label="Softmax sum")  # default GPU-like
            prng_b = gr.Radio(["philox","sha256"], value="philox", label="PRNG")
            bench_btn = gr.Button("Esegui benchmark")
            bench_table = gr.Dataframe(
                headers=["n","v","t_std_ms","t_can_ms","overhead_pct","dtype","prng","sum"],
                label="Latenze (ms) e overhead (%)",
                wrap=True,
            )
            bench_csv = gr.File(label="Scarica CSV")
            bench_json = gr.File(label="Scarica JSON")
            bench_btn.click(run_benchmark_and_save, inputs=[dtype_b, sum_mode_b, prng_b], outputs=[bench_table, bench_csv, bench_json])

if __name__ == "__main__":
    demo.queue().launch()