"""Serialize an ``AnalysisReport`` into a frontend-ready JSON dict. Everything a UI needs to render the verdict and every chart — forest plot, score-distribution histograms, Bayesian posterior curve, mSPRT trajectory — is pre-computed here so the client needs no numerical libraries. """ from __future__ import annotations import numpy as np def _f(x) -> float | None: """Cast to a JSON-safe float, mapping NaN/None to null.""" try: if x is None: return None xf = float(x) return None if np.isnan(xf) else xf except (TypeError, ValueError): return None def report_to_dict(report, scoring, resume_a=None, resume_b=None) -> dict: v = report.verdict boot = report.bootstrap clusters = [] for _, r in report.per_cluster.iterrows(): clusters.append( { "cluster_id": int(r["cluster_id"]), "label": str(r["label"]), "n": int(r["n"]), "mean_delta": _f(r["mean_delta"]), "ci_low": _f(r["ci_low"]), "ci_high": _f(r["ci_high"]), "p_raw": _f(r["p_raw"]), "p_bonferroni": _f(r["p_bonferroni"]), "p_bh_fdr": _f(r["p_bh_fdr"]), "sig_bonferroni": bool(r["sig_bonferroni"]), "sig_bh": bool(r["sig_bh"]), "winner": str(r["winner"]), } ) lo = float(min(scoring.scores_a.min(), scoring.scores_b.min())) hi = float(max(scoring.scores_a.max(), scoring.scores_b.max())) edges = np.linspace(lo, hi, 41) a_counts, _ = np.histogram(scoring.scores_a, bins=edges) b_counts, _ = np.histogram(scoring.scores_b, bins=edges) centers = (edges[:-1] + edges[1:]) / 2 xs, pdf = report.bayes.posterior_pdf(128) traj = report.sequential.trajectory_p idx = np.unique(np.linspace(0, len(traj) - 1, 200).astype(int)) sequential_points = [{"n": int(i + 1), "p": float(traj[i])} for i in idx] out = { "verdict": { "winner": v.winner, "headline": v.headline, "confidence": v.confidence, "significant": bool(v.significant), "mean_delta": _f(v.mean_delta), "mean_delta_points": _f(v.mean_delta * 100), "ci_points": [_f(boot.bca_low * 100), _f(boot.bca_high * 100)], "p_value": _f(v.p_value), "cohens_d": _f(v.cohens_d), }, "summary": { **{k: _f(val) for k, val in report.scores_summary.items()}, "n_jobs": report.n_jobs, }, "tests": { "primary": { "name": report.primary_test.name, "statistic": _f(report.primary_test.statistic), "pvalue": _f(report.primary_test.pvalue), "ci_low": _f(report.primary_test.ci_low), "ci_high": _f(report.primary_test.ci_high), }, "normality": { "name": report.normality.name, "statistic": _f(report.normality.statistic), "pvalue": _f(report.normality.pvalue), "normal_at_05": bool(report.normality.detail.get("normal_at_05", True)), }, "cuped_test": { "name": report.cuped_test.name, "pvalue": _f(report.cuped_test.pvalue), "ci_low": _f(report.cuped_test.ci_low), "ci_high": _f(report.cuped_test.ci_high), }, }, "effect": { "cohens_d": _f(report.cohens_d), "achieved_power": _f(report.achieved_power), "required_n_80": _f(report.required_n_80), "mde": report.mde.to_dict(orient="records"), }, "bootstrap": { "point": _f(boot.point), "percentile": [_f(boot.pct_low), _f(boot.pct_high)], "bca": [_f(boot.bca_low), _f(boot.bca_high)], "n_resamples": boot.n_resamples, }, "cuped": { "variance_reduction": _f(report.cuped.variance_reduction), "r_squared": _f(report.cuped.r_squared), "effective_n_multiplier": _f(report.cuped.effective_n_multiplier), "n_covariates": report.cuped.n_covariates, }, "sequential": { "always_valid_p": _f(report.sequential.always_valid_p), "reject_h0": bool(report.sequential.reject_h0), "n": report.sequential.n, "trajectory": sequential_points, }, "bayes": { "k": report.bayes.k, "n": report.bayes.n, "posterior_mean": _f(report.bayes.mean), "credible_interval": [_f(report.bayes.ci_low), _f(report.bayes.ci_high)], "prob_b_beats_a": _f(report.bayes.prob_b_beats_a), "posterior_curve": [ {"p": float(x), "density": float(y)} for x, y in zip(xs, pdf, strict=True) ], }, "distributions": { "bin_centers": [float(c) for c in centers], "resume_a": [int(c) for c in a_counts], "resume_b": [int(c) for c in b_counts], }, "clusters": clusters, } if resume_a is not None and resume_b is not None: out["inputs"] = { slot: { "chars": r.char_count, "format": r.source_format, "parser": r.parser_used, "skills": r.skills, "parse_quality": r.parse_quality, "flags": r.quality_flags, } for slot, r in (("resume_a", resume_a), ("resume_b", resume_b)) } return out