"""Phase-7 ceiling-check prep for Interaction + Critical (goal/decomp reuse Phase-6 artifacts). Builds 50-unit stratified samples (positive-enriched so inter-model κ is stable) from the 159-conv validation set, with Kim GT attached, plus a BLIND version (context only) for Opus/Sonnet labelers. Interaction unit: (prev_user, this_user) -> refinement_attempt bool. Critical unit: (prev_assistant, this_user) -> set of CE types (5). Run: python -m eval._ceiling.prep_p7 """ from __future__ import annotations import json import os import random from eval import kappa as K from prompt_card.scoring import observable_axes as OA from eval.step_critical import CE HERE = os.path.dirname(__file__) SEED = 7 N = 50 def _interaction_units(gt, convs): units = [] for r in gt: ut = K.user_turns(convs[r["id"]]) for row in r["interaction"]: i = int(row["turn"][1:]) - 1 if i < 1 or i >= len(ut): continue units.append({"cid": r["id"], "turn": row["turn"], "prev_user": ut[i - 1], "this_user": ut[i], "kim": bool(row["refinement"])}) return units def _critical_units(gt, convs): units = [] for r in gt: conv = convs[r["id"]]; ut = K.user_turns(conv) for row in r["critical"]: i = int(row["turn"][1:]) - 1 if i < 0 or i >= len(ut): continue units.append({"cid": r["id"], "turn": row["turn"], "prev_assistant": OA._prev_assistant(conv, i) or "", "this_user": ut[i], "kim": sorted(row["types"])}) return units def _stratified(units, is_pos, n, rng, pos_frac): pos = [u for u in units if is_pos(u)] neg = [u for u in units if not is_pos(u)] rng.shuffle(pos); rng.shuffle(neg) npos = min(len(pos), int(n * pos_frac)) sample = pos[:npos] + neg[:n - npos] rng.shuffle(sample) for k, u in enumerate(sample): u["idx"] = k return sample def _write(name, sample, blind_fields): json.dump(sample, open(os.path.join(HERE, f"{name}_samples_p7.json"), "w"), ensure_ascii=False, indent=1) blind = [{**{"idx": u["idx"]}, **{f: u[f] for f in blind_fields}} for u in sample] json.dump(blind, open(os.path.join(HERE, f"{name}_blind_p7.json"), "w"), ensure_ascii=False, indent=1) print(f"[prep_p7] {name}: {len(sample)} units ({sum(1 for u in sample if (u['kim'] if isinstance(u['kim'],bool) else u['kim']))} pos-ish)") def main(): rng = random.Random(SEED) gt = K.load_gt(); convs = K.load_convs() inter = _stratified(_interaction_units(gt, convs), lambda u: u["kim"], N, rng, 0.4) _write("interaction", inter, ["prev_user", "this_user"]) crit = _stratified(_critical_units(gt, convs), lambda u: len(u["kim"]) > 0, N, rng, 0.5) _write("critical", crit, ["prev_assistant", "this_user"]) if __name__ == "__main__": main()