Spaces:
Runtime error
Runtime error
| """Phase-7 ceiling-check prep for Interaction + Critical (goal/decomp reuse Phase-6 artifacts). | |
| Builds 50-unit stratified samples (positive-enriched so inter-model κ is stable) from the 159-conv | |
| validation set, with Kim GT attached, plus a BLIND version (context only) for Opus/Sonnet labelers. | |
| Interaction unit: (prev_user, this_user) -> refinement_attempt bool. | |
| Critical unit: (prev_assistant, this_user) -> set of CE types (5). | |
| Run: python -m eval._ceiling.prep_p7 | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import random | |
| from eval import kappa as K | |
| from prompt_card.scoring import observable_axes as OA | |
| from eval.step_critical import CE | |
| HERE = os.path.dirname(__file__) | |
| SEED = 7 | |
| N = 50 | |
| def _interaction_units(gt, convs): | |
| units = [] | |
| for r in gt: | |
| ut = K.user_turns(convs[r["id"]]) | |
| for row in r["interaction"]: | |
| i = int(row["turn"][1:]) - 1 | |
| if i < 1 or i >= len(ut): | |
| continue | |
| units.append({"cid": r["id"], "turn": row["turn"], "prev_user": ut[i - 1], | |
| "this_user": ut[i], "kim": bool(row["refinement"])}) | |
| return units | |
| def _critical_units(gt, convs): | |
| units = [] | |
| for r in gt: | |
| conv = convs[r["id"]]; ut = K.user_turns(conv) | |
| for row in r["critical"]: | |
| i = int(row["turn"][1:]) - 1 | |
| if i < 0 or i >= len(ut): | |
| continue | |
| units.append({"cid": r["id"], "turn": row["turn"], | |
| "prev_assistant": OA._prev_assistant(conv, i) or "", | |
| "this_user": ut[i], "kim": sorted(row["types"])}) | |
| return units | |
| def _stratified(units, is_pos, n, rng, pos_frac): | |
| pos = [u for u in units if is_pos(u)] | |
| neg = [u for u in units if not is_pos(u)] | |
| rng.shuffle(pos); rng.shuffle(neg) | |
| npos = min(len(pos), int(n * pos_frac)) | |
| sample = pos[:npos] + neg[:n - npos] | |
| rng.shuffle(sample) | |
| for k, u in enumerate(sample): | |
| u["idx"] = k | |
| return sample | |
| def _write(name, sample, blind_fields): | |
| json.dump(sample, open(os.path.join(HERE, f"{name}_samples_p7.json"), "w"), ensure_ascii=False, indent=1) | |
| blind = [{**{"idx": u["idx"]}, **{f: u[f] for f in blind_fields}} for u in sample] | |
| json.dump(blind, open(os.path.join(HERE, f"{name}_blind_p7.json"), "w"), ensure_ascii=False, indent=1) | |
| print(f"[prep_p7] {name}: {len(sample)} units ({sum(1 for u in sample if (u['kim'] if isinstance(u['kim'],bool) else u['kim']))} pos-ish)") | |
| def main(): | |
| rng = random.Random(SEED) | |
| gt = K.load_gt(); convs = K.load_convs() | |
| inter = _stratified(_interaction_units(gt, convs), lambda u: u["kim"], N, rng, 0.4) | |
| _write("interaction", inter, ["prev_user", "this_user"]) | |
| crit = _stratified(_critical_units(gt, convs), lambda u: len(u["kim"]) > 0, N, rng, 0.5) | |
| _write("critical", crit, ["prev_assistant", "this_user"]) | |
| if __name__ == "__main__": | |
| main() | |