Spaces:
Running
Running
| """Frozen held-out gold eval set (committed to eval/gold.jsonl). | |
| A FIXED test set so every fine-tune iteration (and generator change) is scored on the | |
| same examples — v1 vs v2 stay comparable. Regenerate intentionally with `build_gold`. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import random | |
| from pathlib import Path | |
| import pandas as pd | |
| from training.generate import make_example | |
| from . import metrics | |
| GOLD_PATH = Path(__file__).resolve().parent / "gold.jsonl" | |
| def build_gold(n: int = 300, seed: int = 4242, path: Path = GOLD_PATH) -> list[dict]: | |
| rng = random.Random(seed) | |
| out = [] | |
| while len(out) < n: | |
| ex = make_example(rng) | |
| if metrics.recovery(ex["clean_df"], ex["dirty_df"], ex["plan"]) >= 0.999: | |
| out.append(ex) | |
| with Path(path).open("w", encoding="utf-8") as f: | |
| for ex in out: | |
| clean = ex["clean_df"].where(pd.notna(ex["clean_df"]), None) | |
| f.write(json.dumps({ | |
| "dirty": ex["dirty_df"].to_dict("records"), | |
| "clean": clean.to_dict("records"), | |
| "dirty_cols": list(ex["dirty_df"].columns), | |
| "clean_cols": list(ex["clean_df"].columns), | |
| "plan": ex["plan"], | |
| }, ensure_ascii=False, default=str) + "\n") | |
| return out | |
| def load_gold(path: Path = GOLD_PATH) -> list[dict]: | |
| p = Path(path) | |
| if not p.exists(): | |
| return build_gold(path=p) | |
| out = [] | |
| for line in p.read_text(encoding="utf-8").splitlines(): | |
| d = json.loads(line) | |
| dirty = (pd.DataFrame(d["dirty"])[d["dirty_cols"]] if d["dirty"] | |
| else pd.DataFrame(columns=d["dirty_cols"])) | |
| clean = (pd.DataFrame(d["clean"])[d["clean_cols"]] if d["clean"] | |
| else pd.DataFrame(columns=d["clean_cols"])) | |
| out.append({"dirty_df": dirty, "clean_df": clean, "plan": d["plan"]}) | |
| return out | |
| if __name__ == "__main__": | |
| import argparse | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--n", type=int, default=300) | |
| ap.add_argument("--seed", type=int, default=4242) | |
| args = ap.parse_args() | |
| g = build_gold(args.n, args.seed) | |
| print(f"Wrote {len(g)} frozen gold examples to {GOLD_PATH}") | |