"""Quick verification script for generated dataset""" import json with open("datasets/synthetic_nozzles.json") as f: data = json.load(f) print(f"Total examples: {len(data)}") # Count types types = {} for ex in data: t = ex["id"].split("_")[0] types[t] = types.get(t, 0) + 1 print("\nComposition:") for t, c in sorted(types.items(), key=lambda x: -x[1]): print(f" {t}: {c}") # Check nozzle dimensions print("\nNozzle Spot Checks:") nozzles = [e for e in data if e["id"].startswith("nozzle")] for ex in nozzles[:5]: r = ex["reasoning"] for line in r.split("\n"): line = line.strip() if "D* =" in line and "mm" in line: print(f" [{ex['id']}] {line[:80]}") if "MoS =" in line and "mm" not in line: print(f" [{ex['id']}] {line[:80]}") # Check for negative MoS without failure print("\nMoS Check:") neg_mos = 0 for ex in data: if "MoS = -" in ex.get("reasoning",""): neg_mos += 1 print(f" Examples with negative MoS: {neg_mos}") print(f" Examples with positive MoS: {len(data) - neg_mos}")