| """Quick verification script for generated dataset""" | |
| import json | |
| with open("datasets/synthetic_nozzles.json") as f: | |
| data = json.load(f) | |
| print(f"Total examples: {len(data)}") | |
| # Count types | |
| types = {} | |
| for ex in data: | |
| t = ex["id"].split("_")[0] | |
| types[t] = types.get(t, 0) + 1 | |
| print("\nComposition:") | |
| for t, c in sorted(types.items(), key=lambda x: -x[1]): | |
| print(f" {t}: {c}") | |
| # Check nozzle dimensions | |
| print("\nNozzle Spot Checks:") | |
| nozzles = [e for e in data if e["id"].startswith("nozzle")] | |
| for ex in nozzles[:5]: | |
| r = ex["reasoning"] | |
| for line in r.split("\n"): | |
| line = line.strip() | |
| if "D* =" in line and "mm" in line: | |
| print(f" [{ex['id']}] {line[:80]}") | |
| if "MoS =" in line and "mm" not in line: | |
| print(f" [{ex['id']}] {line[:80]}") | |
| # Check for negative MoS without failure | |
| print("\nMoS Check:") | |
| neg_mos = 0 | |
| for ex in data: | |
| if "MoS = -" in ex.get("reasoning",""): | |
| neg_mos += 1 | |
| print(f" Examples with negative MoS: {neg_mos}") | |
| print(f" Examples with positive MoS: {len(data) - neg_mos}") | |