File size: 1,115 Bytes
947d4a1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | """Quick verification script for generated dataset"""
import json
with open("datasets/synthetic_nozzles.json") as f:
data = json.load(f)
print(f"Total examples: {len(data)}")
# Count types
types = {}
for ex in data:
t = ex["id"].split("_")[0]
types[t] = types.get(t, 0) + 1
print("\nComposition:")
for t, c in sorted(types.items(), key=lambda x: -x[1]):
print(f" {t}: {c}")
# Check nozzle dimensions
print("\nNozzle Spot Checks:")
nozzles = [e for e in data if e["id"].startswith("nozzle")]
for ex in nozzles[:5]:
r = ex["reasoning"]
for line in r.split("\n"):
line = line.strip()
if "D* =" in line and "mm" in line:
print(f" [{ex['id']}] {line[:80]}")
if "MoS =" in line and "mm" not in line:
print(f" [{ex['id']}] {line[:80]}")
# Check for negative MoS without failure
print("\nMoS Check:")
neg_mos = 0
for ex in data:
if "MoS = -" in ex.get("reasoning",""):
neg_mos += 1
print(f" Examples with negative MoS: {neg_mos}")
print(f" Examples with positive MoS: {len(data) - neg_mos}")
|