File size: 1,115 Bytes
947d4a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""Quick verification script for generated dataset"""
import json

with open("datasets/synthetic_nozzles.json") as f:
    data = json.load(f)

print(f"Total examples: {len(data)}")

# Count types
types = {}
for ex in data:
    t = ex["id"].split("_")[0]
    types[t] = types.get(t, 0) + 1
print("\nComposition:")
for t, c in sorted(types.items(), key=lambda x: -x[1]):
    print(f"  {t}: {c}")

# Check nozzle dimensions
print("\nNozzle Spot Checks:")
nozzles = [e for e in data if e["id"].startswith("nozzle")]
for ex in nozzles[:5]:
    r = ex["reasoning"]
    for line in r.split("\n"):
        line = line.strip()
        if "D* =" in line and "mm" in line:
            print(f"  [{ex['id']}] {line[:80]}")
        if "MoS =" in line and "mm" not in line:
            print(f"  [{ex['id']}] {line[:80]}")

# Check for negative MoS without failure
print("\nMoS Check:")
neg_mos = 0
for ex in data:
    if "MoS = -" in ex.get("reasoning",""):
        neg_mos += 1
print(f"  Examples with negative MoS: {neg_mos}")
print(f"  Examples with positive MoS: {len(data) - neg_mos}")