File size: 1,826 Bytes
7735006
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

import json, pandas as pd, os

src = "/workspace/rl4phyx/RL4Phyx/SFT/sft_eval_footprint/inference_results_base.jsonl"
with open(src) as f:
    lines = [json.loads(l) for l in f if l.strip()]

# Use absolute path for test images
IMAGE_BASE = "/workspace/rl4phyx/RL4Phyx/MetaPhyX/test_images"

rows = []
for r in lines:
    idx = r.get("index", 0)
    cat = r.get("category", "unknown")
    subfield = r.get("subfield", "")
    gt_value = str(r.get("ground_truth_value", "")).strip()
    question = r.get("question", "")
    
    prompt_text = f"Look at the image and answer the physics question.\n\n{question}\n\nPlease reason step by step and put your final answer (with units if applicable) in \\boxed{{}}."
    
    # Use absolute path for image
    abs_image_path = os.path.join(IMAGE_BASE, f"{idx}.png")
    
    row = {
        "data_source": "metaphyx_physics",
        "prompt": [{"content": prompt_text, "role": "user"}],
        "ability": "physics",
        "reward_model": {"ground_truth": gt_value, "style": "rule"},
        "extra_info": {
            "category": cat,
            "subfield": subfield,
            "index": idx,
            "image_path": abs_image_path,
            "split": "test",
        },
    }
    rows.append(row)

df = pd.DataFrame(rows)

out_path = "/workspace/rl4phyx/RL4Phyx/oneshot/validation_data/metaphyx_oe_1533.parquet"
df.to_parquet(out_path, index=False)

# Verify
df2 = pd.read_parquet(out_path)
print(f"Saved: {out_path}")
print(f"Shape: {df2.shape}")
# Check first image exists
img0 = df2.iloc[0]["extra_info"]["image_path"]
print(f"First image: {img0}")
print(f"Exists: {os.path.exists(img0)}")
# Check a few more
for i in [0, 100, 500, 1000, 1532]:
    ip = df2.iloc[i]["extra_info"]["image_path"]
    print(f"  [{i}] {os.path.basename(ip)}: exists={os.path.exists(ip)}")