|
|
| import json, pandas as pd, os |
|
|
| src = "/workspace/rl4phyx/RL4Phyx/SFT/sft_eval_footprint/inference_results_base.jsonl" |
| with open(src) as f: |
| lines = [json.loads(l) for l in f if l.strip()] |
|
|
| |
| IMAGE_BASE = "/workspace/rl4phyx/RL4Phyx/MetaPhyX/test_images" |
|
|
| rows = [] |
| for r in lines: |
| idx = r.get("index", 0) |
| cat = r.get("category", "unknown") |
| subfield = r.get("subfield", "") |
| gt_value = str(r.get("ground_truth_value", "")).strip() |
| question = r.get("question", "") |
| |
| prompt_text = f"Look at the image and answer the physics question.\n\n{question}\n\nPlease reason step by step and put your final answer (with units if applicable) in \\boxed{{}}." |
| |
| |
| abs_image_path = os.path.join(IMAGE_BASE, f"{idx}.png") |
| |
| row = { |
| "data_source": "metaphyx_physics", |
| "prompt": [{"content": prompt_text, "role": "user"}], |
| "ability": "physics", |
| "reward_model": {"ground_truth": gt_value, "style": "rule"}, |
| "extra_info": { |
| "category": cat, |
| "subfield": subfield, |
| "index": idx, |
| "image_path": abs_image_path, |
| "split": "test", |
| }, |
| } |
| rows.append(row) |
|
|
| df = pd.DataFrame(rows) |
|
|
| out_path = "/workspace/rl4phyx/RL4Phyx/oneshot/validation_data/metaphyx_oe_1533.parquet" |
| df.to_parquet(out_path, index=False) |
|
|
| |
| df2 = pd.read_parquet(out_path) |
| print(f"Saved: {out_path}") |
| print(f"Shape: {df2.shape}") |
| |
| img0 = df2.iloc[0]["extra_info"]["image_path"] |
| print(f"First image: {img0}") |
| print(f"Exists: {os.path.exists(img0)}") |
| |
| for i in [0, 100, 500, 1000, 1532]: |
| ip = df2.iloc[i]["extra_info"]["image_path"] |
| print(f" [{i}] {os.path.basename(ip)}: exists={os.path.exists(ip)}") |
|
|