File size: 1,526 Bytes
26d786a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import pandas as pd
path = "/workspace/rl4phyx/RL4Phyx/ZeroSearch/One-Shot-RLVR/data/train/physics_vlm/mechanics/mechanics_1_rl_numerical.parquet"
df = pd.read_parquet(path)
# Build a clean open-ended prompt from scratch
open_ended_prompt = """Look at the image and answer the physics question.
A patient with a dislocated shoulder is put into a traction apparatus as shown in figure. The pulls A and B have equal magnitudes and must combine to produce an outward traction force of 12.8 N on the patient's arm.
Question: How large should these pulls be?
Please reason step by step and put your final numerical answer (with units) in \\boxed{}."""
# Rebuild all rows with clean prompt
new_rows = []
for i, row in df.iterrows():
r = {
"data_source": row["data_source"],
"prompt": [{"content": open_ended_prompt, "role": "user"}],
"ability": row["ability"],
"reward_model": row["reward_model"], # keeps {'ground_truth': '7.55N', 'style': 'rule'}
"extra_info": row["extra_info"],
}
new_rows.append(r)
new_df = pd.DataFrame(new_rows)
new_df.to_parquet(path, index=False)
# Verify
df2 = pd.read_parquet(path)
print("Shape:", df2.shape)
print("Columns:", list(df2.columns))
print()
print("Prompt:")
print(df2.iloc[0]["prompt"][0]["content"])
print()
print("reward_model:", df2.iloc[0]["reward_model"])
print("extra_info:", df2.iloc[0]["extra_info"])
print()
has_options = "Options" in df2.iloc[0]["prompt"][0]["content"]
print("Has Options:", has_options)
print("DONE!")
|