import pandas as pd

path = "/workspace/rl4phyx/RL4Phyx/ZeroSearch/One-Shot-RLVR/data/train/physics_vlm/mechanics/mechanics_1_rl_numerical.parquet"
df = pd.read_parquet(path)

# Build a clean open-ended prompt from scratch
open_ended_prompt = """Look at the image and answer the physics question.

A patient with a dislocated shoulder is put into a traction apparatus as shown in figure. The pulls A and B have equal magnitudes and must combine to produce an outward traction force of 12.8 N on the patient's arm.

Question: How large should these pulls be?

Please reason step by step and put your final numerical answer (with units) in \\boxed{}."""

# Rebuild all rows with clean prompt
new_rows = []
for i, row in df.iterrows():
    r = {
        "data_source": row["data_source"],
        "prompt": [{"content": open_ended_prompt, "role": "user"}],
        "ability": row["ability"],
        "reward_model": row["reward_model"],  # keeps {'ground_truth': '7.55N', 'style': 'rule'}
        "extra_info": row["extra_info"],
    }
    new_rows.append(r)

new_df = pd.DataFrame(new_rows)
new_df.to_parquet(path, index=False)

# Verify
df2 = pd.read_parquet(path)
print("Shape:", df2.shape)
print("Columns:", list(df2.columns))
print()
print("Prompt:")
print(df2.iloc[0]["prompt"][0]["content"])
print()
print("reward_model:", df2.iloc[0]["reward_model"])
print("extra_info:", df2.iloc[0]["extra_info"])
print()
has_options = "Options" in df2.iloc[0]["prompt"][0]["content"]
print("Has Options:", has_options)
print("DONE!")