QWen2.5-eval-NEWA800 / data /trans_parquet.py
Xin-Rui's picture
Upload folder using huggingface_hub
a80200a verified
import pandas as pd
import json
def trans_parquet(jsonl_path, save_path):
with open(jsonl_path, 'r') as f:
data = [json.loads(line) for line in f]
RL_data_list = []
for item in data:
QA = {
"problem": "Return your final response within \\boxed{}. " + item['problem'],
"answer": item['answer'],
}
RL_data_list.append(QA)
RL_data = pd.DataFrame(RL_data_list)
RL_data.to_parquet(save_path, index=False)
def read_parquet(save_path):
data = pd.read_parquet(save_path)
print(data.head())
return data
if __name__ == "__main__":
trans_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/test.jsonl", "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet")
read_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet")