import pandas as pd import json def trans_parquet(jsonl_path, save_path): with open(jsonl_path, 'r') as f: data = [json.loads(line) for line in f] RL_data_list = [] for item in data: QA = { "problem": "Return your final response within \\boxed{}. " + item['problem'], "answer": item['answer'], } RL_data_list.append(QA) RL_data = pd.DataFrame(RL_data_list) RL_data.to_parquet(save_path, index=False) def read_parquet(save_path): data = pd.read_parquet(save_path) print(data.head()) return data if __name__ == "__main__": trans_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/test.jsonl", "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet") read_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet")