| import pandas as pd | |
| import json | |
| def trans_parquet(jsonl_path, save_path): | |
| with open(jsonl_path, 'r') as f: | |
| data = [json.loads(line) for line in f] | |
| RL_data_list = [] | |
| for item in data: | |
| QA = { | |
| "problem": "Return your final response within \\boxed{}. " + item['problem'], | |
| "answer": item['answer'], | |
| } | |
| RL_data_list.append(QA) | |
| RL_data = pd.DataFrame(RL_data_list) | |
| RL_data.to_parquet(save_path, index=False) | |
| def read_parquet(save_path): | |
| data = pd.read_parquet(save_path) | |
| print(data.head()) | |
| return data | |
| if __name__ == "__main__": | |
| trans_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/test.jsonl", "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet") | |
| read_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet") |