File size: 906 Bytes
a80200a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd 
import json


def trans_parquet(jsonl_path, save_path):
    with open(jsonl_path, 'r') as f:
        data = [json.loads(line) for line in f]
        
        RL_data_list = []
        for item in data:
            QA = {
                "problem": "Return your final response within \\boxed{}. " + item['problem'],
                "answer": item['answer'],
            }
            RL_data_list.append(QA)
        RL_data = pd.DataFrame(RL_data_list)
        RL_data.to_parquet(save_path, index=False)


def read_parquet(save_path):
    data = pd.read_parquet(save_path)
    print(data.head())
    return data

if __name__ == "__main__":
    trans_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/test.jsonl", "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet")
    
    read_parquet("/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation/data/amc23/amc23.parquet")