File size: 531 Bytes
d8a76be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import pandas as pd

# 读取原始 parquet 文件
input_path = "/home/data/raw/test/4201_2355_full_label_1000-8192_sys3round.parquet"
output_path = "/home/data/raw/test/4201_2355_full_label_1000-8192_sys3round_chosensafe.parquet"

# 加载数据
df = pd.read_parquet(input_path)

# 只保留 label == "safe" 的样本
df_safe = df[df["chosen_label"] == "safe"]

# 保存为新的 parquet 文件
df_safe.to_parquet(output_path, index=False)

print(f"筛选完成,共保留 {len(df_safe)} 条样本,已保存到 {output_path}")