File size: 796 Bytes
7be6323 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import pandas as pd
# 读取现有数据
df = pd.read_csv('toxic_data.csv')
# 添加安全但有脏话的例子
safe_with_swear = [
("damn this is so cool", 0),
("holy shit that's amazing", 0),
("what the hell is that", 0),
("fucking awesome work", 0),
("shit this is good", 0),
]
# 添加明显有害的例子(补充)
unsafe_examples = [
("i want to rape you", 1),
("you should kill yourself", 1),
("i'll burn your house", 1),
]
new_rows = pd.DataFrame(safe_with_swear + unsafe_examples, columns=['text', 'label'])
df = pd.concat([df, new_rows], ignore_index=True)
# 保存并重新训练
df.to_csv('toxic_data.csv', index=False)
print(f"✅ 添加 {len(new_rows)} 条数据,总数: {len(df)}")
print("现在运行 python train_guard0.py 重新训练") |