import pandas as pd

# 读取现有数据
df = pd.read_csv('toxic_data.csv')

# 添加安全但有脏话的例子
safe_with_swear = [
    ("damn this is so cool", 0),
    ("holy shit that's amazing", 0),
    ("what the hell is that", 0),
    ("fucking awesome work", 0),
    ("shit this is good", 0),
]

# 添加明显有害的例子（补充）
unsafe_examples = [
    ("i want to rape you", 1),
    ("you should kill yourself", 1),
    ("i'll burn your house", 1),
]

new_rows = pd.DataFrame(safe_with_swear + unsafe_examples, columns=['text', 'label'])
df = pd.concat([df, new_rows], ignore_index=True)

# 保存并重新训练
df.to_csv('toxic_data.csv', index=False)
print(f"✅ 添加 {len(new_rows)} 条数据，总数: {len(df)}")
print("现在运行 python train_guard0.py 重新训练")