import pandas as pd # 读取现有数据 df = pd.read_csv('toxic_data.csv') # 添加安全但有脏话的例子 safe_with_swear = [ ("damn this is so cool", 0), ("holy shit that's amazing", 0), ("what the hell is that", 0), ("fucking awesome work", 0), ("shit this is good", 0), ] # 添加明显有害的例子(补充) unsafe_examples = [ ("i want to rape you", 1), ("you should kill yourself", 1), ("i'll burn your house", 1), ] new_rows = pd.DataFrame(safe_with_swear + unsafe_examples, columns=['text', 'label']) df = pd.concat([df, new_rows], ignore_index=True) # 保存并重新训练 df.to_csv('toxic_data.csv', index=False) print(f"✅ 添加 {len(new_rows)} 条数据,总数: {len(df)}") print("现在运行 python train_guard0.py 重新训练")