| import pandas as pd |
|
|
| |
| df = pd.read_csv('toxic_data.csv') |
|
|
| |
| safe_with_swear = [ |
| ("damn this is so cool", 0), |
| ("holy shit that's amazing", 0), |
| ("what the hell is that", 0), |
| ("fucking awesome work", 0), |
| ("shit this is good", 0), |
| ] |
|
|
| |
| unsafe_examples = [ |
| ("i want to rape you", 1), |
| ("you should kill yourself", 1), |
| ("i'll burn your house", 1), |
| ] |
|
|
| new_rows = pd.DataFrame(safe_with_swear + unsafe_examples, columns=['text', 'label']) |
| df = pd.concat([df, new_rows], ignore_index=True) |
|
|
| |
| df.to_csv('toxic_data.csv', index=False) |
| print(f"✅ 添加 {len(new_rows)} 条数据,总数: {len(df)}") |
| print("现在运行 python train_guard0.py 重新训练") |