File size: 565 Bytes
b4f16a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import pandas as pd
# Load .txt or .csv with prompt,label format
df = pd.read_csv("data/injection_prompts.txt", names=["prompt", "label"])
# Optional: strip quotes
df["prompt"] = df["prompt"].str.strip('"')
# Map labels to numeric
df["label"] = df["label"].map({"safe": 0, "unsafe": 1})
# Shuffle the dataset for good measure
df = df.sample(frac=1).reset_index(drop=True)
# Check stats
print(" Dataset Loaded")
print(df["label"].value_counts())
# Preview
print(df.head())
# Save to CSV (optional)
df.to_csv("data/cleaned_injection_prompts.csv", index=False)
|