File size: 475 Bytes
f7c7e26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import pandas as pd
# Load the phishing data
df_phish = pd.read_csv("data/url.csv")
# Keep only verified and confirmed phishing entries
df_phish = df_phish[df_phish['verified'] == 'yes']
# Extract only the URL
df_clean = df_phish[['url']].copy()
# Add label: 1 = phishing/spam
df_clean['label'] = 1
print("✅ Cleaned phishing dataset shape:", df_clean.shape)
print(df_clean.head())
# Save to CSV
df_clean.to_csv("clean_phish_urls.csv", index=False)
|