File size: 475 Bytes
f7c7e26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import pandas as pd

# Load the phishing data
df_phish = pd.read_csv("data/url.csv")

# Keep only verified and confirmed phishing entries
df_phish = df_phish[df_phish['verified'] == 'yes']

# Extract only the URL
df_clean = df_phish[['url']].copy()

# Add label: 1 = phishing/spam
df_clean['label'] = 1

print("✅ Cleaned phishing dataset shape:", df_clean.shape)
print(df_clean.head())

# Save to CSV
df_clean.to_csv("clean_phish_urls.csv", index=False)