File size: 1,075 Bytes
f7c7e26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd

# ---------- Load File 1: spam_urls.csv ----------
df_urls = pd.read_csv("data/final merged_urls.csv")  # Replace with your actual filename
df_urls['label'] = df_urls['label'].astype(int)
df_urls['source'] = 'url_data'

# ---------- Load File 2: features.csv ----------
df_features = pd.read_csv("data/phishing.csv")

# Rename 'class' to 'label' for consistency
df_features.rename(columns={'class': 'label'}, inplace=True)

# Convert -1 -> 1 (spam), 1 -> 0 (safe)
df_features['label'] = df_features['label'].map({-1: 1, 1: 0})

# Add source info (optional)
df_features['source'] = 'feature_data'

# ---------- Combine both ----------
# We'll allow different columns (some NaNs will appear)
merged_df = pd.concat([df_urls, df_features], ignore_index=True)

# Shuffle rows (optional but good for ML training)
merged_df = merged_df.sample(frac=1).reset_index(drop=True)

# Save final merged CSV
merged_df.to_csv("fianl2merged_spam_dataset.csv", index=False)

print("✅ Created 'merged_spam_dataset.csv' with unified labels.")