|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
df_urls = pd.read_csv("data/final merged_urls.csv")
|
|
|
df_urls['label'] = df_urls['label'].astype(int)
|
|
|
df_urls['source'] = 'url_data'
|
|
|
|
|
|
|
|
|
df_features = pd.read_csv("data/phishing.csv")
|
|
|
|
|
|
|
|
|
df_features.rename(columns={'class': 'label'}, inplace=True)
|
|
|
|
|
|
|
|
|
df_features['label'] = df_features['label'].map({-1: 1, 1: 0})
|
|
|
|
|
|
|
|
|
df_features['source'] = 'feature_data'
|
|
|
|
|
|
|
|
|
|
|
|
merged_df = pd.concat([df_urls, df_features], ignore_index=True)
|
|
|
|
|
|
|
|
|
merged_df = merged_df.sample(frac=1).reset_index(drop=True)
|
|
|
|
|
|
|
|
|
merged_df.to_csv("fianl2merged_spam_dataset.csv", index=False)
|
|
|
|
|
|
print("✅ Created 'merged_spam_dataset.csv' with unified labels.")
|
|
|
|