import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler def prase_transaction_data(file_path): """prase and clean transaction data""" df = pd.read_csv(file_path) df = df.dropna() df['hour'] = pd.to_datetime(df['timestamp']).dt.hour df['amount_log'] = np.log1p(df['amount']) df['is_high_risk_country'] = df['country'].apply(lambda x: 1 if x in ["Nigeria", "Russia", "China"] else 0) return df def preprocess_for_model(df): """Prepare data for fraud detection model""" features = ['amount_log', 'hour','is_high_risk_country','merchant_category'] X = df[features] y = df.get('fraud_label', None) #One-hot encode category X = pd.get_dummies(X,columns=['merchant_category'],drop_first=True) #Normalize scaler = StandardScaler() X_scaled = scaler.fit_transform(X) return X_scaled, y