fraud_detection_api_1 / features /feature_builder.py
cindyy287's picture
Upload 23 files
c2fb337 verified
import pandas as pd
import numpy as np
from datetime import datetime
# DAFTAR KOLOM SESUAI ERROR TERAKHIR (WAJIB LENGKAP)
FEATURE_COLUMNS = [
"location",
"amount",
"customer_lat",
"customer_long",
"merchant_lat",
"merchant_long",
"distance_customer_merchant",
"customer_city_population",
"customer_no_transactions",
"customer_no_orders",
"customer_no_payments",
"payments_per_order_ratio",
"transactions_per_customer_ratio",
"age",
"customer_gender",
"customer_job",
"customer_place_name",
"customer_zip_code",
"merchant_id",
"merchant_name",
"transaction_type",
"transaction_category",
"hour_of_day",
"day_of_week",
"fraud_rate_by_location",
"mean_amount_by_location",
"avg_amount_per_transaction",
"amount_per_city_pop",
"amount_deviation_from_location_mean"
]
def build_features(input_data: dict) -> pd.DataFrame:
"""
Build 1-row DataFrame with all features required by the model.
Missing features are filled with safe defaults.
"""
now = datetime.now()
feature_dict = {
"location": input_data.get("location", 0),
"amount": input_data.get("amount", 0),
# customer
"customer_lat": 0.0,
"customer_long": 0.0,
"customer_city_population": 0,
"customer_no_transactions": 0,
"customer_no_orders": 0,
"customer_no_payments": 0,
"payments_per_order_ratio": 0.0,
"transactions_per_customer_ratio": 0.0,
"age": 0,
"customer_gender": np.nan,
"customer_job": np.nan,
"customer_place_name": np.nan,
"customer_zip_code": np.nan,
# merchant
"merchant_id": np.nan,
"merchant_name": np.nan,
"merchant_lat": 0.0,
"merchant_long": 0.0,
# transaction
"transaction_type": np.nan,
"transaction_category": np.nan,
# time
"hour_of_day": now.hour,
"day_of_week": now.weekday(),
# engineered / aggregate
"distance_customer_merchant": 0.0,
"fraud_rate_by_location": 0.0,
"mean_amount_by_location": 0.0,
"avg_amount_per_transaction": 0.0,
"amount_per_city_pop": 0.0,
"amount_deviation_from_location_mean": 0.0,
}
df = pd.DataFrame([feature_dict])
# PASTIKAN URUTAN KOLOM SESUAI TRAINING
df = df[FEATURE_COLUMNS]
return df