Spaces:
Sleeping
Sleeping
File size: 2,534 Bytes
c2fb337 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | import pandas as pd
import numpy as np
from datetime import datetime
# DAFTAR KOLOM SESUAI ERROR TERAKHIR (WAJIB LENGKAP)
FEATURE_COLUMNS = [
"location",
"amount",
"customer_lat",
"customer_long",
"merchant_lat",
"merchant_long",
"distance_customer_merchant",
"customer_city_population",
"customer_no_transactions",
"customer_no_orders",
"customer_no_payments",
"payments_per_order_ratio",
"transactions_per_customer_ratio",
"age",
"customer_gender",
"customer_job",
"customer_place_name",
"customer_zip_code",
"merchant_id",
"merchant_name",
"transaction_type",
"transaction_category",
"hour_of_day",
"day_of_week",
"fraud_rate_by_location",
"mean_amount_by_location",
"avg_amount_per_transaction",
"amount_per_city_pop",
"amount_deviation_from_location_mean"
]
def build_features(input_data: dict) -> pd.DataFrame:
"""
Build 1-row DataFrame with all features required by the model.
Missing features are filled with safe defaults.
"""
now = datetime.now()
feature_dict = {
"location": input_data.get("location", 0),
"amount": input_data.get("amount", 0),
# customer
"customer_lat": 0.0,
"customer_long": 0.0,
"customer_city_population": 0,
"customer_no_transactions": 0,
"customer_no_orders": 0,
"customer_no_payments": 0,
"payments_per_order_ratio": 0.0,
"transactions_per_customer_ratio": 0.0,
"age": 0,
"customer_gender": np.nan,
"customer_job": np.nan,
"customer_place_name": np.nan,
"customer_zip_code": np.nan,
# merchant
"merchant_id": np.nan,
"merchant_name": np.nan,
"merchant_lat": 0.0,
"merchant_long": 0.0,
# transaction
"transaction_type": np.nan,
"transaction_category": np.nan,
# time
"hour_of_day": now.hour,
"day_of_week": now.weekday(),
# engineered / aggregate
"distance_customer_merchant": 0.0,
"fraud_rate_by_location": 0.0,
"mean_amount_by_location": 0.0,
"avg_amount_per_transaction": 0.0,
"amount_per_city_pop": 0.0,
"amount_deviation_from_location_mean": 0.0,
}
df = pd.DataFrame([feature_dict])
# PASTIKAN URUTAN KOLOM SESUAI TRAINING
df = df[FEATURE_COLUMNS]
return df
|