File size: 2,534 Bytes
c2fb337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pandas as pd
import numpy as np
from datetime import datetime

# DAFTAR KOLOM SESUAI ERROR TERAKHIR (WAJIB LENGKAP)
FEATURE_COLUMNS = [
    "location",
    "amount",
    "customer_lat",
    "customer_long",
    "merchant_lat",
    "merchant_long",
    "distance_customer_merchant",
    "customer_city_population",
    "customer_no_transactions",
    "customer_no_orders",
    "customer_no_payments",
    "payments_per_order_ratio",
    "transactions_per_customer_ratio",
    "age",
    "customer_gender",
    "customer_job",
    "customer_place_name",
    "customer_zip_code",
    "merchant_id",
    "merchant_name",
    "transaction_type",
    "transaction_category",
    "hour_of_day",
    "day_of_week",
    "fraud_rate_by_location",
    "mean_amount_by_location",
    "avg_amount_per_transaction",
    "amount_per_city_pop",
    "amount_deviation_from_location_mean"
]

def build_features(input_data: dict) -> pd.DataFrame:
    """

    Build 1-row DataFrame with all features required by the model.

    Missing features are filled with safe defaults.

    """

    now = datetime.now()

    feature_dict = {
        "location": input_data.get("location", 0),
        "amount": input_data.get("amount", 0),

        # customer
        "customer_lat": 0.0,
        "customer_long": 0.0,
        "customer_city_population": 0,
        "customer_no_transactions": 0,
        "customer_no_orders": 0,
        "customer_no_payments": 0,
        "payments_per_order_ratio": 0.0,
        "transactions_per_customer_ratio": 0.0,
        "age": 0,
        "customer_gender": np.nan,
        "customer_job": np.nan,
        "customer_place_name": np.nan,
        "customer_zip_code": np.nan,

        # merchant
        "merchant_id": np.nan,
        "merchant_name": np.nan,
        "merchant_lat": 0.0,
        "merchant_long": 0.0,

        # transaction
        "transaction_type": np.nan,
        "transaction_category": np.nan,

        # time
        "hour_of_day": now.hour,
        "day_of_week": now.weekday(),

        # engineered / aggregate
        "distance_customer_merchant": 0.0,
        "fraud_rate_by_location": 0.0,
        "mean_amount_by_location": 0.0,
        "avg_amount_per_transaction": 0.0,
        "amount_per_city_pop": 0.0,
        "amount_deviation_from_location_mean": 0.0,
    }

    df = pd.DataFrame([feature_dict])

    # PASTIKAN URUTAN KOLOM SESUAI TRAINING
    df = df[FEATURE_COLUMNS]

    return df