demoaccta commited on
Commit
5ad8f3a
Β·
verified Β·
1 Parent(s): 4b9e4c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -65
app.py CHANGED
@@ -5,34 +5,16 @@ import pandas as pd
5
  import json
6
  from huggingface_hub import hf_hub_download
7
 
 
 
8
  MODEL_REPO = "shahviransh/fraud-detection"
9
  MODEL_FILE = "xgb_model.pkl"
10
 
11
- # download & load
12
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
13
  model = joblib.load(model_path)
14
 
15
- # Print Features
16
- try:
17
- feature_names = None
18
-
19
- # Case 1: XGBClassifier
20
- if hasattr(model, "get_booster"):
21
- booster = model.get_booster()
22
- feature_names = booster.feature_names
23
-
24
- # Case 2: scikit wrapper
25
- if feature_names is None and hasattr(model, "feature_names_in_"):
26
- feature_names = model.feature_names_in_.tolist()
27
-
28
- print("==== MODEL FEATURE NAMES (ORDERED) ====")
29
- print(feature_names)
30
- print("COUNT =", len(feature_names))
31
-
32
- except Exception as e:
33
- print("FAILED TO READ FEATURE NAMES:", str(e))
34
-
35
 
 
36
  FEATURES = [
37
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
38
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
@@ -50,86 +32,100 @@ FEATURES = [
50
  "Product Category_health & beauty","Product Category_home & garden",
51
  "Product Category_toys & games","Device Used_mobile",
52
  "Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning",
53
- "Hour Bin_Night",
54
- "Transaction Size_Medium","Transaction Size_Small","Transaction Size_Very_Small"
55
  ]
56
 
57
 
58
  def build_feature_row(d):
59
 
60
- row = {c: 0 for c in FEATURES}
61
 
62
  amt = float(d["amount"])
63
  qty = max(float(d["quantity"]), 1.0)
64
  age = float(d["customer_age"])
65
- acc_days = float(d["account_age_days"])
66
  hour = int(d["transaction_hour"])
67
- total_txn = float(d["total_customer_transactions"])
68
 
69
- # ---- numeric base ----
70
  row["Transaction Amount"] = amt
71
  row["Quantity"] = qty
72
  row["Customer Age"] = age
73
  row["Account Age Days"] = acc_days
74
  row["Transaction Hour"] = hour
75
  row["Total Customer Transactions"] = total_txn
76
- row["Address Mismatch"] = int(d.get("address_mismatch", False))
77
  row["Day of Week"] = int(d["day_of_week"])
78
  row["Month"] = int(d["month"])
79
- row["Is Weekend"] = int(row["Day of Week"] >= 5)
 
80
 
81
- # ---- engineered ----
82
  row["Account Age Weeks"] = acc_days / 7
83
  row["Amount Log"] = np.log1p(amt)
84
  row["Quantity Log"] = np.log1p(qty)
85
  row["Amount per Quantity"] = amt / qty
86
- row["High Amount Flag"] = int(amt > 500)
87
- row["High Quantity Flag"] = int(qty > 3)
88
- row["Unusual Hour Flag"] = int(hour < 6 or hour > 22)
89
-
90
- # safe defaults for unknown pipeline fields
91
- row["Transaction Amount Ratio"] = 1.0
92
- row["Avg Daily Transaction Velocity"] = total_txn / max(acc_days, 1)
93
- row["Time Since Last Transaction"] = 24
94
- row["Amount Deviation From History"] = 1
95
  row["Product Category Diversity"] = 1.0
96
- row["Amount zscore"] = 0.2
97
- row["Amount Age Interaction"] = ( amt * age ) / 100
98
- row["Amount Velocity Interaction"] = amt * row["Avg Daily Transaction Velocity"] / 10
99
- row["New Account"] = int(acc_days < 30)
100
- row["New Account High Value"] = int(row["New Account"] and amt > 500)
101
- row["Weekend High Value"] = int(row["Is Weekend"] and amt > 500)
102
- row["High Risk Profile"] = int(row["Address Mismatch"] and amt > 1000)
103
- row["Velocity Deviation"] = 0.2
104
  row["Suspicious Pattern"] = 0.0
105
 
106
- # ---- one-hot dimensions ----
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  pm = f"Payment Method_{d['payment_method']}"
108
- if pm in row: row[pm] = 1
 
109
 
110
  pc = f"Product Category_{d['product_category']}"
111
- if pc in row: row[pc] = 1
 
112
 
113
  device = f"Device Used_{d['device_used']}"
114
- if device in row: row[device] = 1
 
115
 
116
- # ---- hour bins ----
117
  if 6 <= hour < 12:
118
- row["Hour Bin_Morning"] = 1
119
  elif 12 <= hour < 18:
120
- row["Hour Bin_Evening"] = 1
121
  else:
122
- row["Hour Bin_Night"] = 1
123
 
124
- # ---- transaction size ----
125
  if amt < 50:
126
- row["Transaction Size_Very_Small"] = 1
127
  elif amt < 200:
128
- row["Transaction Size_Small"] = 1
129
  else:
130
- row["Transaction Size_Medium"] = 1
 
 
 
131
 
132
- return pd.DataFrame([row])[FEATURES]
133
 
134
 
135
  def predict(input_json):
@@ -139,12 +135,18 @@ def predict(input_json):
139
 
140
  df = build_feature_row(d)
141
 
142
- pred = model.predict(df)[0]
143
- prob = model.predict_proba(df)[0][1]
 
 
 
 
 
 
144
 
145
  return {
146
- "prediction": int(pred),
147
- "fraud_probability": float(prob)
148
  }
149
 
150
  except Exception as e:
@@ -160,4 +162,4 @@ iface = gr.Interface(
160
  )
161
 
162
  if __name__ == "__main__":
163
- iface.launch()
 
5
  import json
6
  from huggingface_hub import hf_hub_download
7
 
8
+ DEBUG = False # <- set to True only when debugging
9
+
10
  MODEL_REPO = "shahviransh/fraud-detection"
11
  MODEL_FILE = "xgb_model.pkl"
12
 
 
13
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
14
  model = joblib.load(model_path)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # EXACT 47 FEATURE ORDER (matches model input)
18
  FEATURES = [
19
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
20
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
 
32
  "Product Category_health & beauty","Product Category_home & garden",
33
  "Product Category_toys & games","Device Used_mobile",
34
  "Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning",
35
+ "Hour Bin_Night","Transaction Size_Medium",
36
+ "Transaction Size_Small","Transaction Size_Very_Small"
37
  ]
38
 
39
 
40
  def build_feature_row(d):
41
 
42
+ row = {c: 0.0 for c in FEATURES}
43
 
44
  amt = float(d["amount"])
45
  qty = max(float(d["quantity"]), 1.0)
46
  age = float(d["customer_age"])
47
+ acc_days = max(float(d["account_age_days"]), 1.0)
48
  hour = int(d["transaction_hour"])
49
+ total_txn = max(float(d["total_customer_transactions"]), 1.0)
50
 
51
+ # -------- base values ----------
52
  row["Transaction Amount"] = amt
53
  row["Quantity"] = qty
54
  row["Customer Age"] = age
55
  row["Account Age Days"] = acc_days
56
  row["Transaction Hour"] = hour
57
  row["Total Customer Transactions"] = total_txn
58
+ row["Address Mismatch"] = float(d.get("address_mismatch", False))
59
  row["Day of Week"] = int(d["day_of_week"])
60
  row["Month"] = int(d["month"])
61
+ row["Is Weekend"] = float(row["Day of Week"] >= 5)
62
+ row["New Account"] = float(acc_days < 30)
63
 
64
+ # -------- safer normalized derived fields ----------
65
  row["Account Age Weeks"] = acc_days / 7
66
  row["Amount Log"] = np.log1p(amt)
67
  row["Quantity Log"] = np.log1p(qty)
68
  row["Amount per Quantity"] = amt / qty
69
+ row["High Amount Flag"] = float(amt > 500)
70
+ row["High Quantity Flag"] = float(qty > 3)
71
+ row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
72
+
73
+ # --- velocity & risk defaults (stabilized) ---
74
+ row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
75
+ row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
76
+ row["Time Since Last Transaction"] = 24.0
77
+ row["Amount Deviation From History"] = 0.2
78
  row["Product Category Diversity"] = 1.0
79
+ row["Amount zscore"] = 0.3
80
+ row["Velocity Deviation"] = 0.3
 
 
 
 
 
 
81
  row["Suspicious Pattern"] = 0.0
82
 
83
+ # --- interaction terms (scaled to avoid saturation) ---
84
+ row["Amount Age Interaction"] = (amt * age) / 100.0
85
+ row["Amount Velocity Interaction"] = (
86
+ amt * row["Avg Daily Transaction Velocity"] / 10.0
87
+ )
88
+
89
+ row["New Account High Value"] = float(row["New Account"] and amt > 500)
90
+ row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
91
+ row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
92
+
93
+ # -------- location score default (neutral risk) --------
94
+ row["Customer Location"] = 0.5
95
+
96
+ # -------- one-hot categorical --------
97
  pm = f"Payment Method_{d['payment_method']}"
98
+ if pm in row:
99
+ row[pm] = 1.0
100
 
101
  pc = f"Product Category_{d['product_category']}"
102
+ if pc in row:
103
+ row[pc] = 1.0
104
 
105
  device = f"Device Used_{d['device_used']}"
106
+ if device in row:
107
+ row[device] = 1.0
108
 
109
+ # -------- hour bins --------
110
  if 6 <= hour < 12:
111
+ row["Hour Bin_Morning"] = 1.0
112
  elif 12 <= hour < 18:
113
+ row["Hour Bin_Evening"] = 1.0
114
  else:
115
+ row["Hour Bin_Night"] = 1.0
116
 
117
+ # -------- transaction size (3-bucket only) --------
118
  if amt < 50:
119
+ row["Transaction Size_Very_Small"] = 1.0
120
  elif amt < 200:
121
+ row["Transaction Size_Small"] = 1.0
122
  else:
123
+ row["Transaction Size_Medium"] = 1.0
124
+
125
+ # --- final dataframe in exact order ---
126
+ df = pd.DataFrame([row])[FEATURES].astype(float)
127
 
128
+ return df
129
 
130
 
131
  def predict(input_json):
 
135
 
136
  df = build_feature_row(d)
137
 
138
+ assert df.shape[1] == 47
139
+
140
+ prob = float(model.predict_proba(df)[0][1])
141
+ pred = int(prob >= 0.5)
142
+
143
+ if DEBUG:
144
+ print("ROW:", df.values.tolist())
145
+ print("MARGIN:", model.predict(df, output_margin=True))
146
 
147
  return {
148
+ "prediction": pred,
149
+ "fraud_probability": round(prob, 4)
150
  }
151
 
152
  except Exception as e:
 
162
  )
163
 
164
  if __name__ == "__main__":
165
+ iface.launch()