demoaccta commited on
Commit
58828d8
Β·
verified Β·
1 Parent(s): e457311

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -19
app.py CHANGED
@@ -5,16 +5,17 @@ import pandas as pd
5
  import json
6
  from huggingface_hub import hf_hub_download
7
 
8
- DEBUG = False # <- set to True only when debugging
9
-
10
  MODEL_REPO = "shahviransh/fraud-detection"
11
- MODEL_FILE = "xgb_model.pkl"
 
 
12
 
 
13
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
14
  model = joblib.load(model_path)
15
 
16
 
17
- # EXACT 47 FEATURE ORDER (matches model input)
18
  FEATURES = [
19
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
20
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
@@ -48,7 +49,7 @@ def build_feature_row(d):
48
  hour = int(d["transaction_hour"])
49
  total_txn = max(float(d["total_customer_transactions"]), 1.0)
50
 
51
- # -------- base values ----------
52
  row["Transaction Amount"] = amt
53
  row["Quantity"] = qty
54
  row["Customer Age"] = age
@@ -61,7 +62,7 @@ def build_feature_row(d):
61
  row["Is Weekend"] = float(row["Day of Week"] >= 5)
62
  row["New Account"] = float(acc_days < 30)
63
 
64
- # -------- safer normalized derived fields ----------
65
  row["Account Age Weeks"] = acc_days / 7
66
  row["Amount Log"] = np.log1p(amt)
67
  row["Quantity Log"] = np.log1p(qty)
@@ -70,7 +71,6 @@ def build_feature_row(d):
70
  row["High Quantity Flag"] = float(qty > 3)
71
  row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
72
 
73
- # --- velocity & risk defaults (stabilized) ---
74
  row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
75
  row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
76
  row["Time Since Last Transaction"] = 24.0
@@ -80,7 +80,6 @@ def build_feature_row(d):
80
  row["Velocity Deviation"] = 0.3
81
  row["Suspicious Pattern"] = 0.0
82
 
83
- # --- interaction terms (scaled to avoid saturation) ---
84
  row["Amount Age Interaction"] = (amt * age) / 100.0
85
  row["Amount Velocity Interaction"] = (
86
  amt * row["Avg Daily Transaction Velocity"] / 10.0
@@ -90,10 +89,10 @@ def build_feature_row(d):
90
  row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
91
  row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
92
 
93
- # -------- location score default (neutral risk) --------
94
  row["Customer Location"] = 0.5
95
 
96
- # -------- one-hot categorical --------
97
  pm = f"Payment Method_{d['payment_method']}"
98
  if pm in row:
99
  row[pm] = 1.0
@@ -114,7 +113,7 @@ def build_feature_row(d):
114
  else:
115
  row["Hour Bin_Night"] = 1.0
116
 
117
- # -------- transaction size (3-bucket only) --------
118
  if amt < 50:
119
  row["Transaction Size_Very_Small"] = 1.0
120
  elif amt < 200:
@@ -122,7 +121,6 @@ def build_feature_row(d):
122
  else:
123
  row["Transaction Size_Medium"] = 1.0
124
 
125
- # --- final dataframe in exact order ---
126
  df = pd.DataFrame([row])[FEATURES].astype(float)
127
 
128
  return df
@@ -132,21 +130,20 @@ def predict(input_json):
132
 
133
  try:
134
  d = json.loads(input_json)
135
-
136
  df = build_feature_row(d)
137
 
138
  assert df.shape[1] == 47
139
 
140
- prob = float(model.predict_proba(df)[0][1])
141
- pred = int(prob >= 0.5)
142
 
143
  if DEBUG:
144
- print("ROW:", df.values.tolist())
145
- print("MARGIN:", model.predict(df, output_margin=True))
146
 
147
  return {
148
  "prediction": pred,
149
- "fraud_probability": round(prob, 4)
150
  }
151
 
152
  except Exception as e:
@@ -157,7 +154,7 @@ iface = gr.Interface(
157
  fn=predict,
158
  inputs=gr.Textbox(lines=10, label="JSON Input"),
159
  outputs="json",
160
- title="Fraud Detection API",
161
  description="Submit JSON payload for fraud scoring."
162
  )
163
 
 
5
  import json
6
  from huggingface_hub import hf_hub_download
7
 
 
 
8
  MODEL_REPO = "shahviransh/fraud-detection"
9
+ MODEL_FILE = "rf_model.pkl"
10
+
11
+ DEBUG = False # set True only for debugging
12
 
13
+ # download & load model
14
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
15
  model = joblib.load(model_path)
16
 
17
 
18
+ # EXACT 47 FEATURE ORDER REQUIRED BY MODEL
19
  FEATURES = [
20
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
21
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
 
49
  hour = int(d["transaction_hour"])
50
  total_txn = max(float(d["total_customer_transactions"]), 1.0)
51
 
52
+ # -------- base --------
53
  row["Transaction Amount"] = amt
54
  row["Quantity"] = qty
55
  row["Customer Age"] = age
 
62
  row["Is Weekend"] = float(row["Day of Week"] >= 5)
63
  row["New Account"] = float(acc_days < 30)
64
 
65
+ # -------- engineered (realistic bounded values) --------
66
  row["Account Age Weeks"] = acc_days / 7
67
  row["Amount Log"] = np.log1p(amt)
68
  row["Quantity Log"] = np.log1p(qty)
 
71
  row["High Quantity Flag"] = float(qty > 3)
72
  row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
73
 
 
74
  row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
75
  row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
76
  row["Time Since Last Transaction"] = 24.0
 
80
  row["Velocity Deviation"] = 0.3
81
  row["Suspicious Pattern"] = 0.0
82
 
 
83
  row["Amount Age Interaction"] = (amt * age) / 100.0
84
  row["Amount Velocity Interaction"] = (
85
  amt * row["Avg Daily Transaction Velocity"] / 10.0
 
89
  row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
90
  row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
91
 
92
+ # neutral
93
  row["Customer Location"] = 0.5
94
 
95
+ # -------- one-hot --------
96
  pm = f"Payment Method_{d['payment_method']}"
97
  if pm in row:
98
  row[pm] = 1.0
 
113
  else:
114
  row["Hour Bin_Night"] = 1.0
115
 
116
+ # -------- size buckets --------
117
  if amt < 50:
118
  row["Transaction Size_Very_Small"] = 1.0
119
  elif amt < 200:
 
121
  else:
122
  row["Transaction Size_Medium"] = 1.0
123
 
 
124
  df = pd.DataFrame([row])[FEATURES].astype(float)
125
 
126
  return df
 
130
 
131
  try:
132
  d = json.loads(input_json)
 
133
  df = build_feature_row(d)
134
 
135
  assert df.shape[1] == 47
136
 
137
+ proba = float(model.predict_proba(df)[0][1])
138
+ pred = int(proba >= 0.5)
139
 
140
  if DEBUG:
141
+ print("INPUT:", df.values.tolist())
142
+ print("PROB:", proba)
143
 
144
  return {
145
  "prediction": pred,
146
+ "fraud_probability": round(proba, 4)
147
  }
148
 
149
  except Exception as e:
 
154
  fn=predict,
155
  inputs=gr.Textbox(lines=10, label="JSON Input"),
156
  outputs="json",
157
+ title="Fraud Detection API (Random Forest)",
158
  description="Submit JSON payload for fraud scoring."
159
  )
160