Spaces:

demoaccta
/

fraud_detection_model

Runtime error

App Files Files Community

demoaccta commited on Jan 1

Commit

5ad8f3a

verified ·

1 Parent(s): 4b9e4c4

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -65

app.py CHANGED Viewed

@@ -5,34 +5,16 @@ import pandas as pd
 import json
 from huggingface_hub import hf_hub_download
 MODEL_REPO = "shahviransh/fraud-detection"
 MODEL_FILE = "xgb_model.pkl"
-# download & load
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 model = joblib.load(model_path)
-# Print Features
-try:
-    feature_names = None
-    # Case 1: XGBClassifier
-    if hasattr(model, "get_booster"):
-        booster = model.get_booster()
-        feature_names = booster.feature_names
-    # Case 2: scikit wrapper
-    if feature_names is None and hasattr(model, "feature_names_in_"):
-        feature_names = model.feature_names_in_.tolist()
-    print("==== MODEL FEATURE NAMES (ORDERED) ====")
-    print(feature_names)
-    print("COUNT =", len(feature_names))
-except Exception as e:
-    print("FAILED TO READ FEATURE NAMES:", str(e))
 FEATURES = [
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
@@ -50,86 +32,100 @@ FEATURES = [
  "Product Category_health & beauty","Product Category_home & garden",
  "Product Category_toys & games","Device Used_mobile",
  "Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning",
- "Hour Bin_Night",
- "Transaction Size_Medium","Transaction Size_Small","Transaction Size_Very_Small"
 ]
 def build_feature_row(d):
-    row = {c: 0 for c in FEATURES}
     amt = float(d["amount"])
     qty = max(float(d["quantity"]), 1.0)
     age = float(d["customer_age"])
-    acc_days = float(d["account_age_days"])
     hour = int(d["transaction_hour"])
-    total_txn = float(d["total_customer_transactions"])
-    # ---- numeric base ----
     row["Transaction Amount"] = amt
     row["Quantity"] = qty
     row["Customer Age"] = age
     row["Account Age Days"] = acc_days
     row["Transaction Hour"] = hour
     row["Total Customer Transactions"] = total_txn
-    row["Address Mismatch"] = int(d.get("address_mismatch", False))
     row["Day of Week"] = int(d["day_of_week"])
     row["Month"] = int(d["month"])
-    row["Is Weekend"] = int(row["Day of Week"] >= 5)
-    # ---- engineered ----
     row["Account Age Weeks"] = acc_days / 7
     row["Amount Log"] = np.log1p(amt)
     row["Quantity Log"] = np.log1p(qty)
     row["Amount per Quantity"] = amt / qty
-    row["High Amount Flag"] = int(amt > 500)
-    row["High Quantity Flag"] = int(qty > 3)
-    row["Unusual Hour Flag"] = int(hour < 6 or hour > 22)
-    # safe defaults for unknown pipeline fields
-    row["Transaction Amount Ratio"] = 1.0
-    row["Avg Daily Transaction Velocity"] = total_txn / max(acc_days, 1)
-    row["Time Since Last Transaction"] = 24
-    row["Amount Deviation From History"] = 1
     row["Product Category Diversity"] = 1.0
-    row["Amount zscore"] = 0.2
-    row["Amount Age Interaction"] = ( amt * age ) / 100
-    row["Amount Velocity Interaction"] = amt * row["Avg Daily Transaction Velocity"] / 10
-    row["New Account"] = int(acc_days < 30)
-    row["New Account High Value"] = int(row["New Account"] and amt > 500)
-    row["Weekend High Value"] = int(row["Is Weekend"] and amt > 500)
-    row["High Risk Profile"] = int(row["Address Mismatch"] and amt > 1000)
-    row["Velocity Deviation"] = 0.2
     row["Suspicious Pattern"] = 0.0
-    # ---- one-hot dimensions ----
     pm = f"Payment Method_{d['payment_method']}"
-    if pm in row: row[pm] = 1
     pc = f"Product Category_{d['product_category']}"
-    if pc in row: row[pc] = 1
     device = f"Device Used_{d['device_used']}"
-    if device in row: row[device] = 1
-    # ---- hour bins ----
     if 6 <= hour < 12:
-        row["Hour Bin_Morning"] = 1
     elif 12 <= hour < 18:
-        row["Hour Bin_Evening"] = 1
     else:
-        row["Hour Bin_Night"] = 1
-    # ---- transaction size ----
     if amt < 50:
-        row["Transaction Size_Very_Small"] = 1
     elif amt < 200:
-        row["Transaction Size_Small"] = 1
     else:
-        row["Transaction Size_Medium"] = 1
-    return pd.DataFrame([row])[FEATURES]
 def predict(input_json):
@@ -139,12 +135,18 @@ def predict(input_json):
         df = build_feature_row(d)
-        pred = model.predict(df)[0]
-        prob = model.predict_proba(df)[0][1]
         return {
-            "prediction": int(pred),
-            "fraud_probability": float(prob)
         }
     except Exception as e:
@@ -160,4 +162,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import json
 from huggingface_hub import hf_hub_download
+DEBUG = False   # <- set to True only when debugging
 MODEL_REPO = "shahviransh/fraud-detection"
 MODEL_FILE = "xgb_model.pkl"
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 model = joblib.load(model_path)
+# EXACT 47 FEATURE ORDER (matches model input)
 FEATURES = [
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
  "Product Category_health & beauty","Product Category_home & garden",
  "Product Category_toys & games","Device Used_mobile",
  "Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning",
+ "Hour Bin_Night","Transaction Size_Medium",
+ "Transaction Size_Small","Transaction Size_Very_Small"
 ]
 def build_feature_row(d):
+    row = {c: 0.0 for c in FEATURES}
     amt = float(d["amount"])
     qty = max(float(d["quantity"]), 1.0)
     age = float(d["customer_age"])
+    acc_days = max(float(d["account_age_days"]), 1.0)
     hour = int(d["transaction_hour"])
+    total_txn = max(float(d["total_customer_transactions"]), 1.0)
+    # -------- base values ----------
     row["Transaction Amount"] = amt
     row["Quantity"] = qty
     row["Customer Age"] = age
     row["Account Age Days"] = acc_days
     row["Transaction Hour"] = hour
     row["Total Customer Transactions"] = total_txn
+    row["Address Mismatch"] = float(d.get("address_mismatch", False))
     row["Day of Week"] = int(d["day_of_week"])
     row["Month"] = int(d["month"])
+    row["Is Weekend"] = float(row["Day of Week"] >= 5)
+    row["New Account"] = float(acc_days < 30)
+    # -------- safer normalized derived fields ----------
     row["Account Age Weeks"] = acc_days / 7
     row["Amount Log"] = np.log1p(amt)
     row["Quantity Log"] = np.log1p(qty)
     row["Amount per Quantity"] = amt / qty
+    row["High Amount Flag"] = float(amt > 500)
+    row["High Quantity Flag"] = float(qty > 3)
+    row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
+    # --- velocity & risk defaults (stabilized) ---
+    row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
+    row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
+    row["Time Since Last Transaction"] = 24.0
+    row["Amount Deviation From History"] = 0.2
     row["Product Category Diversity"] = 1.0
+    row["Amount zscore"] = 0.3
+    row["Velocity Deviation"] = 0.3
     row["Suspicious Pattern"] = 0.0
+    # --- interaction terms (scaled to avoid saturation) ---
+    row["Amount Age Interaction"] = (amt * age) / 100.0
+    row["Amount Velocity Interaction"] = (
+        amt * row["Avg Daily Transaction Velocity"] / 10.0
+    )
+    row["New Account High Value"] = float(row["New Account"] and amt > 500)
+    row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
+    row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
+    # -------- location score default (neutral risk) --------
+    row["Customer Location"] = 0.5
+    # -------- one-hot categorical --------
     pm = f"Payment Method_{d['payment_method']}"
+    if pm in row:
+        row[pm] = 1.0
     pc = f"Product Category_{d['product_category']}"
+    if pc in row:
+        row[pc] = 1.0
     device = f"Device Used_{d['device_used']}"
+    if device in row:
+        row[device] = 1.0
+    # -------- hour bins --------
     if 6 <= hour < 12:
+        row["Hour Bin_Morning"] = 1.0
     elif 12 <= hour < 18:
+        row["Hour Bin_Evening"] = 1.0
     else:
+        row["Hour Bin_Night"] = 1.0
+    # -------- transaction size (3-bucket only) --------
     if amt < 50:
+        row["Transaction Size_Very_Small"] = 1.0
     elif amt < 200:
+        row["Transaction Size_Small"] = 1.0
     else:
+        row["Transaction Size_Medium"] = 1.0
+    # --- final dataframe in exact order ---
+    df = pd.DataFrame([row])[FEATURES].astype(float)
+    return df
 def predict(input_json):
         df = build_feature_row(d)
+        assert df.shape[1] == 47
+        prob = float(model.predict_proba(df)[0][1])
+        pred = int(prob >= 0.5)
+        if DEBUG:
+            print("ROW:", df.values.tolist())
+            print("MARGIN:", model.predict(df, output_margin=True))
         return {
+            "prediction": pred,
+            "fraud_probability": round(prob, 4)
         }
     except Exception as e:
 )
 if __name__ == "__main__":
+    iface.launch()