Spaces:

demoaccta
/

fraud_detection_model

Runtime error

App Files Files Community

demoaccta commited on Jan 1

Commit

58828d8

verified ·

1 Parent(s): e457311

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -19

app.py CHANGED Viewed

@@ -5,16 +5,17 @@ import pandas as pd
 import json
 from huggingface_hub import hf_hub_download
-DEBUG = False   # <- set to True only when debugging
 MODEL_REPO = "shahviransh/fraud-detection"
-MODEL_FILE = "xgb_model.pkl"
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 model = joblib.load(model_path)
-# EXACT 47 FEATURE ORDER (matches model input)
 FEATURES = [
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
@@ -48,7 +49,7 @@ def build_feature_row(d):
     hour = int(d["transaction_hour"])
     total_txn = max(float(d["total_customer_transactions"]), 1.0)
-    # -------- base values ----------
     row["Transaction Amount"] = amt
     row["Quantity"] = qty
     row["Customer Age"] = age
@@ -61,7 +62,7 @@ def build_feature_row(d):
     row["Is Weekend"] = float(row["Day of Week"] >= 5)
     row["New Account"] = float(acc_days < 30)
-    # -------- safer normalized derived fields ----------
     row["Account Age Weeks"] = acc_days / 7
     row["Amount Log"] = np.log1p(amt)
     row["Quantity Log"] = np.log1p(qty)
@@ -70,7 +71,6 @@ def build_feature_row(d):
     row["High Quantity Flag"] = float(qty > 3)
     row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
-    # --- velocity & risk defaults (stabilized) ---
     row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
     row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
     row["Time Since Last Transaction"] = 24.0
@@ -80,7 +80,6 @@ def build_feature_row(d):
     row["Velocity Deviation"] = 0.3
     row["Suspicious Pattern"] = 0.0
-    # --- interaction terms (scaled to avoid saturation) ---
     row["Amount Age Interaction"] = (amt * age) / 100.0
     row["Amount Velocity Interaction"] = (
         amt * row["Avg Daily Transaction Velocity"] / 10.0
@@ -90,10 +89,10 @@ def build_feature_row(d):
     row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
     row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
-    # -------- location score default (neutral risk) --------
     row["Customer Location"] = 0.5
-    # -------- one-hot categorical --------
     pm = f"Payment Method_{d['payment_method']}"
     if pm in row:
         row[pm] = 1.0
@@ -114,7 +113,7 @@ def build_feature_row(d):
     else:
         row["Hour Bin_Night"] = 1.0
-    # -------- transaction size (3-bucket only) --------
     if amt < 50:
         row["Transaction Size_Very_Small"] = 1.0
     elif amt < 200:
@@ -122,7 +121,6 @@ def build_feature_row(d):
     else:
         row["Transaction Size_Medium"] = 1.0
-    # --- final dataframe in exact order ---
     df = pd.DataFrame([row])[FEATURES].astype(float)
     return df
@@ -132,21 +130,20 @@ def predict(input_json):
     try:
         d = json.loads(input_json)
         df = build_feature_row(d)
         assert df.shape[1] == 47
-        prob = float(model.predict_proba(df)[0][1])
-        pred = int(prob >= 0.5)
         if DEBUG:
-            print("ROW:", df.values.tolist())
-            print("MARGIN:", model.predict(df, output_margin=True))
         return {
             "prediction": pred,
-            "fraud_probability": round(prob, 4)
         }
     except Exception as e:
@@ -157,7 +154,7 @@ iface = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(lines=10, label="JSON Input"),
     outputs="json",
-    title="Fraud Detection API",
     description="Submit JSON payload for fraud scoring."
 )

 import json
 from huggingface_hub import hf_hub_download
 MODEL_REPO = "shahviransh/fraud-detection"
+MODEL_FILE = "rf_model.pkl"
+DEBUG = False   # set True only for debugging
+# download & load model
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 model = joblib.load(model_path)
+# EXACT 47 FEATURE ORDER REQUIRED BY MODEL
 FEATURES = [
  "Transaction Amount","Quantity","Customer Age","Account Age Days",
  "Transaction Hour","Total Customer Transactions","Address Mismatch",
     hour = int(d["transaction_hour"])
     total_txn = max(float(d["total_customer_transactions"]), 1.0)
+    # -------- base --------
     row["Transaction Amount"] = amt
     row["Quantity"] = qty
     row["Customer Age"] = age
     row["Is Weekend"] = float(row["Day of Week"] >= 5)
     row["New Account"] = float(acc_days < 30)
+    # -------- engineered (realistic bounded values) --------
     row["Account Age Weeks"] = acc_days / 7
     row["Amount Log"] = np.log1p(amt)
     row["Quantity Log"] = np.log1p(qty)
     row["High Quantity Flag"] = float(qty > 3)
     row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
     row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
     row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
     row["Time Since Last Transaction"] = 24.0
     row["Velocity Deviation"] = 0.3
     row["Suspicious Pattern"] = 0.0
     row["Amount Age Interaction"] = (amt * age) / 100.0
     row["Amount Velocity Interaction"] = (
         amt * row["Avg Daily Transaction Velocity"] / 10.0
     row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
     row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
+    # neutral
     row["Customer Location"] = 0.5
+    # -------- one-hot --------
     pm = f"Payment Method_{d['payment_method']}"
     if pm in row:
         row[pm] = 1.0
     else:
         row["Hour Bin_Night"] = 1.0
+    # -------- size buckets --------
     if amt < 50:
         row["Transaction Size_Very_Small"] = 1.0
     elif amt < 200:
     else:
         row["Transaction Size_Medium"] = 1.0
     df = pd.DataFrame([row])[FEATURES].astype(float)
     return df
     try:
         d = json.loads(input_json)
         df = build_feature_row(d)
         assert df.shape[1] == 47
+        proba = float(model.predict_proba(df)[0][1])
+        pred = int(proba >= 0.5)
         if DEBUG:
+            print("INPUT:", df.values.tolist())
+            print("PROB:", proba)
         return {
             "prediction": pred,
+            "fraud_probability": round(proba, 4)
         }
     except Exception as e:
     fn=predict,
     inputs=gr.Textbox(lines=10, label="JSON Input"),
     outputs="json",
+    title="Fraud Detection API (Random Forest)",
     description="Submit JSON payload for fraud scoring."
 )