demoaccta's picture
Update app.py
58828d8 verified
import gradio as gr
import joblib
import numpy as np
import pandas as pd
import json
from huggingface_hub import hf_hub_download
MODEL_REPO = "shahviransh/fraud-detection"
MODEL_FILE = "rf_model.pkl"
DEBUG = False # set True only for debugging
# download & load model
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
model = joblib.load(model_path)
# EXACT 47 FEATURE ORDER REQUIRED BY MODEL
FEATURES = [
"Transaction Amount","Quantity","Customer Age","Account Age Days",
"Transaction Hour","Total Customer Transactions","Address Mismatch",
"Day of Week","Month","Is Weekend","New Account",
"Transaction Amount Ratio","Avg Daily Transaction Velocity",
"Time Since Last Transaction","Amount Deviation From History",
"Product Category Diversity","Amount Log","Amount per Quantity",
"Amount zscore","Account Age Weeks","Quantity Log","High Amount Flag",
"High Quantity Flag","Unusual Hour Flag","Amount Age Interaction",
"Amount Velocity Interaction","New Account High Value",
"Weekend High Value","High Risk Profile","Velocity Deviation",
"Suspicious Pattern","Customer Location",
"Payment Method_bank transfer","Payment Method_credit card",
"Payment Method_debit card","Product Category_electronics",
"Product Category_health & beauty","Product Category_home & garden",
"Product Category_toys & games","Device Used_mobile",
"Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning",
"Hour Bin_Night","Transaction Size_Medium",
"Transaction Size_Small","Transaction Size_Very_Small"
]
def build_feature_row(d):
row = {c: 0.0 for c in FEATURES}
amt = float(d["amount"])
qty = max(float(d["quantity"]), 1.0)
age = float(d["customer_age"])
acc_days = max(float(d["account_age_days"]), 1.0)
hour = int(d["transaction_hour"])
total_txn = max(float(d["total_customer_transactions"]), 1.0)
# -------- base --------
row["Transaction Amount"] = amt
row["Quantity"] = qty
row["Customer Age"] = age
row["Account Age Days"] = acc_days
row["Transaction Hour"] = hour
row["Total Customer Transactions"] = total_txn
row["Address Mismatch"] = float(d.get("address_mismatch", False))
row["Day of Week"] = int(d["day_of_week"])
row["Month"] = int(d["month"])
row["Is Weekend"] = float(row["Day of Week"] >= 5)
row["New Account"] = float(acc_days < 30)
# -------- engineered (realistic bounded values) --------
row["Account Age Weeks"] = acc_days / 7
row["Amount Log"] = np.log1p(amt)
row["Quantity Log"] = np.log1p(qty)
row["Amount per Quantity"] = amt / qty
row["High Amount Flag"] = float(amt > 500)
row["High Quantity Flag"] = float(qty > 3)
row["Unusual Hour Flag"] = float(hour < 6 or hour > 22)
row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5)
row["Transaction Amount Ratio"] = min(amt / 100.0, 10)
row["Time Since Last Transaction"] = 24.0
row["Amount Deviation From History"] = 0.2
row["Product Category Diversity"] = 1.0
row["Amount zscore"] = 0.3
row["Velocity Deviation"] = 0.3
row["Suspicious Pattern"] = 0.0
row["Amount Age Interaction"] = (amt * age) / 100.0
row["Amount Velocity Interaction"] = (
amt * row["Avg Daily Transaction Velocity"] / 10.0
)
row["New Account High Value"] = float(row["New Account"] and amt > 500)
row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500)
row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000)
# neutral
row["Customer Location"] = 0.5
# -------- one-hot --------
pm = f"Payment Method_{d['payment_method']}"
if pm in row:
row[pm] = 1.0
pc = f"Product Category_{d['product_category']}"
if pc in row:
row[pc] = 1.0
device = f"Device Used_{d['device_used']}"
if device in row:
row[device] = 1.0
# -------- hour bins --------
if 6 <= hour < 12:
row["Hour Bin_Morning"] = 1.0
elif 12 <= hour < 18:
row["Hour Bin_Evening"] = 1.0
else:
row["Hour Bin_Night"] = 1.0
# -------- size buckets --------
if amt < 50:
row["Transaction Size_Very_Small"] = 1.0
elif amt < 200:
row["Transaction Size_Small"] = 1.0
else:
row["Transaction Size_Medium"] = 1.0
df = pd.DataFrame([row])[FEATURES].astype(float)
return df
def predict(input_json):
try:
d = json.loads(input_json)
df = build_feature_row(d)
assert df.shape[1] == 47
proba = float(model.predict_proba(df)[0][1])
pred = int(proba >= 0.5)
if DEBUG:
print("INPUT:", df.values.tolist())
print("PROB:", proba)
return {
"prediction": pred,
"fraud_probability": round(proba, 4)
}
except Exception as e:
return {"error": str(e)}
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=10, label="JSON Input"),
outputs="json",
title="Fraud Detection API (Random Forest)",
description="Submit JSON payload for fraud scoring."
)
if __name__ == "__main__":
iface.launch()