import gradio as gr import joblib import numpy as np import pandas as pd import json from huggingface_hub import hf_hub_download MODEL_REPO = "shahviransh/fraud-detection" MODEL_FILE = "rf_model.pkl" DEBUG = False # set True only for debugging # download & load model model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) model = joblib.load(model_path) # EXACT 47 FEATURE ORDER REQUIRED BY MODEL FEATURES = [ "Transaction Amount","Quantity","Customer Age","Account Age Days", "Transaction Hour","Total Customer Transactions","Address Mismatch", "Day of Week","Month","Is Weekend","New Account", "Transaction Amount Ratio","Avg Daily Transaction Velocity", "Time Since Last Transaction","Amount Deviation From History", "Product Category Diversity","Amount Log","Amount per Quantity", "Amount zscore","Account Age Weeks","Quantity Log","High Amount Flag", "High Quantity Flag","Unusual Hour Flag","Amount Age Interaction", "Amount Velocity Interaction","New Account High Value", "Weekend High Value","High Risk Profile","Velocity Deviation", "Suspicious Pattern","Customer Location", "Payment Method_bank transfer","Payment Method_credit card", "Payment Method_debit card","Product Category_electronics", "Product Category_health & beauty","Product Category_home & garden", "Product Category_toys & games","Device Used_mobile", "Device Used_tablet","Hour Bin_Evening","Hour Bin_Morning", "Hour Bin_Night","Transaction Size_Medium", "Transaction Size_Small","Transaction Size_Very_Small" ] def build_feature_row(d): row = {c: 0.0 for c in FEATURES} amt = float(d["amount"]) qty = max(float(d["quantity"]), 1.0) age = float(d["customer_age"]) acc_days = max(float(d["account_age_days"]), 1.0) hour = int(d["transaction_hour"]) total_txn = max(float(d["total_customer_transactions"]), 1.0) # -------- base -------- row["Transaction Amount"] = amt row["Quantity"] = qty row["Customer Age"] = age row["Account Age Days"] = acc_days row["Transaction Hour"] = hour row["Total Customer Transactions"] = total_txn row["Address Mismatch"] = float(d.get("address_mismatch", False)) row["Day of Week"] = int(d["day_of_week"]) row["Month"] = int(d["month"]) row["Is Weekend"] = float(row["Day of Week"] >= 5) row["New Account"] = float(acc_days < 30) # -------- engineered (realistic bounded values) -------- row["Account Age Weeks"] = acc_days / 7 row["Amount Log"] = np.log1p(amt) row["Quantity Log"] = np.log1p(qty) row["Amount per Quantity"] = amt / qty row["High Amount Flag"] = float(amt > 500) row["High Quantity Flag"] = float(qty > 3) row["Unusual Hour Flag"] = float(hour < 6 or hour > 22) row["Avg Daily Transaction Velocity"] = min(total_txn / acc_days, 5) row["Transaction Amount Ratio"] = min(amt / 100.0, 10) row["Time Since Last Transaction"] = 24.0 row["Amount Deviation From History"] = 0.2 row["Product Category Diversity"] = 1.0 row["Amount zscore"] = 0.3 row["Velocity Deviation"] = 0.3 row["Suspicious Pattern"] = 0.0 row["Amount Age Interaction"] = (amt * age) / 100.0 row["Amount Velocity Interaction"] = ( amt * row["Avg Daily Transaction Velocity"] / 10.0 ) row["New Account High Value"] = float(row["New Account"] and amt > 500) row["Weekend High Value"] = float(row["Is Weekend"] and amt > 500) row["High Risk Profile"] = float(row["Address Mismatch"] and amt > 1000) # neutral row["Customer Location"] = 0.5 # -------- one-hot -------- pm = f"Payment Method_{d['payment_method']}" if pm in row: row[pm] = 1.0 pc = f"Product Category_{d['product_category']}" if pc in row: row[pc] = 1.0 device = f"Device Used_{d['device_used']}" if device in row: row[device] = 1.0 # -------- hour bins -------- if 6 <= hour < 12: row["Hour Bin_Morning"] = 1.0 elif 12 <= hour < 18: row["Hour Bin_Evening"] = 1.0 else: row["Hour Bin_Night"] = 1.0 # -------- size buckets -------- if amt < 50: row["Transaction Size_Very_Small"] = 1.0 elif amt < 200: row["Transaction Size_Small"] = 1.0 else: row["Transaction Size_Medium"] = 1.0 df = pd.DataFrame([row])[FEATURES].astype(float) return df def predict(input_json): try: d = json.loads(input_json) df = build_feature_row(d) assert df.shape[1] == 47 proba = float(model.predict_proba(df)[0][1]) pred = int(proba >= 0.5) if DEBUG: print("INPUT:", df.values.tolist()) print("PROB:", proba) return { "prediction": pred, "fraud_probability": round(proba, 4) } except Exception as e: return {"error": str(e)} iface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=10, label="JSON Input"), outputs="json", title="Fraud Detection API (Random Forest)", description="Submit JSON payload for fraud scoring." ) if __name__ == "__main__": iface.launch()