Sahithi27 commited on
Commit
32a3d15
·
verified ·
1 Parent(s): 402d09b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -103
app.py CHANGED
@@ -1,119 +1,45 @@
1
- import pandas as pd
2
- import numpy as np
3
  import joblib
4
 
5
- from xgboost import XGBClassifier
6
- from sklearn.model_selection import train_test_split
7
- from sklearn.metrics import roc_auc_score, classification_report
8
-
9
- import onnxmltools
10
- from onnxmltools.convert.common.data_types import FloatTensorType
11
 
12
 
13
  def main():
14
 
15
- # =============================
16
- # 1. LOAD DATA
17
- # =============================
18
- df = pd.read_csv("synthetic_collusion_1M.csv")
19
-
20
- # Robust timestamp parsing
21
- df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
22
- df["hour"] = df["timestamp"].dt.hour
23
- df["day_of_week"] = df["timestamp"].dt.dayofweek
24
-
25
- # =============================
26
- # 2. FEATURE ENGINEERING
27
- # =============================
28
- df["user_txn_count"] = df.groupby("user_id")["transaction_id"].transform("count")
29
- df["driver_txn_count"] = df.groupby("driver_id")["transaction_id"].transform("count")
30
- df["user_driver_pair_count"] = (
31
- df.groupby(["user_id", "driver_id"])["transaction_id"]
32
- .transform("count")
33
- )
34
-
35
- FEATURES = [
36
- "amount",
37
- "user_txn_count",
38
- "driver_txn_count",
39
- "user_driver_pair_count",
40
- "hour",
41
- "day_of_week"
42
- ]
43
-
44
- X = df[FEATURES].fillna(0)
45
- y = df["is_collusion_fraud"]
46
-
47
- # =============================
48
- # 3. TRAIN / TEST SPLIT
49
- # =============================
50
- X_train, X_test, y_train, y_test = train_test_split(
51
- X, y,
52
- test_size=0.2,
53
- stratify=y,
54
- random_state=42
55
- )
56
-
57
- # =============================
58
- # 4. TRAIN XGBOOST
59
- # =============================
60
- xgb_model = XGBClassifier(
61
- n_estimators=300,
62
- max_depth=6,
63
- learning_rate=0.05,
64
- subsample=0.8,
65
- colsample_bytree=0.8,
66
- scale_pos_weight=y_train.value_counts()[0] / y_train.value_counts()[1],
67
- base_score=0.5,
68
- objective="binary:logistic",
69
- eval_metric="auc",
70
- random_state=42,
71
- n_jobs=-1
72
- )
73
 
74
- xgb_model.fit(X_train, y_train)
 
 
 
 
 
 
75
 
76
- # =============================
77
- # 5. EVALUATION
78
- # =============================
79
- y_prob = xgb_model.predict_proba(X_test)[:, 1]
80
 
81
- print("\n=== MODEL EVALUATION ===")
82
- print("ROC-AUC:", roc_auc_score(y_test, y_prob))
83
- print(classification_report(y_test, (y_prob > 0.7).astype(int)))
 
 
84
 
85
- # =============================
86
- # 6. SAVE MODEL ARTIFACTS
87
- # =============================
88
- joblib.dump(xgb_model, "collusion_xgb_model.joblib")
89
- joblib.dump(FEATURES, "feature_order.joblib")
90
 
91
- print("✅ Model and feature order saved")
 
 
 
 
 
 
 
92
 
93
- # =============================
94
- # 7. CONVERT TO ONNX
95
- # =============================
96
- booster = xgb_model.get_booster()
97
-
98
- # Rename features to f0, f1, f2... (required by onnxmltools)
99
- booster.feature_names = [f"f{i}" for i in range(len(FEATURES))]
100
-
101
- initial_type = [
102
- ("float_input", FloatTensorType([None, len(FEATURES)]))
103
- ]
104
-
105
- onnx_model = onnxmltools.convert_xgboost(
106
- booster,
107
- initial_types=initial_type,
108
- target_opset=12
109
- )
110
-
111
- with open("collusion_xgb_model.onnx", "wb") as f:
112
- f.write(onnx_model.SerializeToString())
113
-
114
- print("✅ ONNX model exported successfully")
115
 
116
 
117
  if __name__ == "__main__":
118
  main()
119
-
 
1
+ import os
2
+ import onnx
3
  import joblib
4
 
5
+ ONNX_PATH = "collusion_xgb_model.onnx"
6
+ FEATURES_PATH = "feature_order.joblib"
 
 
 
 
7
 
8
 
9
  def main():
10
 
11
+ print("🔍 Hugging Face inference environment check")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # -----------------------------
14
+ # 1. Check ONNX model
15
+ # -----------------------------
16
+ if not os.path.exists(ONNX_PATH):
17
+ raise FileNotFoundError(
18
+ "ONNX model not found. Expected collusion_xgb_model.onnx"
19
+ )
20
 
21
+ print("ONNX model found")
 
 
 
22
 
23
+ # -----------------------------
24
+ # 2. Load & verify ONNX
25
+ # -----------------------------
26
+ model = onnx.load(ONNX_PATH)
27
+ onnx.checker.check_model(model)
28
 
29
+ print("ONNX model is valid")
 
 
 
 
30
 
31
+ # -----------------------------
32
+ # 3. Feature order check (optional)
33
+ # -----------------------------
34
+ if os.path.exists(FEATURES_PATH):
35
+ features = joblib.load(FEATURES_PATH)
36
+ print("Feature order loaded:", features)
37
+ else:
38
+ print("feature_order.joblib not found (ok for inference-only)")
39
 
40
+ print("\n🚀 Environment ready for inference")
41
+ print("➡️ Use app.py to serve predictions")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
  if __name__ == "__main__":
45
  main()