Spaces:

ArthurGamaJorge
/

Unfraud-api

Sleeping

App Files Files Community

ArthurGamaJorge commited on Nov 20, 2025

Commit

68c8e02

1 Parent(s): 62c2783

Corrigir diretórios

Browse files

Files changed (3) hide show

stacking_fraude_model_4.pkl → ai/model/stacking_fraude_model_4.pkl +0 -0
thresholds.json → ai/model/thresholds.json +115 -115
api/app.py +33 -12

stacking_fraude_model_4.pkl → ai/model/stacking_fraude_model_4.pkl RENAMED Viewed

File without changes

thresholds.json → ai/model/thresholds.json RENAMED Viewed

@@ -1,116 +1,116 @@
-{
-  "generated_at": "2025-11-20T02:38:35.138608+00:00",
-  "dataset_size": 4483956,
-  "class_distribution_original": {
-    "0": 4439775,
-    "1": 44181
-  },
-  "train_non_frauds_max_used": 400000,
-  "train_class_distribution_before_balance": {
-    "0": 2663864,
-    "1": 26509
-  },
-  "train_balanced_class_distribution": {
-    "0": 400000,
-    "1": 26509
-  },
-  "auc_roc": 0.8857282388447254,
-  "threshold": 0.44144144144144143,
-  "selection": {
-    "mode": "best_f1",
-    "best_f1_threshold": 0.44144144144144143,
-    "best_f1_value": 0.6439178036627154
-  },
-  "metrics_at_threshold": {
-    "accuracy": 0.9947055727526561,
-    "precision": 0.9544241885282347,
-    "recall": 0.48585332729741965,
-    "f1": 0.6439178041097945
-  },
-  "feature_importances_xgb_gain_top20": [
-    {
-      "feature": "qtas_fraudes_cartao",
-      "gain": 0.32888033986091614
-    },
-    {
-      "feature": "fraude_ratio_terminal",
-      "gain": 0.18363086879253387
-    },
-    {
-      "feature": "fraude_ratio_cartao",
-      "gain": 0.10315709561109543
-    },
-    {
-      "feature": "qtas_fraudes_terminal",
-      "gain": 0.09818089008331299
-    },
-    {
-      "feature": "entropia_geografica_cartao",
-      "gain": 0.0293036587536335
-    },
-    {
-      "feature": "longitude",
-      "gain": 0.023642411455512047
-    },
-    {
-      "feature": "latitude",
-      "gain": 0.02150840125977993
-    },
-    {
-      "feature": "qtas_transacoes_terminal_dia",
-      "gain": 0.02116469480097294
-    },
-    {
-      "feature": "desvio_padrao_valor_cartao",
-      "gain": 0.02007487416267395
-    },
-    {
-      "feature": "valor_medio_cartao",
-      "gain": 0.019987693056464195
-    },
-    {
-      "feature": "desvio_padrao_valor_terminal",
-      "gain": 0.019812680780887604
-    },
-    {
-      "feature": "tx_amount",
-      "gain": 0.019712846726179123
-    },
-    {
-      "feature": "velocidade_ultima_transacao",
-      "gain": 0.01963718980550766
-    },
-    {
-      "feature": "dias_desde_primeira_transacao_do_cartao",
-      "gain": 0.01920768804848194
-    },
-    {
-      "feature": "valor_medio_terminal",
-      "gain": 0.01920476369559765
-    },
-    {
-      "feature": "dias_desde_inicio_terminal",
-      "gain": 0.018486132845282555
-    },
-    {
-      "feature": "tx_hour",
-      "gain": 0.01793355494737625
-    },
-    {
-      "feature": "qtas_transacoes_cartao_dia",
-      "gain": 0.01647430658340454
-    }
-  ],
-  "base_model_meta_weights": {
-    "rf": 0.9984035731210663,
-    "xgb": 0.001111504578919198,
-    "et": 0.000484922300014473
-  },
-  "base_model_auc_test": {
-    "rf": 0.881155250373531,
-    "xgb": 0.8867136661938362,
-    "et": 0.8830867877996174
-  },
-  "model_path": "./models\\stacking_fraude_model_4.pkl",
-  "calibrated_saved": true
 }

+{
+  "generated_at": "2025-11-20T02:38:35.138608+00:00",
+  "dataset_size": 4483956,
+  "class_distribution_original": {
+    "0": 4439775,
+    "1": 44181
+  },
+  "train_non_frauds_max_used": 400000,
+  "train_class_distribution_before_balance": {
+    "0": 2663864,
+    "1": 26509
+  },
+  "train_balanced_class_distribution": {
+    "0": 400000,
+    "1": 26509
+  },
+  "auc_roc": 0.8857282388447254,
+  "threshold": 0.44144144144144143,
+  "selection": {
+    "mode": "best_f1",
+    "best_f1_threshold": 0.44144144144144143,
+    "best_f1_value": 0.6439178036627154
+  },
+  "metrics_at_threshold": {
+    "accuracy": 0.9947055727526561,
+    "precision": 0.9544241885282347,
+    "recall": 0.48585332729741965,
+    "f1": 0.6439178041097945
+  },
+  "feature_importances_xgb_gain_top20": [
+    {
+      "feature": "qtas_fraudes_cartao",
+      "gain": 0.32888033986091614
+    },
+    {
+      "feature": "fraude_ratio_terminal",
+      "gain": 0.18363086879253387
+    },
+    {
+      "feature": "fraude_ratio_cartao",
+      "gain": 0.10315709561109543
+    },
+    {
+      "feature": "qtas_fraudes_terminal",
+      "gain": 0.09818089008331299
+    },
+    {
+      "feature": "entropia_geografica_cartao",
+      "gain": 0.0293036587536335
+    },
+    {
+      "feature": "longitude",
+      "gain": 0.023642411455512047
+    },
+    {
+      "feature": "latitude",
+      "gain": 0.02150840125977993
+    },
+    {
+      "feature": "qtas_transacoes_terminal_dia",
+      "gain": 0.02116469480097294
+    },
+    {
+      "feature": "desvio_padrao_valor_cartao",
+      "gain": 0.02007487416267395
+    },
+    {
+      "feature": "valor_medio_cartao",
+      "gain": 0.019987693056464195
+    },
+    {
+      "feature": "desvio_padrao_valor_terminal",
+      "gain": 0.019812680780887604
+    },
+    {
+      "feature": "tx_amount",
+      "gain": 0.019712846726179123
+    },
+    {
+      "feature": "velocidade_ultima_transacao",
+      "gain": 0.01963718980550766
+    },
+    {
+      "feature": "dias_desde_primeira_transacao_do_cartao",
+      "gain": 0.01920768804848194
+    },
+    {
+      "feature": "valor_medio_terminal",
+      "gain": 0.01920476369559765
+    },
+    {
+      "feature": "dias_desde_inicio_terminal",
+      "gain": 0.018486132845282555
+    },
+    {
+      "feature": "tx_hour",
+      "gain": 0.01793355494737625
+    },
+    {
+      "feature": "qtas_transacoes_cartao_dia",
+      "gain": 0.01647430658340454
+    }
+  ],
+  "base_model_meta_weights": {
+    "rf": 0.9984035731210663,
+    "xgb": 0.001111504578919198,
+    "et": 0.000484922300014473
+  },
+  "base_model_auc_test": {
+    "rf": 0.881155250373531,
+    "xgb": 0.8867136661938362,
+    "et": 0.8830867877996174
+  },
+  "model_path": "./models\\stacking_fraude_model_4.pkl",
+  "calibrated_saved": true
 }

api/app.py CHANGED Viewed

@@ -13,7 +13,8 @@ from pydantic import BaseModel, Field
 APP_DIR = os.path.dirname(os.path.abspath(__file__))
 ROOT_DIR = os.path.dirname(APP_DIR)
-MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_4.pkl")
 FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
 PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
@@ -38,6 +39,7 @@ _CARD_MEDIANS: Dict[str, Dict[str, float]] = {}
 _TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
 _CARD_IDS: Optional[set] = None
 _TERM_IDS: Optional[set] = None
 CARD_FEATURES = {
@@ -73,23 +75,31 @@ def _normalize_id(val: Optional[Any]) -> Optional[str]:
 def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
-	y_pred = ensemble.predict(X)
 	y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
 	items: List[Dict[str, Any]] = []
-	for i in range(len(X)):
-		pred_class = int(y_pred[i])
-		is_fraud = bool(pred_class == 1)
-		prob_fraud = None
-		if y_prob is not None:
 			pp = y_prob[i]
 			if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
 				prob_fraud = float(pp[1])
 			else:
 				prob_fraud = float(pp)
-		items.append({
-			"is_fraud": bool(is_fraud),
-			"fraud_probability": prob_fraud,
-		})
 	return {"items": items}
@@ -104,11 +114,22 @@ app.add_middleware(
 def _load_model_and_features():
-	global _MODEL, _FEATURES
 	if _MODEL is None:
 		if not os.path.exists(MODEL_PATH):
 			raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
 		_MODEL = joblib.load(MODEL_PATH)
 	if _FEATURES is None:
 		feat_from_model = getattr(_MODEL, "feature_names_in_", None)
 		if feat_from_model is not None:

 APP_DIR = os.path.dirname(os.path.abspath(__file__))
 ROOT_DIR = os.path.dirname(APP_DIR)
+MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_calibrated.pkl")
+THRESHOLDS_PATH = os.path.join(ROOT_DIR, "ai", "models", "thresholds.json")
 FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
 PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
 _TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
 _CARD_IDS: Optional[set] = None
 _TERM_IDS: Optional[set] = None
+_ACTIVE_THRESHOLD: float = 0.5
 CARD_FEATURES = {
 def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
+	global _ACTIVE_THRESHOLD
 	y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
 	items: List[Dict[str, Any]] = []
+	if y_prob is not None:
+		for i in range(len(X)):
 			pp = y_prob[i]
 			if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
 				prob_fraud = float(pp[1])
 			else:
 				prob_fraud = float(pp)
+			is_fraud = prob_fraud >= _ACTIVE_THRESHOLD
+			items.append({
+				"is_fraud": bool(is_fraud),
+				"fraud_probability": prob_fraud,
+				"threshold_used": _ACTIVE_THRESHOLD
+			})
+	else:
+		y_pred = ensemble.predict(X)
+		for i in range(len(X)):
+			pred_class = int(y_pred[i])
+			items.append({
+				"is_fraud": bool(pred_class == 1),
+				"fraud_probability": None,
+				"threshold_used": None
+			})
 	return {"items": items}
 def _load_model_and_features():
+	global _MODEL, _FEATURES, _ACTIVE_THRESHOLD
 	if _MODEL is None:
 		if not os.path.exists(MODEL_PATH):
 			raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
 		_MODEL = joblib.load(MODEL_PATH)
+		if os.path.exists(THRESHOLDS_PATH):
+			try:
+				import json
+				with open(THRESHOLDS_PATH, 'r', encoding='utf-8') as f:
+					data = json.load(f)
+					val = data.get('threshold')
+					if isinstance(val, (int, float)):
+						_ACTIVE_THRESHOLD = float(val)
+						print(f"[Threshold] Carregado threshold único: {_ACTIVE_THRESHOLD:.4f}")
+			except Exception as e:
+				print(f"[Threshold] Falha ao carregar threshold: {e}; usando 0.5")
 	if _FEATURES is None:
 		feat_from_model = getattr(_MODEL, "feature_names_in_", None)
 		if feat_from_model is not None: