ArthurGamaJorge commited on
Commit
68c8e02
·
1 Parent(s): 62c2783

Corrigir diretórios

Browse files
stacking_fraude_model_4.pkl → ai/model/stacking_fraude_model_4.pkl RENAMED
File without changes
thresholds.json → ai/model/thresholds.json RENAMED
@@ -1,116 +1,116 @@
1
- {
2
- "generated_at": "2025-11-20T02:38:35.138608+00:00",
3
- "dataset_size": 4483956,
4
- "class_distribution_original": {
5
- "0": 4439775,
6
- "1": 44181
7
- },
8
- "train_non_frauds_max_used": 400000,
9
- "train_class_distribution_before_balance": {
10
- "0": 2663864,
11
- "1": 26509
12
- },
13
- "train_balanced_class_distribution": {
14
- "0": 400000,
15
- "1": 26509
16
- },
17
- "auc_roc": 0.8857282388447254,
18
- "threshold": 0.44144144144144143,
19
- "selection": {
20
- "mode": "best_f1",
21
- "best_f1_threshold": 0.44144144144144143,
22
- "best_f1_value": 0.6439178036627154
23
- },
24
- "metrics_at_threshold": {
25
- "accuracy": 0.9947055727526561,
26
- "precision": 0.9544241885282347,
27
- "recall": 0.48585332729741965,
28
- "f1": 0.6439178041097945
29
- },
30
- "feature_importances_xgb_gain_top20": [
31
- {
32
- "feature": "qtas_fraudes_cartao",
33
- "gain": 0.32888033986091614
34
- },
35
- {
36
- "feature": "fraude_ratio_terminal",
37
- "gain": 0.18363086879253387
38
- },
39
- {
40
- "feature": "fraude_ratio_cartao",
41
- "gain": 0.10315709561109543
42
- },
43
- {
44
- "feature": "qtas_fraudes_terminal",
45
- "gain": 0.09818089008331299
46
- },
47
- {
48
- "feature": "entropia_geografica_cartao",
49
- "gain": 0.0293036587536335
50
- },
51
- {
52
- "feature": "longitude",
53
- "gain": 0.023642411455512047
54
- },
55
- {
56
- "feature": "latitude",
57
- "gain": 0.02150840125977993
58
- },
59
- {
60
- "feature": "qtas_transacoes_terminal_dia",
61
- "gain": 0.02116469480097294
62
- },
63
- {
64
- "feature": "desvio_padrao_valor_cartao",
65
- "gain": 0.02007487416267395
66
- },
67
- {
68
- "feature": "valor_medio_cartao",
69
- "gain": 0.019987693056464195
70
- },
71
- {
72
- "feature": "desvio_padrao_valor_terminal",
73
- "gain": 0.019812680780887604
74
- },
75
- {
76
- "feature": "tx_amount",
77
- "gain": 0.019712846726179123
78
- },
79
- {
80
- "feature": "velocidade_ultima_transacao",
81
- "gain": 0.01963718980550766
82
- },
83
- {
84
- "feature": "dias_desde_primeira_transacao_do_cartao",
85
- "gain": 0.01920768804848194
86
- },
87
- {
88
- "feature": "valor_medio_terminal",
89
- "gain": 0.01920476369559765
90
- },
91
- {
92
- "feature": "dias_desde_inicio_terminal",
93
- "gain": 0.018486132845282555
94
- },
95
- {
96
- "feature": "tx_hour",
97
- "gain": 0.01793355494737625
98
- },
99
- {
100
- "feature": "qtas_transacoes_cartao_dia",
101
- "gain": 0.01647430658340454
102
- }
103
- ],
104
- "base_model_meta_weights": {
105
- "rf": 0.9984035731210663,
106
- "xgb": 0.001111504578919198,
107
- "et": 0.000484922300014473
108
- },
109
- "base_model_auc_test": {
110
- "rf": 0.881155250373531,
111
- "xgb": 0.8867136661938362,
112
- "et": 0.8830867877996174
113
- },
114
- "model_path": "./models\\stacking_fraude_model_4.pkl",
115
- "calibrated_saved": true
116
  }
 
1
+ {
2
+ "generated_at": "2025-11-20T02:38:35.138608+00:00",
3
+ "dataset_size": 4483956,
4
+ "class_distribution_original": {
5
+ "0": 4439775,
6
+ "1": 44181
7
+ },
8
+ "train_non_frauds_max_used": 400000,
9
+ "train_class_distribution_before_balance": {
10
+ "0": 2663864,
11
+ "1": 26509
12
+ },
13
+ "train_balanced_class_distribution": {
14
+ "0": 400000,
15
+ "1": 26509
16
+ },
17
+ "auc_roc": 0.8857282388447254,
18
+ "threshold": 0.44144144144144143,
19
+ "selection": {
20
+ "mode": "best_f1",
21
+ "best_f1_threshold": 0.44144144144144143,
22
+ "best_f1_value": 0.6439178036627154
23
+ },
24
+ "metrics_at_threshold": {
25
+ "accuracy": 0.9947055727526561,
26
+ "precision": 0.9544241885282347,
27
+ "recall": 0.48585332729741965,
28
+ "f1": 0.6439178041097945
29
+ },
30
+ "feature_importances_xgb_gain_top20": [
31
+ {
32
+ "feature": "qtas_fraudes_cartao",
33
+ "gain": 0.32888033986091614
34
+ },
35
+ {
36
+ "feature": "fraude_ratio_terminal",
37
+ "gain": 0.18363086879253387
38
+ },
39
+ {
40
+ "feature": "fraude_ratio_cartao",
41
+ "gain": 0.10315709561109543
42
+ },
43
+ {
44
+ "feature": "qtas_fraudes_terminal",
45
+ "gain": 0.09818089008331299
46
+ },
47
+ {
48
+ "feature": "entropia_geografica_cartao",
49
+ "gain": 0.0293036587536335
50
+ },
51
+ {
52
+ "feature": "longitude",
53
+ "gain": 0.023642411455512047
54
+ },
55
+ {
56
+ "feature": "latitude",
57
+ "gain": 0.02150840125977993
58
+ },
59
+ {
60
+ "feature": "qtas_transacoes_terminal_dia",
61
+ "gain": 0.02116469480097294
62
+ },
63
+ {
64
+ "feature": "desvio_padrao_valor_cartao",
65
+ "gain": 0.02007487416267395
66
+ },
67
+ {
68
+ "feature": "valor_medio_cartao",
69
+ "gain": 0.019987693056464195
70
+ },
71
+ {
72
+ "feature": "desvio_padrao_valor_terminal",
73
+ "gain": 0.019812680780887604
74
+ },
75
+ {
76
+ "feature": "tx_amount",
77
+ "gain": 0.019712846726179123
78
+ },
79
+ {
80
+ "feature": "velocidade_ultima_transacao",
81
+ "gain": 0.01963718980550766
82
+ },
83
+ {
84
+ "feature": "dias_desde_primeira_transacao_do_cartao",
85
+ "gain": 0.01920768804848194
86
+ },
87
+ {
88
+ "feature": "valor_medio_terminal",
89
+ "gain": 0.01920476369559765
90
+ },
91
+ {
92
+ "feature": "dias_desde_inicio_terminal",
93
+ "gain": 0.018486132845282555
94
+ },
95
+ {
96
+ "feature": "tx_hour",
97
+ "gain": 0.01793355494737625
98
+ },
99
+ {
100
+ "feature": "qtas_transacoes_cartao_dia",
101
+ "gain": 0.01647430658340454
102
+ }
103
+ ],
104
+ "base_model_meta_weights": {
105
+ "rf": 0.9984035731210663,
106
+ "xgb": 0.001111504578919198,
107
+ "et": 0.000484922300014473
108
+ },
109
+ "base_model_auc_test": {
110
+ "rf": 0.881155250373531,
111
+ "xgb": 0.8867136661938362,
112
+ "et": 0.8830867877996174
113
+ },
114
+ "model_path": "./models\\stacking_fraude_model_4.pkl",
115
+ "calibrated_saved": true
116
  }
api/app.py CHANGED
@@ -13,7 +13,8 @@ from pydantic import BaseModel, Field
13
 
14
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
15
  ROOT_DIR = os.path.dirname(APP_DIR)
16
- MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_4.pkl")
 
17
  FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
18
  PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
19
 
@@ -38,6 +39,7 @@ _CARD_MEDIANS: Dict[str, Dict[str, float]] = {}
38
  _TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
39
  _CARD_IDS: Optional[set] = None
40
  _TERM_IDS: Optional[set] = None
 
41
 
42
 
43
  CARD_FEATURES = {
@@ -73,23 +75,31 @@ def _normalize_id(val: Optional[Any]) -> Optional[str]:
73
 
74
 
75
  def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
76
- y_pred = ensemble.predict(X)
77
  y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
78
  items: List[Dict[str, Any]] = []
79
- for i in range(len(X)):
80
- pred_class = int(y_pred[i])
81
- is_fraud = bool(pred_class == 1)
82
- prob_fraud = None
83
- if y_prob is not None:
84
  pp = y_prob[i]
85
  if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
86
  prob_fraud = float(pp[1])
87
  else:
88
  prob_fraud = float(pp)
89
- items.append({
90
- "is_fraud": bool(is_fraud),
91
- "fraud_probability": prob_fraud,
92
- })
 
 
 
 
 
 
 
 
 
 
 
93
  return {"items": items}
94
 
95
 
@@ -104,11 +114,22 @@ app.add_middleware(
104
 
105
 
106
  def _load_model_and_features():
107
- global _MODEL, _FEATURES
108
  if _MODEL is None:
109
  if not os.path.exists(MODEL_PATH):
110
  raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
111
  _MODEL = joblib.load(MODEL_PATH)
 
 
 
 
 
 
 
 
 
 
 
112
  if _FEATURES is None:
113
  feat_from_model = getattr(_MODEL, "feature_names_in_", None)
114
  if feat_from_model is not None:
 
13
 
14
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
15
  ROOT_DIR = os.path.dirname(APP_DIR)
16
+ MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_calibrated.pkl")
17
+ THRESHOLDS_PATH = os.path.join(ROOT_DIR, "ai", "models", "thresholds.json")
18
  FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
19
  PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
20
 
 
39
  _TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
40
  _CARD_IDS: Optional[set] = None
41
  _TERM_IDS: Optional[set] = None
42
+ _ACTIVE_THRESHOLD: float = 0.5
43
 
44
 
45
  CARD_FEATURES = {
 
75
 
76
 
77
  def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
78
+ global _ACTIVE_THRESHOLD
79
  y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
80
  items: List[Dict[str, Any]] = []
81
+ if y_prob is not None:
82
+ for i in range(len(X)):
 
 
 
83
  pp = y_prob[i]
84
  if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
85
  prob_fraud = float(pp[1])
86
  else:
87
  prob_fraud = float(pp)
88
+ is_fraud = prob_fraud >= _ACTIVE_THRESHOLD
89
+ items.append({
90
+ "is_fraud": bool(is_fraud),
91
+ "fraud_probability": prob_fraud,
92
+ "threshold_used": _ACTIVE_THRESHOLD
93
+ })
94
+ else:
95
+ y_pred = ensemble.predict(X)
96
+ for i in range(len(X)):
97
+ pred_class = int(y_pred[i])
98
+ items.append({
99
+ "is_fraud": bool(pred_class == 1),
100
+ "fraud_probability": None,
101
+ "threshold_used": None
102
+ })
103
  return {"items": items}
104
 
105
 
 
114
 
115
 
116
  def _load_model_and_features():
117
+ global _MODEL, _FEATURES, _ACTIVE_THRESHOLD
118
  if _MODEL is None:
119
  if not os.path.exists(MODEL_PATH):
120
  raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
121
  _MODEL = joblib.load(MODEL_PATH)
122
+ if os.path.exists(THRESHOLDS_PATH):
123
+ try:
124
+ import json
125
+ with open(THRESHOLDS_PATH, 'r', encoding='utf-8') as f:
126
+ data = json.load(f)
127
+ val = data.get('threshold')
128
+ if isinstance(val, (int, float)):
129
+ _ACTIVE_THRESHOLD = float(val)
130
+ print(f"[Threshold] Carregado threshold único: {_ACTIVE_THRESHOLD:.4f}")
131
+ except Exception as e:
132
+ print(f"[Threshold] Falha ao carregar threshold: {e}; usando 0.5")
133
  if _FEATURES is None:
134
  feat_from_model = getattr(_MODEL, "feature_names_in_", None)
135
  if feat_from_model is not None: