Spaces:
Sleeping
Sleeping
Commit ·
68c8e02
1
Parent(s): 62c2783
Corrigir diretórios
Browse files
stacking_fraude_model_4.pkl → ai/model/stacking_fraude_model_4.pkl
RENAMED
|
File without changes
|
thresholds.json → ai/model/thresholds.json
RENAMED
|
@@ -1,116 +1,116 @@
|
|
| 1 |
-
{
|
| 2 |
-
"generated_at": "2025-11-20T02:38:35.138608+00:00",
|
| 3 |
-
"dataset_size": 4483956,
|
| 4 |
-
"class_distribution_original": {
|
| 5 |
-
"0": 4439775,
|
| 6 |
-
"1": 44181
|
| 7 |
-
},
|
| 8 |
-
"train_non_frauds_max_used": 400000,
|
| 9 |
-
"train_class_distribution_before_balance": {
|
| 10 |
-
"0": 2663864,
|
| 11 |
-
"1": 26509
|
| 12 |
-
},
|
| 13 |
-
"train_balanced_class_distribution": {
|
| 14 |
-
"0": 400000,
|
| 15 |
-
"1": 26509
|
| 16 |
-
},
|
| 17 |
-
"auc_roc": 0.8857282388447254,
|
| 18 |
-
"threshold": 0.44144144144144143,
|
| 19 |
-
"selection": {
|
| 20 |
-
"mode": "best_f1",
|
| 21 |
-
"best_f1_threshold": 0.44144144144144143,
|
| 22 |
-
"best_f1_value": 0.6439178036627154
|
| 23 |
-
},
|
| 24 |
-
"metrics_at_threshold": {
|
| 25 |
-
"accuracy": 0.9947055727526561,
|
| 26 |
-
"precision": 0.9544241885282347,
|
| 27 |
-
"recall": 0.48585332729741965,
|
| 28 |
-
"f1": 0.6439178041097945
|
| 29 |
-
},
|
| 30 |
-
"feature_importances_xgb_gain_top20": [
|
| 31 |
-
{
|
| 32 |
-
"feature": "qtas_fraudes_cartao",
|
| 33 |
-
"gain": 0.32888033986091614
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"feature": "fraude_ratio_terminal",
|
| 37 |
-
"gain": 0.18363086879253387
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"feature": "fraude_ratio_cartao",
|
| 41 |
-
"gain": 0.10315709561109543
|
| 42 |
-
},
|
| 43 |
-
{
|
| 44 |
-
"feature": "qtas_fraudes_terminal",
|
| 45 |
-
"gain": 0.09818089008331299
|
| 46 |
-
},
|
| 47 |
-
{
|
| 48 |
-
"feature": "entropia_geografica_cartao",
|
| 49 |
-
"gain": 0.0293036587536335
|
| 50 |
-
},
|
| 51 |
-
{
|
| 52 |
-
"feature": "longitude",
|
| 53 |
-
"gain": 0.023642411455512047
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"feature": "latitude",
|
| 57 |
-
"gain": 0.02150840125977993
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"feature": "qtas_transacoes_terminal_dia",
|
| 61 |
-
"gain": 0.02116469480097294
|
| 62 |
-
},
|
| 63 |
-
{
|
| 64 |
-
"feature": "desvio_padrao_valor_cartao",
|
| 65 |
-
"gain": 0.02007487416267395
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"feature": "valor_medio_cartao",
|
| 69 |
-
"gain": 0.019987693056464195
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"feature": "desvio_padrao_valor_terminal",
|
| 73 |
-
"gain": 0.019812680780887604
|
| 74 |
-
},
|
| 75 |
-
{
|
| 76 |
-
"feature": "tx_amount",
|
| 77 |
-
"gain": 0.019712846726179123
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
"feature": "velocidade_ultima_transacao",
|
| 81 |
-
"gain": 0.01963718980550766
|
| 82 |
-
},
|
| 83 |
-
{
|
| 84 |
-
"feature": "dias_desde_primeira_transacao_do_cartao",
|
| 85 |
-
"gain": 0.01920768804848194
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"feature": "valor_medio_terminal",
|
| 89 |
-
"gain": 0.01920476369559765
|
| 90 |
-
},
|
| 91 |
-
{
|
| 92 |
-
"feature": "dias_desde_inicio_terminal",
|
| 93 |
-
"gain": 0.018486132845282555
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"feature": "tx_hour",
|
| 97 |
-
"gain": 0.01793355494737625
|
| 98 |
-
},
|
| 99 |
-
{
|
| 100 |
-
"feature": "qtas_transacoes_cartao_dia",
|
| 101 |
-
"gain": 0.01647430658340454
|
| 102 |
-
}
|
| 103 |
-
],
|
| 104 |
-
"base_model_meta_weights": {
|
| 105 |
-
"rf": 0.9984035731210663,
|
| 106 |
-
"xgb": 0.001111504578919198,
|
| 107 |
-
"et": 0.000484922300014473
|
| 108 |
-
},
|
| 109 |
-
"base_model_auc_test": {
|
| 110 |
-
"rf": 0.881155250373531,
|
| 111 |
-
"xgb": 0.8867136661938362,
|
| 112 |
-
"et": 0.8830867877996174
|
| 113 |
-
},
|
| 114 |
-
"model_path": "./models\\stacking_fraude_model_4.pkl",
|
| 115 |
-
"calibrated_saved": true
|
| 116 |
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"generated_at": "2025-11-20T02:38:35.138608+00:00",
|
| 3 |
+
"dataset_size": 4483956,
|
| 4 |
+
"class_distribution_original": {
|
| 5 |
+
"0": 4439775,
|
| 6 |
+
"1": 44181
|
| 7 |
+
},
|
| 8 |
+
"train_non_frauds_max_used": 400000,
|
| 9 |
+
"train_class_distribution_before_balance": {
|
| 10 |
+
"0": 2663864,
|
| 11 |
+
"1": 26509
|
| 12 |
+
},
|
| 13 |
+
"train_balanced_class_distribution": {
|
| 14 |
+
"0": 400000,
|
| 15 |
+
"1": 26509
|
| 16 |
+
},
|
| 17 |
+
"auc_roc": 0.8857282388447254,
|
| 18 |
+
"threshold": 0.44144144144144143,
|
| 19 |
+
"selection": {
|
| 20 |
+
"mode": "best_f1",
|
| 21 |
+
"best_f1_threshold": 0.44144144144144143,
|
| 22 |
+
"best_f1_value": 0.6439178036627154
|
| 23 |
+
},
|
| 24 |
+
"metrics_at_threshold": {
|
| 25 |
+
"accuracy": 0.9947055727526561,
|
| 26 |
+
"precision": 0.9544241885282347,
|
| 27 |
+
"recall": 0.48585332729741965,
|
| 28 |
+
"f1": 0.6439178041097945
|
| 29 |
+
},
|
| 30 |
+
"feature_importances_xgb_gain_top20": [
|
| 31 |
+
{
|
| 32 |
+
"feature": "qtas_fraudes_cartao",
|
| 33 |
+
"gain": 0.32888033986091614
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"feature": "fraude_ratio_terminal",
|
| 37 |
+
"gain": 0.18363086879253387
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"feature": "fraude_ratio_cartao",
|
| 41 |
+
"gain": 0.10315709561109543
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"feature": "qtas_fraudes_terminal",
|
| 45 |
+
"gain": 0.09818089008331299
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"feature": "entropia_geografica_cartao",
|
| 49 |
+
"gain": 0.0293036587536335
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"feature": "longitude",
|
| 53 |
+
"gain": 0.023642411455512047
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"feature": "latitude",
|
| 57 |
+
"gain": 0.02150840125977993
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"feature": "qtas_transacoes_terminal_dia",
|
| 61 |
+
"gain": 0.02116469480097294
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"feature": "desvio_padrao_valor_cartao",
|
| 65 |
+
"gain": 0.02007487416267395
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"feature": "valor_medio_cartao",
|
| 69 |
+
"gain": 0.019987693056464195
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"feature": "desvio_padrao_valor_terminal",
|
| 73 |
+
"gain": 0.019812680780887604
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"feature": "tx_amount",
|
| 77 |
+
"gain": 0.019712846726179123
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"feature": "velocidade_ultima_transacao",
|
| 81 |
+
"gain": 0.01963718980550766
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"feature": "dias_desde_primeira_transacao_do_cartao",
|
| 85 |
+
"gain": 0.01920768804848194
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"feature": "valor_medio_terminal",
|
| 89 |
+
"gain": 0.01920476369559765
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"feature": "dias_desde_inicio_terminal",
|
| 93 |
+
"gain": 0.018486132845282555
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"feature": "tx_hour",
|
| 97 |
+
"gain": 0.01793355494737625
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"feature": "qtas_transacoes_cartao_dia",
|
| 101 |
+
"gain": 0.01647430658340454
|
| 102 |
+
}
|
| 103 |
+
],
|
| 104 |
+
"base_model_meta_weights": {
|
| 105 |
+
"rf": 0.9984035731210663,
|
| 106 |
+
"xgb": 0.001111504578919198,
|
| 107 |
+
"et": 0.000484922300014473
|
| 108 |
+
},
|
| 109 |
+
"base_model_auc_test": {
|
| 110 |
+
"rf": 0.881155250373531,
|
| 111 |
+
"xgb": 0.8867136661938362,
|
| 112 |
+
"et": 0.8830867877996174
|
| 113 |
+
},
|
| 114 |
+
"model_path": "./models\\stacking_fraude_model_4.pkl",
|
| 115 |
+
"calibrated_saved": true
|
| 116 |
}
|
api/app.py
CHANGED
|
@@ -13,7 +13,8 @@ from pydantic import BaseModel, Field
|
|
| 13 |
|
| 14 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
ROOT_DIR = os.path.dirname(APP_DIR)
|
| 16 |
-
MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "
|
|
|
|
| 17 |
FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
|
| 18 |
PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
|
| 19 |
|
|
@@ -38,6 +39,7 @@ _CARD_MEDIANS: Dict[str, Dict[str, float]] = {}
|
|
| 38 |
_TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
|
| 39 |
_CARD_IDS: Optional[set] = None
|
| 40 |
_TERM_IDS: Optional[set] = None
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
CARD_FEATURES = {
|
|
@@ -73,23 +75,31 @@ def _normalize_id(val: Optional[Any]) -> Optional[str]:
|
|
| 73 |
|
| 74 |
|
| 75 |
def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
|
| 76 |
-
|
| 77 |
y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
|
| 78 |
items: List[Dict[str, Any]] = []
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
is_fraud = bool(pred_class == 1)
|
| 82 |
-
prob_fraud = None
|
| 83 |
-
if y_prob is not None:
|
| 84 |
pp = y_prob[i]
|
| 85 |
if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
|
| 86 |
prob_fraud = float(pp[1])
|
| 87 |
else:
|
| 88 |
prob_fraud = float(pp)
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
return {"items": items}
|
| 94 |
|
| 95 |
|
|
@@ -104,11 +114,22 @@ app.add_middleware(
|
|
| 104 |
|
| 105 |
|
| 106 |
def _load_model_and_features():
|
| 107 |
-
global _MODEL, _FEATURES
|
| 108 |
if _MODEL is None:
|
| 109 |
if not os.path.exists(MODEL_PATH):
|
| 110 |
raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
|
| 111 |
_MODEL = joblib.load(MODEL_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
if _FEATURES is None:
|
| 113 |
feat_from_model = getattr(_MODEL, "feature_names_in_", None)
|
| 114 |
if feat_from_model is not None:
|
|
|
|
| 13 |
|
| 14 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
ROOT_DIR = os.path.dirname(APP_DIR)
|
| 16 |
+
MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_calibrated.pkl")
|
| 17 |
+
THRESHOLDS_PATH = os.path.join(ROOT_DIR, "ai", "models", "thresholds.json")
|
| 18 |
FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
|
| 19 |
PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
|
| 20 |
|
|
|
|
| 39 |
_TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
|
| 40 |
_CARD_IDS: Optional[set] = None
|
| 41 |
_TERM_IDS: Optional[set] = None
|
| 42 |
+
_ACTIVE_THRESHOLD: float = 0.5
|
| 43 |
|
| 44 |
|
| 45 |
CARD_FEATURES = {
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
|
| 78 |
+
global _ACTIVE_THRESHOLD
|
| 79 |
y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None
|
| 80 |
items: List[Dict[str, Any]] = []
|
| 81 |
+
if y_prob is not None:
|
| 82 |
+
for i in range(len(X)):
|
|
|
|
|
|
|
|
|
|
| 83 |
pp = y_prob[i]
|
| 84 |
if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
|
| 85 |
prob_fraud = float(pp[1])
|
| 86 |
else:
|
| 87 |
prob_fraud = float(pp)
|
| 88 |
+
is_fraud = prob_fraud >= _ACTIVE_THRESHOLD
|
| 89 |
+
items.append({
|
| 90 |
+
"is_fraud": bool(is_fraud),
|
| 91 |
+
"fraud_probability": prob_fraud,
|
| 92 |
+
"threshold_used": _ACTIVE_THRESHOLD
|
| 93 |
+
})
|
| 94 |
+
else:
|
| 95 |
+
y_pred = ensemble.predict(X)
|
| 96 |
+
for i in range(len(X)):
|
| 97 |
+
pred_class = int(y_pred[i])
|
| 98 |
+
items.append({
|
| 99 |
+
"is_fraud": bool(pred_class == 1),
|
| 100 |
+
"fraud_probability": None,
|
| 101 |
+
"threshold_used": None
|
| 102 |
+
})
|
| 103 |
return {"items": items}
|
| 104 |
|
| 105 |
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def _load_model_and_features():
|
| 117 |
+
global _MODEL, _FEATURES, _ACTIVE_THRESHOLD
|
| 118 |
if _MODEL is None:
|
| 119 |
if not os.path.exists(MODEL_PATH):
|
| 120 |
raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}")
|
| 121 |
_MODEL = joblib.load(MODEL_PATH)
|
| 122 |
+
if os.path.exists(THRESHOLDS_PATH):
|
| 123 |
+
try:
|
| 124 |
+
import json
|
| 125 |
+
with open(THRESHOLDS_PATH, 'r', encoding='utf-8') as f:
|
| 126 |
+
data = json.load(f)
|
| 127 |
+
val = data.get('threshold')
|
| 128 |
+
if isinstance(val, (int, float)):
|
| 129 |
+
_ACTIVE_THRESHOLD = float(val)
|
| 130 |
+
print(f"[Threshold] Carregado threshold único: {_ACTIVE_THRESHOLD:.4f}")
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"[Threshold] Falha ao carregar threshold: {e}; usando 0.5")
|
| 133 |
if _FEATURES is None:
|
| 134 |
feat_from_model = getattr(_MODEL, "feature_names_in_", None)
|
| 135 |
if feat_from_model is not None:
|