Spaces:
Sleeping
Sleeping
Commit ·
0d8b1e3
1
Parent(s): 1c6b56b
Atualizar API
Browse files- api/app.py +44 -43
- data/final_dataset.parquet +3 -0
api/app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# python -m uvicorn
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
from typing import Any, Dict, List, Optional
|
|
@@ -18,16 +18,9 @@ FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather")
|
|
| 18 |
PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
|
| 19 |
|
| 20 |
DROP_COLS = {
|
| 21 |
-
"tx_year",
|
| 22 |
-
"tx_month",
|
| 23 |
-
"periodo",
|
| 24 |
-
"terminal_soft_descriptor",
|
| 25 |
"card_hash",
|
| 26 |
-
"
|
| 27 |
-
"is_transactional_fraud",
|
| 28 |
"is_fraud",
|
| 29 |
-
"cluster",
|
| 30 |
-
"fraude_tipo_extendido",
|
| 31 |
}
|
| 32 |
|
| 33 |
|
|
@@ -43,28 +36,20 @@ _MODEL = None
|
|
| 43 |
_FEATURES: Optional[List[str]] = None
|
| 44 |
_CARD_MEDIANS: Dict[str, Dict[str, float]] = {}
|
| 45 |
_TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
|
|
|
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
-
FRAUD_TYPE_MAP = {
|
| 49 |
-
0: ("c0", "não é fraude"),
|
| 50 |
-
1: ("c1", "fraude em cartão"),
|
| 51 |
-
2: ("c2", "desacordo comercial"),
|
| 52 |
-
3: ("c3", "fraude no terminal"),
|
| 53 |
-
4: ("c4", "conluio"),
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
CARD_FEATURES = {
|
| 57 |
"dias_desde_primeira_transacao_do_cartao",
|
| 58 |
"qtas_transacoes_cartao_dia",
|
| 59 |
"qtas_fraudes_cartao",
|
| 60 |
"valor_medio_cartao",
|
| 61 |
-
"valor_medio_cartao_3_transacoes",
|
| 62 |
"desvio_padrao_valor_cartao",
|
| 63 |
"entropia_geografica_cartao",
|
| 64 |
-
"frequencia_transacoes_24h",
|
| 65 |
-
"media_tempo_entre_transacoes",
|
| 66 |
"fraude_ratio_cartao",
|
| 67 |
"tempo_medio_denuncia_cartao",
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
TERMINAL_FEATURES = {
|
|
@@ -72,7 +57,7 @@ TERMINAL_FEATURES = {
|
|
| 72 |
"qtas_transacoes_terminal_dia",
|
| 73 |
"qtas_fraudes_terminal",
|
| 74 |
"valor_medio_terminal",
|
| 75 |
-
"
|
| 76 |
"fraude_ratio_terminal",
|
| 77 |
"tempo_medio_denuncia_terminal",
|
| 78 |
}
|
|
@@ -84,23 +69,17 @@ def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]:
|
|
| 84 |
items: List[Dict[str, Any]] = []
|
| 85 |
for i in range(len(X)):
|
| 86 |
pred_class = int(y_pred[i])
|
| 87 |
-
is_fraud = bool(pred_class
|
| 88 |
-
|
| 89 |
if y_prob is not None:
|
| 90 |
pp = y_prob[i]
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
row = X.iloc[i]
|
| 96 |
-
debug = {c: (float(row[c]) if pd.notna(row[c]) else None) for c in X.columns}
|
| 97 |
items.append({
|
| 98 |
-
"predicted_class": pred_class,
|
| 99 |
"is_fraud": bool(is_fraud),
|
| 100 |
-
"
|
| 101 |
-
"fraud_type_name": fraud_label if is_fraud else None,
|
| 102 |
-
"class_probabilities": probs,
|
| 103 |
-
"_debug_processed_features": debug,
|
| 104 |
})
|
| 105 |
return {"items": items}
|
| 106 |
|
|
@@ -198,20 +177,26 @@ def _ensure_dataframe(records: List[Dict[str, Any]], feature_order: List[str]) -
|
|
| 198 |
return df
|
| 199 |
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
@app.get("/health")
|
| 202 |
def health():
|
| 203 |
return {"status": "ok"}
|
| 204 |
|
| 205 |
|
| 206 |
-
@app.get("/")
|
| 207 |
-
def root():
|
| 208 |
-
return {
|
| 209 |
-
"status": "ok",
|
| 210 |
-
"health": "/health",
|
| 211 |
-
"docs": "/docs",
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
|
| 215 |
@app.post("/predict")
|
| 216 |
def predict_one(body: TransactionBody, request: Request):
|
| 217 |
try:
|
|
@@ -242,3 +227,19 @@ def predict_batch(body: BatchBody, request: Request):
|
|
| 242 |
except Exception as e:
|
| 243 |
traceback.print_exc()
|
| 244 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python -m uvicorn main:app --reload
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
from typing import Any, Dict, List, Optional
|
|
|
|
| 18 |
PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet")
|
| 19 |
|
| 20 |
DROP_COLS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"card_hash",
|
| 22 |
+
"terminal_id",
|
|
|
|
| 23 |
"is_fraud",
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
|
|
|
|
| 36 |
_FEATURES: Optional[List[str]] = None
|
| 37 |
_CARD_MEDIANS: Dict[str, Dict[str, float]] = {}
|
| 38 |
_TERM_MEDIANS: Dict[str, Dict[str, float]] = {}
|
| 39 |
+
_CARD_IDS: Optional[set] = None
|
| 40 |
+
_TERM_IDS: Optional[set] = None
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
CARD_FEATURES = {
|
| 44 |
"dias_desde_primeira_transacao_do_cartao",
|
| 45 |
"qtas_transacoes_cartao_dia",
|
| 46 |
"qtas_fraudes_cartao",
|
| 47 |
"valor_medio_cartao",
|
|
|
|
| 48 |
"desvio_padrao_valor_cartao",
|
| 49 |
"entropia_geografica_cartao",
|
|
|
|
|
|
|
| 50 |
"fraude_ratio_cartao",
|
| 51 |
"tempo_medio_denuncia_cartao",
|
| 52 |
+
"velocidade_ultima_transacao",
|
| 53 |
}
|
| 54 |
|
| 55 |
TERMINAL_FEATURES = {
|
|
|
|
| 57 |
"qtas_transacoes_terminal_dia",
|
| 58 |
"qtas_fraudes_terminal",
|
| 59 |
"valor_medio_terminal",
|
| 60 |
+
"desvio_padrao_valor_terminal",
|
| 61 |
"fraude_ratio_terminal",
|
| 62 |
"tempo_medio_denuncia_terminal",
|
| 63 |
}
|
|
|
|
| 69 |
items: List[Dict[str, Any]] = []
|
| 70 |
for i in range(len(X)):
|
| 71 |
pred_class = int(y_pred[i])
|
| 72 |
+
is_fraud = bool(pred_class == 1)
|
| 73 |
+
prob_fraud = None
|
| 74 |
if y_prob is not None:
|
| 75 |
pp = y_prob[i]
|
| 76 |
+
if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2:
|
| 77 |
+
prob_fraud = float(pp[1])
|
| 78 |
+
else:
|
| 79 |
+
prob_fraud = float(pp)
|
|
|
|
|
|
|
| 80 |
items.append({
|
|
|
|
| 81 |
"is_fraud": bool(is_fraud),
|
| 82 |
+
"fraud_probability": prob_fraud,
|
|
|
|
|
|
|
|
|
|
| 83 |
})
|
| 84 |
return {"items": items}
|
| 85 |
|
|
|
|
| 177 |
return df
|
| 178 |
|
| 179 |
|
| 180 |
+
def _ensure_id_sets():
|
| 181 |
+
global _CARD_IDS, _TERM_IDS
|
| 182 |
+
if _CARD_IDS is not None and _TERM_IDS is not None:
|
| 183 |
+
return
|
| 184 |
+
df_ids = _load_dataset(["card_hash", "terminal_id"])
|
| 185 |
+
if "card_hash" in df_ids.columns:
|
| 186 |
+
_CARD_IDS = set(df_ids["card_hash"].astype(str).dropna().unique())
|
| 187 |
+
else:
|
| 188 |
+
_CARD_IDS = set()
|
| 189 |
+
if "terminal_id" in df_ids.columns:
|
| 190 |
+
_TERM_IDS = set(df_ids["terminal_id"].astype(str).dropna().unique())
|
| 191 |
+
else:
|
| 192 |
+
_TERM_IDS = set()
|
| 193 |
+
|
| 194 |
+
|
| 195 |
@app.get("/health")
|
| 196 |
def health():
|
| 197 |
return {"status": "ok"}
|
| 198 |
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
@app.post("/predict")
|
| 201 |
def predict_one(body: TransactionBody, request: Request):
|
| 202 |
try:
|
|
|
|
| 227 |
except Exception as e:
|
| 228 |
traceback.print_exc()
|
| 229 |
raise HTTPException(status_code=500, detail=str(e))
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
@app.get("/ids/exists")
|
| 233 |
+
def ids_exists(terminal_id: Optional[str] = None, card_hash: Optional[str] = None):
|
| 234 |
+
try:
|
| 235 |
+
_ensure_id_sets()
|
| 236 |
+
term_ok = False
|
| 237 |
+
card_ok = False
|
| 238 |
+
if terminal_id is not None and _TERM_IDS is not None:
|
| 239 |
+
term_ok = str(terminal_id) in _TERM_IDS
|
| 240 |
+
if card_hash is not None and _CARD_IDS is not None:
|
| 241 |
+
card_ok = str(card_hash) in _CARD_IDS
|
| 242 |
+
return {"terminal_id_exists": term_ok, "card_hash_exists": card_ok}
|
| 243 |
+
except Exception as e:
|
| 244 |
+
traceback.print_exc()
|
| 245 |
+
raise HTTPException(status_code=500, detail=str(e))
|
data/final_dataset.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:880f13b2180b1ebc1845cc090f6be5fc8c4a5f08c478791a71f2af9b6315ad90
|
| 3 |
+
size 246272480
|