Spaces:
Sleeping
Sleeping
| # python -m uvicorn main:app --reload | |
| from __future__ import annotations | |
| from typing import Any, Dict, List, Optional | |
| import os | |
| import traceback | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from fastapi import FastAPI, HTTPException, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| APP_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| ROOT_DIR = os.path.dirname(APP_DIR) | |
| MODEL_PATH = os.path.join(ROOT_DIR, "ai", "models", "stacking_fraude_model_4.pkl") | |
| THRESHOLDS_PATH = os.path.join(ROOT_DIR, "ai", "models", "thresholds.json") | |
| FEATHER_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.feather") | |
| PARQUET_DATASET = os.path.join(ROOT_DIR, "data", "final_dataset.parquet") | |
| DROP_COLS = { | |
| "card_hash", | |
| "terminal_id", | |
| "is_fraud", | |
| } | |
| class TransactionBody(BaseModel): | |
| features: Dict[str, Any] = Field(default_factory=dict) | |
| class BatchBody(BaseModel): | |
| items: List[Dict[str, Any]] | |
| _MODEL = None | |
| _FEATURES: Optional[List[str]] = None | |
| _CARD_MEDIANS: Dict[str, Dict[str, float]] = {} | |
| _TERM_MEDIANS: Dict[str, Dict[str, float]] = {} | |
| _CARD_IDS: Optional[set] = None | |
| _TERM_IDS: Optional[set] = None | |
| _ACTIVE_THRESHOLD: float = 0.5 | |
| CARD_FEATURES = { | |
| "dias_desde_primeira_transacao_do_cartao", | |
| "qtas_transacoes_cartao_dia", | |
| "qtas_fraudes_cartao", | |
| "valor_medio_cartao", | |
| "desvio_padrao_valor_cartao", | |
| "entropia_geografica_cartao", | |
| "fraude_ratio_cartao", | |
| "tempo_medio_denuncia_cartao", | |
| "velocidade_ultima_transacao", | |
| } | |
| TERMINAL_FEATURES = { | |
| "dias_desde_inicio_terminal", | |
| "qtas_transacoes_terminal_dia", | |
| "qtas_fraudes_terminal", | |
| "valor_medio_terminal", | |
| "desvio_padrao_valor_terminal", | |
| "fraude_ratio_terminal", | |
| "tempo_medio_denuncia_terminal", | |
| } | |
| def _normalize_id(val: Optional[Any]) -> Optional[str]: | |
| if val is None: | |
| return None | |
| s = str(val).strip() | |
| if s.endswith('.0'): | |
| s = s[:-2] | |
| return s | |
| def _predict(ensemble, X: pd.DataFrame) -> Dict[str, Any]: | |
| global _ACTIVE_THRESHOLD | |
| y_prob = ensemble.predict_proba(X) if hasattr(ensemble, "predict_proba") else None | |
| items: List[Dict[str, Any]] = [] | |
| if y_prob is not None: | |
| for i in range(len(X)): | |
| pp = y_prob[i] | |
| if isinstance(pp, (list, np.ndarray)) and len(pp) >= 2: | |
| prob_fraud = float(pp[1]) | |
| else: | |
| prob_fraud = float(pp) | |
| is_fraud = prob_fraud >= _ACTIVE_THRESHOLD | |
| items.append({ | |
| "is_fraud": bool(is_fraud), | |
| "fraud_probability": prob_fraud, | |
| "threshold_used": _ACTIVE_THRESHOLD | |
| }) | |
| else: | |
| y_pred = ensemble.predict(X) | |
| for i in range(len(X)): | |
| pred_class = int(y_pred[i]) | |
| items.append({ | |
| "is_fraud": bool(pred_class == 1), | |
| "fraud_probability": None, | |
| "threshold_used": None | |
| }) | |
| return {"items": items} | |
| app = FastAPI(title="Unfraud API", version="1.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["http://localhost:5173", "http://127.0.0.1:5173", "*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| def _load_model_and_features(): | |
| global _MODEL, _FEATURES, _ACTIVE_THRESHOLD | |
| if _MODEL is None: | |
| if not os.path.exists(MODEL_PATH): | |
| raise FileNotFoundError(f"Modelo não encontrado: {MODEL_PATH}") | |
| _MODEL = joblib.load(MODEL_PATH) | |
| if os.path.exists(THRESHOLDS_PATH): | |
| try: | |
| import json | |
| with open(THRESHOLDS_PATH, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| val = data.get('threshold') | |
| if isinstance(val, (int, float)): | |
| _ACTIVE_THRESHOLD = float(val) | |
| print(f"[Threshold] Carregado threshold único: {_ACTIVE_THRESHOLD:.4f}") | |
| except Exception as e: | |
| print(f"[Threshold] Falha ao carregar threshold: {e}; usando 0.5") | |
| if _FEATURES is None: | |
| feat_from_model = getattr(_MODEL, "feature_names_in_", None) | |
| if feat_from_model is not None: | |
| _FEATURES = [c for c in list(feat_from_model) if c not in DROP_COLS] | |
| else: | |
| if os.path.exists(PARQUET_DATASET): | |
| df_cols = list(pd.read_parquet(PARQUET_DATASET).columns) | |
| _FEATURES = [c for c in df_cols if c not in DROP_COLS] | |
| elif os.path.exists(FEATHER_DATASET): | |
| df_cols = list(pd.read_feather(FEATHER_DATASET).columns) | |
| _FEATURES = [c for c in df_cols if c not in DROP_COLS] | |
| else: | |
| raise FileNotFoundError("Dataset não encontrado para inferir features") | |
| def _load_dataset(columns: List[str]) -> pd.DataFrame: | |
| if os.path.exists(PARQUET_DATASET): | |
| df = pd.read_parquet(PARQUET_DATASET) | |
| use = [c for c in columns if c in df.columns] if columns else df.columns | |
| return df[use] | |
| elif os.path.exists(FEATHER_DATASET): | |
| df = pd.read_feather(FEATHER_DATASET) | |
| use = [c for c in columns if c in df.columns] if columns else df.columns | |
| return df[use] | |
| else: | |
| raise FileNotFoundError("Nenhum arquivo de dataset encontrado (.parquet ou .feather)") | |
| def _compute_group_medians(): | |
| global _CARD_MEDIANS, _TERM_MEDIANS | |
| if _CARD_MEDIANS or _TERM_MEDIANS: | |
| return | |
| if _FEATURES is None: | |
| raise RuntimeError("Features não carregadas") | |
| df = _load_dataset(list(set(_FEATURES + ["card_hash", "terminal_id"]))) | |
| num_feats = [c for c in _FEATURES if c in df.columns and pd.api.types.is_numeric_dtype(df[c])] | |
| if "card_hash" in df.columns and num_feats: | |
| g = df.groupby("card_hash")[num_feats].median(numeric_only=True) | |
| _CARD_MEDIANS = { _normalize_id(k): {kk: float(vv) for kk, vv in row.dropna().to_dict().items()} for k, row in g.iterrows() } | |
| if "terminal_id" in df.columns and num_feats: | |
| g2 = df.groupby("terminal_id")[num_feats].median(numeric_only=True) | |
| _TERM_MEDIANS = { _normalize_id(k): {kk: float(vv) for kk, vv in row.dropna().to_dict().items()} for k, row in g2.iterrows() } | |
| def _enrich_with_id_medians(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
| if not items: | |
| return items | |
| enriched: List[Dict[str, Any]] = [] | |
| for idx, rec in enumerate(items): | |
| r = dict(rec) | |
| ck_raw = rec.get("card_hash") | |
| tk_raw = rec.get("terminal_id") | |
| ck = _normalize_id(ck_raw) | |
| tk = _normalize_id(tk_raw) | |
| cm = _CARD_MEDIANS.get(ck) if ck is not None else None | |
| tm = _TERM_MEDIANS.get(tk) if tk is not None else None | |
| card_all = {k: float(v) for k, v in (cm or {}).items() if k in CARD_FEATURES} | |
| term_all = {k: float(v) for k, v in (tm or {}).items() if k in TERMINAL_FEATURES} | |
| card_applied: Dict[str, float] = {} | |
| term_applied: Dict[str, float] = {} | |
| if cm: | |
| for k, v in cm.items(): | |
| if k in CARD_FEATURES and (k not in r or r[k] in (None, "", "NaN")): | |
| r[k] = v | |
| card_applied[k] = float(v) | |
| if tm: | |
| for k, v in tm.items(): | |
| if k in TERMINAL_FEATURES and (k not in r or r[k] in (None, "", "NaN")): | |
| r[k] = v | |
| term_applied[k] = float(v) | |
| try: | |
| print( | |
| f"[Enrich] idx={idx} card_hash={ck} card_found={bool(cm)}\n" | |
| f" card_all={card_all}\n card_applied={card_applied}\n" | |
| f" terminal_id={tk} term_found={bool(tm)}\n" | |
| f" term_all={term_all}\n term_applied={term_applied}", | |
| flush=True, | |
| ) | |
| except Exception: | |
| pass | |
| enriched.append(r) | |
| return enriched | |
| def _ensure_dataframe(records: List[Dict[str, Any]], feature_order: List[str]) -> pd.DataFrame: | |
| df = pd.DataFrame(records) | |
| for col in df.columns: | |
| df[col] = pd.to_numeric(df[col], errors="coerce") | |
| df = df.reindex(columns=feature_order) | |
| df = df.fillna(0) | |
| return df | |
| def _ensure_id_sets(): | |
| """Populate in-memory sets of known card_hash and terminal_id from the final dataset. | |
| We normalize values to avoid mismatches caused by float representations (e.g. '4983922282101.0'). | |
| """ | |
| global _CARD_IDS, _TERM_IDS | |
| if _CARD_IDS is not None and _TERM_IDS is not None: | |
| return | |
| df_ids = _load_dataset(["card_hash", "terminal_id"]) | |
| def _normalize_series(s): | |
| norm = [] | |
| for v in s.dropna().tolist(): | |
| nv = _normalize_id(v) | |
| if nv is not None and nv != "": | |
| norm.append(nv) | |
| return set(norm) | |
| _CARD_IDS = _normalize_series(df_ids["card_hash"]) if "card_hash" in df_ids.columns else set() | |
| _TERM_IDS = _normalize_series(df_ids["terminal_id"]) if "terminal_id" in df_ids.columns else set() | |
| def health(): | |
| return {"status": "ok"} | |
| def predict_one(body: TransactionBody, request: Request): | |
| try: | |
| _load_model_and_features() | |
| _compute_group_medians() | |
| assert _FEATURES is not None | |
| enriched = _enrich_with_id_medians([body.features]) | |
| X = _ensure_dataframe(enriched, _FEATURES) | |
| output = _predict(_MODEL, X) | |
| return output["items"][0] | |
| except Exception as e: | |
| traceback.print_exc() | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def predict_batch(body: BatchBody, request: Request): | |
| try: | |
| if len(body.items) == 0: | |
| return {"items": []} | |
| _load_model_and_features() | |
| _compute_group_medians() | |
| assert _FEATURES is not None | |
| enriched = _enrich_with_id_medians(body.items) | |
| X = _ensure_dataframe(enriched, _FEATURES) | |
| output = _predict(_MODEL, X) | |
| return output | |
| except Exception as e: | |
| traceback.print_exc() | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def ids_exists(terminal_id: Optional[str] = None, card_hash: Optional[str] = None): | |
| try: | |
| _ensure_id_sets() | |
| term_ok = False | |
| card_ok = False | |
| nt = _normalize_id(terminal_id) if terminal_id is not None else None | |
| nc = _normalize_id(card_hash) if card_hash is not None else None | |
| if nt is not None and _TERM_IDS is not None: | |
| term_ok = nt in _TERM_IDS | |
| if nc is not None and _CARD_IDS is not None: | |
| card_ok = nc in _CARD_IDS | |
| return {"terminal_id_exists": term_ok, "card_hash_exists": card_ok} | |
| except Exception as e: | |
| traceback.print_exc() | |
| raise HTTPException(status_code=500, detail=str(e)) | |