Spaces:
Runtime error
Runtime error
File size: 5,303 Bytes
c2fb337 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import joblib
import json
import os
import numpy as np
import pandas as pd
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
MODEL_DIR = os.path.join(BASE_DIR, "models")
def _load_first_existing(*names):
"""Try the given filenames in order and load the first one that exists.
Returns the loaded object or raises FileNotFoundError if none exist.
"""
for name in names:
path = os.path.join(MODEL_DIR, name)
if os.path.exists(path):
return joblib.load(path)
raise FileNotFoundError(f"None of {names} found in {MODEL_DIR}")
# Load model and preprocessor, preferring enhanced versions if present.
model = _load_first_existing(
"ensemble_model_enhanced.joblib",
"ensemble_model.joblib",
"Ensemble_model.joblib",
)
preprocessor = _load_first_existing(
"preprocessor_enhanced.joblib",
"preprocessor.joblib",
"Preprocessor.joblib",
)
# Anscombe config (case-insensitive check)
anscombe_path = None
for candidate in ("anscombe.json", "Anscombe.json"):
p = os.path.join(MODEL_DIR, candidate)
if os.path.exists(p):
anscombe_path = p
break
if anscombe_path:
with open(anscombe_path) as f:
anscombe_config = json.load(f)
else:
anscombe_config = {}
def predict_fraud(data: dict):
# Accept either a dict of feature-name: value pairs or a JSON
# body with a single key "features" containing a list of values.
if isinstance(data, dict) and "features" in data:
features = data["features"]
# If the preprocessor expects named columns, provide a
# DataFrame with those column names; otherwise use a numpy
# array truncated/padded to the expected length.
feature_names = getattr(preprocessor, "feature_names_in_", None)
if feature_names is not None:
cols = list(feature_names)
row = features[: len(cols)]
# Figure out which columns are treated as categorical by the
# preprocessor so we can coerce values appropriately.
cat_cols = set()
for name, trans, cols_in_transformer in preprocessor.transformers_:
try:
# If transformer is OneHotEncoder (or similar) we
# treat its columns as categorical.
if type(trans).__name__ == "OneHotEncoder" or hasattr(trans, 'categories_'):
for c in cols_in_transformer:
cat_cols.add(c)
except Exception:
continue
coerced = []
for col_name, v in zip(cols, row):
if col_name in cat_cols:
coerced.append(str(v))
else:
try:
coerced.append(float(v))
except Exception:
coerced.append(float('nan'))
# If the provided features list is shorter than the number
# of expected columns, pad the remaining columns with
# sensible defaults: empty string for categorical columns
# and NaN for numeric columns.
if len(row) < len(cols):
for col_name in cols[len(row) :]:
if col_name in cat_cols:
coerced.append("")
else:
coerced.append(float('nan'))
X = pd.DataFrame([coerced], columns=cols)
else:
X = np.array([features])
else:
# If caller provided a mapping of name->value, use a
# DataFrame so column names match the preprocessor.
if isinstance(data, dict):
X = pd.DataFrame([data])
else:
X = np.array([list(data.values())])
# Ensure the input has the expected number of features for the
# preprocessor. If extra features are provided (e.g. tests send 4
# but preprocessor expects 2), take the first n features.
expected = getattr(preprocessor, "n_features_in_", None)
if expected is not None:
# If X is a numpy array, check shape; if it's a DataFrame,
# the preprocessor can accept it as long as it has required
# columns.
if isinstance(X, np.ndarray):
if X.shape[1] < expected:
raise ValueError(f"X has {X.shape[1]} features, but preprocessor is expecting {expected} features as input.")
if X.shape[1] > expected:
X = X[:, :expected]
try:
X_processed = preprocessor.transform(X)
except Exception as exc:
# Raise a more informative error to help debugging
cols = getattr(X, 'columns', None)
head = None
try:
head = X.head().to_dict()
except Exception:
head = None
raise ValueError(f"Transform failed: {exc}; X_type={type(X)}; columns={cols}; head={head}") from exc
prediction = model.predict(X_processed)[0]
probability = model.predict_proba(X_processed)[0].max()
return {
"fraud": int(prediction),
"fraud_prediction": int(prediction),
"probability": float(probability)
}
|