cindyy287's picture
Upload 23 files
c2fb337 verified
import joblib
import json
import os
import numpy as np
import pandas as pd
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
MODEL_DIR = os.path.join(BASE_DIR, "models")
def _load_first_existing(*names):
"""Try the given filenames in order and load the first one that exists.
Returns the loaded object or raises FileNotFoundError if none exist.
"""
for name in names:
path = os.path.join(MODEL_DIR, name)
if os.path.exists(path):
return joblib.load(path)
raise FileNotFoundError(f"None of {names} found in {MODEL_DIR}")
# Load model and preprocessor, preferring enhanced versions if present.
model = _load_first_existing(
"ensemble_model_enhanced.joblib",
"ensemble_model.joblib",
"Ensemble_model.joblib",
)
preprocessor = _load_first_existing(
"preprocessor_enhanced.joblib",
"preprocessor.joblib",
"Preprocessor.joblib",
)
# Anscombe config (case-insensitive check)
anscombe_path = None
for candidate in ("anscombe.json", "Anscombe.json"):
p = os.path.join(MODEL_DIR, candidate)
if os.path.exists(p):
anscombe_path = p
break
if anscombe_path:
with open(anscombe_path) as f:
anscombe_config = json.load(f)
else:
anscombe_config = {}
def predict_fraud(data: dict):
# Accept either a dict of feature-name: value pairs or a JSON
# body with a single key "features" containing a list of values.
if isinstance(data, dict) and "features" in data:
features = data["features"]
# If the preprocessor expects named columns, provide a
# DataFrame with those column names; otherwise use a numpy
# array truncated/padded to the expected length.
feature_names = getattr(preprocessor, "feature_names_in_", None)
if feature_names is not None:
cols = list(feature_names)
row = features[: len(cols)]
# Figure out which columns are treated as categorical by the
# preprocessor so we can coerce values appropriately.
cat_cols = set()
for name, trans, cols_in_transformer in preprocessor.transformers_:
try:
# If transformer is OneHotEncoder (or similar) we
# treat its columns as categorical.
if type(trans).__name__ == "OneHotEncoder" or hasattr(trans, 'categories_'):
for c in cols_in_transformer:
cat_cols.add(c)
except Exception:
continue
coerced = []
for col_name, v in zip(cols, row):
if col_name in cat_cols:
coerced.append(str(v))
else:
try:
coerced.append(float(v))
except Exception:
coerced.append(float('nan'))
# If the provided features list is shorter than the number
# of expected columns, pad the remaining columns with
# sensible defaults: empty string for categorical columns
# and NaN for numeric columns.
if len(row) < len(cols):
for col_name in cols[len(row) :]:
if col_name in cat_cols:
coerced.append("")
else:
coerced.append(float('nan'))
X = pd.DataFrame([coerced], columns=cols)
else:
X = np.array([features])
else:
# If caller provided a mapping of name->value, use a
# DataFrame so column names match the preprocessor.
if isinstance(data, dict):
X = pd.DataFrame([data])
else:
X = np.array([list(data.values())])
# Ensure the input has the expected number of features for the
# preprocessor. If extra features are provided (e.g. tests send 4
# but preprocessor expects 2), take the first n features.
expected = getattr(preprocessor, "n_features_in_", None)
if expected is not None:
# If X is a numpy array, check shape; if it's a DataFrame,
# the preprocessor can accept it as long as it has required
# columns.
if isinstance(X, np.ndarray):
if X.shape[1] < expected:
raise ValueError(f"X has {X.shape[1]} features, but preprocessor is expecting {expected} features as input.")
if X.shape[1] > expected:
X = X[:, :expected]
try:
X_processed = preprocessor.transform(X)
except Exception as exc:
# Raise a more informative error to help debugging
cols = getattr(X, 'columns', None)
head = None
try:
head = X.head().to_dict()
except Exception:
head = None
raise ValueError(f"Transform failed: {exc}; X_type={type(X)}; columns={cols}; head={head}") from exc
prediction = model.predict(X_processed)[0]
probability = model.predict_proba(X_processed)[0].max()
return {
"fraud": int(prediction),
"fraud_prediction": int(prediction),
"probability": float(probability)
}