fraud_detection_api_1

Sleeping

App Files Files Community

fraud_detection_api_1 / app /model.py

cindyy287

Upload 23 files

c2fb337 verified 5 months ago

raw

history blame contribute delete

5.3 kB

	import joblib
	import json
	import os
	import numpy as np
	import pandas as pd

	BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	MODEL_DIR = os.path.join(BASE_DIR, "models")
	def _load_first_existing(*names):
	"""Try the given filenames in order and load the first one that exists.
	Returns the loaded object or raises FileNotFoundError if none exist.
	"""
	for name in names:
	path = os.path.join(MODEL_DIR, name)
	if os.path.exists(path):
	return joblib.load(path)
	raise FileNotFoundError(f"None of {names} found in {MODEL_DIR}")


	# Load model and preprocessor, preferring enhanced versions if present.
	model = _load_first_existing(
	"ensemble_model_enhanced.joblib",
	"ensemble_model.joblib",
	"Ensemble_model.joblib",
	)

	preprocessor = _load_first_existing(
	"preprocessor_enhanced.joblib",
	"preprocessor.joblib",
	"Preprocessor.joblib",
	)

	# Anscombe config (case-insensitive check)
	anscombe_path = None
	for candidate in ("anscombe.json", "Anscombe.json"):
	p = os.path.join(MODEL_DIR, candidate)
	if os.path.exists(p):
	anscombe_path = p
	break
	if anscombe_path:
	with open(anscombe_path) as f:
	anscombe_config = json.load(f)
	else:
	anscombe_config = {}


	def predict_fraud(data: dict):
	# Accept either a dict of feature-name: value pairs or a JSON
	# body with a single key "features" containing a list of values.
	if isinstance(data, dict) and "features" in data:
	features = data["features"]
	# If the preprocessor expects named columns, provide a
	# DataFrame with those column names; otherwise use a numpy
	# array truncated/padded to the expected length.
	feature_names = getattr(preprocessor, "feature_names_in_", None)
	if feature_names is not None:
	cols = list(feature_names)
	row = features[: len(cols)]
	# Figure out which columns are treated as categorical by the
	# preprocessor so we can coerce values appropriately.
	cat_cols = set()
	for name, trans, cols_in_transformer in preprocessor.transformers_:
	try:
	# If transformer is OneHotEncoder (or similar) we
	# treat its columns as categorical.
	if type(trans).__name__ == "OneHotEncoder" or hasattr(trans, 'categories_'):
	for c in cols_in_transformer:
	cat_cols.add(c)
	except Exception:
	continue

	coerced = []
	for col_name, v in zip(cols, row):
	if col_name in cat_cols:
	coerced.append(str(v))
	else:
	try:
	coerced.append(float(v))
	except Exception:
	coerced.append(float('nan'))
	# If the provided features list is shorter than the number
	# of expected columns, pad the remaining columns with
	# sensible defaults: empty string for categorical columns
	# and NaN for numeric columns.
	if len(row) < len(cols):
	for col_name in cols[len(row) :]:
	if col_name in cat_cols:
	coerced.append("")
	else:
	coerced.append(float('nan'))
	X = pd.DataFrame([coerced], columns=cols)
	else:
	X = np.array([features])
	else:
	# If caller provided a mapping of name->value, use a
	# DataFrame so column names match the preprocessor.
	if isinstance(data, dict):
	X = pd.DataFrame([data])
	else:
	X = np.array([list(data.values())])
	# Ensure the input has the expected number of features for the
	# preprocessor. If extra features are provided (e.g. tests send 4
	# but preprocessor expects 2), take the first n features.
	expected = getattr(preprocessor, "n_features_in_", None)
	if expected is not None:
	# If X is a numpy array, check shape; if it's a DataFrame,
	# the preprocessor can accept it as long as it has required
	# columns.
	if isinstance(X, np.ndarray):
	if X.shape[1] < expected:
	raise ValueError(f"X has {X.shape[1]} features, but preprocessor is expecting {expected} features as input.")
	if X.shape[1] > expected:
	X = X[:, :expected]

	try:
	X_processed = preprocessor.transform(X)
	except Exception as exc:
	# Raise a more informative error to help debugging
	cols = getattr(X, 'columns', None)
	head = None
	try:
	head = X.head().to_dict()
	except Exception:
	head = None
	raise ValueError(f"Transform failed: {exc}; X_type={type(X)}; columns={cols}; head={head}") from exc

	prediction = model.predict(X_processed)[0]
	probability = model.predict_proba(X_processed)[0].max()

	return {
	"fraud": int(prediction),
	"fraud_prediction": int(prediction),
	"probability": float(probability)
	}