Spaces:

Egeekle
/

MLOps-risk-model

Sleeping

github-actions[bot]

deploy: sync from GitHub main

1e5b98a 3 months ago

3.24 kB

	# src/model_utils.py
	from typing import List, Tuple

	import numpy as np
	import pandas as pd
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder, StandardScaler
	from sklearn.linear_model import LogisticRegression


	NUMERIC_FEATURES: List[str] = [
	"age",
	"dependents",
	"monthly_income",
	"employment_months",
	"requested_amount",
	"loan_term_months",
	"interest_rate",
	"installment",
	"debt_to_income",
	"num_open_loans",
	"num_credit_cards",
	]

	CATEGORICAL_FEATURES: List[str] = [
	"gender",
	"marital_status",
	"employment_type",
	"has_mortgage",
	"channel",
	"region",
	]


	def split_features_target(df: pd.DataFrame, target_column: str) -> Tuple[pd.DataFrame, np.ndarray]:
	X = df[NUMERIC_FEATURES + CATEGORICAL_FEATURES].copy()
	y = df[target_column].values
	return X, y


	def build_model_pipeline(
	random_state: int = 42,
	C: float = 1.0,
	penalty: str = "l2",
	solver: str = "lbfgs",
	max_iter: int = 1000,
	class_weight: str = None,
	l1_ratio: float = 0.5,
	) -> Pipeline:
	"""
	Build a model pipeline with configurable hyperparameters.

	Args:
	random_state: Random state for reproducibility
	C: Inverse of regularization strength (smaller = stronger regularization)
	penalty: Regularization penalty ('l1', 'l2', 'elasticnet', None)
	solver: Algorithm to use ('lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga')
	max_iter: Maximum number of iterations
	class_weight: Class weight strategy ('balanced', None, or dict)

	Returns:
	Scikit-learn Pipeline
	"""
	numeric_transformer = Pipeline(
	steps=[
	("scaler", StandardScaler()),
	]
	)

	categorical_transformer = Pipeline(
	steps=[
	("onehot", OneHotEncoder(handle_unknown="ignore")),
	]
	)

	preprocessor = ColumnTransformer(
	transformers=[
	("num", numeric_transformer, NUMERIC_FEATURES),
	("cat", categorical_transformer, CATEGORICAL_FEATURES),
	]
	)

	# Handle solver compatibility with penalty
	if penalty == "l1":
	if solver not in ["liblinear", "saga"]:
	solver = "liblinear"
	elif penalty == "elasticnet":
	if solver != "saga":
	solver = "saga"
	elif penalty == "l2" or penalty is None:
	if solver not in ["lbfgs", "liblinear", "newton-cg", "sag", "saga"]:
	solver = "lbfgs"

	# Build LogisticRegression with appropriate parameters
	lr_params = {
	"C": C,
	"penalty": penalty,
	"solver": solver,
	"max_iter": max_iter,
	"random_state": random_state,
	"class_weight": class_weight,
	}

	# Add l1_ratio only for elasticnet penalty
	if penalty == "elasticnet":
	lr_params["l1_ratio"] = l1_ratio

	clf = LogisticRegression(**lr_params)

	model = Pipeline(
	steps=[
	("preprocessor", preprocessor),
	("clf", clf),
	]
	)
	return model