SmartChurnPredictor / modules.py
mohamedmostafa259's picture
added needed classes for running the application
bc5f977
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from verstack import NaNImputer
############################################################
class DataCleaner(BaseEstimator, TransformerMixin):
def __init__(self, cols_to_drop, nonnegative_cols):
self.cols_to_drop = cols_to_drop
self.nonnegative_cols = nonnegative_cols
def fit(self, X, y=None):
return self
# X is pd.DataFrame
def transform(self, X):
X_copy = X.copy()
X_copy.drop(columns=self.cols_to_drop, errors='ignore', inplace=True)
X_copy.replace(['?', 'Error'], np.nan, inplace=True)
if 'avg_frequency_login_days' in X_copy.columns:
X_copy['avg_frequency_login_days'] = X_copy['avg_frequency_login_days'].astype(float)
for col in self.nonnegative_cols:
if col in X_copy.columns:
X_copy.loc[X_copy[col] < 0, col] = np.nan
return X_copy
############################################################
class NaNImputerWrapper(BaseEstimator, TransformerMixin):
def __init__(self, train_sample_size=30_000, verbose=True):
self.train_sample_size = train_sample_size
self.verbose = verbose
self.imputer = NaNImputer(self.train_sample_size, self.verbose)
def fit(self, X, y=None):
return self
def transform(self, X):
return self.imputer.impute(X)
############################################################
class FeatureEng(BaseEstimator, TransformerMixin):
def __init__(self):
self.membership_order = ['No Membership', 'Basic Membership', 'Silver Membership',
'Gold Membership', 'Platinum Membership', 'Premium Membership']
self.positive_feedback = ['Products always in Stock', 'Quality Customer Care', 'Reasonable Price', 'User Friendly Website']
self.negative_feedback = ['Poor Website', 'Poor Customer Service', 'Poor Product Quality', 'Too many ads']
def get_sentiment(self, feedback):
if feedback in self.positive_feedback:
return 1
elif feedback in self.negative_feedback:
return -1
else:
return 0
def fit(self, X, y=None):
return self
def transform(self, X):
# feature selection
X = X[['membership_category', 'feedback', 'points_in_wallet']]
# encoding
X['membership_category'] = pd.Categorical( X['membership_category'],
categories=self.membership_order,
ordered=True).codes
X['feedback'] = X['feedback'].apply(self.get_sentiment)
# standardization
X['points_in_wallet'] = (X['points_in_wallet'] - X['points_in_wallet'].mean()) / X['points_in_wallet'].std()
return X
def fit_transform(self, X, y=None):
X_transformed = self.transform(X)
self.feature_names_out_ = X_transformed.columns
return X_transformed
def get_feature_names_out(self, input_features=None):
return self.feature_names_out_
############################################################
class AdjustedProbClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, model, thresholds):
self.model = model
self.thresholds = thresholds
def fit(self, X, y):
self.model.fit(X, y)
return self
def predict_proba(self, X):
return self.model.predict_proba(X)
def predict(self, X):
y_proba = self.predict_proba(X)
preds = []
for probs in y_proba:
predicted_class = np.argmax(probs / np.array(self.thresholds)) + 1
preds.append(predicted_class)
return np.array(preds)
def score(self, X, y):
return np.mean(self.predict(X) == y)
############################################################