Spaces:
Build error
Build error
| import pandas as pd | |
| from functools import reduce | |
| from random import randint | |
| from causalml.dataset import make_uplift_classification | |
| class UpliftSimulation: | |
| def __init__(self, n=50000, y_name='conversion', | |
| treatment_group_keys=['control', 'discount_05', 'discount_10', 'discount_15'], | |
| n_classification_features=15, n_classification_informative=7, | |
| n_classification_repeated=0, | |
| n_uplift_increase_dict={'discount_05': 4, 'discount_10': 3, 'discount_15': 3}, | |
| n_uplift_decrease_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0}, | |
| delta_uplift_increase_dict={'discount_05': 0.0020, 'discount_10': 0.0045, 'discount_15': 0.008}, | |
| delta_uplift_decrease_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0}, | |
| n_uplift_increase_mix_informative_dict={'discount_05': 3, 'discount_10': 2, 'discount_15': 3}, | |
| n_uplift_decrease_mix_informative_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0}, | |
| positive_class_proportion=0.05, random_seed=8097): | |
| self.n = n | |
| self.y_name = y_name | |
| self.treatment_group_keys = treatment_group_keys | |
| self.n_classification_features = n_classification_features | |
| self.n_classification_informative = n_classification_informative | |
| self.n_classification_repeated = n_classification_repeated | |
| self.n_uplift_increase_dict = n_uplift_increase_dict | |
| self.n_uplift_decrease_dict = n_uplift_decrease_dict | |
| self.delta_uplift_increase_dict = delta_uplift_increase_dict | |
| self.delta_uplift_decrease_dict = delta_uplift_decrease_dict | |
| self.n_uplift_increase_mix_informative_dict = n_uplift_increase_mix_informative_dict | |
| self.n_uplift_decrease_mix_informative_dict = n_uplift_decrease_mix_informative_dict | |
| self.positive_class_proportion = positive_class_proportion | |
| self.random_seed = random_seed | |
| self.df = None | |
| self.X_names = None | |
| def simulate_dataset(self): | |
| self.df, self.X_names = make_uplift_classification( | |
| treatment_name=self.treatment_group_keys, | |
| y_name=self.y_name, | |
| n_samples=self.n, | |
| n_classification_features=self.n_classification_features, | |
| n_classification_informative=self.n_classification_informative, | |
| n_classification_repeated=self.n_classification_repeated, | |
| n_uplift_increase_dict=self.n_uplift_increase_dict, | |
| n_uplift_decrease_dict=self.n_uplift_decrease_dict, | |
| delta_uplift_increase_dict=self.delta_uplift_increase_dict, | |
| delta_uplift_decrease_dict=self.delta_uplift_decrease_dict, | |
| n_uplift_increase_mix_informative_dict=self.n_uplift_increase_mix_informative_dict, | |
| n_uplift_decrease_mix_informative_dict=self.n_uplift_decrease_mix_informative_dict, | |
| positive_class_proportion=self.positive_class_proportion, | |
| random_seed=self.random_seed, | |
| ) | |
| def apply_discounts_and_clean(self): | |
| discounts_dict = {'control': 0, 'discount_05': 0.05, 'discount_10': 0.10, 'discount_15': 0.15} | |
| self.df['discount'] = self.df['treatment_group_key'] | |
| self.df = self.df.replace({"discount": discounts_dict}) | |
| self.df.drop(columns=['treatment_effect'], inplace=True) | |
| def postprocess_tables(self): | |
| # Add a synthetic UserID for each entry | |
| self.df['UserID'] = range(len(self.df)) | |
| # Mapping the columns | |
| informative_cols = [col for col in self.df.columns if 'informative' in col] | |
| uplift_cols = [col for col in self.df.columns if 'uplift' in col] | |
| irrelevant_cols = [col for col in self.df.columns if 'irrelevant' in col] | |
| transaction_cols = ['treatment_group_key', 'conversion', 'discount'] | |
| # User Demographics and Profiles Table (Including Informative Features) | |
| user_profiles = self.df[['UserID'] + informative_cols].copy() | |
| # Web Interaction Data Table (This might need adjustment based on actual data) | |
| # If any of the 'informative' columns relate to web interaction, include them here. | |
| # Uplift-Related Data Table | |
| uplift_data = self.df[['UserID'] + uplift_cols].copy() | |
| # Adjusting the Uplift-Related Data table to include the mixed features | |
| mixed_uplift_columns = ['x31_increase_mix', 'x22_increase_mix', 'x20_increase_mix', | |
| 'x33_increase_mix', 'x32_increase_mix', 'x27_increase_mix', | |
| 'x21_increase_mix', 'x26_increase_mix'] | |
| # Assuming uplift_data already includes the 'UserID' column | |
| uplift_data = pd.concat([uplift_data, self.df[mixed_uplift_columns]], axis=1) | |
| # Irrelevant Data Table | |
| irrelevant_data = self.df[['UserID'] + irrelevant_cols].copy() | |
| # Transaction Data Table | |
| transaction_data = self.df[['UserID'] + transaction_cols].copy() | |
| user_profiles.columns = [ | |
| 'UserID', 'AgeIndex', 'IncomeIndex', 'PurchaseFrequencyIndex', | |
| 'AccountLifetimeIndex', 'AverageTransactionValueIndex', 'PreferredPaymentMethodIndex', 'RegionIndex' | |
| ] | |
| uplift_data.columns = [ | |
| 'UserID', 'EmailDiscountCTRIndex', 'WebDiscountCTRIndex', 'SocialMediaEngagementIndex', | |
| 'DirectMailDiscountResponseIndex', 'InAppDiscountEngagementIndex', 'FlashSaleParticipationIndex', | |
| 'SeasonalPromoInterestIndex', 'LoyaltyProgramEngagementIndex', 'ReferralBonusUsageIndex', | |
| 'DiscountCodeRedemptionIndex', 'VIPSaleAccessIndex', 'EarlyAccessOptInIndex', | |
| 'ProductReviewAfterDiscountIndex', 'UpsellConversionIndex', 'CrossSellInterestIndex', | |
| 'BundlePurchaseIndex', 'SubscriptionUpgradeIndex', 'CustomerFeedbackIndex' | |
| ] | |
| irrelevant_data.columns = [ | |
| 'UserID', 'BrowserTypeIndex', 'DeviceCategoryIndex', 'OperatingSystemIndex', | |
| 'SessionStartTimeIndex', 'LanguagePreferenceIndex', 'NewsletterSubscriptionIndex', | |
| 'AccountVerificationStatusIndex', 'AdBlockerPresenceIndex' | |
| ] | |
| # transaction_data.columns = [ | |
| # 'UserID', 'DiscountCategoryIndex', 'PurchaseIndex', 'DiscountPercentageIndex' | |
| # ] | |
| transaction_data.columns = ['UserID'] + transaction_cols | |
| # List of all DataFrames to be merged | |
| self.dataframes = [user_profiles, uplift_data, irrelevant_data, transaction_data] | |
| # Merge all DataFrames on 'UserID' in one line | |
| self.df = reduce(lambda left, right: pd.merge(left, right, on='UserID'), self.dataframes) | |
| def add_monetary_effect(self): | |
| # Adding a monetary effect column | |
| def base_price(df, informative_features): | |
| if df.conversion == 0: | |
| base_price = 0 | |
| else: | |
| base_price = randint(1, 100) | |
| return base_price | |
| informative_features = [k for k in self.X_names if 'informative' in k] | |
| self.df['base_price'] = self.df.apply(lambda x: base_price(x, informative_features), axis=1) | |
| self.df['discounted_price'] = self.df['base_price']*(1-self.df['discount']) | |
| self.df['benefit'] = self.df['discounted_price']-0.8*self.df['base_price'] |