Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| import random | |
| # Define the Electronics products | |
| electronics_products = [ | |
| {"name": "4K Smart TV", "cost": 500, "price": 699}, | |
| {"name": "Wireless Headphones", "cost": 100, "price": 139}, | |
| {"name": "Gaming Console", "cost": 300, "price": 419}, | |
| {"name": "Digital Camera", "cost": 400, "price": 559}, | |
| {"name": "Bluetooth Speaker", "cost": 50, "price": 69}, | |
| {"name": "Smartwatch", "cost": 150, "price": 209}, | |
| {"name": "Laptop", "cost": 600, "price": 839}, | |
| {"name": "Tablet", "cost": 200, "price": 279}, | |
| {"name": "Drone", "cost": 250, "price": 349}, | |
| {"name": "Home Theater System", "cost": 350, "price": 489}, | |
| {"name": "E-reader", "cost": 80, "price": 109}, | |
| {"name": "Portable Power Bank", "cost": 30, "price": 41}, | |
| {"name": "Wireless Earbuds", "cost": 80, "price": 109}, | |
| {"name": "Action Camera", "cost": 150, "price": 209}, | |
| {"name": "Smart Home Hub", "cost": 70, "price": 97}, | |
| {"name": "Gaming Mouse", "cost": 40, "price": 55}, | |
| {"name": "External Hard Drive", "cost": 60, "price": 83}, | |
| {"name": "Graphic Tablet", "cost": 180, "price": 249}, | |
| {"name": "Noise-Canceling Headphones", "cost": 200, "price": 279}, | |
| {"name": "Portable Projector", "cost": 300, "price": 419} | |
| ] | |
| # Define the RCT variants | |
| variants = ['Control', '5% discount', '10% discount', '15% discount'] | |
| discount_rates = [0, 0.05, 0.10, 0.15] | |
| def calculate_purchase_probability(customer, discount, base_prob=0.1): | |
| """ | |
| Calculate the probability of a customer making a purchase based on various factors. | |
| This function considers customer attributes such as age, loyalty, past behavior, | |
| and the applied discount to determine the likelihood of a purchase. | |
| Args: | |
| customer (dict): A dictionary containing customer attributes | |
| discount (float): The discount rate applied (e.g., 0.05 for 5% discount) | |
| base_prob (float): The base probability of purchase (default: 0.1) | |
| Returns: | |
| float: The calculated probability of purchase | |
| """ | |
| prob = base_prob | |
| # Age factor (younger customers more sensitive to discounts) | |
| age_factor = (60 - customer['age']) / 60 | |
| prob += 0.02 * age_factor | |
| # Loyalty factor (more loyal customers less sensitive to discounts) | |
| loyalty_factor = (6 - customer['loyalty_level']) / 5 | |
| prob += 0.02 * loyalty_factor | |
| # Past behavior factor (customers with more orders more likely to buy, but less sensitive to discounts) | |
| order_factor = min(customer['total_orders'] / 20, 1) | |
| prob += 0.03 * order_factor | |
| # Newsletter subscription factor (subscribed customers more sensitive to discounts) | |
| if customer['newsletter_subscription']: | |
| prob += 0.03 | |
| # Browsing device factor (mobile and app users more sensitive to discounts) | |
| if customer['main_browsing_device'] == 'Mobile': | |
| prob += 0.02 | |
| elif customer['main_browsing_device'] == 'App': | |
| prob += 0.03 | |
| # Average order value factor (higher AOV customers less sensitive to discounts) | |
| aov_factor = min(customer['average_order_value'] / 1000, 1) | |
| prob -= 0.02 * aov_factor | |
| # Gender factor (assume slightly different sensitivity to discounts) | |
| if customer['gender'] == 'Female': | |
| prob += 0.01 | |
| elif customer['gender'] == 'Male': | |
| prob -= 0.01 | |
| # Preferred payment method factor | |
| if customer['preferred_payment_method'] == 'Credit Card': | |
| prob += 0.02 # Credit card users might be more likely to make impulse purchases | |
| # Adjust probability based on discount with increased sensitivity | |
| discount_sensitivity = 1 + age_factor - loyalty_factor + (0.5 if customer['newsletter_subscription'] else 0) | |
| if discount == 0.05: | |
| prob *= (1 + discount * 3.5 * discount_sensitivity) | |
| elif discount == 0.1: | |
| prob *= (1 + discount * 4.5 * discount_sensitivity) | |
| elif discount == 0.15: | |
| prob *= (1 + discount * 4.3 * discount_sensitivity) | |
| # Add random noise to the probability | |
| noise = np.random.normal(0, 0.02) # Add noise with mean 0 and std dev 0.02 | |
| prob = max(0, min(1, prob + noise)) | |
| return prob | |
| def simulate_purchase(customer, variant_index, product): | |
| """ | |
| Simulate a purchase based on the customer, variant, and product. | |
| This function determines if a purchase is made, and if so, calculates | |
| the discounted price, cost, and profit. | |
| Args: | |
| customer (dict): A dictionary containing customer attributes | |
| variant_index (int): The index of the variant (discount level) | |
| product (dict): A dictionary containing product information | |
| Returns: | |
| dict or None: A dictionary with purchase details if a purchase is made, None otherwise | |
| """ | |
| discount = discount_rates[variant_index] | |
| prob = calculate_purchase_probability(customer, discount) | |
| if np.random.random() < prob: | |
| # Add some noise to the discounted price | |
| price_noise = np.random.normal(0, product['price'] * 0.05) # 5% noise | |
| discounted_price = max(0, product['price'] * (1 - discount) + price_noise) | |
| # Add some noise to the cost | |
| cost_noise = np.random.normal(0, product['cost'] * 0.03) # 3% noise | |
| adjusted_cost = max(0, product['cost'] + cost_noise) | |
| return { | |
| 'customer_id': customer['customer_id'], | |
| 'variant': variants[variant_index], | |
| 'product': product['name'], | |
| 'price': product['price'], | |
| 'discounted_price': discounted_price, | |
| 'cost': adjusted_cost, | |
| 'profit': discounted_price - adjusted_cost | |
| } | |
| return None | |
| def run_rct_simulation(df, experiment_duration=30): | |
| """ | |
| Run a Randomized Control Trial (RCT) simulation. | |
| This function simulates an RCT by assigning customers to different variants | |
| and simulating purchases over the experiment duration. | |
| Args: | |
| df (pandas.DataFrame): The customer data | |
| experiment_duration (int): The duration of the experiment in days (default: 30) | |
| Returns: | |
| tuple: Contains two DataFrames - transactions and variant assignments | |
| """ | |
| # Set random seed for reproducibility | |
| np.random.seed(42) | |
| random.seed(42) | |
| # Set up experiment dates | |
| start_date = datetime(2024, 7, 1) | |
| end_date = start_date + timedelta(days=experiment_duration) | |
| results = [] | |
| variant_assignments = [] | |
| for _, customer in df.iterrows(): | |
| # Add some randomness to variant assignment | |
| if np.random.random() < 0.05: # 5% chance of random assignment | |
| variant_index = np.random.randint(0, 4) | |
| else: | |
| variant_index = np.random.randint(0, 4) # Original random assignment | |
| # Record variant assignment for all eligible customers | |
| variant_assignments.append({ | |
| 'customer_id': customer['customer_id'], | |
| 'variant': variants[variant_index] | |
| }) | |
| # Simulate multiple purchase opportunities with varying frequency | |
| num_opportunities = np.random.poisson(experiment_duration / 10) | |
| for _ in range(num_opportunities): | |
| product = random.choice(electronics_products) | |
| purchase = simulate_purchase(customer, variant_index, product) | |
| if purchase: | |
| results.append(purchase) | |
| # Create DataFrame from results | |
| transactions_df = pd.DataFrame(results) | |
| transactions_df['purchase'] = 1 | |
| # Create DataFrame from variant assignments | |
| variant_assignments_df = pd.DataFrame(variant_assignments) | |
| return transactions_df, variant_assignments_df | |