Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| def calculate_metrics(df): | |
| """ | |
| Calculate key metrics from the RCT results. | |
| This function computes various metrics such as total customers, purchases, | |
| revenue, profit, conversion rate, and average order value. | |
| Args: | |
| df (pandas.DataFrame): The DataFrame containing RCT results | |
| Returns: | |
| pandas.Series: A series containing calculated metrics | |
| """ | |
| total_customers = len(df['customer_id'].unique()) | |
| total_purchases = len(df) | |
| total_revenue = df['discounted_price'].sum() | |
| total_profit = df['profit'].sum() | |
| conversion_rate = total_purchases / total_customers | |
| average_order_value = total_revenue / total_purchases if total_purchases > 0 else 0 | |
| return pd.Series({ | |
| 'Total Converted Customers': total_customers, | |
| 'Total Purchases': total_purchases, | |
| 'Total Revenue': total_revenue, | |
| 'Total Profit': total_profit, | |
| 'Conversion Rate': conversion_rate, | |
| 'Average Order Value': average_order_value | |
| }) | |
| def analyze_rct_results(transactions_df, variant_assignments_df): | |
| """ | |
| Analyze the results of the Randomized Control Trial (RCT). | |
| This function calculates overall metrics, metrics per variant, and creates | |
| visualizations to compare the performance of different discount levels. | |
| Args: | |
| transactions_df (pandas.DataFrame): DataFrame containing transaction data | |
| variant_assignments_df (pandas.DataFrame): DataFrame containing variant assignments | |
| Returns: | |
| tuple: Contains overall metrics DataFrame, variant metrics DataFrame, and a matplotlib Figure | |
| """ | |
| overall_metrics = calculate_metrics(transactions_df) | |
| variant_metrics = transactions_df.groupby('variant').apply(calculate_metrics).reset_index() | |
| # Calculate incremental metrics | |
| control_metrics = variant_metrics[variant_metrics['variant'] == 'Control'].iloc[0] | |
| variant_metrics['Incremental Purchases'] = variant_metrics['Total Purchases'] - control_metrics['Total Purchases'] | |
| variant_metrics['Incremental Profit'] = variant_metrics['Total Profit'] - control_metrics['Total Profit'] | |
| variant_metrics['Profit per Incremental Purchase'] = variant_metrics['Incremental Profit'] / variant_metrics['Incremental Purchases'] | |
| # Prepare overall metrics table | |
| overall_df = pd.DataFrame([overall_metrics]) | |
| # Prepare variant metrics table | |
| variant_order = ['Control', '5% discount', '10% discount', '15% discount'] | |
| variant_metrics['variant'] = pd.Categorical(variant_metrics['variant'], categories=variant_order, ordered=True) | |
| variant_metrics = variant_metrics.sort_values('variant') | |
| # Create plots | |
| fig, ax1 = plt.subplots(1, 1, figsize=(10, 6)) | |
| # Incremental Total Profit vs Incremental Total Purchases | |
| non_control = variant_metrics[variant_metrics['variant'] != 'Control'] | |
| ax1.scatter(non_control['Incremental Purchases'], non_control['Incremental Profit']) | |
| for _, row in non_control.iterrows(): | |
| ax1.annotate(row['variant'], (row['Incremental Purchases'], row['Incremental Profit'])) | |
| ax1.set_xlabel('Incremental Total Purchases') | |
| ax1.set_ylabel('Incremental Total Profit') | |
| ax1.set_title('Incremental Total Profit vs Incremental Total Purchases') | |
| ax1.axhline(y=0, color='r', linestyle='--') | |
| ax1.axvline(x=0, color='r', linestyle='--') | |
| ax1.grid(True, linestyle=':', alpha=0.7) | |
| plt.tight_layout() | |
| return overall_df, variant_metrics, fig | |