import pandas as pd import numpy as np from datetime import datetime import random import tempfile import gradio as gr def generate_fake_saas_data(start_date="2023-01-01", end_date="2025-01-31"): start = datetime.strptime(start_date, "%Y-%m-%d") end = datetime.strptime(end_date, "%Y-%m-%d") dates = pd.date_range(start=start, end=end) data = [] # Initial values mrr = 5000.0 customers = 100 day_num = 0 total_days = (end - start).days + 1 for date in dates: mrr_start = mrr total_customers_start = customers # Daily new customers (Poisson, increasing lambda over time) lambda_new = 0.26 + (1.19 - 0.26) * (day_num / (total_days - 1)) new_customers = np.random.poisson(lambda_new) # Daily churn (Binomial, increasing rate over time) monthly_churn_rate = 0.01 + (0.031 - 0.01) * (day_num / (total_days - 1)) daily_churn_prob = 1 - (1 - monthly_churn_rate) ** (1 / 30.5) churned_customers = np.random.binomial(customers, daily_churn_prob) # ARPU arpu = mrr_start / total_customers_start if total_customers_start > 0 else 0 # New revenue new_revenue = new_customers * arpu * random.uniform(0.8, 1.2) # Churned revenue churned_revenue = churned_customers * arpu * random.uniform(0.8, 1.2) # Expansion daily_expansion_prob = 0.005 num_expansion = np.random.binomial(customers, daily_expansion_prob) expansion_revenue = num_expansion * arpu * random.uniform(0.05, 0.15) # Update MRR mrr = mrr + new_revenue + expansion_revenue - churned_revenue if mrr < 0: mrr = 0 # Update customers customers = customers + new_customers - churned_customers if customers < 0: customers = 0 # Total revenue (approximate daily recognized revenue) total_revenue = (mrr_start + mrr) / 2 / 30.5 # COGS (~18% of daily revenue with variation) cogs = total_revenue * random.uniform(0.15, 0.20) # OpEx (increasing over time) opex_daily_base = 31 + 22 * (day_num / (total_days - 1)) opex = opex_daily_base * random.uniform(0.9, 1.1) # Sales and Marketing Spend (increasing over time) sales_daily_base = 72 + 238 * (day_num / (total_days - 1)) sales_and_marketing_spend = sales_daily_base * random.uniform(0.9, 1.1) # Owner Compensation (fixed monthly, daily portion) owner_compensation = 4000 / 30.5 * random.uniform(0.95, 1.05) # Interest (small random) interest = random.uniform(1, 2) # Taxes (increasing over time) taxes_daily_base = 9.6 + 18.2 * (day_num / (total_days - 1)) taxes = taxes_daily_base * random.uniform(0.9, 1.1) # Depreciation/Amortization (random around average) depreciation_amortization = random.uniform(2.5, 4) # Append row data.append( { "Date": date.strftime("%Y-%m-%d"), "MRR_Start": round(mrr_start, 2), "MRR_End": round(mrr, 2), "Total_Revenue": round(total_revenue, 2), "New_Customers": new_customers, "Churned_Customers": churned_customers, "Total_Customers_Start": total_customers_start, "Total_Customers_End": customers, "Expansion_Revenue": round(expansion_revenue, 2), "Churned_Revenue": round(churned_revenue, 2), "COGS": round(cogs, 2), "OpEx": round(opex, 2), "Sales_And_Marketing_Spend": round(sales_and_marketing_spend, 2), "Owner_Compensation": round(owner_compensation, 2), "Interest": round(interest, 2), "Taxes": round(taxes, 2), "Depreciation_Amortization": round(depreciation_amortization, 2), } ) day_num += 1 df = pd.DataFrame(data) return df def concatenate_data_per_month(df): # Group by month and aggregate df["Date"] = pd.to_datetime(df["Date"]) df["Month"] = df["Date"].dt.to_period("M") # For 'MRR_Start', we want the MRR_Start of the first day of the month # For 'MRR_End', we want the MRR_End of the last day of the month # For sums, we sum them up # For 'Total_Customers_Start', we want the Total_Customers_Start of the first day of the month # For 'Total_Customers_End', we want the Total_Customers_End of the last day of the month df_per_month = ( df.groupby("Month") .agg( Date=("Date", "first"), # Keep the first date of the month for grouping MRR_Start=("MRR_Start", "first"), MRR_End=("MRR_End", "last"), Total_Revenue=("Total_Revenue", "sum"), New_Customers=("New_Customers", "sum"), Churned_Customers=("Churned_Customers", "sum"), Total_Customers_Start=("Total_Customers_Start", "first"), Total_Customers_End=("Total_Customers_End", "last"), Expansion_Revenue=("Expansion_Revenue", "sum"), Churned_Revenue=("Churned_Revenue", "sum"), COGS=("COGS", "sum"), OpEx=("OpEx", "sum"), Sales_And_Marketing_Spend=("Sales_And_Marketing_Spend", "sum"), Owner_Compensation=("Owner_Compensation", "sum"), Interest=("Interest", "sum"), Taxes=("Taxes", "sum"), Depreciation_Amortization=("Depreciation_Amortization", "sum"), ) .reset_index() ) # Adjust the 'Date' column to be the end of the month for consistency with typical reporting df_per_month["Date"] = df_per_month[ "Month" ].dt.to_timestamp() + pd.offsets.MonthEnd(0) df_per_month = df_per_month.drop("Month", axis=1) return df_per_month def generate_csv(start_date, end_date): random.seed(42) np.random.seed(42) df = generate_fake_saas_data(start_date, end_date) df_per_month = concatenate_data_per_month(df) df_per_month["Date"] = df_per_month["Date"].dt.strftime("%Y-%m-%d") with ( tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_month, tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp, ): df.to_csv(tmp.name, index=False) df_per_month.to_csv(tmp_month.name, index=False) return tmp.name, tmp_month.name, df, df_per_month with gr.Blocks() as demo: gr.Markdown("# Fake SaaS Data Generator") start_date = gr.Textbox(label="Start Date (YYYY-MM-DD)", value="2023-01-01") end_date = gr.Textbox(label="End Date (YYYY-MM-DD)", value="2025-01-31") output_df_per_day = gr.Dataframe( label="Generated Data Preview (per day)", interactive=False, wrap=True ) output_df_per_month = gr.Dataframe( label="Generated Data Preview (per month)", interactive=False, wrap=True ) output_file_per_day = gr.File(label="Download Generated CSV (per day)") output_file_per_month = gr.File(label="Download Generated CSV (per month)") generate_btn = gr.Button("Generate Data") generate_btn.click( fn=generate_csv, inputs=[start_date, end_date], outputs=[ output_file_per_day, output_file_per_month, output_df_per_day, output_df_per_month, ], ) if __name__ == "__main__": demo.launch()