Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| import random | |
| import tempfile | |
| import gradio as gr | |
| def generate_fake_saas_data(start_date="2023-01-01", end_date="2025-01-31"): | |
| start = datetime.strptime(start_date, "%Y-%m-%d") | |
| end = datetime.strptime(end_date, "%Y-%m-%d") | |
| dates = pd.date_range(start=start, end=end) | |
| data = [] | |
| # Initial values | |
| mrr = 5000.0 | |
| customers = 100 | |
| day_num = 0 | |
| total_days = (end - start).days + 1 | |
| for date in dates: | |
| mrr_start = mrr | |
| total_customers_start = customers | |
| # Daily new customers (Poisson, increasing lambda over time) | |
| lambda_new = 0.26 + (1.19 - 0.26) * (day_num / (total_days - 1)) | |
| new_customers = np.random.poisson(lambda_new) | |
| # Daily churn (Binomial, increasing rate over time) | |
| monthly_churn_rate = 0.01 + (0.031 - 0.01) * (day_num / (total_days - 1)) | |
| daily_churn_prob = 1 - (1 - monthly_churn_rate) ** (1 / 30.5) | |
| churned_customers = np.random.binomial(customers, daily_churn_prob) | |
| # ARPU | |
| arpu = mrr_start / total_customers_start if total_customers_start > 0 else 0 | |
| # New revenue | |
| new_revenue = new_customers * arpu * random.uniform(0.8, 1.2) | |
| # Churned revenue | |
| churned_revenue = churned_customers * arpu * random.uniform(0.8, 1.2) | |
| # Expansion | |
| daily_expansion_prob = 0.005 | |
| num_expansion = np.random.binomial(customers, daily_expansion_prob) | |
| expansion_revenue = num_expansion * arpu * random.uniform(0.05, 0.15) | |
| # Update MRR | |
| mrr = mrr + new_revenue + expansion_revenue - churned_revenue | |
| if mrr < 0: | |
| mrr = 0 | |
| # Update customers | |
| customers = customers + new_customers - churned_customers | |
| if customers < 0: | |
| customers = 0 | |
| # Total revenue (approximate daily recognized revenue) | |
| total_revenue = (mrr_start + mrr) / 2 / 30.5 | |
| # COGS (~18% of daily revenue with variation) | |
| cogs = total_revenue * random.uniform(0.15, 0.20) | |
| # OpEx (increasing over time) | |
| opex_daily_base = 31 + 22 * (day_num / (total_days - 1)) | |
| opex = opex_daily_base * random.uniform(0.9, 1.1) | |
| # Sales and Marketing Spend (increasing over time) | |
| sales_daily_base = 72 + 238 * (day_num / (total_days - 1)) | |
| sales_and_marketing_spend = sales_daily_base * random.uniform(0.9, 1.1) | |
| # Owner Compensation (fixed monthly, daily portion) | |
| owner_compensation = 4000 / 30.5 * random.uniform(0.95, 1.05) | |
| # Interest (small random) | |
| interest = random.uniform(1, 2) | |
| # Taxes (increasing over time) | |
| taxes_daily_base = 9.6 + 18.2 * (day_num / (total_days - 1)) | |
| taxes = taxes_daily_base * random.uniform(0.9, 1.1) | |
| # Depreciation/Amortization (random around average) | |
| depreciation_amortization = random.uniform(2.5, 4) | |
| # Append row | |
| data.append( | |
| { | |
| "Date": date.strftime("%Y-%m-%d"), | |
| "MRR_Start": round(mrr_start, 2), | |
| "MRR_End": round(mrr, 2), | |
| "Total_Revenue": round(total_revenue, 2), | |
| "New_Customers": new_customers, | |
| "Churned_Customers": churned_customers, | |
| "Total_Customers_Start": total_customers_start, | |
| "Total_Customers_End": customers, | |
| "Expansion_Revenue": round(expansion_revenue, 2), | |
| "Churned_Revenue": round(churned_revenue, 2), | |
| "COGS": round(cogs, 2), | |
| "OpEx": round(opex, 2), | |
| "Sales_And_Marketing_Spend": round(sales_and_marketing_spend, 2), | |
| "Owner_Compensation": round(owner_compensation, 2), | |
| "Interest": round(interest, 2), | |
| "Taxes": round(taxes, 2), | |
| "Depreciation_Amortization": round(depreciation_amortization, 2), | |
| } | |
| ) | |
| day_num += 1 | |
| df = pd.DataFrame(data) | |
| return df | |
| def concatenate_data_per_month(df): | |
| # Group by month and aggregate | |
| df["Date"] = pd.to_datetime(df["Date"]) | |
| df["Month"] = df["Date"].dt.to_period("M") | |
| # For 'MRR_Start', we want the MRR_Start of the first day of the month | |
| # For 'MRR_End', we want the MRR_End of the last day of the month | |
| # For sums, we sum them up | |
| # For 'Total_Customers_Start', we want the Total_Customers_Start of the first day of the month | |
| # For 'Total_Customers_End', we want the Total_Customers_End of the last day of the month | |
| df_per_month = ( | |
| df.groupby("Month") | |
| .agg( | |
| Date=("Date", "first"), # Keep the first date of the month for grouping | |
| MRR_Start=("MRR_Start", "first"), | |
| MRR_End=("MRR_End", "last"), | |
| Total_Revenue=("Total_Revenue", "sum"), | |
| New_Customers=("New_Customers", "sum"), | |
| Churned_Customers=("Churned_Customers", "sum"), | |
| Total_Customers_Start=("Total_Customers_Start", "first"), | |
| Total_Customers_End=("Total_Customers_End", "last"), | |
| Expansion_Revenue=("Expansion_Revenue", "sum"), | |
| Churned_Revenue=("Churned_Revenue", "sum"), | |
| COGS=("COGS", "sum"), | |
| OpEx=("OpEx", "sum"), | |
| Sales_And_Marketing_Spend=("Sales_And_Marketing_Spend", "sum"), | |
| Owner_Compensation=("Owner_Compensation", "sum"), | |
| Interest=("Interest", "sum"), | |
| Taxes=("Taxes", "sum"), | |
| Depreciation_Amortization=("Depreciation_Amortization", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| # Adjust the 'Date' column to be the end of the month for consistency with typical reporting | |
| df_per_month["Date"] = df_per_month[ | |
| "Month" | |
| ].dt.to_timestamp() + pd.offsets.MonthEnd(0) | |
| df_per_month = df_per_month.drop("Month", axis=1) | |
| return df_per_month | |
| def generate_csv(start_date, end_date): | |
| random.seed(42) | |
| np.random.seed(42) | |
| df = generate_fake_saas_data(start_date, end_date) | |
| df_per_month = concatenate_data_per_month(df) | |
| df_per_month["Date"] = df_per_month["Date"].dt.strftime("%Y-%m-%d") | |
| with ( | |
| tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_month, | |
| tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp, | |
| ): | |
| df.to_csv(tmp.name, index=False) | |
| df_per_month.to_csv(tmp_month.name, index=False) | |
| return tmp.name, tmp_month.name, df, df_per_month | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Fake SaaS Data Generator") | |
| start_date = gr.Textbox(label="Start Date (YYYY-MM-DD)", value="2023-01-01") | |
| end_date = gr.Textbox(label="End Date (YYYY-MM-DD)", value="2025-01-31") | |
| output_df_per_day = gr.Dataframe( | |
| label="Generated Data Preview (per day)", interactive=False, wrap=True | |
| ) | |
| output_df_per_month = gr.Dataframe( | |
| label="Generated Data Preview (per month)", interactive=False, wrap=True | |
| ) | |
| output_file_per_day = gr.File(label="Download Generated CSV (per day)") | |
| output_file_per_month = gr.File(label="Download Generated CSV (per month)") | |
| generate_btn = gr.Button("Generate Data") | |
| generate_btn.click( | |
| fn=generate_csv, | |
| inputs=[start_date, end_date], | |
| outputs=[ | |
| output_file_per_day, | |
| output_file_per_month, | |
| output_df_per_day, | |
| output_df_per_month, | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |