Yehor's picture
Update app.py
32ffed9 verified
import pandas as pd
import numpy as np
from datetime import datetime
import random
import tempfile
import gradio as gr
def generate_fake_saas_data(start_date="2023-01-01", end_date="2025-01-31"):
start = datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.strptime(end_date, "%Y-%m-%d")
dates = pd.date_range(start=start, end=end)
data = []
# Initial values
mrr = 5000.0
customers = 100
day_num = 0
total_days = (end - start).days + 1
for date in dates:
mrr_start = mrr
total_customers_start = customers
# Daily new customers (Poisson, increasing lambda over time)
lambda_new = 0.26 + (1.19 - 0.26) * (day_num / (total_days - 1))
new_customers = np.random.poisson(lambda_new)
# Daily churn (Binomial, increasing rate over time)
monthly_churn_rate = 0.01 + (0.031 - 0.01) * (day_num / (total_days - 1))
daily_churn_prob = 1 - (1 - monthly_churn_rate) ** (1 / 30.5)
churned_customers = np.random.binomial(customers, daily_churn_prob)
# ARPU
arpu = mrr_start / total_customers_start if total_customers_start > 0 else 0
# New revenue
new_revenue = new_customers * arpu * random.uniform(0.8, 1.2)
# Churned revenue
churned_revenue = churned_customers * arpu * random.uniform(0.8, 1.2)
# Expansion
daily_expansion_prob = 0.005
num_expansion = np.random.binomial(customers, daily_expansion_prob)
expansion_revenue = num_expansion * arpu * random.uniform(0.05, 0.15)
# Update MRR
mrr = mrr + new_revenue + expansion_revenue - churned_revenue
if mrr < 0:
mrr = 0
# Update customers
customers = customers + new_customers - churned_customers
if customers < 0:
customers = 0
# Total revenue (approximate daily recognized revenue)
total_revenue = (mrr_start + mrr) / 2 / 30.5
# COGS (~18% of daily revenue with variation)
cogs = total_revenue * random.uniform(0.15, 0.20)
# OpEx (increasing over time)
opex_daily_base = 31 + 22 * (day_num / (total_days - 1))
opex = opex_daily_base * random.uniform(0.9, 1.1)
# Sales and Marketing Spend (increasing over time)
sales_daily_base = 72 + 238 * (day_num / (total_days - 1))
sales_and_marketing_spend = sales_daily_base * random.uniform(0.9, 1.1)
# Owner Compensation (fixed monthly, daily portion)
owner_compensation = 4000 / 30.5 * random.uniform(0.95, 1.05)
# Interest (small random)
interest = random.uniform(1, 2)
# Taxes (increasing over time)
taxes_daily_base = 9.6 + 18.2 * (day_num / (total_days - 1))
taxes = taxes_daily_base * random.uniform(0.9, 1.1)
# Depreciation/Amortization (random around average)
depreciation_amortization = random.uniform(2.5, 4)
# Append row
data.append(
{
"Date": date.strftime("%Y-%m-%d"),
"MRR_Start": round(mrr_start, 2),
"MRR_End": round(mrr, 2),
"Total_Revenue": round(total_revenue, 2),
"New_Customers": new_customers,
"Churned_Customers": churned_customers,
"Total_Customers_Start": total_customers_start,
"Total_Customers_End": customers,
"Expansion_Revenue": round(expansion_revenue, 2),
"Churned_Revenue": round(churned_revenue, 2),
"COGS": round(cogs, 2),
"OpEx": round(opex, 2),
"Sales_And_Marketing_Spend": round(sales_and_marketing_spend, 2),
"Owner_Compensation": round(owner_compensation, 2),
"Interest": round(interest, 2),
"Taxes": round(taxes, 2),
"Depreciation_Amortization": round(depreciation_amortization, 2),
}
)
day_num += 1
df = pd.DataFrame(data)
return df
def concatenate_data_per_month(df):
# Group by month and aggregate
df["Date"] = pd.to_datetime(df["Date"])
df["Month"] = df["Date"].dt.to_period("M")
# For 'MRR_Start', we want the MRR_Start of the first day of the month
# For 'MRR_End', we want the MRR_End of the last day of the month
# For sums, we sum them up
# For 'Total_Customers_Start', we want the Total_Customers_Start of the first day of the month
# For 'Total_Customers_End', we want the Total_Customers_End of the last day of the month
df_per_month = (
df.groupby("Month")
.agg(
Date=("Date", "first"), # Keep the first date of the month for grouping
MRR_Start=("MRR_Start", "first"),
MRR_End=("MRR_End", "last"),
Total_Revenue=("Total_Revenue", "sum"),
New_Customers=("New_Customers", "sum"),
Churned_Customers=("Churned_Customers", "sum"),
Total_Customers_Start=("Total_Customers_Start", "first"),
Total_Customers_End=("Total_Customers_End", "last"),
Expansion_Revenue=("Expansion_Revenue", "sum"),
Churned_Revenue=("Churned_Revenue", "sum"),
COGS=("COGS", "sum"),
OpEx=("OpEx", "sum"),
Sales_And_Marketing_Spend=("Sales_And_Marketing_Spend", "sum"),
Owner_Compensation=("Owner_Compensation", "sum"),
Interest=("Interest", "sum"),
Taxes=("Taxes", "sum"),
Depreciation_Amortization=("Depreciation_Amortization", "sum"),
)
.reset_index()
)
# Adjust the 'Date' column to be the end of the month for consistency with typical reporting
df_per_month["Date"] = df_per_month[
"Month"
].dt.to_timestamp() + pd.offsets.MonthEnd(0)
df_per_month = df_per_month.drop("Month", axis=1)
return df_per_month
def generate_csv(start_date, end_date):
random.seed(42)
np.random.seed(42)
df = generate_fake_saas_data(start_date, end_date)
df_per_month = concatenate_data_per_month(df)
df_per_month["Date"] = df_per_month["Date"].dt.strftime("%Y-%m-%d")
with (
tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_month,
tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp,
):
df.to_csv(tmp.name, index=False)
df_per_month.to_csv(tmp_month.name, index=False)
return tmp.name, tmp_month.name, df, df_per_month
with gr.Blocks() as demo:
gr.Markdown("# Fake SaaS Data Generator")
start_date = gr.Textbox(label="Start Date (YYYY-MM-DD)", value="2023-01-01")
end_date = gr.Textbox(label="End Date (YYYY-MM-DD)", value="2025-01-31")
output_df_per_day = gr.Dataframe(
label="Generated Data Preview (per day)", interactive=False, wrap=True
)
output_df_per_month = gr.Dataframe(
label="Generated Data Preview (per month)", interactive=False, wrap=True
)
output_file_per_day = gr.File(label="Download Generated CSV (per day)")
output_file_per_month = gr.File(label="Download Generated CSV (per month)")
generate_btn = gr.Button("Generate Data")
generate_btn.click(
fn=generate_csv,
inputs=[start_date, end_date],
outputs=[
output_file_per_day,
output_file_per_month,
output_df_per_day,
output_df_per_month,
],
)
if __name__ == "__main__":
demo.launch()