Spaces:

saas-things
/

fake-data-generator

Sleeping

App Files Files Community

fake-data-generator / app.py

Yehor

Update app.py

32ffed9 verified 4 months ago

raw

history blame contribute delete

7.46 kB

	import pandas as pd
	import numpy as np
	from datetime import datetime
	import random
	import tempfile
	import gradio as gr


	def generate_fake_saas_data(start_date="2023-01-01", end_date="2025-01-31"):
	start = datetime.strptime(start_date, "%Y-%m-%d")
	end = datetime.strptime(end_date, "%Y-%m-%d")
	dates = pd.date_range(start=start, end=end)
	data = []
	# Initial values
	mrr = 5000.0
	customers = 100
	day_num = 0
	total_days = (end - start).days + 1
	for date in dates:
	mrr_start = mrr
	total_customers_start = customers
	# Daily new customers (Poisson, increasing lambda over time)
	lambda_new = 0.26 + (1.19 - 0.26) * (day_num / (total_days - 1))
	new_customers = np.random.poisson(lambda_new)
	# Daily churn (Binomial, increasing rate over time)
	monthly_churn_rate = 0.01 + (0.031 - 0.01) * (day_num / (total_days - 1))
	daily_churn_prob = 1 - (1 - monthly_churn_rate) ** (1 / 30.5)
	churned_customers = np.random.binomial(customers, daily_churn_prob)
	# ARPU
	arpu = mrr_start / total_customers_start if total_customers_start > 0 else 0
	# New revenue
	new_revenue = new_customers * arpu * random.uniform(0.8, 1.2)
	# Churned revenue
	churned_revenue = churned_customers * arpu * random.uniform(0.8, 1.2)
	# Expansion
	daily_expansion_prob = 0.005
	num_expansion = np.random.binomial(customers, daily_expansion_prob)
	expansion_revenue = num_expansion * arpu * random.uniform(0.05, 0.15)
	# Update MRR
	mrr = mrr + new_revenue + expansion_revenue - churned_revenue
	if mrr < 0:
	mrr = 0
	# Update customers
	customers = customers + new_customers - churned_customers
	if customers < 0:
	customers = 0
	# Total revenue (approximate daily recognized revenue)
	total_revenue = (mrr_start + mrr) / 2 / 30.5
	# COGS (~18% of daily revenue with variation)
	cogs = total_revenue * random.uniform(0.15, 0.20)
	# OpEx (increasing over time)
	opex_daily_base = 31 + 22 * (day_num / (total_days - 1))
	opex = opex_daily_base * random.uniform(0.9, 1.1)
	# Sales and Marketing Spend (increasing over time)
	sales_daily_base = 72 + 238 * (day_num / (total_days - 1))
	sales_and_marketing_spend = sales_daily_base * random.uniform(0.9, 1.1)
	# Owner Compensation (fixed monthly, daily portion)
	owner_compensation = 4000 / 30.5 * random.uniform(0.95, 1.05)
	# Interest (small random)
	interest = random.uniform(1, 2)
	# Taxes (increasing over time)
	taxes_daily_base = 9.6 + 18.2 * (day_num / (total_days - 1))
	taxes = taxes_daily_base * random.uniform(0.9, 1.1)
	# Depreciation/Amortization (random around average)
	depreciation_amortization = random.uniform(2.5, 4)
	# Append row
	data.append(
	{
	"Date": date.strftime("%Y-%m-%d"),
	"MRR_Start": round(mrr_start, 2),
	"MRR_End": round(mrr, 2),
	"Total_Revenue": round(total_revenue, 2),
	"New_Customers": new_customers,
	"Churned_Customers": churned_customers,
	"Total_Customers_Start": total_customers_start,
	"Total_Customers_End": customers,
	"Expansion_Revenue": round(expansion_revenue, 2),
	"Churned_Revenue": round(churned_revenue, 2),
	"COGS": round(cogs, 2),
	"OpEx": round(opex, 2),
	"Sales_And_Marketing_Spend": round(sales_and_marketing_spend, 2),
	"Owner_Compensation": round(owner_compensation, 2),
	"Interest": round(interest, 2),
	"Taxes": round(taxes, 2),
	"Depreciation_Amortization": round(depreciation_amortization, 2),
	}
	)
	day_num += 1
	df = pd.DataFrame(data)
	return df


	def concatenate_data_per_month(df):
	# Group by month and aggregate
	df["Date"] = pd.to_datetime(df["Date"])
	df["Month"] = df["Date"].dt.to_period("M")

	# For 'MRR_Start', we want the MRR_Start of the first day of the month
	# For 'MRR_End', we want the MRR_End of the last day of the month
	# For sums, we sum them up
	# For 'Total_Customers_Start', we want the Total_Customers_Start of the first day of the month
	# For 'Total_Customers_End', we want the Total_Customers_End of the last day of the month
	df_per_month = (
	df.groupby("Month")
	.agg(
	Date=("Date", "first"), # Keep the first date of the month for grouping
	MRR_Start=("MRR_Start", "first"),
	MRR_End=("MRR_End", "last"),
	Total_Revenue=("Total_Revenue", "sum"),
	New_Customers=("New_Customers", "sum"),
	Churned_Customers=("Churned_Customers", "sum"),
	Total_Customers_Start=("Total_Customers_Start", "first"),
	Total_Customers_End=("Total_Customers_End", "last"),
	Expansion_Revenue=("Expansion_Revenue", "sum"),
	Churned_Revenue=("Churned_Revenue", "sum"),
	COGS=("COGS", "sum"),
	OpEx=("OpEx", "sum"),
	Sales_And_Marketing_Spend=("Sales_And_Marketing_Spend", "sum"),
	Owner_Compensation=("Owner_Compensation", "sum"),
	Interest=("Interest", "sum"),
	Taxes=("Taxes", "sum"),
	Depreciation_Amortization=("Depreciation_Amortization", "sum"),
	)
	.reset_index()
	)

	# Adjust the 'Date' column to be the end of the month for consistency with typical reporting
	df_per_month["Date"] = df_per_month[
	"Month"
	].dt.to_timestamp() + pd.offsets.MonthEnd(0)
	df_per_month = df_per_month.drop("Month", axis=1)
	return df_per_month


	def generate_csv(start_date, end_date):
	random.seed(42)
	np.random.seed(42)

	df = generate_fake_saas_data(start_date, end_date)

	df_per_month = concatenate_data_per_month(df)

	df_per_month["Date"] = df_per_month["Date"].dt.strftime("%Y-%m-%d")

	with (
	tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_month,
	tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp,
	):
	df.to_csv(tmp.name, index=False)
	df_per_month.to_csv(tmp_month.name, index=False)

	return tmp.name, tmp_month.name, df, df_per_month


	with gr.Blocks() as demo:
	gr.Markdown("# Fake SaaS Data Generator")
	start_date = gr.Textbox(label="Start Date (YYYY-MM-DD)", value="2023-01-01")
	end_date = gr.Textbox(label="End Date (YYYY-MM-DD)", value="2025-01-31")
	output_df_per_day = gr.Dataframe(
	label="Generated Data Preview (per day)", interactive=False, wrap=True
	)
	output_df_per_month = gr.Dataframe(
	label="Generated Data Preview (per month)", interactive=False, wrap=True
	)
	output_file_per_day = gr.File(label="Download Generated CSV (per day)")
	output_file_per_month = gr.File(label="Download Generated CSV (per month)")
	generate_btn = gr.Button("Generate Data")
	generate_btn.click(
	fn=generate_csv,
	inputs=[start_date, end_date],
	outputs=[
	output_file_per_day,
	output_file_per_month,
	output_df_per_day,
	output_df_per_month,
	],
	)

	if __name__ == "__main__":
	demo.launch()