amitlals
Initial commit: SAP Finance Dashboard with RPT-1-OSS Model
fc8c40e
"""
Synthetic SAP Finance Data Generator
Generates synthetic datasets for:
- General Ledger accounts with transactions
- Financial Statements (P&L and Balance Sheet)
- Sales Order data
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
def generate_gl_accounts(num_transactions=1000, output_path="data/synthetic_gl_accounts.csv"):
"""Generate synthetic General Ledger accounts with transactions."""
# Account codes and descriptions
account_codes = [
"100000", "110000", "120000", "130000", "140000", # Assets
"200000", "210000", "220000", "230000", # Liabilities
"300000", "310000", "320000", # Equity
"400000", "410000", "420000", "430000", # Revenue
"500000", "510000", "520000", "530000", "540000", # Expenses
]
account_descriptions = [
"Cash and Cash Equivalents", "Accounts Receivable", "Inventory",
"Prepaid Expenses", "Property, Plant & Equipment",
"Accounts Payable", "Accrued Liabilities", "Short-term Debt", "Long-term Debt",
"Common Stock", "Retained Earnings", "Other Equity",
"Sales Revenue", "Service Revenue", "Interest Income", "Other Income",
"Cost of Goods Sold", "Salaries and Wages", "Rent Expense",
"Utilities Expense", "Marketing Expense"
]
np.random.seed(42)
transactions = []
base_date = datetime(2024, 1, 1)
for i in range(num_transactions):
account_idx = np.random.randint(0, len(account_codes))
transaction_date = base_date + timedelta(days=np.random.randint(0, 365))
# Determine if debit or credit based on account type
account_num = int(account_codes[account_idx][0])
if account_num in [1, 5]: # Assets or Expenses
debit = np.random.uniform(100, 50000)
credit = 0
else: # Liabilities, Equity, Revenue
debit = 0
credit = np.random.uniform(100, 50000)
transactions.append({
"Transaction_ID": f"TXN{str(i+1).zfill(6)}",
"Date": transaction_date.strftime("%Y-%m-%d"),
"Account_Code": account_codes[account_idx],
"Account_Description": account_descriptions[account_idx],
"Debit": round(debit, 2),
"Credit": round(credit, 2),
"Balance": round(debit - credit, 2),
"Document_Number": f"DOC{str(np.random.randint(1000, 9999))}",
"Posting_Period": transaction_date.strftime("%Y-%m")
})
df = pd.DataFrame(transactions)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)
print(f"Generated {num_transactions} GL transactions: {output_path}")
return df
def generate_financial_statements(num_periods=12, output_path="data/synthetic_financial_statements.csv"):
"""Generate synthetic Financial Statements (P&L and Balance Sheet)."""
np.random.seed(42)
base_date = datetime(2024, 1, 1)
statements = []
for period in range(num_periods):
period_date = base_date + timedelta(days=period * 30)
period_str = period_date.strftime("%Y-%m")
# Profit & Loss Statement
revenue = np.random.uniform(500000, 1000000)
cogs = revenue * np.random.uniform(0.4, 0.6)
gross_profit = revenue - cogs
operating_expenses = np.random.uniform(200000, 400000)
ebitda = gross_profit - operating_expenses
depreciation = np.random.uniform(20000, 50000)
ebit = ebitda - depreciation
interest_expense = np.random.uniform(10000, 30000)
ebt = ebit - interest_expense
tax = ebt * 0.25
net_income = ebt - tax
# Balance Sheet
cash = np.random.uniform(100000, 500000)
accounts_receivable = np.random.uniform(200000, 400000)
inventory = np.random.uniform(150000, 300000)
current_assets = cash + accounts_receivable + inventory
ppe = np.random.uniform(2000000, 5000000)
total_assets = current_assets + ppe
accounts_payable = np.random.uniform(100000, 200000)
short_term_debt = np.random.uniform(50000, 150000)
current_liabilities = accounts_payable + short_term_debt
long_term_debt = np.random.uniform(1000000, 2000000)
total_liabilities = current_liabilities + long_term_debt
equity = total_assets - total_liabilities
statements.append({
"Period": period_str,
"Statement_Type": "P&L",
"Revenue": round(revenue, 2),
"Cost_of_Goods_Sold": round(cogs, 2),
"Gross_Profit": round(gross_profit, 2),
"Operating_Expenses": round(operating_expenses, 2),
"EBITDA": round(ebitda, 2),
"Depreciation": round(depreciation, 2),
"EBIT": round(ebit, 2),
"Interest_Expense": round(interest_expense, 2),
"EBT": round(ebt, 2),
"Tax": round(tax, 2),
"Net_Income": round(net_income, 2),
"Cash": round(cash, 2),
"Accounts_Receivable": round(accounts_receivable, 2),
"Inventory": round(inventory, 2),
"Current_Assets": round(current_assets, 2),
"PPE": round(ppe, 2),
"Total_Assets": round(total_assets, 2),
"Accounts_Payable": round(accounts_payable, 2),
"Short_Term_Debt": round(short_term_debt, 2),
"Current_Liabilities": round(current_liabilities, 2),
"Long_Term_Debt": round(long_term_debt, 2),
"Total_Liabilities": round(total_liabilities, 2),
"Equity": round(equity, 2)
})
df = pd.DataFrame(statements)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)
print(f"Generated {num_periods} financial statement periods: {output_path}")
return df
def generate_sales_orders(num_orders=500, output_path="data/synthetic_sales_orders.csv"):
"""Generate synthetic Sales Order data."""
np.random.seed(42)
base_date = datetime(2024, 1, 1)
products = [
"Product A", "Product B", "Product C", "Product D", "Product E",
"Product F", "Product G", "Product H", "Product I", "Product J"
]
customers = [f"CUST{str(i).zfill(5)}" for i in range(1, 101)]
regions = ["North", "South", "East", "West", "Central"]
orders = []
for i in range(num_orders):
order_date = base_date + timedelta(days=np.random.randint(0, 365))
delivery_date = order_date + timedelta(days=np.random.randint(7, 30))
customer = np.random.choice(customers)
product = np.random.choice(products)
quantity = np.random.randint(1, 100)
unit_price = np.random.uniform(10, 500)
total_amount = quantity * unit_price
region = np.random.choice(regions)
order_status = np.random.choice(
["Open", "In Process", "Delivered", "Cancelled"],
p=[0.2, 0.3, 0.4, 0.1]
)
orders.append({
"Order_Number": f"SO{str(i+1).zfill(6)}",
"Order_Date": order_date.strftime("%Y-%m-%d"),
"Delivery_Date": delivery_date.strftime("%Y-%m-%d"),
"Customer_ID": customer,
"Customer_Name": f"Customer {customer}",
"Product_Code": f"PRD{str(np.random.randint(1, 100)).zfill(3)}",
"Product_Name": product,
"Quantity": quantity,
"Unit_Price": round(unit_price, 2),
"Total_Amount": round(total_amount, 2),
"Currency": "USD",
"Region": region,
"Status": order_status,
"Sales_Rep": f"REP{str(np.random.randint(1, 20)).zfill(2)}"
})
df = pd.DataFrame(orders)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)
print(f"Generated {num_orders} sales orders: {output_path}")
return df
def generate_all_datasets():
"""Generate all synthetic datasets."""
print("Generating synthetic SAP finance datasets...")
generate_gl_accounts()
generate_financial_statements()
generate_sales_orders()
print("All datasets generated successfully!")
if __name__ == "__main__":
generate_all_datasets()