Spaces:

yekkala
/

FinSight-Agent

Build error

App Files Files Community

FinSight-Agent / app.py

yekkala

Upload 6 files

3816b9b verified 5 months ago

raw

history blame contribute delete

9.51 kB


	# app.py — Corporate Forecast Agent (Gradio, HF Spaces)
	# Notes:
	# - Upload a CSV with columns: date (YYYY-MM), revenue, cogs, opex
	# - Click buttons to run Baseline / Best / Base / Worst scenarios
	# - Shows plots and lets you download the forecast CSV
	# - Focus on transparency; validations explain why results pass/fail

	import io
	import json
	import hashlib
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import gradio as gr

	from datetime import datetime

	# Optional: statsmodels for SARIMAX baseline
	try:
	from statsmodels.tsa.statespace.sarimax import SARIMAX
	except Exception:
	SARIMAX = None

	HORIZON_MONTHS = 18
	np.random.seed(42)

	# ---------------------- helpers ----------------------
	def audit_row(drivers, status):
	ts = datetime.utcnow().isoformat()
	h = hashlib.sha256(json.dumps(drivers, sort_keys=True).encode()).hexdigest()[:8]
	return {"timestamp": ts, "driver_hash": h, "status": status}

	def load_csv(file_obj):
	df = pd.read_csv(file_obj)
	df.columns = [c.strip().lower() for c in df.columns]
	required = {"date","revenue","cogs","opex"}
	if not required.issubset(set(df.columns)):
	raise ValueError(f"CSV must contain columns: {sorted(list(required))}")
	df["date"] = pd.to_datetime(df["date"]).dt.to_period("M").dt.to_timestamp()
	df = df.sort_values("date").reset_index(drop=True)
	return df

	# ---------------------- baseline forecast ----------------------
	def fit_series(y):
	# Fit SARIMAX if available; otherwise fallback to simple growth extrapolation
	y = pd.Series(y)
	if SARIMAX is not None and len(y) >= 12:
	try:
	model = SARIMAX(y, order=(1,1,1), seasonal_order=(0,0,0,0), trend='t', enforce_stationarity=False, enforce_invertibility=False)
	res = model.fit(disp=False)
	fc = res.get_forecast(steps=HORIZON_MONTHS).predicted_mean
	return np.maximum(fc.values, 0.0)
	except Exception:
	pass
	# Fallback: compound growth based on last 6 months CAGR
	growth = (y.iloc[-1] / max(y.iloc[-6], 1e-6)) ** (1/6) - 1 if len(y) > 6 else 0.0
	last = y.iloc[-1]
	return np.array([max(last * ((1+growth) ** (i+1)), 0.0) for i in range(HORIZON_MONTHS)])

	def baseline_forecast(df):
	rev_fc = fit_series(df["revenue"].values)
	cogs_fc = fit_series(df["cogs"].values)
	opex_fc = fit_series(df["opex"].values)
	start = (df["date"].iloc[-1] + pd.offsets.MonthBegin(1)).to_period("M").to_timestamp()
	idx = pd.date_range(start, periods=HORIZON_MONTHS, freq="MS")
	out = pd.DataFrame({"date": idx, "revenue": rev_fc, "cogs": cogs_fc, "opex": opex_fc})
	return out

	# ---------------------- scenario overlay ----------------------
	def apply_scenario(forecast, drivers):
	fc = forecast.copy()
	# Revenue growth %
	fc["revenue"] = fc["revenue"] * (1 + drivers.get("rev_growth_pct", 0.0))
	# Gross margin bps adjustment -> change COGS accordingly
	gm_base = 1 - (fc["cogs"] / np.maximum(fc["revenue"], 1e-6))
	gm_adj = gm_base + drivers.get("gm_bps", 0.0)/10000.0
	gm_adj = np.clip(gm_adj, 0.0, 0.9)
	fc["cogs"] = fc["revenue"] * (1 - gm_adj)
	# Opex inflation
	fc["opex"] = fc["opex"] * (1 + drivers.get("opex_infl_pct", 0.0))
	# FX headwind (% applied to revenue)
	fx = drivers.get("fx_pct", 0.0)
	fc["revenue"] = fc["revenue"] * (1 + fx)
	# Working capital via DSO/DPO/DIO
	dso = drivers.get("dso", 60)
	dpo = drivers.get("dpo", 45)
	dio = drivers.get("dio", 60)
	fc["ar"] = fc["revenue"] * (dso/30)
	fc["ap"] = fc["cogs"] * (dpo/30)
	fc["inv"] = fc["cogs"] * (dio/30)
	# P&L and cash
	fc["gp"] = fc["revenue"] - fc["cogs"]
	fc["ebitda"] = fc["gp"] - fc["opex"]
	dep = drivers.get("dep", 0.05) * fc["revenue"]
	fc["ebit"] = fc["ebitda"] - dep
	interest = drivers.get("interest", 0.01) * fc["revenue"]
	taxes = np.maximum(drivers.get("tax_rate", 0.25) * np.maximum(fc["ebit"] - interest, 0.0), 0.0)
	fc["ni"] = fc["ebit"] - interest - taxes
	# Non-cash = dep
	fc["noncash"] = dep
	# ΔWC month-over-month
	fc[["ar","ap","inv"]] = fc[["ar","ap","inv"]].fillna(0.0)
	fc["wc"] = fc["ar"] + fc["inv"] - fc["ap"]
	fc["delta_wc"] = fc["wc"].diff().fillna(fc["wc"]) # first month delta vs 0
	fc["cash"] = fc["ni"] + fc["noncash"] - fc["delta_wc"]
	return fc

	# ---------------------- validators ----------------------
	def check_bounds(drivers):
	bounds = {
	"rev_growth_pct": (-0.20, 0.30),
	"gm_bps": (-500, 500),
	"opex_infl_pct": (-0.10, 0.20),
	"fx_pct": (-0.10, 0.10),
	"dso": (30, 90),
	"dpo": (15, 90),
	"dio": (20, 120),
	}
	msgs = []
	ok = True
	for k,(lo,hi) in bounds.items():
	v = drivers.get(k)
	if v is None: continue
	if not (lo <= v <= hi):
	ok = False
	msgs.append(f"{k} out of bounds: {v} not in [{lo},{hi}]")
	return ok, msgs

	def check_conservation(df, tol=1e-6):
	lhs = df["cash"].sum()
	rhs = (df["ni"].sum() + df["noncash"].sum() - df["delta_wc"].sum())
	return abs(lhs - rhs) <= tol

	def check_monotonicity(base_df, worst_df, best_df):
	# Check totals of EBITDA and Cash across horizon
	def total(col, d): return float(d[col].sum())
	eb = {
	"worst": total("ebitda", worst_df),
	"base": total("ebitda", base_df),
	"best": total("ebitda", best_df),
	}
	ca = {
	"worst": total("cash", worst_df),
	"base": total("cash", base_df),
	"best": total("cash", best_df),
	}
	mono_eb = eb["worst"] <= eb["base"] <= eb["best"]
	mono_ca = ca["worst"] <= ca["base"] <= ca["best"]
	return mono_eb and mono_ca, eb, ca

	# ---------------------- plotting ----------------------
	def plot_series(df, title):
	fig, ax = plt.subplots(2, 2, figsize=(10,6))
	ax = ax.ravel()
	ax[0].plot(df["date"], df["revenue"], label="Revenue")
	ax[0].plot(df["date"], df["cogs"], label="COGS")
	ax[0].plot(df["date"], df["opex"], label="Opex")
	ax[0].legend(); ax[0].set_title("P&L")

	ax[1].plot(df["date"], df["ebitda"], color="green", label="EBITDA")
	ax[1].legend(); ax[1].set_title("EBITDA")

	ax[2].plot(df["date"], df["cash"], color="purple", label="Cash Flow")
	ax[2].legend(); ax[2].set_title("Cash Flow")

	ax[3].plot(df["date"], df["wc"], color="orange", label="Working Capital")
	ax[3].legend(); ax[3].set_title("Working Capital")

	fig.suptitle(title)
	buf = io.BytesIO()
	plt.tight_layout()
	fig.savefig(buf, format="png"); plt.close(fig)
	return buf

	# ---------------------- Gradio UI ----------------------
	def run_pipeline(file, scenario):
	df_hist = load_csv(file)
	fc = baseline_forecast(df_hist)
	# Define drivers per scenario
	drivers_base = {"rev_growth_pct":0.0, "gm_bps":0, "opex_infl_pct":0.0, "fx_pct":0.0, "dso":60, "dpo":45, "dio":60, "dep":0.05, "interest":0.01, "tax_rate":0.25}
	drivers_best = {**drivers_base, "rev_growth_pct":0.10, "gm_bps":200, "opex_infl_pct":-0.05, "fx_pct":0.02}
	drivers_worst = {**drivers_base, "rev_growth_pct":-0.10, "gm_bps":-200, "opex_infl_pct":0.10, "fx_pct":-0.03}

	if scenario == "Baseline":
	out = apply_scenario(fc, drivers_base)
	status = "Baseline";
	buf = plot_series(out, "Baseline")
	csv_bytes = out.to_csv(index=False).encode()
	return buf, csv_bytes, json.dumps(audit_row(drivers_base, status))

	# Base/Best/Worst comparison + validations
	base_df = apply_scenario(fc, drivers_base)
	best_df = apply_scenario(fc, drivers_best)
	worst_df = apply_scenario(fc, drivers_worst)

	# Bounds check for all
	ok_b_base, _ = check_bounds(drivers_base)
	ok_b_best, _ = check_bounds(drivers_best)
	ok_b_worst, _ = check_bounds(drivers_worst)

	ok_cons_base = check_conservation(base_df)
	ok_cons_best = check_conservation(best_df)
	ok_cons_worst = check_conservation(worst_df)

	ok_mono, eb, ca = check_monotonicity(base_df, worst_df, best_df)

	status = {
	"bounds": ok_b_base and ok_b_best and ok_b_worst,
	"conservation": ok_cons_base and ok_cons_best and ok_cons_worst,
	"monotonicity": ok_mono,
	}

	# choose which to show
	out = base_df if scenario == "Base" else best_df if scenario == "Best" else worst_df
	buf = plot_series(out, f"Scenario: {scenario}")
	meta = {
	"scenario": scenario,
	"monotonicity_totals": {"EBITDA": eb, "Cash": ca},
	"status": status,
	}
	csv_bytes = out.to_csv(index=False).encode()
	return buf, csv_bytes, json.dumps(meta)

	with gr.Blocks(title="Corporate Forecast Agent") as demo:
	gr.Markdown("## Corporate Forecast Agent — Best / Base / Worst")
	gr.Markdown("Upload CSV with columns: date,revenue,cogs,opex")

	file = gr.File(file_types=[".csv"], label="Upload CSV")
	scenario = gr.Radio(["Baseline","Best","Base","Worst"], value="Baseline", label="Scenario")
	btn = gr.Button("Run")

	img = gr.Image(type="numpy", label="Plots")
	dl = gr.File(label="Download forecast.csv")
	meta = gr.JSON(label="Validation & Audit")

	def _run(file_obj, scen):
	if file_obj is None:
	raise gr.Error("Please upload a CSV first.")
	plot_buf, csv_bytes, meta_json = run_pipeline(file_obj.name, scen)
	return plot_buf.getvalue(), ("forecast.csv", csv_bytes), meta_json

	btn.click(_run, inputs=[file, scenario], outputs=[img, dl, meta])

	if __name__ == "__main__":
	demo.launch()