# app.py — Corporate Forecast Agent (Gradio, HF Spaces) # Notes: # - Upload a CSV with columns: date (YYYY-MM), revenue, cogs, opex # - Click buttons to run Baseline / Best / Base / Worst scenarios # - Shows plots and lets you download the forecast CSV # - Focus on transparency; validations explain why results pass/fail import io import json import hashlib import numpy as np import pandas as pd import matplotlib.pyplot as plt import gradio as gr from datetime import datetime # Optional: statsmodels for SARIMAX baseline try: from statsmodels.tsa.statespace.sarimax import SARIMAX except Exception: SARIMAX = None HORIZON_MONTHS = 18 np.random.seed(42) # ---------------------- helpers ---------------------- def audit_row(drivers, status): ts = datetime.utcnow().isoformat() h = hashlib.sha256(json.dumps(drivers, sort_keys=True).encode()).hexdigest()[:8] return {"timestamp": ts, "driver_hash": h, "status": status} def load_csv(file_obj): df = pd.read_csv(file_obj) df.columns = [c.strip().lower() for c in df.columns] required = {"date","revenue","cogs","opex"} if not required.issubset(set(df.columns)): raise ValueError(f"CSV must contain columns: {sorted(list(required))}") df["date"] = pd.to_datetime(df["date"]).dt.to_period("M").dt.to_timestamp() df = df.sort_values("date").reset_index(drop=True) return df # ---------------------- baseline forecast ---------------------- def fit_series(y): # Fit SARIMAX if available; otherwise fallback to simple growth extrapolation y = pd.Series(y) if SARIMAX is not None and len(y) >= 12: try: model = SARIMAX(y, order=(1,1,1), seasonal_order=(0,0,0,0), trend='t', enforce_stationarity=False, enforce_invertibility=False) res = model.fit(disp=False) fc = res.get_forecast(steps=HORIZON_MONTHS).predicted_mean return np.maximum(fc.values, 0.0) except Exception: pass # Fallback: compound growth based on last 6 months CAGR growth = (y.iloc[-1] / max(y.iloc[-6], 1e-6)) ** (1/6) - 1 if len(y) > 6 else 0.0 last = y.iloc[-1] return np.array([max(last * ((1+growth) ** (i+1)), 0.0) for i in range(HORIZON_MONTHS)]) def baseline_forecast(df): rev_fc = fit_series(df["revenue"].values) cogs_fc = fit_series(df["cogs"].values) opex_fc = fit_series(df["opex"].values) start = (df["date"].iloc[-1] + pd.offsets.MonthBegin(1)).to_period("M").to_timestamp() idx = pd.date_range(start, periods=HORIZON_MONTHS, freq="MS") out = pd.DataFrame({"date": idx, "revenue": rev_fc, "cogs": cogs_fc, "opex": opex_fc}) return out # ---------------------- scenario overlay ---------------------- def apply_scenario(forecast, drivers): fc = forecast.copy() # Revenue growth % fc["revenue"] = fc["revenue"] * (1 + drivers.get("rev_growth_pct", 0.0)) # Gross margin bps adjustment -> change COGS accordingly gm_base = 1 - (fc["cogs"] / np.maximum(fc["revenue"], 1e-6)) gm_adj = gm_base + drivers.get("gm_bps", 0.0)/10000.0 gm_adj = np.clip(gm_adj, 0.0, 0.9) fc["cogs"] = fc["revenue"] * (1 - gm_adj) # Opex inflation fc["opex"] = fc["opex"] * (1 + drivers.get("opex_infl_pct", 0.0)) # FX headwind (% applied to revenue) fx = drivers.get("fx_pct", 0.0) fc["revenue"] = fc["revenue"] * (1 + fx) # Working capital via DSO/DPO/DIO dso = drivers.get("dso", 60) dpo = drivers.get("dpo", 45) dio = drivers.get("dio", 60) fc["ar"] = fc["revenue"] * (dso/30) fc["ap"] = fc["cogs"] * (dpo/30) fc["inv"] = fc["cogs"] * (dio/30) # P&L and cash fc["gp"] = fc["revenue"] - fc["cogs"] fc["ebitda"] = fc["gp"] - fc["opex"] dep = drivers.get("dep", 0.05) * fc["revenue"] fc["ebit"] = fc["ebitda"] - dep interest = drivers.get("interest", 0.01) * fc["revenue"] taxes = np.maximum(drivers.get("tax_rate", 0.25) * np.maximum(fc["ebit"] - interest, 0.0), 0.0) fc["ni"] = fc["ebit"] - interest - taxes # Non-cash = dep fc["noncash"] = dep # ΔWC month-over-month fc[["ar","ap","inv"]] = fc[["ar","ap","inv"]].fillna(0.0) fc["wc"] = fc["ar"] + fc["inv"] - fc["ap"] fc["delta_wc"] = fc["wc"].diff().fillna(fc["wc"]) # first month delta vs 0 fc["cash"] = fc["ni"] + fc["noncash"] - fc["delta_wc"] return fc # ---------------------- validators ---------------------- def check_bounds(drivers): bounds = { "rev_growth_pct": (-0.20, 0.30), "gm_bps": (-500, 500), "opex_infl_pct": (-0.10, 0.20), "fx_pct": (-0.10, 0.10), "dso": (30, 90), "dpo": (15, 90), "dio": (20, 120), } msgs = [] ok = True for k,(lo,hi) in bounds.items(): v = drivers.get(k) if v is None: continue if not (lo <= v <= hi): ok = False msgs.append(f"{k} out of bounds: {v} not in [{lo},{hi}]") return ok, msgs def check_conservation(df, tol=1e-6): lhs = df["cash"].sum() rhs = (df["ni"].sum() + df["noncash"].sum() - df["delta_wc"].sum()) return abs(lhs - rhs) <= tol def check_monotonicity(base_df, worst_df, best_df): # Check totals of EBITDA and Cash across horizon def total(col, d): return float(d[col].sum()) eb = { "worst": total("ebitda", worst_df), "base": total("ebitda", base_df), "best": total("ebitda", best_df), } ca = { "worst": total("cash", worst_df), "base": total("cash", base_df), "best": total("cash", best_df), } mono_eb = eb["worst"] <= eb["base"] <= eb["best"] mono_ca = ca["worst"] <= ca["base"] <= ca["best"] return mono_eb and mono_ca, eb, ca # ---------------------- plotting ---------------------- def plot_series(df, title): fig, ax = plt.subplots(2, 2, figsize=(10,6)) ax = ax.ravel() ax[0].plot(df["date"], df["revenue"], label="Revenue") ax[0].plot(df["date"], df["cogs"], label="COGS") ax[0].plot(df["date"], df["opex"], label="Opex") ax[0].legend(); ax[0].set_title("P&L") ax[1].plot(df["date"], df["ebitda"], color="green", label="EBITDA") ax[1].legend(); ax[1].set_title("EBITDA") ax[2].plot(df["date"], df["cash"], color="purple", label="Cash Flow") ax[2].legend(); ax[2].set_title("Cash Flow") ax[3].plot(df["date"], df["wc"], color="orange", label="Working Capital") ax[3].legend(); ax[3].set_title("Working Capital") fig.suptitle(title) buf = io.BytesIO() plt.tight_layout() fig.savefig(buf, format="png"); plt.close(fig) return buf # ---------------------- Gradio UI ---------------------- def run_pipeline(file, scenario): df_hist = load_csv(file) fc = baseline_forecast(df_hist) # Define drivers per scenario drivers_base = {"rev_growth_pct":0.0, "gm_bps":0, "opex_infl_pct":0.0, "fx_pct":0.0, "dso":60, "dpo":45, "dio":60, "dep":0.05, "interest":0.01, "tax_rate":0.25} drivers_best = {**drivers_base, "rev_growth_pct":0.10, "gm_bps":200, "opex_infl_pct":-0.05, "fx_pct":0.02} drivers_worst = {**drivers_base, "rev_growth_pct":-0.10, "gm_bps":-200, "opex_infl_pct":0.10, "fx_pct":-0.03} if scenario == "Baseline": out = apply_scenario(fc, drivers_base) status = "Baseline"; buf = plot_series(out, "Baseline") csv_bytes = out.to_csv(index=False).encode() return buf, csv_bytes, json.dumps(audit_row(drivers_base, status)) # Base/Best/Worst comparison + validations base_df = apply_scenario(fc, drivers_base) best_df = apply_scenario(fc, drivers_best) worst_df = apply_scenario(fc, drivers_worst) # Bounds check for all ok_b_base, _ = check_bounds(drivers_base) ok_b_best, _ = check_bounds(drivers_best) ok_b_worst, _ = check_bounds(drivers_worst) ok_cons_base = check_conservation(base_df) ok_cons_best = check_conservation(best_df) ok_cons_worst = check_conservation(worst_df) ok_mono, eb, ca = check_monotonicity(base_df, worst_df, best_df) status = { "bounds": ok_b_base and ok_b_best and ok_b_worst, "conservation": ok_cons_base and ok_cons_best and ok_cons_worst, "monotonicity": ok_mono, } # choose which to show out = base_df if scenario == "Base" else best_df if scenario == "Best" else worst_df buf = plot_series(out, f"Scenario: {scenario}") meta = { "scenario": scenario, "monotonicity_totals": {"EBITDA": eb, "Cash": ca}, "status": status, } csv_bytes = out.to_csv(index=False).encode() return buf, csv_bytes, json.dumps(meta) with gr.Blocks(title="Corporate Forecast Agent") as demo: gr.Markdown("## Corporate Forecast Agent — Best / Base / Worst") gr.Markdown("Upload CSV with columns: date,revenue,cogs,opex") file = gr.File(file_types=[".csv"], label="Upload CSV") scenario = gr.Radio(["Baseline","Best","Base","Worst"], value="Baseline", label="Scenario") btn = gr.Button("Run") img = gr.Image(type="numpy", label="Plots") dl = gr.File(label="Download forecast.csv") meta = gr.JSON(label="Validation & Audit") def _run(file_obj, scen): if file_obj is None: raise gr.Error("Please upload a CSV first.") plot_buf, csv_bytes, meta_json = run_pipeline(file_obj.name, scen) return plot_buf.getvalue(), ("forecast.csv", csv_bytes), meta_json btn.click(_run, inputs=[file, scenario], outputs=[img, dl, meta]) if __name__ == "__main__": demo.launch()