Spaces:
Sleeping
Sleeping
First commit: add retail finance forecasting app
Browse files
app.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from dateutil.relativedelta import relativedelta
|
| 6 |
+
|
| 7 |
+
from sklearn.preprocessing import OneHotEncoder
|
| 8 |
+
from sklearn.compose import ColumnTransformer
|
| 9 |
+
from sklearn.pipeline import Pipeline
|
| 10 |
+
from sklearn.linear_model import LinearRegression
|
| 11 |
+
from sklearn.metrics import mean_absolute_error
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
|
| 15 |
+
# --------------------------
|
| 16 |
+
# Minimal feature engineering
|
| 17 |
+
# --------------------------
|
| 18 |
+
def prep_monthly_features(df):
|
| 19 |
+
# Expect columns: date, amount, category, income
|
| 20 |
+
df = df.copy()
|
| 21 |
+
df["date"] = pd.to_datetime(df["date"])
|
| 22 |
+
# monthly aggregates
|
| 23 |
+
df["month"] = df["date"].values.astype("datetime64[M]")
|
| 24 |
+
month_agg = (
|
| 25 |
+
df.groupby("month")
|
| 26 |
+
.agg(
|
| 27 |
+
spend=("amount", lambda x: x[x < 0].sum()), # negatives as spend
|
| 28 |
+
inflow=("amount", lambda x: x[x > 0].sum()),
|
| 29 |
+
txns=("amount", "count"),
|
| 30 |
+
income=("income", "max") # assume monthly income repeated
|
| 31 |
+
)
|
| 32 |
+
.reset_index()
|
| 33 |
+
)
|
| 34 |
+
# categories per month (diversity proxy)
|
| 35 |
+
cats = (df.assign(cnt=1)
|
| 36 |
+
.pivot_table(index="month", columns="category", values="cnt", aggfunc="sum", fill_value=0))
|
| 37 |
+
cats.columns = [f"cat_{c}" for c in cats.columns]
|
| 38 |
+
out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
|
| 39 |
+
|
| 40 |
+
# Targets: next-month spend
|
| 41 |
+
out = out.sort_values("month")
|
| 42 |
+
out["target_next_spend"] = out["spend"].shift(-1) # what we want to predict
|
| 43 |
+
# Basic time features
|
| 44 |
+
out["m_num"] = out["month"].dt.month
|
| 45 |
+
out["y_num"] = out["month"].dt.year
|
| 46 |
+
# lag features
|
| 47 |
+
out["spend_lag1"] = out["spend"].shift(1)
|
| 48 |
+
out["spend_lag2"] = out["spend"].shift(2)
|
| 49 |
+
out["inflow_lag1"] = out["inflow"].shift(1)
|
| 50 |
+
out = out.dropna().reset_index(drop=True)
|
| 51 |
+
return out
|
| 52 |
+
|
| 53 |
+
def train_model(monthly_df):
|
| 54 |
+
# Simple linear model
|
| 55 |
+
y = monthly_df["target_next_spend"].values
|
| 56 |
+
feature_cols = [c for c in monthly_df.columns if c not in ["month","target_next_spend"]]
|
| 57 |
+
X = monthly_df[feature_cols].copy()
|
| 58 |
+
model = LinearRegression()
|
| 59 |
+
model.fit(X, y)
|
| 60 |
+
# quick backtest MAE on last 3 months
|
| 61 |
+
if len(monthly_df) >= 4:
|
| 62 |
+
X_hold = X.tail(3)
|
| 63 |
+
y_hold = y[-3:]
|
| 64 |
+
preds = model.predict(X_hold)
|
| 65 |
+
mae = mean_absolute_error(y_hold, preds)
|
| 66 |
+
else:
|
| 67 |
+
mae = np.nan
|
| 68 |
+
return model, feature_cols, mae
|
| 69 |
+
|
| 70 |
+
def predict_next(monthly_df, model, feature_cols):
|
| 71 |
+
# Use last observed month’s features to predict next-month spend
|
| 72 |
+
last = monthly_df.iloc[[-1]][feature_cols]
|
| 73 |
+
pred = float(model.predict(last)[0])
|
| 74 |
+
# A simple overspend risk flag: predict spend more negative than 90% of past spends
|
| 75 |
+
p90 = np.percentile(monthly_df["spend"], 10) # more negative = higher spend
|
| 76 |
+
risk = "High" if pred <= p90 else "Low"
|
| 77 |
+
# Return predicted NEXT month label
|
| 78 |
+
next_month = (monthly_df["month"].iloc[-1] + np.timedelta64(1, "M")).astype("datetime64[M]").astype("datetime64[D]")
|
| 79 |
+
next_month = pd.to_datetime(next_month).strftime("%Y-%m")
|
| 80 |
+
return next_month, pred, risk
|
| 81 |
+
|
| 82 |
+
# --------------------------
|
| 83 |
+
# Gradio interface functions
|
| 84 |
+
# --------------------------
|
| 85 |
+
def load_or_demo(file, budget):
|
| 86 |
+
if file is None:
|
| 87 |
+
# Build a tiny synthetic demo dataset (12 months)
|
| 88 |
+
rng = pd.date_range("2024-01-01", periods=365, freq="D")
|
| 89 |
+
cats = ["groceries","rent","utilities","fun","transport"]
|
| 90 |
+
rows = []
|
| 91 |
+
income = 3500.0
|
| 92 |
+
np.random.seed(7)
|
| 93 |
+
for d in rng:
|
| 94 |
+
# income on the first of each month
|
| 95 |
+
if d.day == 1:
|
| 96 |
+
rows.append({"date": d, "amount": income, "category": "income", "income": income})
|
| 97 |
+
# random spends
|
| 98 |
+
for _ in range(np.random.poisson(1.8)):
|
| 99 |
+
amt = -np.random.choice([15,25,40,60,120,300], p=[.25,.25,.2,.15,.1,.05])
|
| 100 |
+
rows.append({"date": d, "amount": amt, "category": np.random.choice(cats), "income": income})
|
| 101 |
+
df = pd.DataFrame(rows)
|
| 102 |
+
else:
|
| 103 |
+
df = pd.read_csv(file.name)
|
| 104 |
+
# basic sanity
|
| 105 |
+
needed = {"date","amount","category","income"}
|
| 106 |
+
missing = needed - set(df.columns)
|
| 107 |
+
if missing:
|
| 108 |
+
raise ValueError(f"CSV is missing columns: {sorted(missing)}")
|
| 109 |
+
# Fit model and predict
|
| 110 |
+
m = prep_monthly_features(df)
|
| 111 |
+
if len(m) < 6:
|
| 112 |
+
raise ValueError("Need at least ~6 months of data for a useful forecast (demo provides this).")
|
| 113 |
+
model, feats, mae = train_model(m)
|
| 114 |
+
next_m, spend_pred, risk = predict_next(m, model, feats)
|
| 115 |
+
# Budget comparison
|
| 116 |
+
budget = float(budget) if budget is not None and budget != "" else 0.0
|
| 117 |
+
vs_budget = ("Over budget" if spend_pred < -abs(budget) else "Within budget") if budget else "No budget set"
|
| 118 |
+
# Returnables
|
| 119 |
+
summary = pd.DataFrame({
|
| 120 |
+
"metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check"],
|
| 121 |
+
"value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, vs_budget]
|
| 122 |
+
})
|
| 123 |
+
monthly_view = m[["month","spend","inflow","txns","income"]].copy()
|
| 124 |
+
monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
|
| 125 |
+
return summary, monthly_view
|
| 126 |
+
|
| 127 |
+
with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
|
| 128 |
+
gr.Markdown("## Retail Finance Spend Forecaster\nUpload your transactions CSV (columns: `date, amount, category, income`) or use demo data. Model forecasts **next-month spend** and flags **overspend risk**.")
|
| 129 |
+
with gr.Row():
|
| 130 |
+
file = gr.File(label="Upload CSV (optional)")
|
| 131 |
+
budget = gr.Number(value=2500, label="Monthly budget (positive number)")
|
| 132 |
+
btn = gr.Button("Run Forecast")
|
| 133 |
+
summary = gr.Dataframe(label="Summary")
|
| 134 |
+
monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
|
| 135 |
+
btn.click(load_or_demo, inputs=[file, budget], outputs=[summary, monthly_table])
|
| 136 |
+
|
| 137 |
+
demo.launch()
|