MrThinker098 commited on
Commit
273b603
·
verified ·
1 Parent(s): c133362

First commit: add retail finance forecasting app

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import numpy as np
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from dateutil.relativedelta import relativedelta
6
+
7
+ from sklearn.preprocessing import OneHotEncoder
8
+ from sklearn.compose import ColumnTransformer
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.linear_model import LinearRegression
11
+ from sklearn.metrics import mean_absolute_error
12
+
13
+ import gradio as gr
14
+
15
+ # --------------------------
16
+ # Minimal feature engineering
17
+ # --------------------------
18
+ def prep_monthly_features(df):
19
+ # Expect columns: date, amount, category, income
20
+ df = df.copy()
21
+ df["date"] = pd.to_datetime(df["date"])
22
+ # monthly aggregates
23
+ df["month"] = df["date"].values.astype("datetime64[M]")
24
+ month_agg = (
25
+ df.groupby("month")
26
+ .agg(
27
+ spend=("amount", lambda x: x[x < 0].sum()), # negatives as spend
28
+ inflow=("amount", lambda x: x[x > 0].sum()),
29
+ txns=("amount", "count"),
30
+ income=("income", "max") # assume monthly income repeated
31
+ )
32
+ .reset_index()
33
+ )
34
+ # categories per month (diversity proxy)
35
+ cats = (df.assign(cnt=1)
36
+ .pivot_table(index="month", columns="category", values="cnt", aggfunc="sum", fill_value=0))
37
+ cats.columns = [f"cat_{c}" for c in cats.columns]
38
+ out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
39
+
40
+ # Targets: next-month spend
41
+ out = out.sort_values("month")
42
+ out["target_next_spend"] = out["spend"].shift(-1) # what we want to predict
43
+ # Basic time features
44
+ out["m_num"] = out["month"].dt.month
45
+ out["y_num"] = out["month"].dt.year
46
+ # lag features
47
+ out["spend_lag1"] = out["spend"].shift(1)
48
+ out["spend_lag2"] = out["spend"].shift(2)
49
+ out["inflow_lag1"] = out["inflow"].shift(1)
50
+ out = out.dropna().reset_index(drop=True)
51
+ return out
52
+
53
+ def train_model(monthly_df):
54
+ # Simple linear model
55
+ y = monthly_df["target_next_spend"].values
56
+ feature_cols = [c for c in monthly_df.columns if c not in ["month","target_next_spend"]]
57
+ X = monthly_df[feature_cols].copy()
58
+ model = LinearRegression()
59
+ model.fit(X, y)
60
+ # quick backtest MAE on last 3 months
61
+ if len(monthly_df) >= 4:
62
+ X_hold = X.tail(3)
63
+ y_hold = y[-3:]
64
+ preds = model.predict(X_hold)
65
+ mae = mean_absolute_error(y_hold, preds)
66
+ else:
67
+ mae = np.nan
68
+ return model, feature_cols, mae
69
+
70
+ def predict_next(monthly_df, model, feature_cols):
71
+ # Use last observed month’s features to predict next-month spend
72
+ last = monthly_df.iloc[[-1]][feature_cols]
73
+ pred = float(model.predict(last)[0])
74
+ # A simple overspend risk flag: predict spend more negative than 90% of past spends
75
+ p90 = np.percentile(monthly_df["spend"], 10) # more negative = higher spend
76
+ risk = "High" if pred <= p90 else "Low"
77
+ # Return predicted NEXT month label
78
+ next_month = (monthly_df["month"].iloc[-1] + np.timedelta64(1, "M")).astype("datetime64[M]").astype("datetime64[D]")
79
+ next_month = pd.to_datetime(next_month).strftime("%Y-%m")
80
+ return next_month, pred, risk
81
+
82
+ # --------------------------
83
+ # Gradio interface functions
84
+ # --------------------------
85
+ def load_or_demo(file, budget):
86
+ if file is None:
87
+ # Build a tiny synthetic demo dataset (12 months)
88
+ rng = pd.date_range("2024-01-01", periods=365, freq="D")
89
+ cats = ["groceries","rent","utilities","fun","transport"]
90
+ rows = []
91
+ income = 3500.0
92
+ np.random.seed(7)
93
+ for d in rng:
94
+ # income on the first of each month
95
+ if d.day == 1:
96
+ rows.append({"date": d, "amount": income, "category": "income", "income": income})
97
+ # random spends
98
+ for _ in range(np.random.poisson(1.8)):
99
+ amt = -np.random.choice([15,25,40,60,120,300], p=[.25,.25,.2,.15,.1,.05])
100
+ rows.append({"date": d, "amount": amt, "category": np.random.choice(cats), "income": income})
101
+ df = pd.DataFrame(rows)
102
+ else:
103
+ df = pd.read_csv(file.name)
104
+ # basic sanity
105
+ needed = {"date","amount","category","income"}
106
+ missing = needed - set(df.columns)
107
+ if missing:
108
+ raise ValueError(f"CSV is missing columns: {sorted(missing)}")
109
+ # Fit model and predict
110
+ m = prep_monthly_features(df)
111
+ if len(m) < 6:
112
+ raise ValueError("Need at least ~6 months of data for a useful forecast (demo provides this).")
113
+ model, feats, mae = train_model(m)
114
+ next_m, spend_pred, risk = predict_next(m, model, feats)
115
+ # Budget comparison
116
+ budget = float(budget) if budget is not None and budget != "" else 0.0
117
+ vs_budget = ("Over budget" if spend_pred < -abs(budget) else "Within budget") if budget else "No budget set"
118
+ # Returnables
119
+ summary = pd.DataFrame({
120
+ "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check"],
121
+ "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, vs_budget]
122
+ })
123
+ monthly_view = m[["month","spend","inflow","txns","income"]].copy()
124
+ monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
125
+ return summary, monthly_view
126
+
127
+ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
128
+ gr.Markdown("## Retail Finance Spend Forecaster\nUpload your transactions CSV (columns: `date, amount, category, income`) or use demo data. Model forecasts **next-month spend** and flags **overspend risk**.")
129
+ with gr.Row():
130
+ file = gr.File(label="Upload CSV (optional)")
131
+ budget = gr.Number(value=2500, label="Monthly budget (positive number)")
132
+ btn = gr.Button("Run Forecast")
133
+ summary = gr.Dataframe(label="Summary")
134
+ monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
135
+ btn.click(load_or_demo, inputs=[file, budget], outputs=[summary, monthly_table])
136
+
137
+ demo.launch()