MrThinker098 commited on
Commit
710adcc
·
verified ·
1 Parent(s): ed237df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -60
app.py CHANGED
@@ -1,48 +1,53 @@
1
- import io
2
  import numpy as np
3
  import pandas as pd
4
  from datetime import datetime
5
- from dateutil.relativedelta import relativedelta
6
-
7
- from sklearn.preprocessing import OneHotEncoder
8
- from sklearn.compose import ColumnTransformer
9
- from sklearn.pipeline import Pipeline
10
- from sklearn.linear_model import LinearRegression
11
- from sklearn.metrics import mean_absolute_error
12
-
13
  import gradio as gr
14
 
15
  # --------------------------
16
- # Minimal feature engineering
17
  # --------------------------
18
- from pandas.tseries.offsets import MonthBegin
19
-
20
- def prep_monthly_features(df):
 
 
 
 
 
 
 
21
  df = df.copy()
 
 
22
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
23
  df = df.dropna(subset=["date"])
24
 
25
- # use month PERIOD safe month start timestamps
26
- df["month"] = df["date"].dt.to_period("M").dt.to_timestamp("M") - pd.offsets.MonthEnd(1) + MonthBegin(1)
27
 
 
28
  month_agg = (
29
- df.groupby("month")
30
  .agg(
31
  spend=("amount", lambda x: x[x < 0].sum()),
32
  inflow=("amount", lambda x: x[x > 0].sum()),
33
  txns=("amount", "count"),
34
- income=("income", "max")
35
  )
36
- .reset_index()
37
  )
38
 
39
- cats = (df.assign(cnt=1)
40
- .pivot_table(index="month", columns="category", values="cnt",
41
- aggfunc="sum", fill_value=0))
42
- cats.columns = [f"cat_{c}" for c in cats.columns]
 
 
 
 
43
  out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
44
 
45
- out = out.sort_values("month")
 
46
  out["target_next_spend"] = out["spend"].shift(-1)
47
 
48
  out["m_num"] = out["month"].dt.month
@@ -54,84 +59,112 @@ def prep_monthly_features(df):
54
  out = out.dropna().reset_index(drop=True)
55
  return out
56
 
57
- def train_model(monthly_df):
58
- # Simple linear model
 
 
59
  y = monthly_df["target_next_spend"].values
60
- feature_cols = [c for c in monthly_df.columns if c not in ["month","target_next_spend"]]
61
  X = monthly_df[feature_cols].copy()
 
62
  model = LinearRegression()
63
  model.fit(X, y)
64
- # quick backtest MAE on last 3 months
65
  if len(monthly_df) >= 4:
66
  X_hold = X.tail(3)
67
  y_hold = y[-3:]
68
  preds = model.predict(X_hold)
69
- mae = mean_absolute_error(y_hold, preds)
70
  else:
71
  mae = np.nan
72
- return model, feature_cols, mae
73
 
74
- from pandas.tseries.offsets import MonthEnd
75
 
76
- def predict_next(monthly_df, model, feature_cols):
 
77
  last = monthly_df.iloc[[-1]][feature_cols]
78
  pred = float(model.predict(last)[0])
79
 
80
- # overspend risk threshold (more negative = higher spend)
81
- p10 = np.percentile(monthly_df["spend"], 10)
82
  risk = "High" if pred <= p10 else "Low"
83
 
84
- last_month = pd.to_datetime(monthly_df["month"].iloc[-1])
85
- next_month = (last_month + MonthBegin(1)).strftime("%Y-%m")
 
86
  return next_month, pred, risk
87
 
88
  # --------------------------
89
- # Gradio interface functions
90
  # --------------------------
91
  def load_or_demo(file, budget):
 
92
  if file is None:
93
- # Build a tiny synthetic demo dataset (12 months)
94
  rng = pd.date_range("2024-01-01", periods=365, freq="D")
95
- cats = ["groceries","rent","utilities","fun","transport"]
96
  rows = []
97
  income = 3500.0
98
- np.random.seed(7)
99
  for d in rng:
100
- # income on the first of each month
101
  if d.day == 1:
102
  rows.append({"date": d, "amount": income, "category": "income", "income": income})
103
- # random spends
104
- for _ in range(np.random.poisson(1.8)):
105
- amt = -np.random.choice([15,25,40,60,120,300], p=[.25,.25,.2,.15,.1,.05])
106
- rows.append({"date": d, "amount": amt, "category": np.random.choice(cats), "income": income})
107
  df = pd.DataFrame(rows)
108
  else:
109
  df = pd.read_csv(file.name)
110
- # basic sanity
111
- needed = {"date","amount","category","income"}
112
- missing = needed - set(df.columns)
113
- if missing:
114
- raise ValueError(f"CSV is missing columns: {sorted(missing)}")
115
- # Fit model and predict
116
  m = prep_monthly_features(df)
117
  if len(m) < 6:
118
- raise ValueError("Need at least ~6 months of data for a useful forecast (demo provides this).")
 
119
  model, feats, mae = train_model(m)
120
  next_m, spend_pred, risk = predict_next(m, model, feats)
121
- # Budget comparison
122
- budget = float(budget) if budget is not None and budget != "" else 0.0
123
- vs_budget = ("Over budget" if spend_pred < -abs(budget) else "Within budget") if budget else "No budget set"
124
- # Returnables
 
 
 
 
 
 
 
 
125
  summary = pd.DataFrame({
126
- "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check"],
127
- "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, vs_budget]
128
  })
129
- monthly_view = m[["month","spend","inflow","txns","income"]].copy()
 
130
  monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
 
131
  return summary, monthly_view
132
 
 
 
 
 
 
 
 
 
 
 
 
133
  with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
134
- gr.Markdown("## Retail Finance Spend Forecaster\nUpload your transactions CSV (columns: `date, amount, category, income`) or use demo data. Model forecasts **next-month spend** and flags **overspend risk**.")
 
 
 
 
135
  with gr.Row():
136
  file = gr.File(label="Upload CSV (optional)")
137
  budget = gr.Number(value=2500, label="Monthly budget (positive number)")
@@ -140,4 +173,5 @@ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
140
  monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
141
  btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
142
 
143
- demo.launch()
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
  from datetime import datetime
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
 
6
  # --------------------------
7
+ # Helpers
8
  # --------------------------
9
+ def month_start(ts):
10
+ """Return the first day of the month as Timestamp."""
11
+ ts = pd.to_datetime(ts)
12
+ return ts.to_period("M").to_timestamp() # safe, gives YYYY-MM-01
13
+
14
+ def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
15
+ """
16
+ Expect columns: date, amount, category, income
17
+ amount: +inflows, -spend
18
+ """
19
  df = df.copy()
20
+
21
+ # Parse dates safely
22
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
23
  df = df.dropna(subset=["date"])
24
 
25
+ # Month bucket (always the 1st of month)
26
+ df["month"] = df["date"].apply(month_start)
27
 
28
+ # Monthly aggregates
29
  month_agg = (
30
+ df.groupby("month", as_index=False)
31
  .agg(
32
  spend=("amount", lambda x: x[x < 0].sum()),
33
  inflow=("amount", lambda x: x[x > 0].sum()),
34
  txns=("amount", "count"),
35
+ income=("income", "max"),
36
  )
 
37
  )
38
 
39
+ # Category counts per month (diversity proxy)
40
+ cats = (
41
+ df.assign(cnt=1)
42
+ .pivot_table(index="month", columns="category", values="cnt",
43
+ aggfunc="sum", fill_value=0)
44
+ .rename(columns=lambda c: f"cat_{c}")
45
+ )
46
+
47
  out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
48
 
49
+ # Sort and create target/lag features
50
+ out = out.sort_values("month").reset_index(drop=True)
51
  out["target_next_spend"] = out["spend"].shift(-1)
52
 
53
  out["m_num"] = out["month"].dt.month
 
59
  out = out.dropna().reset_index(drop=True)
60
  return out
61
 
62
+ def train_model(monthly_df: pd.DataFrame):
63
+ from sklearn.linear_model import LinearRegression
64
+ from sklearn.metrics import mean_absolute_error
65
+
66
  y = monthly_df["target_next_spend"].values
67
+ feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
68
  X = monthly_df[feature_cols].copy()
69
+
70
  model = LinearRegression()
71
  model.fit(X, y)
72
+
73
  if len(monthly_df) >= 4:
74
  X_hold = X.tail(3)
75
  y_hold = y[-3:]
76
  preds = model.predict(X_hold)
77
+ mae = float(mean_absolute_error(y_hold, preds))
78
  else:
79
  mae = np.nan
 
80
 
81
+ return model, feature_cols, mae
82
 
83
+ def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
84
+ # Predict next month using last row features
85
  last = monthly_df.iloc[[-1]][feature_cols]
86
  pred = float(model.predict(last)[0])
87
 
88
+ # Overspend risk (more negative spend = higher spend)
89
+ p10 = float(np.percentile(monthly_df["spend"], 10))
90
  risk = "High" if pred <= p10 else "Low"
91
 
92
+ # Format next month label
93
+ last_month = monthly_df["month"].iloc[-1]
94
+ next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
95
  return next_month, pred, risk
96
 
97
  # --------------------------
98
+ # App logic
99
  # --------------------------
100
  def load_or_demo(file, budget):
101
+ # Demo data if no file
102
  if file is None:
 
103
  rng = pd.date_range("2024-01-01", periods=365, freq="D")
104
+ cats = ["groceries", "rent", "utilities", "fun", "transport"]
105
  rows = []
106
  income = 3500.0
107
+ rng_seed = np.random.default_rng(7)
108
  for d in rng:
 
109
  if d.day == 1:
110
  rows.append({"date": d, "amount": income, "category": "income", "income": income})
111
+ # Poisson number of daily spend txns
112
+ for _ in range(rng_seed.poisson(2)):
113
+ amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
114
+ rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
115
  df = pd.DataFrame(rows)
116
  else:
117
  df = pd.read_csv(file.name)
118
+ needed = {"date", "amount", "category", "income"}
119
+ miss = needed - set(df.columns)
120
+ if miss:
121
+ raise ValueError(f"CSV is missing columns: {sorted(miss)}")
122
+
 
123
  m = prep_monthly_features(df)
124
  if len(m) < 6:
125
+ raise ValueError("Need at least ~6 months of data (the demo provides this).")
126
+
127
  model, feats, mae = train_model(m)
128
  next_m, spend_pred, risk = predict_next(m, model, feats)
129
+
130
+ # Budget evaluation
131
+ try:
132
+ budget_val = float(budget) if budget not in (None, "") else 0.0
133
+ except Exception:
134
+ budget_val = 0.0
135
+
136
+ if budget_val:
137
+ budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
138
+ else:
139
+ budget_check = "No budget set"
140
+
141
  summary = pd.DataFrame({
142
+ "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
143
+ "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
144
  })
145
+
146
+ monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
147
  monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
148
+
149
  return summary, monthly_view
150
 
151
+ def safe_run(file, budget):
152
+ try:
153
+ return load_or_demo(file, budget)
154
+ except Exception as e:
155
+ import traceback
156
+ print("TRACEBACK:\n", traceback.format_exc())
157
+ raise gr.Error(str(e))
158
+
159
+ # --------------------------
160
+ # Gradio UI
161
+ # --------------------------
162
  with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
163
+ gr.Markdown(
164
+ "## Retail Finance Spend Forecaster\n"
165
+ "Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
166
+ "The model forecasts **next-month spend** and flags **overspend risk**."
167
+ )
168
  with gr.Row():
169
  file = gr.File(label="Upload CSV (optional)")
170
  budget = gr.Number(value=2500, label="Monthly budget (positive number)")
 
173
  monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
174
  btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
175
 
176
+ if __name__ == "__main__":
177
+ demo.launch()