MrThinker098 commited on
Commit
ed237df
·
verified ·
1 Parent(s): ec5f130

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -15,38 +15,42 @@ import gradio as gr
15
  # --------------------------
16
  # Minimal feature engineering
17
  # --------------------------
 
 
18
  def prep_monthly_features(df):
19
- # Expect columns: date, amount, category, income
20
  df = df.copy()
21
- df["date"] = pd.to_datetime(df["date"])
22
- # monthly aggregates
23
- df["month"] = df["date"].values.astype("datetime64[M]")
 
 
 
24
  month_agg = (
25
  df.groupby("month")
26
  .agg(
27
- spend=("amount", lambda x: x[x < 0].sum()), # negatives as spend
28
  inflow=("amount", lambda x: x[x > 0].sum()),
29
  txns=("amount", "count"),
30
- income=("income", "max") # assume monthly income repeated
31
  )
32
  .reset_index()
33
  )
34
- # categories per month (diversity proxy)
35
  cats = (df.assign(cnt=1)
36
- .pivot_table(index="month", columns="category", values="cnt", aggfunc="sum", fill_value=0))
 
37
  cats.columns = [f"cat_{c}" for c in cats.columns]
38
  out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
39
 
40
- # Targets: next-month spend
41
  out = out.sort_values("month")
42
- out["target_next_spend"] = out["spend"].shift(-1) # what we want to predict
43
- # Basic time features
44
  out["m_num"] = out["month"].dt.month
45
  out["y_num"] = out["month"].dt.year
46
- # lag features
47
  out["spend_lag1"] = out["spend"].shift(1)
48
  out["spend_lag2"] = out["spend"].shift(2)
49
  out["inflow_lag1"] = out["inflow"].shift(1)
 
50
  out = out.dropna().reset_index(drop=True)
51
  return out
52
 
@@ -67,16 +71,18 @@ def train_model(monthly_df):
67
  mae = np.nan
68
  return model, feature_cols, mae
69
 
 
 
70
  def predict_next(monthly_df, model, feature_cols):
71
- # Use last observed month’s features to predict next-month spend
72
  last = monthly_df.iloc[[-1]][feature_cols]
73
  pred = float(model.predict(last)[0])
74
- # A simple overspend risk flag: predict spend more negative than 90% of past spends
75
- p90 = np.percentile(monthly_df["spend"], 10) # more negative = higher spend
76
- risk = "High" if pred <= p90 else "Low"
77
- # Return predicted NEXT month label
78
- next_month = (monthly_df["month"].iloc[-1] + np.timedelta64(1, "M")).astype("datetime64[M]").astype("datetime64[D]")
79
- next_month = pd.to_datetime(next_month).strftime("%Y-%m")
 
80
  return next_month, pred, risk
81
 
82
  # --------------------------
@@ -132,6 +138,6 @@ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
132
  btn = gr.Button("Run Forecast")
133
  summary = gr.Dataframe(label="Summary")
134
  monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
135
- btn.click(load_or_demo, inputs=[file, budget], outputs=[summary, monthly_table])
136
 
137
  demo.launch()
 
15
  # --------------------------
16
  # Minimal feature engineering
17
  # --------------------------
18
+ from pandas.tseries.offsets import MonthBegin
19
+
20
  def prep_monthly_features(df):
 
21
  df = df.copy()
22
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
23
+ df = df.dropna(subset=["date"])
24
+
25
+ # use month PERIOD → safe month start timestamps
26
+ df["month"] = df["date"].dt.to_period("M").dt.to_timestamp("M") - pd.offsets.MonthEnd(1) + MonthBegin(1)
27
+
28
  month_agg = (
29
  df.groupby("month")
30
  .agg(
31
+ spend=("amount", lambda x: x[x < 0].sum()),
32
  inflow=("amount", lambda x: x[x > 0].sum()),
33
  txns=("amount", "count"),
34
+ income=("income", "max")
35
  )
36
  .reset_index()
37
  )
38
+
39
  cats = (df.assign(cnt=1)
40
+ .pivot_table(index="month", columns="category", values="cnt",
41
+ aggfunc="sum", fill_value=0))
42
  cats.columns = [f"cat_{c}" for c in cats.columns]
43
  out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
44
 
 
45
  out = out.sort_values("month")
46
+ out["target_next_spend"] = out["spend"].shift(-1)
47
+
48
  out["m_num"] = out["month"].dt.month
49
  out["y_num"] = out["month"].dt.year
 
50
  out["spend_lag1"] = out["spend"].shift(1)
51
  out["spend_lag2"] = out["spend"].shift(2)
52
  out["inflow_lag1"] = out["inflow"].shift(1)
53
+
54
  out = out.dropna().reset_index(drop=True)
55
  return out
56
 
 
71
  mae = np.nan
72
  return model, feature_cols, mae
73
 
74
+ from pandas.tseries.offsets import MonthEnd
75
+
76
  def predict_next(monthly_df, model, feature_cols):
 
77
  last = monthly_df.iloc[[-1]][feature_cols]
78
  pred = float(model.predict(last)[0])
79
+
80
+ # overspend risk threshold (more negative = higher spend)
81
+ p10 = np.percentile(monthly_df["spend"], 10)
82
+ risk = "High" if pred <= p10 else "Low"
83
+
84
+ last_month = pd.to_datetime(monthly_df["month"].iloc[-1])
85
+ next_month = (last_month + MonthBegin(1)).strftime("%Y-%m")
86
  return next_month, pred, risk
87
 
88
  # --------------------------
 
138
  btn = gr.Button("Run Forecast")
139
  summary = gr.Dataframe(label="Summary")
140
  monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
141
+ btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
142
 
143
  demo.launch()