Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,38 +15,42 @@ import gradio as gr
|
|
| 15 |
# --------------------------
|
| 16 |
# Minimal feature engineering
|
| 17 |
# --------------------------
|
|
|
|
|
|
|
| 18 |
def prep_monthly_features(df):
|
| 19 |
-
# Expect columns: date, amount, category, income
|
| 20 |
df = df.copy()
|
| 21 |
-
df["date"] = pd.to_datetime(df["date"])
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
month_agg = (
|
| 25 |
df.groupby("month")
|
| 26 |
.agg(
|
| 27 |
-
spend=("amount", lambda x: x[x < 0].sum()),
|
| 28 |
inflow=("amount", lambda x: x[x > 0].sum()),
|
| 29 |
txns=("amount", "count"),
|
| 30 |
-
income=("income", "max")
|
| 31 |
)
|
| 32 |
.reset_index()
|
| 33 |
)
|
| 34 |
-
|
| 35 |
cats = (df.assign(cnt=1)
|
| 36 |
-
.pivot_table(index="month", columns="category", values="cnt",
|
|
|
|
| 37 |
cats.columns = [f"cat_{c}" for c in cats.columns]
|
| 38 |
out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
|
| 39 |
|
| 40 |
-
# Targets: next-month spend
|
| 41 |
out = out.sort_values("month")
|
| 42 |
-
out["target_next_spend"] = out["spend"].shift(-1)
|
| 43 |
-
|
| 44 |
out["m_num"] = out["month"].dt.month
|
| 45 |
out["y_num"] = out["month"].dt.year
|
| 46 |
-
# lag features
|
| 47 |
out["spend_lag1"] = out["spend"].shift(1)
|
| 48 |
out["spend_lag2"] = out["spend"].shift(2)
|
| 49 |
out["inflow_lag1"] = out["inflow"].shift(1)
|
|
|
|
| 50 |
out = out.dropna().reset_index(drop=True)
|
| 51 |
return out
|
| 52 |
|
|
@@ -67,16 +71,18 @@ def train_model(monthly_df):
|
|
| 67 |
mae = np.nan
|
| 68 |
return model, feature_cols, mae
|
| 69 |
|
|
|
|
|
|
|
| 70 |
def predict_next(monthly_df, model, feature_cols):
|
| 71 |
-
# Use last observed month’s features to predict next-month spend
|
| 72 |
last = monthly_df.iloc[[-1]][feature_cols]
|
| 73 |
pred = float(model.predict(last)[0])
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
| 80 |
return next_month, pred, risk
|
| 81 |
|
| 82 |
# --------------------------
|
|
@@ -132,6 +138,6 @@ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
|
|
| 132 |
btn = gr.Button("Run Forecast")
|
| 133 |
summary = gr.Dataframe(label="Summary")
|
| 134 |
monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
|
| 135 |
-
btn.click(
|
| 136 |
|
| 137 |
demo.launch()
|
|
|
|
| 15 |
# --------------------------
|
| 16 |
# Minimal feature engineering
|
| 17 |
# --------------------------
|
| 18 |
+
from pandas.tseries.offsets import MonthBegin
|
| 19 |
+
|
| 20 |
def prep_monthly_features(df):
|
|
|
|
| 21 |
df = df.copy()
|
| 22 |
+
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
| 23 |
+
df = df.dropna(subset=["date"])
|
| 24 |
+
|
| 25 |
+
# use month PERIOD → safe month start timestamps
|
| 26 |
+
df["month"] = df["date"].dt.to_period("M").dt.to_timestamp("M") - pd.offsets.MonthEnd(1) + MonthBegin(1)
|
| 27 |
+
|
| 28 |
month_agg = (
|
| 29 |
df.groupby("month")
|
| 30 |
.agg(
|
| 31 |
+
spend=("amount", lambda x: x[x < 0].sum()),
|
| 32 |
inflow=("amount", lambda x: x[x > 0].sum()),
|
| 33 |
txns=("amount", "count"),
|
| 34 |
+
income=("income", "max")
|
| 35 |
)
|
| 36 |
.reset_index()
|
| 37 |
)
|
| 38 |
+
|
| 39 |
cats = (df.assign(cnt=1)
|
| 40 |
+
.pivot_table(index="month", columns="category", values="cnt",
|
| 41 |
+
aggfunc="sum", fill_value=0))
|
| 42 |
cats.columns = [f"cat_{c}" for c in cats.columns]
|
| 43 |
out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
|
| 44 |
|
|
|
|
| 45 |
out = out.sort_values("month")
|
| 46 |
+
out["target_next_spend"] = out["spend"].shift(-1)
|
| 47 |
+
|
| 48 |
out["m_num"] = out["month"].dt.month
|
| 49 |
out["y_num"] = out["month"].dt.year
|
|
|
|
| 50 |
out["spend_lag1"] = out["spend"].shift(1)
|
| 51 |
out["spend_lag2"] = out["spend"].shift(2)
|
| 52 |
out["inflow_lag1"] = out["inflow"].shift(1)
|
| 53 |
+
|
| 54 |
out = out.dropna().reset_index(drop=True)
|
| 55 |
return out
|
| 56 |
|
|
|
|
| 71 |
mae = np.nan
|
| 72 |
return model, feature_cols, mae
|
| 73 |
|
| 74 |
+
from pandas.tseries.offsets import MonthEnd
|
| 75 |
+
|
| 76 |
def predict_next(monthly_df, model, feature_cols):
|
|
|
|
| 77 |
last = monthly_df.iloc[[-1]][feature_cols]
|
| 78 |
pred = float(model.predict(last)[0])
|
| 79 |
+
|
| 80 |
+
# overspend risk threshold (more negative = higher spend)
|
| 81 |
+
p10 = np.percentile(monthly_df["spend"], 10)
|
| 82 |
+
risk = "High" if pred <= p10 else "Low"
|
| 83 |
+
|
| 84 |
+
last_month = pd.to_datetime(monthly_df["month"].iloc[-1])
|
| 85 |
+
next_month = (last_month + MonthBegin(1)).strftime("%Y-%m")
|
| 86 |
return next_month, pred, risk
|
| 87 |
|
| 88 |
# --------------------------
|
|
|
|
| 138 |
btn = gr.Button("Run Forecast")
|
| 139 |
summary = gr.Dataframe(label="Summary")
|
| 140 |
monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
|
| 141 |
+
btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
|
| 142 |
|
| 143 |
demo.launch()
|