Spaces:

ThinkerFactory
/

README

Sleeping

App Files Files Community

MrThinker098 commited on Sep 5, 2025

Commit

ed237df

verified ·

1 Parent(s): ec5f130

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -20

app.py CHANGED Viewed

@@ -15,38 +15,42 @@ import gradio as gr
 # --------------------------
 # Minimal feature engineering
 # --------------------------
 def prep_monthly_features(df):
-    # Expect columns: date, amount, category, income
     df = df.copy()
-    df["date"] = pd.to_datetime(df["date"])
-    # monthly aggregates
-    df["month"] = df["date"].values.astype("datetime64[M]")
     month_agg = (
         df.groupby("month")
           .agg(
-              spend=("amount", lambda x: x[x < 0].sum()),   # negatives as spend
               inflow=("amount", lambda x: x[x > 0].sum()),
               txns=("amount", "count"),
-              income=("income", "max")  # assume monthly income repeated
           )
           .reset_index()
     )
-    # categories per month (diversity proxy)
     cats = (df.assign(cnt=1)
-              .pivot_table(index="month", columns="category", values="cnt", aggfunc="sum", fill_value=0))
     cats.columns = [f"cat_{c}" for c in cats.columns]
     out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
-    # Targets: next-month spend
     out = out.sort_values("month")
-    out["target_next_spend"] = out["spend"].shift(-1)  # what we want to predict
-    # Basic time features
     out["m_num"] = out["month"].dt.month
     out["y_num"] = out["month"].dt.year
-    # lag features
     out["spend_lag1"] = out["spend"].shift(1)
     out["spend_lag2"] = out["spend"].shift(2)
     out["inflow_lag1"] = out["inflow"].shift(1)
     out = out.dropna().reset_index(drop=True)
     return out
@@ -67,16 +71,18 @@ def train_model(monthly_df):
         mae = np.nan
     return model, feature_cols, mae
 def predict_next(monthly_df, model, feature_cols):
-    # Use last observed month’s features to predict next-month spend
     last = monthly_df.iloc[[-1]][feature_cols]
     pred = float(model.predict(last)[0])
-    # A simple overspend risk flag: predict spend more negative than 90% of past spends
-    p90 = np.percentile(monthly_df["spend"], 10)  # more negative = higher spend
-    risk = "High" if pred <= p90 else "Low"
-    # Return predicted NEXT month label
-    next_month = (monthly_df["month"].iloc[-1] + np.timedelta64(1, "M")).astype("datetime64[M]").astype("datetime64[D]")
-    next_month = pd.to_datetime(next_month).strftime("%Y-%m")
     return next_month, pred, risk
 # --------------------------
@@ -132,6 +138,6 @@ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
     btn = gr.Button("Run Forecast")
     summary = gr.Dataframe(label="Summary")
     monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
-    btn.click(load_or_demo, inputs=[file, budget], outputs=[summary, monthly_table])
 demo.launch()

 # --------------------------
 # Minimal feature engineering
 # --------------------------
+from pandas.tseries.offsets import MonthBegin
 def prep_monthly_features(df):
     df = df.copy()
+    df["date"] = pd.to_datetime(df["date"], errors="coerce")
+    df = df.dropna(subset=["date"])
+    # use month PERIOD → safe month start timestamps
+    df["month"] = df["date"].dt.to_period("M").dt.to_timestamp("M") - pd.offsets.MonthEnd(1) + MonthBegin(1)
     month_agg = (
         df.groupby("month")
           .agg(
+              spend=("amount", lambda x: x[x < 0].sum()),
               inflow=("amount", lambda x: x[x > 0].sum()),
               txns=("amount", "count"),
+              income=("income", "max")
           )
           .reset_index()
     )
     cats = (df.assign(cnt=1)
+              .pivot_table(index="month", columns="category", values="cnt",
+                           aggfunc="sum", fill_value=0))
     cats.columns = [f"cat_{c}" for c in cats.columns]
     out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
     out = out.sort_values("month")
+    out["target_next_spend"] = out["spend"].shift(-1)
     out["m_num"] = out["month"].dt.month
     out["y_num"] = out["month"].dt.year
     out["spend_lag1"] = out["spend"].shift(1)
     out["spend_lag2"] = out["spend"].shift(2)
     out["inflow_lag1"] = out["inflow"].shift(1)
     out = out.dropna().reset_index(drop=True)
     return out
         mae = np.nan
     return model, feature_cols, mae
+from pandas.tseries.offsets import MonthEnd
 def predict_next(monthly_df, model, feature_cols):
     last = monthly_df.iloc[[-1]][feature_cols]
     pred = float(model.predict(last)[0])
+    # overspend risk threshold (more negative = higher spend)
+    p10 = np.percentile(monthly_df["spend"], 10)
+    risk = "High" if pred <= p10 else "Low"
+    last_month = pd.to_datetime(monthly_df["month"].iloc[-1])
+    next_month = (last_month + MonthBegin(1)).strftime("%Y-%m")
     return next_month, pred, risk
 # --------------------------
     btn = gr.Button("Run Forecast")
     summary = gr.Dataframe(label="Summary")
     monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
+    btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
 demo.launch()