Spaces:

ThinkerFactory
/

README

Sleeping

App Files Files Community

MrThinker098 commited on Sep 5, 2025

Commit

710adcc

verified ·

1 Parent(s): ed237df

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -60

app.py CHANGED Viewed

@@ -1,48 +1,53 @@
-import io
 import numpy as np
 import pandas as pd
 from datetime import datetime
-from dateutil.relativedelta import relativedelta
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.compose import ColumnTransformer
-from sklearn.pipeline import Pipeline
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import mean_absolute_error
 import gradio as gr
 # --------------------------
-# Minimal feature engineering
 # --------------------------
-from pandas.tseries.offsets import MonthBegin
-def prep_monthly_features(df):
     df = df.copy()
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
     df = df.dropna(subset=["date"])
-    # use month PERIOD → safe month start timestamps
-    df["month"] = df["date"].dt.to_period("M").dt.to_timestamp("M") - pd.offsets.MonthEnd(1) + MonthBegin(1)
     month_agg = (
-        df.groupby("month")
           .agg(
               spend=("amount", lambda x: x[x < 0].sum()),
               inflow=("amount", lambda x: x[x > 0].sum()),
               txns=("amount", "count"),
-              income=("income", "max")
           )
-          .reset_index()
     )
-    cats = (df.assign(cnt=1)
-              .pivot_table(index="month", columns="category", values="cnt",
-                           aggfunc="sum", fill_value=0))
-    cats.columns = [f"cat_{c}" for c in cats.columns]
     out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
-    out = out.sort_values("month")
     out["target_next_spend"] = out["spend"].shift(-1)
     out["m_num"] = out["month"].dt.month
@@ -54,84 +59,112 @@ def prep_monthly_features(df):
     out = out.dropna().reset_index(drop=True)
     return out
-def train_model(monthly_df):
-    # Simple linear model
     y = monthly_df["target_next_spend"].values
-    feature_cols = [c for c in monthly_df.columns if c not in ["month","target_next_spend"]]
     X = monthly_df[feature_cols].copy()
     model = LinearRegression()
     model.fit(X, y)
-    # quick backtest MAE on last 3 months
     if len(monthly_df) >= 4:
         X_hold = X.tail(3)
         y_hold = y[-3:]
         preds = model.predict(X_hold)
-        mae = mean_absolute_error(y_hold, preds)
     else:
         mae = np.nan
-    return model, feature_cols, mae
-from pandas.tseries.offsets import MonthEnd
-def predict_next(monthly_df, model, feature_cols):
     last = monthly_df.iloc[[-1]][feature_cols]
     pred = float(model.predict(last)[0])
-    # overspend risk threshold (more negative = higher spend)
-    p10 = np.percentile(monthly_df["spend"], 10)
     risk = "High" if pred <= p10 else "Low"
-    last_month = pd.to_datetime(monthly_df["month"].iloc[-1])
-    next_month = (last_month + MonthBegin(1)).strftime("%Y-%m")
     return next_month, pred, risk
 # --------------------------
-# Gradio interface functions
 # --------------------------
 def load_or_demo(file, budget):
     if file is None:
-        # Build a tiny synthetic demo dataset (12 months)
         rng = pd.date_range("2024-01-01", periods=365, freq="D")
-        cats = ["groceries","rent","utilities","fun","transport"]
         rows = []
         income = 3500.0
-        np.random.seed(7)
         for d in rng:
-            # income on the first of each month
             if d.day == 1:
                 rows.append({"date": d, "amount": income, "category": "income", "income": income})
-            # random spends
-            for _ in range(np.random.poisson(1.8)):
-                amt = -np.random.choice([15,25,40,60,120,300], p=[.25,.25,.2,.15,.1,.05])
-                rows.append({"date": d, "amount": amt, "category": np.random.choice(cats), "income": income})
         df = pd.DataFrame(rows)
     else:
         df = pd.read_csv(file.name)
-        # basic sanity
-        needed = {"date","amount","category","income"}
-        missing = needed - set(df.columns)
-        if missing:
-            raise ValueError(f"CSV is missing columns: {sorted(missing)}")
-    # Fit model and predict
     m = prep_monthly_features(df)
     if len(m) < 6:
-        raise ValueError("Need at least ~6 months of data for a useful forecast (demo provides this).")
     model, feats, mae = train_model(m)
     next_m, spend_pred, risk = predict_next(m, model, feats)
-    # Budget comparison
-    budget = float(budget) if budget is not None and budget != "" else 0.0
-    vs_budget = ("Over budget" if spend_pred < -abs(budget) else "Within budget") if budget else "No budget set"
-    # Returnables
     summary = pd.DataFrame({
-        "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check"],
-        "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, vs_budget]
     })
-    monthly_view = m[["month","spend","inflow","txns","income"]].copy()
     monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
     return summary, monthly_view
 with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
-    gr.Markdown("## Retail Finance Spend Forecaster\nUpload your transactions CSV (columns: `date, amount, category, income`) or use demo data. Model forecasts **next-month spend** and flags **overspend risk**.")
     with gr.Row():
         file = gr.File(label="Upload CSV (optional)")
         budget = gr.Number(value=2500, label="Monthly budget (positive number)")
@@ -140,4 +173,5 @@ with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
     monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
     btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
-demo.launch()

 import numpy as np
 import pandas as pd
 from datetime import datetime
 import gradio as gr
 # --------------------------
+# Helpers
 # --------------------------
+def month_start(ts):
+    """Return the first day of the month as Timestamp."""
+    ts = pd.to_datetime(ts)
+    return ts.to_period("M").to_timestamp()  # safe, gives YYYY-MM-01
+def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Expect columns: date, amount, category, income
+    amount: +inflows, -spend
+    """
     df = df.copy()
+    # Parse dates safely
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
     df = df.dropna(subset=["date"])
+    # Month bucket (always the 1st of month)
+    df["month"] = df["date"].apply(month_start)
+    # Monthly aggregates
     month_agg = (
+        df.groupby("month", as_index=False)
           .agg(
               spend=("amount", lambda x: x[x < 0].sum()),
               inflow=("amount", lambda x: x[x > 0].sum()),
               txns=("amount", "count"),
+              income=("income", "max"),
           )
     )
+    # Category counts per month (diversity proxy)
+    cats = (
+        df.assign(cnt=1)
+          .pivot_table(index="month", columns="category", values="cnt",
+                       aggfunc="sum", fill_value=0)
+          .rename(columns=lambda c: f"cat_{c}")
+    )
     out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
+    # Sort and create target/lag features
+    out = out.sort_values("month").reset_index(drop=True)
     out["target_next_spend"] = out["spend"].shift(-1)
     out["m_num"] = out["month"].dt.month
     out = out.dropna().reset_index(drop=True)
     return out
+def train_model(monthly_df: pd.DataFrame):
+    from sklearn.linear_model import LinearRegression
+    from sklearn.metrics import mean_absolute_error
     y = monthly_df["target_next_spend"].values
+    feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
     X = monthly_df[feature_cols].copy()
     model = LinearRegression()
     model.fit(X, y)
     if len(monthly_df) >= 4:
         X_hold = X.tail(3)
         y_hold = y[-3:]
         preds = model.predict(X_hold)
+        mae = float(mean_absolute_error(y_hold, preds))
     else:
         mae = np.nan
+    return model, feature_cols, mae
+def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
+    # Predict next month using last row features
     last = monthly_df.iloc[[-1]][feature_cols]
     pred = float(model.predict(last)[0])
+    # Overspend risk (more negative spend = higher spend)
+    p10 = float(np.percentile(monthly_df["spend"], 10))
     risk = "High" if pred <= p10 else "Low"
+    # Format next month label
+    last_month = monthly_df["month"].iloc[-1]
+    next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
     return next_month, pred, risk
 # --------------------------
+# App logic
 # --------------------------
 def load_or_demo(file, budget):
+    # Demo data if no file
     if file is None:
         rng = pd.date_range("2024-01-01", periods=365, freq="D")
+        cats = ["groceries", "rent", "utilities", "fun", "transport"]
         rows = []
         income = 3500.0
+        rng_seed = np.random.default_rng(7)
         for d in rng:
             if d.day == 1:
                 rows.append({"date": d, "amount": income, "category": "income", "income": income})
+            # Poisson number of daily spend txns
+            for _ in range(rng_seed.poisson(2)):
+                amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
+                rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
         df = pd.DataFrame(rows)
     else:
         df = pd.read_csv(file.name)
+        needed = {"date", "amount", "category", "income"}
+        miss = needed - set(df.columns)
+        if miss:
+            raise ValueError(f"CSV is missing columns: {sorted(miss)}")
     m = prep_monthly_features(df)
     if len(m) < 6:
+        raise ValueError("Need at least ~6 months of data (the demo provides this).")
     model, feats, mae = train_model(m)
     next_m, spend_pred, risk = predict_next(m, model, feats)
+    # Budget evaluation
+    try:
+        budget_val = float(budget) if budget not in (None, "") else 0.0
+    except Exception:
+        budget_val = 0.0
+    if budget_val:
+        budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
+    else:
+        budget_check = "No budget set"
     summary = pd.DataFrame({
+        "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
+        "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
     })
+    monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
     monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
     return summary, monthly_view
+def safe_run(file, budget):
+    try:
+        return load_or_demo(file, budget)
+    except Exception as e:
+        import traceback
+        print("TRACEBACK:\n", traceback.format_exc())
+        raise gr.Error(str(e))
+# --------------------------
+# Gradio UI
+# --------------------------
 with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
+    gr.Markdown(
+        "## Retail Finance Spend Forecaster\n"
+        "Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
+        "The model forecasts **next-month spend** and flags **overspend risk**."
+    )
     with gr.Row():
         file = gr.File(label="Upload CSV (optional)")
         budget = gr.Number(value=2500, label="Monthly budget (positive number)")
     monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
     btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
+if __name__ == "__main__":
+    demo.launch()