Spaces:

ThinkerFactory
/

README

Sleeping

App Files Files Community

MrThinker098 commited on Sep 5, 2025

Commit

273b603

verified ·

1 Parent(s): c133362

First commit: add retail finance forecasting app

Browse files

Files changed (1) hide show

app.py +137 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import io
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_absolute_error
+import gradio as gr
+# --------------------------
+# Minimal feature engineering
+# --------------------------
+def prep_monthly_features(df):
+    # Expect columns: date, amount, category, income
+    df = df.copy()
+    df["date"] = pd.to_datetime(df["date"])
+    # monthly aggregates
+    df["month"] = df["date"].values.astype("datetime64[M]")
+    month_agg = (
+        df.groupby("month")
+          .agg(
+              spend=("amount", lambda x: x[x < 0].sum()),   # negatives as spend
+              inflow=("amount", lambda x: x[x > 0].sum()),
+              txns=("amount", "count"),
+              income=("income", "max")  # assume monthly income repeated
+          )
+          .reset_index()
+    )
+    # categories per month (diversity proxy)
+    cats = (df.assign(cnt=1)
+              .pivot_table(index="month", columns="category", values="cnt", aggfunc="sum", fill_value=0))
+    cats.columns = [f"cat_{c}" for c in cats.columns]
+    out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
+    # Targets: next-month spend
+    out = out.sort_values("month")
+    out["target_next_spend"] = out["spend"].shift(-1)  # what we want to predict
+    # Basic time features
+    out["m_num"] = out["month"].dt.month
+    out["y_num"] = out["month"].dt.year
+    # lag features
+    out["spend_lag1"] = out["spend"].shift(1)
+    out["spend_lag2"] = out["spend"].shift(2)
+    out["inflow_lag1"] = out["inflow"].shift(1)
+    out = out.dropna().reset_index(drop=True)
+    return out
+def train_model(monthly_df):
+    # Simple linear model
+    y = monthly_df["target_next_spend"].values
+    feature_cols = [c for c in monthly_df.columns if c not in ["month","target_next_spend"]]
+    X = monthly_df[feature_cols].copy()
+    model = LinearRegression()
+    model.fit(X, y)
+    # quick backtest MAE on last 3 months
+    if len(monthly_df) >= 4:
+        X_hold = X.tail(3)
+        y_hold = y[-3:]
+        preds = model.predict(X_hold)
+        mae = mean_absolute_error(y_hold, preds)
+    else:
+        mae = np.nan
+    return model, feature_cols, mae
+def predict_next(monthly_df, model, feature_cols):
+    # Use last observed month’s features to predict next-month spend
+    last = monthly_df.iloc[[-1]][feature_cols]
+    pred = float(model.predict(last)[0])
+    # A simple overspend risk flag: predict spend more negative than 90% of past spends
+    p90 = np.percentile(monthly_df["spend"], 10)  # more negative = higher spend
+    risk = "High" if pred <= p90 else "Low"
+    # Return predicted NEXT month label
+    next_month = (monthly_df["month"].iloc[-1] + np.timedelta64(1, "M")).astype("datetime64[M]").astype("datetime64[D]")
+    next_month = pd.to_datetime(next_month).strftime("%Y-%m")
+    return next_month, pred, risk
+# --------------------------
+# Gradio interface functions
+# --------------------------
+def load_or_demo(file, budget):
+    if file is None:
+        # Build a tiny synthetic demo dataset (12 months)
+        rng = pd.date_range("2024-01-01", periods=365, freq="D")
+        cats = ["groceries","rent","utilities","fun","transport"]
+        rows = []
+        income = 3500.0
+        np.random.seed(7)
+        for d in rng:
+            # income on the first of each month
+            if d.day == 1:
+                rows.append({"date": d, "amount": income, "category": "income", "income": income})
+            # random spends
+            for _ in range(np.random.poisson(1.8)):
+                amt = -np.random.choice([15,25,40,60,120,300], p=[.25,.25,.2,.15,.1,.05])
+                rows.append({"date": d, "amount": amt, "category": np.random.choice(cats), "income": income})
+        df = pd.DataFrame(rows)
+    else:
+        df = pd.read_csv(file.name)
+        # basic sanity
+        needed = {"date","amount","category","income"}
+        missing = needed - set(df.columns)
+        if missing:
+            raise ValueError(f"CSV is missing columns: {sorted(missing)}")
+    # Fit model and predict
+    m = prep_monthly_features(df)
+    if len(m) < 6:
+        raise ValueError("Need at least ~6 months of data for a useful forecast (demo provides this).")
+    model, feats, mae = train_model(m)
+    next_m, spend_pred, risk = predict_next(m, model, feats)
+    # Budget comparison
+    budget = float(budget) if budget is not None and budget != "" else 0.0
+    vs_budget = ("Over budget" if spend_pred < -abs(budget) else "Within budget") if budget else "No budget set"
+    # Returnables
+    summary = pd.DataFrame({
+        "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check"],
+        "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, vs_budget]
+    })
+    monthly_view = m[["month","spend","inflow","txns","income"]].copy()
+    monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
+    return summary, monthly_view
+with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
+    gr.Markdown("## Retail Finance Spend Forecaster\nUpload your transactions CSV (columns: `date, amount, category, income`) or use demo data. Model forecasts **next-month spend** and flags **overspend risk**.")
+    with gr.Row():
+        file = gr.File(label="Upload CSV (optional)")
+        budget = gr.Number(value=2500, label="Monthly budget (positive number)")
+    btn = gr.Button("Run Forecast")
+    summary = gr.Dataframe(label="Summary")
+    monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
+    btn.click(load_or_demo, inputs=[file, budget], outputs=[summary, monthly_table])
+demo.launch()