Spaces:

ThinkerFactory
/

README

Sleeping

File size: 5,831 Bytes

273b603
 
 
 
72058ba
710adcc
 
 
72058ba
710adcc
 
 
 
 
 
273b603
ed237df
 
 
72058ba
710adcc
ed237df
72058ba
273b603
710adcc
273b603
ed237df
273b603
 
710adcc
273b603
 
ed237df
72058ba
710adcc
 
 
 
 
 
 
273b603
 
72058ba
710adcc
ed237df
 
273b603
 
 
 
 
ed237df
273b603
 
 
710adcc
 
 
 
273b603
710adcc
273b603
710adcc
273b603
 
710adcc
273b603
 
 
 
710adcc
273b603
 
 
710adcc
ed237df
710adcc
273b603
 
ed237df
72058ba
710adcc
ed237df
 
72058ba
710adcc
 
273b603
 
72058ba
273b603
72058ba
273b603
 
710adcc
273b603
 
710adcc
273b603
 
 
710adcc
 
 
273b603
 
 
710adcc
 
 
 
 
273b603
 
710adcc
 
273b603
 
710adcc
72058ba
710adcc
 
 
 
 
 
 
 
 
 
273b603
710adcc
 
273b603
710adcc
 
273b603
710adcc
273b603
 
710adcc
 
 
 
 
 
 
 
72058ba
273b603
710adcc
 
 
 
 
273b603
 
 
 
 
 
72058ba
273b603
710adcc

import numpy as np
import pandas as pd
import gradio as gr

# -------- helpers --------
def month_start(ts):
    """Return the first day of the month as Timestamp."""
    ts = pd.to_datetime(ts)
    return ts.to_period("M").to_timestamp()  # YYYY-MM-01

def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Expect columns: date, amount, category, income
    amount: +inflows, -spend
    """
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df = df.dropna(subset=["date"])

    # month bucket
    df["month"] = df["date"].apply(month_start)

    # monthly aggregates
    month_agg = (
        df.groupby("month", as_index=False)
          .agg(
              spend=("amount", lambda x: x[x < 0].sum()),
              inflow=("amount", lambda x: x[x > 0].sum()),
              txns=("amount", "count"),
              income=("income", "max"),
          )
    )

    # category counts per month (diversity proxy)
    cats = (
        df.assign(cnt=1)
          .pivot_table(index="month", columns="category", values="cnt",
                       aggfunc="sum", fill_value=0)
          .rename(columns=lambda c: f"cat_{c}")
    )

    out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)

    # target & lags
    out = out.sort_values("month").reset_index(drop=True)
    out["target_next_spend"] = out["spend"].shift(-1)

    out["m_num"] = out["month"].dt.month
    out["y_num"] = out["month"].dt.year
    out["spend_lag1"] = out["spend"].shift(1)
    out["spend_lag2"] = out["spend"].shift(2)
    out["inflow_lag1"] = out["inflow"].shift(1)

    out = out.dropna().reset_index(drop=True)
    return out

def train_model(monthly_df: pd.DataFrame):
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_absolute_error

    y = monthly_df["target_next_spend"].values
    feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
    X = monthly_df[feature_cols].copy()

    model = LinearRegression()
    model.fit(X, y)

    if len(monthly_df) >= 4:
        X_hold = X.tail(3)
        y_hold = y[-3:]
        preds = model.predict(X_hold)
        mae = float(mean_absolute_error(y_hold, preds))
    else:
        mae = np.nan

    return model, feature_cols, mae

def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
    last = monthly_df.iloc[[-1]][feature_cols]
    pred = float(model.predict(last)[0])

    # overspend risk (more negative spend = higher spend)
    p10 = float(np.percentile(monthly_df["spend"], 10))
    risk = "High" if pred <= p10 else "Low"

    # next month label
    last_month = monthly_df["month"].iloc[-1]
    next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
    return next_month, pred, risk

# -------- app logic --------
def load_or_demo(file, budget):
    # demo data if no file
    if file is None:
        rng = pd.date_range("2024-01-01", periods=365, freq="D")
        cats = ["groceries", "rent", "utilities", "fun", "transport"]
        rows = []
        income = 3500.0
        rng_seed = np.random.default_rng(7)
        for d in rng:
            if d.day == 1:
                rows.append({"date": d, "amount": income, "category": "income", "income": income})
            for _ in range(rng_seed.poisson(2)):
                amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
                rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
        df = pd.DataFrame(rows)
    else:
        df = pd.read_csv(file.name)
        needed = {"date", "amount", "category", "income"}
        miss = needed - set(df.columns)
        if miss:
            raise ValueError(f"CSV is missing columns: {sorted(miss)}")

    m = prep_monthly_features(df)
    if len(m) < 6:
        raise ValueError("Need at least ~6 months of data (the demo provides this).")

    model, feats, mae = train_model(m)
    next_m, spend_pred, risk = predict_next(m, model, feats)

    # budget evaluation
    try:
        budget_val = float(budget) if budget not in (None, "") else 0.0
    except Exception:
        budget_val = 0.0

    if budget_val:
        budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
    else:
        budget_check = "No budget set"

    summary = pd.DataFrame({
        "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
        "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
    })

    monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
    monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")

    return summary, monthly_view

def safe_run(file, budget):
    try:
        return load_or_demo(file, budget)
    except Exception as e:
        import traceback
        print("TRACEBACK:\n", traceback.format_exc())
        raise gr.Error(str(e))

# -------- UI --------
with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
    gr.Markdown(
        "## Retail Finance Spend Forecaster\n"
        "Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
        "The model forecasts **next-month spend** and flags **overspend risk**."
    )
    with gr.Row():
        file = gr.File(label="Upload CSV (optional)")
        budget = gr.Number(value=2500, label="Monthly budget (positive number)")
    btn = gr.Button("Run Forecast")
    summary = gr.Dataframe(label="Summary")
    monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
    btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])

if __name__ == "__main__":
    demo.launch()