File size: 5,831 Bytes
273b603
 
 
 
72058ba
710adcc
 
 
72058ba
710adcc
 
 
 
 
 
273b603
ed237df
 
 
72058ba
710adcc
ed237df
72058ba
273b603
710adcc
273b603
ed237df
273b603
 
710adcc
273b603
 
ed237df
72058ba
710adcc
 
 
 
 
 
 
273b603
 
72058ba
710adcc
ed237df
 
273b603
 
 
 
 
ed237df
273b603
 
 
710adcc
 
 
 
273b603
710adcc
273b603
710adcc
273b603
 
710adcc
273b603
 
 
 
710adcc
273b603
 
 
710adcc
ed237df
710adcc
273b603
 
ed237df
72058ba
710adcc
ed237df
 
72058ba
710adcc
 
273b603
 
72058ba
273b603
72058ba
273b603
 
710adcc
273b603
 
710adcc
273b603
 
 
710adcc
 
 
273b603
 
 
710adcc
 
 
 
 
273b603
 
710adcc
 
273b603
 
710adcc
72058ba
710adcc
 
 
 
 
 
 
 
 
 
273b603
710adcc
 
273b603
710adcc
 
273b603
710adcc
273b603
 
710adcc
 
 
 
 
 
 
 
72058ba
273b603
710adcc
 
 
 
 
273b603
 
 
 
 
 
72058ba
273b603
710adcc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import numpy as np
import pandas as pd
import gradio as gr

# -------- helpers --------
def month_start(ts):
    """Return the first day of the month as Timestamp."""
    ts = pd.to_datetime(ts)
    return ts.to_period("M").to_timestamp()  # YYYY-MM-01

def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Expect columns: date, amount, category, income
    amount: +inflows, -spend
    """
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df = df.dropna(subset=["date"])

    # month bucket
    df["month"] = df["date"].apply(month_start)

    # monthly aggregates
    month_agg = (
        df.groupby("month", as_index=False)
          .agg(
              spend=("amount", lambda x: x[x < 0].sum()),
              inflow=("amount", lambda x: x[x > 0].sum()),
              txns=("amount", "count"),
              income=("income", "max"),
          )
    )

    # category counts per month (diversity proxy)
    cats = (
        df.assign(cnt=1)
          .pivot_table(index="month", columns="category", values="cnt",
                       aggfunc="sum", fill_value=0)
          .rename(columns=lambda c: f"cat_{c}")
    )

    out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)

    # target & lags
    out = out.sort_values("month").reset_index(drop=True)
    out["target_next_spend"] = out["spend"].shift(-1)

    out["m_num"] = out["month"].dt.month
    out["y_num"] = out["month"].dt.year
    out["spend_lag1"] = out["spend"].shift(1)
    out["spend_lag2"] = out["spend"].shift(2)
    out["inflow_lag1"] = out["inflow"].shift(1)

    out = out.dropna().reset_index(drop=True)
    return out

def train_model(monthly_df: pd.DataFrame):
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_absolute_error

    y = monthly_df["target_next_spend"].values
    feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
    X = monthly_df[feature_cols].copy()

    model = LinearRegression()
    model.fit(X, y)

    if len(monthly_df) >= 4:
        X_hold = X.tail(3)
        y_hold = y[-3:]
        preds = model.predict(X_hold)
        mae = float(mean_absolute_error(y_hold, preds))
    else:
        mae = np.nan

    return model, feature_cols, mae

def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
    last = monthly_df.iloc[[-1]][feature_cols]
    pred = float(model.predict(last)[0])

    # overspend risk (more negative spend = higher spend)
    p10 = float(np.percentile(monthly_df["spend"], 10))
    risk = "High" if pred <= p10 else "Low"

    # next month label
    last_month = monthly_df["month"].iloc[-1]
    next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
    return next_month, pred, risk

# -------- app logic --------
def load_or_demo(file, budget):
    # demo data if no file
    if file is None:
        rng = pd.date_range("2024-01-01", periods=365, freq="D")
        cats = ["groceries", "rent", "utilities", "fun", "transport"]
        rows = []
        income = 3500.0
        rng_seed = np.random.default_rng(7)
        for d in rng:
            if d.day == 1:
                rows.append({"date": d, "amount": income, "category": "income", "income": income})
            for _ in range(rng_seed.poisson(2)):
                amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
                rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
        df = pd.DataFrame(rows)
    else:
        df = pd.read_csv(file.name)
        needed = {"date", "amount", "category", "income"}
        miss = needed - set(df.columns)
        if miss:
            raise ValueError(f"CSV is missing columns: {sorted(miss)}")

    m = prep_monthly_features(df)
    if len(m) < 6:
        raise ValueError("Need at least ~6 months of data (the demo provides this).")

    model, feats, mae = train_model(m)
    next_m, spend_pred, risk = predict_next(m, model, feats)

    # budget evaluation
    try:
        budget_val = float(budget) if budget not in (None, "") else 0.0
    except Exception:
        budget_val = 0.0

    if budget_val:
        budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
    else:
        budget_check = "No budget set"

    summary = pd.DataFrame({
        "metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
        "value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
    })

    monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
    monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")

    return summary, monthly_view

def safe_run(file, budget):
    try:
        return load_or_demo(file, budget)
    except Exception as e:
        import traceback
        print("TRACEBACK:\n", traceback.format_exc())
        raise gr.Error(str(e))

# -------- UI --------
with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
    gr.Markdown(
        "## Retail Finance Spend Forecaster\n"
        "Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
        "The model forecasts **next-month spend** and flags **overspend risk**."
    )
    with gr.Row():
        file = gr.File(label="Upload CSV (optional)")
        budget = gr.Number(value=2500, label="Monthly budget (positive number)")
    btn = gr.Button("Run Forecast")
    summary = gr.Dataframe(label="Summary")
    monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
    btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])

if __name__ == "__main__":
    demo.launch()