Spaces:
Sleeping
Sleeping
File size: 5,831 Bytes
273b603 72058ba 710adcc 72058ba 710adcc 273b603 ed237df 72058ba 710adcc ed237df 72058ba 273b603 710adcc 273b603 ed237df 273b603 710adcc 273b603 ed237df 72058ba 710adcc 273b603 72058ba 710adcc ed237df 273b603 ed237df 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc ed237df 710adcc 273b603 ed237df 72058ba 710adcc ed237df 72058ba 710adcc 273b603 72058ba 273b603 72058ba 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 72058ba 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 273b603 710adcc 72058ba 273b603 710adcc 273b603 72058ba 273b603 710adcc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | import numpy as np
import pandas as pd
import gradio as gr
# -------- helpers --------
def month_start(ts):
"""Return the first day of the month as Timestamp."""
ts = pd.to_datetime(ts)
return ts.to_period("M").to_timestamp() # YYYY-MM-01
def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
"""
Expect columns: date, amount, category, income
amount: +inflows, -spend
"""
df = df.copy()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.dropna(subset=["date"])
# month bucket
df["month"] = df["date"].apply(month_start)
# monthly aggregates
month_agg = (
df.groupby("month", as_index=False)
.agg(
spend=("amount", lambda x: x[x < 0].sum()),
inflow=("amount", lambda x: x[x > 0].sum()),
txns=("amount", "count"),
income=("income", "max"),
)
)
# category counts per month (diversity proxy)
cats = (
df.assign(cnt=1)
.pivot_table(index="month", columns="category", values="cnt",
aggfunc="sum", fill_value=0)
.rename(columns=lambda c: f"cat_{c}")
)
out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
# target & lags
out = out.sort_values("month").reset_index(drop=True)
out["target_next_spend"] = out["spend"].shift(-1)
out["m_num"] = out["month"].dt.month
out["y_num"] = out["month"].dt.year
out["spend_lag1"] = out["spend"].shift(1)
out["spend_lag2"] = out["spend"].shift(2)
out["inflow_lag1"] = out["inflow"].shift(1)
out = out.dropna().reset_index(drop=True)
return out
def train_model(monthly_df: pd.DataFrame):
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
y = monthly_df["target_next_spend"].values
feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
X = monthly_df[feature_cols].copy()
model = LinearRegression()
model.fit(X, y)
if len(monthly_df) >= 4:
X_hold = X.tail(3)
y_hold = y[-3:]
preds = model.predict(X_hold)
mae = float(mean_absolute_error(y_hold, preds))
else:
mae = np.nan
return model, feature_cols, mae
def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
last = monthly_df.iloc[[-1]][feature_cols]
pred = float(model.predict(last)[0])
# overspend risk (more negative spend = higher spend)
p10 = float(np.percentile(monthly_df["spend"], 10))
risk = "High" if pred <= p10 else "Low"
# next month label
last_month = monthly_df["month"].iloc[-1]
next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
return next_month, pred, risk
# -------- app logic --------
def load_or_demo(file, budget):
# demo data if no file
if file is None:
rng = pd.date_range("2024-01-01", periods=365, freq="D")
cats = ["groceries", "rent", "utilities", "fun", "transport"]
rows = []
income = 3500.0
rng_seed = np.random.default_rng(7)
for d in rng:
if d.day == 1:
rows.append({"date": d, "amount": income, "category": "income", "income": income})
for _ in range(rng_seed.poisson(2)):
amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
df = pd.DataFrame(rows)
else:
df = pd.read_csv(file.name)
needed = {"date", "amount", "category", "income"}
miss = needed - set(df.columns)
if miss:
raise ValueError(f"CSV is missing columns: {sorted(miss)}")
m = prep_monthly_features(df)
if len(m) < 6:
raise ValueError("Need at least ~6 months of data (the demo provides this).")
model, feats, mae = train_model(m)
next_m, spend_pred, risk = predict_next(m, model, feats)
# budget evaluation
try:
budget_val = float(budget) if budget not in (None, "") else 0.0
except Exception:
budget_val = 0.0
if budget_val:
budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
else:
budget_check = "No budget set"
summary = pd.DataFrame({
"metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
"value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
})
monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
return summary, monthly_view
def safe_run(file, budget):
try:
return load_or_demo(file, budget)
except Exception as e:
import traceback
print("TRACEBACK:\n", traceback.format_exc())
raise gr.Error(str(e))
# -------- UI --------
with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
gr.Markdown(
"## Retail Finance Spend Forecaster\n"
"Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
"The model forecasts **next-month spend** and flags **overspend risk**."
)
with gr.Row():
file = gr.File(label="Upload CSV (optional)")
budget = gr.Number(value=2500, label="Monthly budget (positive number)")
btn = gr.Button("Run Forecast")
summary = gr.Dataframe(label="Summary")
monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
if __name__ == "__main__":
demo.launch()
|