README / app.py
MrThinker098's picture
Update app.py
72058ba verified
import numpy as np
import pandas as pd
import gradio as gr
# -------- helpers --------
def month_start(ts):
"""Return the first day of the month as Timestamp."""
ts = pd.to_datetime(ts)
return ts.to_period("M").to_timestamp() # YYYY-MM-01
def prep_monthly_features(df: pd.DataFrame) -> pd.DataFrame:
"""
Expect columns: date, amount, category, income
amount: +inflows, -spend
"""
df = df.copy()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.dropna(subset=["date"])
# month bucket
df["month"] = df["date"].apply(month_start)
# monthly aggregates
month_agg = (
df.groupby("month", as_index=False)
.agg(
spend=("amount", lambda x: x[x < 0].sum()),
inflow=("amount", lambda x: x[x > 0].sum()),
txns=("amount", "count"),
income=("income", "max"),
)
)
# category counts per month (diversity proxy)
cats = (
df.assign(cnt=1)
.pivot_table(index="month", columns="category", values="cnt",
aggfunc="sum", fill_value=0)
.rename(columns=lambda c: f"cat_{c}")
)
out = month_agg.merge(cats, left_on="month", right_index=True, how="left").fillna(0)
# target & lags
out = out.sort_values("month").reset_index(drop=True)
out["target_next_spend"] = out["spend"].shift(-1)
out["m_num"] = out["month"].dt.month
out["y_num"] = out["month"].dt.year
out["spend_lag1"] = out["spend"].shift(1)
out["spend_lag2"] = out["spend"].shift(2)
out["inflow_lag1"] = out["inflow"].shift(1)
out = out.dropna().reset_index(drop=True)
return out
def train_model(monthly_df: pd.DataFrame):
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
y = monthly_df["target_next_spend"].values
feature_cols = [c for c in monthly_df.columns if c not in ["month", "target_next_spend"]]
X = monthly_df[feature_cols].copy()
model = LinearRegression()
model.fit(X, y)
if len(monthly_df) >= 4:
X_hold = X.tail(3)
y_hold = y[-3:]
preds = model.predict(X_hold)
mae = float(mean_absolute_error(y_hold, preds))
else:
mae = np.nan
return model, feature_cols, mae
def predict_next(monthly_df: pd.DataFrame, model, feature_cols):
last = monthly_df.iloc[[-1]][feature_cols]
pred = float(model.predict(last)[0])
# overspend risk (more negative spend = higher spend)
p10 = float(np.percentile(monthly_df["spend"], 10))
risk = "High" if pred <= p10 else "Low"
# next month label
last_month = monthly_df["month"].iloc[-1]
next_month = month_start(last_month + pd.offsets.MonthBegin(1)).strftime("%Y-%m")
return next_month, pred, risk
# -------- app logic --------
def load_or_demo(file, budget):
# demo data if no file
if file is None:
rng = pd.date_range("2024-01-01", periods=365, freq="D")
cats = ["groceries", "rent", "utilities", "fun", "transport"]
rows = []
income = 3500.0
rng_seed = np.random.default_rng(7)
for d in rng:
if d.day == 1:
rows.append({"date": d, "amount": income, "category": "income", "income": income})
for _ in range(rng_seed.poisson(2)):
amt = -float(rng_seed.choice([15, 25, 40, 60, 120, 300], p=[.25, .25, .2, .15, .1, .05]))
rows.append({"date": d, "amount": amt, "category": rng_seed.choice(cats), "income": income})
df = pd.DataFrame(rows)
else:
df = pd.read_csv(file.name)
needed = {"date", "amount", "category", "income"}
miss = needed - set(df.columns)
if miss:
raise ValueError(f"CSV is missing columns: {sorted(miss)}")
m = prep_monthly_features(df)
if len(m) < 6:
raise ValueError("Need at least ~6 months of data (the demo provides this).")
model, feats, mae = train_model(m)
next_m, spend_pred, risk = predict_next(m, model, feats)
# budget evaluation
try:
budget_val = float(budget) if budget not in (None, "") else 0.0
except Exception:
budget_val = 0.0
if budget_val:
budget_check = "Over budget" if spend_pred < -abs(budget_val) else "Within budget"
else:
budget_check = "No budget set"
summary = pd.DataFrame({
"metric": ["Predicted next-month spend", "MAE (last 3 months)", "Overspend risk", "Budget check", "Next month"],
"value": [round(spend_pred, 2), (None if np.isnan(mae) else round(mae, 2)), risk, budget_check, next_m]
})
monthly_view = m[["month", "spend", "inflow", "txns", "income"]].copy()
monthly_view["month"] = monthly_view["month"].dt.strftime("%Y-%m")
return summary, monthly_view
def safe_run(file, budget):
try:
return load_or_demo(file, budget)
except Exception as e:
import traceback
print("TRACEBACK:\n", traceback.format_exc())
raise gr.Error(str(e))
# -------- UI --------
with gr.Blocks(title="Retail Finance: Spend Forecast") as demo:
gr.Markdown(
"## Retail Finance Spend Forecaster\n"
"Upload your transactions CSV (columns: `date, amount, category, income`) or use demo data. "
"The model forecasts **next-month spend** and flags **overspend risk**."
)
with gr.Row():
file = gr.File(label="Upload CSV (optional)")
budget = gr.Number(value=2500, label="Monthly budget (positive number)")
btn = gr.Button("Run Forecast")
summary = gr.Dataframe(label="Summary")
monthly_table = gr.Dataframe(label="Monthly aggregates used by the model")
btn.click(safe_run, inputs=[file, budget], outputs=[summary, monthly_table])
if __name__ == "__main__":
demo.launch()