Spaces:

Ekow24
/

AI_Spending_Analyzer

Sleeping

App Files Files Community

Ekow24 commited on Oct 9, 2025

Commit

4be23b6

verified ·

1 Parent(s): 8152eda

Update utils.py

Browse files

Files changed (1) hide show

utils.py +164 -48

utils.py CHANGED Viewed

@@ -2,7 +2,6 @@ from __future__ import annotations
 import math
 import os
-from dataclasses import dataclass
 from datetime import datetime, timedelta
 from typing import Dict, Iterable, List, Optional, Tuple
@@ -10,53 +9,29 @@ import numpy as np
 import pandas as pd
 import plotly.express as px
 CATEGORIES = [
-    "Food",
-    "Travel",
-    "Shopping",
-    "Utilities",
-    "Entertainment",
-    "Health",
-    "Subscriptions",
-    "Transport",
 ]
 MERCHANTS = [
-    "SuperMart",
-    "QuickEats",
-    "Urban Cafe",
-    "MegaStore",
-    "Cinema City",
-    "Fit&Fine Gym",
-    "City Utilities",
-    "StreamFlix",
-    "RideNow",
-    "Book Haven",
-    "ElectroWorld",
-    "TravelCo",
-    "PharmaPlus",
-    "HomeNeeds",
 ]
 PAYMENT_METHODS = ["Debit Card", "Credit Card", "Digital Wallet"]
 LOCATIONS = [
-    "London",
-    "Manchester",
-    "Birmingham",
-    "Leeds",
-    "Glasgow",
-    "Liverpool",
-    "Bristol",
-    "Edinburgh",
-    "Cardiff",
-    "Belfast",
 ]
 def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
-    # Mixture distribution for realistic spend
     choices = rng.choice(["small", "medium", "large"], size=n, p=[0.65, 0.28, 0.07])
     amounts = np.empty(n)
     for i, c in enumerate(choices):
@@ -66,7 +41,6 @@ def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
             amounts[i] = max(5, rng.normal(60, 25))
         else:
             amounts[i] = max(20, rng.normal(180, 60))
-    # Random spikes
     spike_idx = rng.choice(np.arange(n), size=max(1, n // 50), replace=False)
     amounts[spike_idx] *= rng.uniform(2.5, 4.0, size=len(spike_idx))
     return np.round(amounts, 2)
@@ -80,8 +54,8 @@ def generate_synthetic_transactions(n_rows: int = 900, seed: Optional[int] = Non
     weights = np.array([1.2 if d.weekday() >= 5 else 1.0 for d in dates]) * \
               np.array([1.3 if d.day > 25 else 1.0 for d in dates])
-    weights = np.clip(weights, 0, None)
-    weights /= weights.sum()
     date_choices = rng.choice(len(dates), size=n_rows, replace=True, p=weights)
     chosen_dates = dates[date_choices]
@@ -99,9 +73,13 @@ def generate_synthetic_transactions(n_rows: int = 900, seed: Optional[int] = Non
         "Payment Method": payment_methods,
         "Location": locations,
     })
-    return df.sort_values("Date").reset_index(drop=True)
 def filter_transactions(
     df: pd.DataFrame,
     date_range: Tuple[datetime, datetime],
@@ -111,7 +89,7 @@ def filter_transactions(
     start, end = date_range
     mask = (df["Date"] >= pd.to_datetime(start)) & (df["Date"] <= pd.to_datetime(end))
     if categories:
-        mask &= df["Category"].isin(categories)
     if merchant_query:
         mask &= df["Merchant"].str.contains(merchant_query, case=False, na=False)
     return df.loc[mask].copy()
@@ -152,7 +130,8 @@ def compute_aggregations(df: pd.DataFrame) -> Dict:
     df_daily = df_daily.sort_values("Date")
     df_daily["Rolling28"] = df_daily["Amount"].rolling(window=28, min_periods=7).mean()
-    mu, sigma = df_daily["Amount"].mean(), df_daily["Amount"].std(ddof=0) or 0.0
     threshold = mu + 2.5 * sigma
     df_spikes = df_daily.assign(IsSpike=df_daily["Amount"] > threshold)
@@ -170,12 +149,15 @@ def compute_aggregations(df: pd.DataFrame) -> Dict:
     }
 def build_time_series_chart(
     df: pd.DataFrame,
     template: str = "plotly",
     spike_overlay: Optional[pd.DataFrame] = None,
-    fixed_line_width: int = 2,
-    hover_line_width: int = 4,
 ) -> "px.Figure":
     if df.empty:
         fig = px.line()
@@ -188,7 +170,7 @@ def build_time_series_chart(
     fig.update_traces(line=dict(width=fixed_line_width), hovertemplate="%{x|%b %d, %Y}: £%{y:.2f}")
     fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
-    if spike_overlay is not None and not spike_overlay.empty:
         spike_points = spike_overlay[spike_overlay.get("IsSpike", False)]
         if not spike_points.empty:
             fig.add_scatter(
@@ -206,6 +188,8 @@ def build_category_bar_chart(
     spend_per_category: pd.Series,
     template: str = "plotly",
     color_sequence: Optional[list] = None,
 ):
     if spend_per_category.empty:
         fig = px.bar()
@@ -213,14 +197,14 @@ def build_category_bar_chart(
         return fig
     fig = px.bar(
-        spend_per_category.reset_index().rename(columns={"index": "Category", "Amount": 0}),
         x="Category",
         y="Amount",
         title="Spend by Category",
         color="Category",
         color_discrete_sequence=color_sequence,
     )
-    fig.update_traces(hovertemplate="%{x}: £%{y:.2f}")
     fig.update_layout(showlegend=False, margin=dict(l=10, r=10, t=40, b=10), template=template)
     return fig
@@ -229,6 +213,7 @@ def build_payment_method_pie_chart(
     spend_per_payment: pd.Series,
     template: str = "plotly",
     color_sequence: Optional[list] = None,
 ):
     if spend_per_payment.empty:
         fig = px.pie()
@@ -236,7 +221,7 @@ def build_payment_method_pie_chart(
         return fig
     fig = px.pie(
-        spend_per_payment.reset_index().rename(columns={"index": "Payment Method", "Amount": 0}),
         values="Amount",
         names="Payment Method",
         title="Payment Methods Distribution",
@@ -248,9 +233,140 @@ def build_payment_method_pie_chart(
     return fig
 def _format_number(n: float) -> str:
     if n >= 1_000_000:
         return f"£{n/1_000_000:.1f}M"
     if n >= 1_000:
         return f"£{n/1_000:.1f}k"
-    return f"£{n:,.0f}"

 import math
 import os
 from datetime import datetime, timedelta
 from typing import Dict, Iterable, List, Optional, Tuple
 import pandas as pd
 import plotly.express as px
 CATEGORIES = [
+    "Food", "Travel", "Shopping", "Utilities", "Entertainment",
+    "Health", "Subscriptions", "Transport",
 ]
 MERCHANTS = [
+    "SuperMart", "QuickEats", "Urban Cafe", "MegaStore", "Cinema City",
+    "Fit&Fine Gym", "City Utilities", "StreamFlix", "RideNow",
+    "Book Haven", "ElectroWorld", "TravelCo", "PharmaPlus", "HomeNeeds",
 ]
 PAYMENT_METHODS = ["Debit Card", "Credit Card", "Digital Wallet"]
 LOCATIONS = [
+    "London", "Manchester", "Birmingham", "Leeds", "Glasgow",
+    "Liverpool", "Bristol", "Edinburgh", "Cardiff", "Belfast",
 ]
+# -----------------------------
+# Synthetic Data Generation
+# -----------------------------
 def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
     choices = rng.choice(["small", "medium", "large"], size=n, p=[0.65, 0.28, 0.07])
     amounts = np.empty(n)
     for i, c in enumerate(choices):
             amounts[i] = max(5, rng.normal(60, 25))
         else:
             amounts[i] = max(20, rng.normal(180, 60))
     spike_idx = rng.choice(np.arange(n), size=max(1, n // 50), replace=False)
     amounts[spike_idx] *= rng.uniform(2.5, 4.0, size=len(spike_idx))
     return np.round(amounts, 2)
     weights = np.array([1.2 if d.weekday() >= 5 else 1.0 for d in dates]) * \
               np.array([1.3 if d.day > 25 else 1.0 for d in dates])
+    weights = np.clip(weights, a_min=0, a_max=None)
+    weights = weights / weights.sum()
     date_choices = rng.choice(len(dates), size=n_rows, replace=True, p=weights)
     chosen_dates = dates[date_choices]
         "Payment Method": payment_methods,
         "Location": locations,
     })
+    df = df.sort_values("Date").reset_index(drop=True)
+    return df
+# -----------------------------
+# Filtering and Aggregation
+# -----------------------------
 def filter_transactions(
     df: pd.DataFrame,
     date_range: Tuple[datetime, datetime],
     start, end = date_range
     mask = (df["Date"] >= pd.to_datetime(start)) & (df["Date"] <= pd.to_datetime(end))
     if categories:
+        mask &= df["Category"].isin(list(categories))
     if merchant_query:
         mask &= df["Merchant"].str.contains(merchant_query, case=False, na=False)
     return df.loc[mask].copy()
     df_daily = df_daily.sort_values("Date")
     df_daily["Rolling28"] = df_daily["Amount"].rolling(window=28, min_periods=7).mean()
+    mu = df_daily["Amount"].mean()
+    sigma = df_daily["Amount"].std(ddof=0) or 0.0
     threshold = mu + 2.5 * sigma
     df_spikes = df_daily.assign(IsSpike=df_daily["Amount"] > threshold)
     }
+# -----------------------------
+# Chart Builders (fixed)
+# -----------------------------
 def build_time_series_chart(
     df: pd.DataFrame,
     template: str = "plotly",
     spike_overlay: Optional[pd.DataFrame] = None,
+    fixed_line_width: float = 2.0,
+    **kwargs,
 ) -> "px.Figure":
     if df.empty:
         fig = px.line()
     fig.update_traces(line=dict(width=fixed_line_width), hovertemplate="%{x|%b %d, %Y}: £%{y:.2f}")
     fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
+    if isinstance(spike_overlay, pd.DataFrame) and not spike_overlay.empty:
         spike_points = spike_overlay[spike_overlay.get("IsSpike", False)]
         if not spike_points.empty:
             fig.add_scatter(
     spend_per_category: pd.Series,
     template: str = "plotly",
     color_sequence: Optional[list] = None,
+    fixed_bar_width: float = 0.8,
+    **kwargs,
 ):
     if spend_per_category.empty:
         fig = px.bar()
         return fig
     fig = px.bar(
+        spend_per_category.reset_index().rename(columns={"index": "Category", 0: "Amount"}),
         x="Category",
         y="Amount",
         title="Spend by Category",
         color="Category",
         color_discrete_sequence=color_sequence,
     )
+    fig.update_traces(width=fixed_bar_width, hovertemplate="%{x}: £%{y:.2f}")
     fig.update_layout(showlegend=False, margin=dict(l=10, r=10, t=40, b=10), template=template)
     return fig
     spend_per_payment: pd.Series,
     template: str = "plotly",
     color_sequence: Optional[list] = None,
+    **kwargs,
 ):
     if spend_per_payment.empty:
         fig = px.pie()
         return fig
     fig = px.pie(
+        spend_per_payment.reset_index().rename(columns={"index": "Payment Method", 0: "Amount"}),
         values="Amount",
         names="Payment Method",
         title="Payment Methods Distribution",
     return fig
+# -----------------------------
+# Formatting Helpers
+# -----------------------------
 def _format_number(n: float) -> str:
     if n >= 1_000_000:
         return f"£{n/1_000_000:.1f}M"
     if n >= 1_000:
         return f"£{n/1_000:.1f}k"
+    return f"£{n:,.0f}"
+def _month_over_month_change(monthly: Optional[pd.DataFrame]) -> float:
+    if monthly is None or monthly.empty or len(monthly) < 2:
+        return 0.0
+    monthly_sorted = monthly.sort_values("Month")
+    last, prev = monthly_sorted["Amount"].iloc[-1], monthly_sorted["Amount"].iloc[-2]
+    if prev == 0:
+        return 0.0
+    return float((last - prev) / prev)
+# -----------------------------
+# AI Summary Helpers
+# -----------------------------
+def _heuristic_summary(ctx: Dict, mode: str = "Concise") -> str:
+    total = _format_number(ctx.get("total_spend", 0.0))
+    avg = _format_number(ctx.get("avg_monthly", 0.0))
+    lcat = ctx.get("largest_category") or "N/A"
+    share = ctx.get("largest_category_share", 0.0) * 100
+    max_amt = ctx.get("max_transaction", {}).get("amount", 0.0)
+    max_merchant = ctx.get("max_transaction", {}).get("merchant", "")
+    mom = ctx.get("mom_change", 0.0) * 100
+    spikes = ctx.get("spike_days", 0)
+    parts = [
+        f"Total spend in the selected period is {total}, averaging {avg} per month.",
+        f"Top category is {lcat} at {share:.0f}% of spend." if lcat != "N/A" else "",
+        f"Month-over-month, spending changed by {mom:+.0f}%.",
+        f"Largest single transaction was £{max_amt:,.0f} at {max_merchant}." if max_amt else "",
+        f"Detected {spikes} unusually high daily spend day(s)." if spikes else "",
+    ]
+    text = " ".join([p for p in parts if p])
+    if mode == "Detailed":
+        detailed_insights = []
+        if mom > 10:
+            detailed_insights.append("Your spending has increased significantly this month, which may indicate lifestyle changes or seasonal variations.")
+        elif mom < -10:
+            detailed_insights.append("You've successfully reduced your spending this month, showing good financial discipline.")
+        else:
+            detailed_insights.append("Your spending patterns remain relatively stable month-over-month.")
+        if lcat == "Food":
+            detailed_insights.append("Food represents your largest expense category. Consider meal planning and bulk shopping to optimize costs.")
+        elif lcat == "Shopping":
+            detailed_insights.append("Shopping is your primary spending category. Review purchases for necessities vs. wants to identify savings opportunities.")
+        elif lcat == "Entertainment":
+            detailed_insights.append("Entertainment spending dominates your budget. Look for free or low-cost alternatives to maintain your lifestyle within budget.")
+        if spikes > 5:
+            detailed_insights.append("Multiple spending spikes detected suggest irregular expense patterns. Consider smoothing these through better budgeting.")
+        elif spikes > 0:
+            detailed_insights.append("Some spending spikes were identified, which is normal but worth monitoring for budget planning.")
+        detailed_insights.append("Consider setting category budgets and monitoring spikes to smooth cash flow and improve financial predictability.")
+        text += " " + " ".join(detailed_insights)
+    return text
+def _hf_prompt(context: Dict, mode: str) -> str:
+    style = "concise (80-120 words)" if mode == "Concise" else "detailed (140-220 words)"
+    return (
+        "You are a helpful financial assistant. Produce a "
+        + style
+        + " natural-language summary of the provided spending analytics in plain English.\n\n"
+        + f"Context: {context}\n\nSummary:"
+    )
+def summarize_with_ai(
+    agg: Dict,
+    api_key: Optional[str] = None,
+    mode: str = "Concise",
+    engine: str = "Heuristic",
+    ollama_model: Optional[str] = None,
+) -> str:
+    largest_cat = agg["spend_per_category"].idxmax() if not agg["spend_per_category"].empty else None
+    largest_cat_share = float(agg["category_share"].max()) if not agg["category_share"].empty else 0.0
+    context = {
+        "total_spend": float(agg["total_spend"]),
+        "avg_monthly": float(agg["avg_monthly_spend"]),
+        "largest_category": largest_cat,
+        "largest_category_share": largest_cat_share,
+        "max_transaction": {
+            "amount": float(agg["max_transaction"].get("Amount", 0.0)),
+            "merchant": str(agg["max_transaction"].get("Merchant", "")),
+        },
+        "mom_change": _month_over_month_change(agg.get("monthly")),
+        "spike_days": int(agg.get("spikes", pd.DataFrame()).get("IsSpike", pd.Series(dtype=bool)).sum())
+    }
+    engine = (engine or "Heuristic").strip()
+    if engine == "Heuristic":
+        return _heuristic_summary(context, mode=mode)
+    # HuggingFace or other engines can be added here
+    return _heuristic_summary(context, mode=mode)
+# -----------------------------
+# Chat AI (local only)
+# -----------------------------
+def chat_with_ai(
+    agg: Dict,
+    question: str,
+    engine: str = "Heuristic",
+    api_key: Optional[str] = None,
+    ollama_model: Optional[str] = None,
+) -> str:
+    context = {
+        "totals": float(agg.get("total_spend", 0.0)),
+        "monthly": [
+            {"month": str(r["Month"]), "amount": float(r["Amount"])}
+            for _, r in agg.get("monthly", pd.DataFrame()).iterrows()
+        ],
+        "spikes": agg.get("spikes", pd.DataFrame()).to_dict(orient="records") if "spikes" in agg else [],
+        "categories": agg.get("spend_per_category", pd.Series(dtype=float)).to_dict(),
+        "payments": agg.get("spend_per_payment", pd.Series(dtype=float)).to_dict(),
+    }
+    prompt = f"Context: {context}\nUser Question: {question}\nAnswer:"
+    if engine == "Heuristic":
+        return "Heuristic engine does not support free-form Q&A yet. Please use summary mode."
+    return "AI response placeholder."