Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -56,7 +56,6 @@ LOCATIONS = [
|
|
| 56 |
|
| 57 |
|
| 58 |
def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
|
| 59 |
-
# Mixture distribution for more realistic spend: many small, some medium, few large
|
| 60 |
choices = rng.choice(["small", "medium", "large"], size=n, p=[0.65, 0.28, 0.07])
|
| 61 |
amounts = np.empty(n)
|
| 62 |
for i, c in enumerate(choices):
|
|
@@ -66,7 +65,6 @@ def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
|
|
| 66 |
amounts[i] = max(5, rng.normal(60, 25))
|
| 67 |
else:
|
| 68 |
amounts[i] = max(20, rng.normal(180, 60))
|
| 69 |
-
# Random spikes
|
| 70 |
spike_idx = rng.choice(np.arange(n), size=max(1, n // 50), replace=False)
|
| 71 |
amounts[spike_idx] *= rng.uniform(2.5, 4.0, size=len(spike_idx))
|
| 72 |
return np.round(amounts, 2)
|
|
@@ -78,12 +76,8 @@ def generate_synthetic_transactions(n_rows: int = 900, seed: Optional[int] = Non
|
|
| 78 |
start = end - pd.Timedelta(days=365)
|
| 79 |
dates = pd.date_range(start, end, freq="D")
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
1.2 if d.weekday() >= 5 else 1.0 for d in dates
|
| 84 |
-
]) * np.array([
|
| 85 |
-
1.3 if d.day > 25 else 1.0 for d in dates
|
| 86 |
-
])
|
| 87 |
weights = np.clip(weights, a_min=0, a_max=None)
|
| 88 |
weights = weights / weights.sum()
|
| 89 |
date_choices = rng.choice(len(dates), size=n_rows, replace=True, p=weights)
|
|
@@ -95,17 +89,14 @@ def generate_synthetic_transactions(n_rows: int = 900, seed: Optional[int] = Non
|
|
| 95 |
locations = rng.choice(LOCATIONS, size=n_rows)
|
| 96 |
amts = _random_amounts(n_rows, rng)
|
| 97 |
|
| 98 |
-
df = pd.DataFrame(
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
}
|
| 107 |
-
)
|
| 108 |
-
# Sort by date for better UX
|
| 109 |
df = df.sort_values("Date").reset_index(drop=True)
|
| 110 |
return df
|
| 111 |
|
|
@@ -150,23 +141,15 @@ def compute_aggregations(df: pd.DataFrame) -> Dict:
|
|
| 150 |
max_txn = df.loc[df["Amount"].idxmax()].to_dict()
|
| 151 |
min_txn = df.loc[df["Amount"].idxmin()].to_dict()
|
| 152 |
|
| 153 |
-
monthly = (
|
| 154 |
-
df.assign(Month=_month_key(df["Date"]))
|
| 155 |
-
.groupby("Month")["Amount"].sum()
|
| 156 |
-
.reset_index()
|
| 157 |
-
)
|
| 158 |
avg_monthly_spend = float(monthly["Amount"].mean()) if not monthly.empty else 0.0
|
| 159 |
-
|
| 160 |
-
# Category share
|
| 161 |
category_share = (spend_per_category / max(total_spend, 1e-9)).round(4)
|
| 162 |
|
| 163 |
-
# Rolling 28-day spend for simple trend smoothing
|
| 164 |
df_daily = df.groupby(pd.to_datetime(df["Date"]).dt.date)["Amount"].sum().reset_index()
|
| 165 |
-
df_daily["Date"] = pd.to_datetime(df_daily["Date"])
|
| 166 |
df_daily = df_daily.sort_values("Date")
|
| 167 |
df_daily["Rolling28"] = df_daily["Amount"].rolling(window=28, min_periods=7).mean()
|
| 168 |
|
| 169 |
-
# Naive anomaly: mark spikes above mean + 2.5*std on daily amounts
|
| 170 |
mu = df_daily["Amount"].mean()
|
| 171 |
sigma = df_daily["Amount"].std(ddof=0) or 0.0
|
| 172 |
threshold = mu + 2.5 * sigma
|
|
@@ -190,6 +173,7 @@ def build_time_series_chart(
|
|
| 190 |
df: pd.DataFrame,
|
| 191 |
template: str = "plotly",
|
| 192 |
spike_overlay: Optional[pd.DataFrame] = None,
|
|
|
|
| 193 |
) -> "px.Figure":
|
| 194 |
if df.empty:
|
| 195 |
fig = px.line()
|
|
@@ -197,7 +181,7 @@ def build_time_series_chart(
|
|
| 197 |
return fig
|
| 198 |
|
| 199 |
daily = df.groupby(pd.to_datetime(df["Date"]).dt.date)["Amount"].sum().reset_index()
|
| 200 |
-
daily["Date"] = pd.to_datetime(daily["Date"])
|
| 201 |
|
| 202 |
fig = px.line(
|
| 203 |
daily,
|
|
@@ -207,12 +191,14 @@ def build_time_series_chart(
|
|
| 207 |
markers=True,
|
| 208 |
)
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 214 |
|
| 215 |
-
# Optional spike overlay
|
| 216 |
if isinstance(spike_overlay, pd.DataFrame) and not spike_overlay.empty:
|
| 217 |
spike_points = spike_overlay[spike_overlay.get("IsSpike", False)]
|
| 218 |
if not spike_points.empty:
|
|
@@ -228,6 +214,7 @@ def build_time_series_chart(
|
|
| 228 |
return fig
|
| 229 |
|
| 230 |
|
|
|
|
| 231 |
def build_category_bar_chart(
|
| 232 |
spend_per_category: pd.Series,
|
| 233 |
template: str = "plotly",
|
|
@@ -271,251 +258,11 @@ def build_payment_method_pie_chart(
|
|
| 271 |
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 272 |
return fig
|
| 273 |
|
| 274 |
-
def build_category_bar_chart(
|
| 275 |
-
spend_per_category: pd.Series,
|
| 276 |
-
template: str = "plotly",
|
| 277 |
-
color_sequence: Optional[list] = None,
|
| 278 |
-
):
|
| 279 |
-
if spend_per_category.empty:
|
| 280 |
-
fig = px.bar()
|
| 281 |
-
fig.update_layout(template=template)
|
| 282 |
-
return fig
|
| 283 |
-
fig = px.bar(
|
| 284 |
-
spend_per_category.reset_index().rename(columns={"index": "Category", 0: "Amount"}),
|
| 285 |
-
x="Category",
|
| 286 |
-
y="Amount",
|
| 287 |
-
title="Spend by Category",
|
| 288 |
-
color="Category",
|
| 289 |
-
color_discrete_sequence=color_sequence,
|
| 290 |
-
)
|
| 291 |
-
fig.update_traces(hovertemplate="%{x}: £%{y:.2f}")
|
| 292 |
-
fig.update_layout(showlegend=False, margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 293 |
-
return fig
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
def build_payment_method_pie_chart(
|
| 298 |
-
spend_per_payment: pd.Series,
|
| 299 |
-
template: str = "plotly",
|
| 300 |
-
color_sequence: Optional[list] = None,
|
| 301 |
-
):
|
| 302 |
-
if spend_per_payment.empty:
|
| 303 |
-
fig = px.pie()
|
| 304 |
-
fig.update_layout(template=template)
|
| 305 |
-
return fig
|
| 306 |
-
fig = px.pie(
|
| 307 |
-
spend_per_payment.reset_index().rename(columns={"index": "Payment Method", 0: "Amount"}),
|
| 308 |
-
values="Amount",
|
| 309 |
-
names="Payment Method",
|
| 310 |
-
title="Payment Methods Distribution",
|
| 311 |
-
hole=0.45,
|
| 312 |
-
color_discrete_sequence=color_sequence,
|
| 313 |
-
)
|
| 314 |
-
fig.update_traces(hovertemplate="%{label}: £%{value:.2f} (%{percent})")
|
| 315 |
-
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 316 |
-
return fig
|
| 317 |
-
|
| 318 |
|
|
|
|
| 319 |
def _format_number(n: float) -> str:
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
def summarize_with_ai(
|
| 328 |
-
agg: Dict,
|
| 329 |
-
api_key: Optional[str] = None,
|
| 330 |
-
mode: str = "Concise",
|
| 331 |
-
engine: str = "Heuristic",
|
| 332 |
-
ollama_model: Optional[str] = None,
|
| 333 |
-
) -> str:
|
| 334 |
-
# Prepare a compact context
|
| 335 |
-
largest_cat = (
|
| 336 |
-
agg["spend_per_category"].idxmax() if not agg["spend_per_category"].empty else None
|
| 337 |
-
)
|
| 338 |
-
largest_cat_share = (
|
| 339 |
-
float(agg["category_share"].max()) if not agg["category_share"].empty else 0.0
|
| 340 |
-
)
|
| 341 |
-
|
| 342 |
-
context = {
|
| 343 |
-
"total_spend": float(agg["total_spend"]),
|
| 344 |
-
"avg_monthly": float(agg["avg_monthly_spend"]),
|
| 345 |
-
"largest_category": largest_cat,
|
| 346 |
-
"largest_category_share": largest_cat_share,
|
| 347 |
-
"max_transaction": {
|
| 348 |
-
"amount": float(agg["max_transaction"].get("Amount", 0.0)),
|
| 349 |
-
"merchant": str(agg["max_transaction"].get("Merchant", "")),
|
| 350 |
-
},
|
| 351 |
-
"mom_change": _month_over_month_change(agg.get("monthly")),
|
| 352 |
-
"spike_days": int(agg.get("spikes", pd.DataFrame()).get("IsSpike", pd.Series(dtype=bool)).sum()) if isinstance(agg.get("spikes"), pd.DataFrame) else 0,
|
| 353 |
-
}
|
| 354 |
-
|
| 355 |
-
# Engine selection
|
| 356 |
-
engine = (engine or "Heuristic").strip()
|
| 357 |
-
if engine == "Heuristic":
|
| 358 |
-
return _heuristic_summary(context, mode=mode)
|
| 359 |
-
|
| 360 |
-
# Local Hugging Face transformer model (small) - suitable for Spaces without paid APIs
|
| 361 |
-
if engine == "HuggingFace":
|
| 362 |
-
# Try to load a small, commonly-available model for generation. `distilgpt2`
|
| 363 |
-
# is a reasonable CPU-friendly option available on HF Hub and produces
|
| 364 |
-
# better text than the ultra-tiny toy models.
|
| 365 |
-
model_name = os.getenv("HF_LOCAL_MODEL", "distilgpt2")
|
| 366 |
-
try:
|
| 367 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 368 |
-
import torch
|
| 369 |
-
# load tokenizer & model (cached by huggingface inside the Space)
|
| 370 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 371 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 372 |
-
prompt = _hf_prompt(context, mode)
|
| 373 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 374 |
-
with torch.no_grad():
|
| 375 |
-
out = model.generate(**inputs, max_new_tokens=128, do_sample=True, temperature=0.7)
|
| 376 |
-
text = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 377 |
-
# post-process: return the generated tail after the prompt if present
|
| 378 |
-
if text.startswith(prompt):
|
| 379 |
-
return text[len(prompt):].strip() or _heuristic_summary(context, mode=mode)
|
| 380 |
-
return text.strip() or _heuristic_summary(context, mode=mode)
|
| 381 |
-
except Exception:
|
| 382 |
-
# If local HF fails, fallback to heuristic (keeps app running on Spaces)
|
| 383 |
-
return _heuristic_summary(context, mode=mode)
|
| 384 |
-
|
| 385 |
-
# At this point, only local Hugging Face generation and heuristic fallback are supported
|
| 386 |
-
# to keep the app free and self-contained for Hugging Face Spaces.
|
| 387 |
-
return _heuristic_summary(context, mode=mode)
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
def _month_over_month_change(monthly: Optional[pd.DataFrame]) -> float:
|
| 391 |
-
if monthly is None or monthly.empty or len(monthly) < 2:
|
| 392 |
-
return 0.0
|
| 393 |
-
monthly_sorted = monthly.sort_values("Month")
|
| 394 |
-
last, prev = monthly_sorted["Amount"].iloc[-1], monthly_sorted["Amount"].iloc[-2]
|
| 395 |
-
if prev == 0:
|
| 396 |
-
return 0.0
|
| 397 |
-
return float((last - prev) / prev)
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
def _heuristic_summary(ctx: Dict, mode: str = "Concise") -> str:
|
| 401 |
-
total = _format_number(ctx.get("total_spend", 0.0))
|
| 402 |
-
avg = _format_number(ctx.get("avg_monthly", 0.0))
|
| 403 |
-
lcat = ctx.get("largest_category") or "N/A"
|
| 404 |
-
share = ctx.get("largest_category_share", 0.0) * 100
|
| 405 |
-
max_amt = ctx.get("max_transaction", {}).get("amount", 0.0)
|
| 406 |
-
max_merchant = ctx.get("max_transaction", {}).get("merchant", "")
|
| 407 |
-
mom = ctx.get("mom_change", 0.0) * 100
|
| 408 |
-
spikes = ctx.get("spike_days", 0)
|
| 409 |
-
|
| 410 |
-
parts = [
|
| 411 |
-
f"Total spend in the selected period is {total}, averaging {avg} per month.",
|
| 412 |
-
f"Top category is {lcat} at {share:.0f}% of spend." if lcat != "N/A" else "",
|
| 413 |
-
f"Month-over-month, spending changed by {mom:+.0f}%.",
|
| 414 |
-
f"Largest single transaction was £{max_amt:,.0f} at {max_merchant}." if max_amt else "",
|
| 415 |
-
f"Detected {spikes} unusually high daily spend day(s)." if spikes else "",
|
| 416 |
-
]
|
| 417 |
-
text = " ".join([p for p in parts if p])
|
| 418 |
-
|
| 419 |
-
if mode == "Detailed":
|
| 420 |
-
# Add more comprehensive analysis for detailed mode
|
| 421 |
-
detailed_insights = []
|
| 422 |
-
|
| 423 |
-
# Spending pattern analysis
|
| 424 |
-
if mom > 10:
|
| 425 |
-
detailed_insights.append("Your spending has increased significantly this month, which may indicate lifestyle changes or seasonal variations.")
|
| 426 |
-
elif mom < -10:
|
| 427 |
-
detailed_insights.append("You've successfully reduced your spending this month, showing good financial discipline.")
|
| 428 |
-
else:
|
| 429 |
-
detailed_insights.append("Your spending patterns remain relatively stable month-over-month.")
|
| 430 |
-
|
| 431 |
-
# Category-specific recommendations
|
| 432 |
-
if lcat == "Food":
|
| 433 |
-
detailed_insights.append("Food represents your largest expense category. Consider meal planning and bulk shopping to optimize costs.")
|
| 434 |
-
elif lcat == "Shopping":
|
| 435 |
-
detailed_insights.append("Shopping is your primary spending category. Review purchases for necessities vs. wants to identify savings opportunities.")
|
| 436 |
-
elif lcat == "Entertainment":
|
| 437 |
-
detailed_insights.append("Entertainment spending dominates your budget. Look for free or low-cost alternatives to maintain your lifestyle within budget.")
|
| 438 |
-
|
| 439 |
-
# Spike analysis
|
| 440 |
-
if spikes > 5:
|
| 441 |
-
detailed_insights.append("Multiple spending spikes detected suggest irregular expense patterns. Consider smoothing these through better budgeting.")
|
| 442 |
-
elif spikes > 0:
|
| 443 |
-
detailed_insights.append("Some spending spikes were identified, which is normal but worth monitoring for budget planning.")
|
| 444 |
-
|
| 445 |
-
# General financial advice
|
| 446 |
-
detailed_insights.append("Consider setting category budgets and monitoring spikes to smooth cash flow and improve financial predictability.")
|
| 447 |
-
|
| 448 |
-
text += " " + " ".join(detailed_insights)
|
| 449 |
-
|
| 450 |
-
return text
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
# Ollama/OpenAI helpers removed to keep the app local-only and free.
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
def _hf_prompt(context: Dict, mode: str) -> str:
|
| 457 |
-
style = "concise (80-120 words)" if mode == "Concise" else "detailed (140-220 words)"
|
| 458 |
-
return (
|
| 459 |
-
"You are a helpful financial assistant. Produce a "
|
| 460 |
-
+ style
|
| 461 |
-
+ " natural-language summary of the provided spending analytics in plain English.\n\n"
|
| 462 |
-
+ f"Context: {context}\n\nSummary:"
|
| 463 |
-
)
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
def chat_with_ai(
|
| 467 |
-
agg: Dict,
|
| 468 |
-
question: str,
|
| 469 |
-
engine: str = "Heuristic",
|
| 470 |
-
api_key: Optional[str] = None,
|
| 471 |
-
ollama_model: Optional[str] = None,
|
| 472 |
-
) -> str:
|
| 473 |
-
# Provide compact context; reuse from summarize
|
| 474 |
-
context = {
|
| 475 |
-
"totals": float(agg.get("total_spend", 0.0)),
|
| 476 |
-
"monthly": [
|
| 477 |
-
{ "month": str(r["Month"]), "amount": float(r["Amount"]) }
|
| 478 |
-
for _, r in agg.get("monthly", pd.DataFrame()).iterrows()
|
| 479 |
-
],
|
| 480 |
-
"by_category": agg.get("spend_per_category", pd.Series(dtype=float)).to_dict(),
|
| 481 |
-
"by_payment": agg.get("spend_per_payment", pd.Series(dtype=float)).to_dict(),
|
| 482 |
-
"max_txn": agg.get("max_transaction", {}),
|
| 483 |
-
}
|
| 484 |
-
|
| 485 |
-
engine = (engine or "Heuristic").strip()
|
| 486 |
-
if engine == "Heuristic" or not question.strip():
|
| 487 |
-
return "Here's what I can tell from your data: total spend is " \
|
| 488 |
-
+ _format_number(context["totals"]) \
|
| 489 |
-
+ ". Ask about trends, categories, or months for more detail."
|
| 490 |
-
|
| 491 |
-
# Support local Hugging Face model for Q&A if requested; otherwise, return heuristic answer.
|
| 492 |
-
engine = (engine or "Heuristic").strip()
|
| 493 |
-
if engine == "Heuristic" or not question.strip():
|
| 494 |
-
return "Here's what I can tell from your data: total spend is " \
|
| 495 |
-
+ _format_number(context["totals"]) \
|
| 496 |
-
+ ". Ask about trends, categories, or months for more detail."
|
| 497 |
-
|
| 498 |
-
if engine == "HuggingFace":
|
| 499 |
-
model_name = os.getenv("HF_LOCAL_MODEL", "distilgpt2")
|
| 500 |
-
try:
|
| 501 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 502 |
-
import torch
|
| 503 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 504 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 505 |
-
prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
|
| 506 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 507 |
-
with torch.no_grad():
|
| 508 |
-
out = model.generate(**inputs, max_new_tokens=128, do_sample=True, temperature=0.7)
|
| 509 |
-
text = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 510 |
-
if text.startswith(prompt):
|
| 511 |
-
return text[len(prompt):].strip()
|
| 512 |
-
return text.strip()
|
| 513 |
-
except Exception:
|
| 514 |
-
return "Local model unavailable. Falling back to heuristic answer: " + (
|
| 515 |
-
"Here's what I can tell from your data: total spend is " + _format_number(context["totals"]) + "."
|
| 516 |
-
)
|
| 517 |
-
|
| 518 |
-
# Default fallback
|
| 519 |
-
return "I can't answer that right now. Try the Heuristic engine."
|
| 520 |
-
|
| 521 |
-
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def _random_amounts(n: int, rng: np.random.Generator) -> np.ndarray:
|
|
|
|
| 59 |
choices = rng.choice(["small", "medium", "large"], size=n, p=[0.65, 0.28, 0.07])
|
| 60 |
amounts = np.empty(n)
|
| 61 |
for i, c in enumerate(choices):
|
|
|
|
| 65 |
amounts[i] = max(5, rng.normal(60, 25))
|
| 66 |
else:
|
| 67 |
amounts[i] = max(20, rng.normal(180, 60))
|
|
|
|
| 68 |
spike_idx = rng.choice(np.arange(n), size=max(1, n // 50), replace=False)
|
| 69 |
amounts[spike_idx] *= rng.uniform(2.5, 4.0, size=len(spike_idx))
|
| 70 |
return np.round(amounts, 2)
|
|
|
|
| 76 |
start = end - pd.Timedelta(days=365)
|
| 77 |
dates = pd.date_range(start, end, freq="D")
|
| 78 |
|
| 79 |
+
weights = np.array([1.2 if d.weekday() >= 5 else 1.0 for d in dates]) * \
|
| 80 |
+
np.array([1.3 if d.day > 25 else 1.0 for d in dates])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
weights = np.clip(weights, a_min=0, a_max=None)
|
| 82 |
weights = weights / weights.sum()
|
| 83 |
date_choices = rng.choice(len(dates), size=n_rows, replace=True, p=weights)
|
|
|
|
| 89 |
locations = rng.choice(LOCATIONS, size=n_rows)
|
| 90 |
amts = _random_amounts(n_rows, rng)
|
| 91 |
|
| 92 |
+
df = pd.DataFrame({
|
| 93 |
+
"Date": pd.to_datetime(chosen_dates),
|
| 94 |
+
"Merchant": merchants,
|
| 95 |
+
"Category": categories,
|
| 96 |
+
"Amount": amts,
|
| 97 |
+
"Payment Method": payment_methods,
|
| 98 |
+
"Location": locations,
|
| 99 |
+
})
|
|
|
|
|
|
|
|
|
|
| 100 |
df = df.sort_values("Date").reset_index(drop=True)
|
| 101 |
return df
|
| 102 |
|
|
|
|
| 141 |
max_txn = df.loc[df["Amount"].idxmax()].to_dict()
|
| 142 |
min_txn = df.loc[df["Amount"].idxmin()].to_dict()
|
| 143 |
|
| 144 |
+
monthly = df.assign(Month=_month_key(df["Date"])).groupby("Month")["Amount"].sum().reset_index()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
avg_monthly_spend = float(monthly["Amount"].mean()) if not monthly.empty else 0.0
|
|
|
|
|
|
|
| 146 |
category_share = (spend_per_category / max(total_spend, 1e-9)).round(4)
|
| 147 |
|
|
|
|
| 148 |
df_daily = df.groupby(pd.to_datetime(df["Date"]).dt.date)["Amount"].sum().reset_index()
|
| 149 |
+
df_daily["Date"] = pd.to_datetime(df_daily["Date"])
|
| 150 |
df_daily = df_daily.sort_values("Date")
|
| 151 |
df_daily["Rolling28"] = df_daily["Amount"].rolling(window=28, min_periods=7).mean()
|
| 152 |
|
|
|
|
| 153 |
mu = df_daily["Amount"].mean()
|
| 154 |
sigma = df_daily["Amount"].std(ddof=0) or 0.0
|
| 155 |
threshold = mu + 2.5 * sigma
|
|
|
|
| 173 |
df: pd.DataFrame,
|
| 174 |
template: str = "plotly",
|
| 175 |
spike_overlay: Optional[pd.DataFrame] = None,
|
| 176 |
+
fixed_line_width: int = 2,
|
| 177 |
) -> "px.Figure":
|
| 178 |
if df.empty:
|
| 179 |
fig = px.line()
|
|
|
|
| 181 |
return fig
|
| 182 |
|
| 183 |
daily = df.groupby(pd.to_datetime(df["Date"]).dt.date)["Amount"].sum().reset_index()
|
| 184 |
+
daily["Date"] = pd.to_datetime(daily["Date"])
|
| 185 |
|
| 186 |
fig = px.line(
|
| 187 |
daily,
|
|
|
|
| 191 |
markers=True,
|
| 192 |
)
|
| 193 |
|
| 194 |
+
fig.update_traces(
|
| 195 |
+
line=dict(width=fixed_line_width),
|
| 196 |
+
marker=dict(size=6),
|
| 197 |
+
hovertemplate="%{x|%b %d, %Y}: £%{y:.2f}",
|
| 198 |
+
)
|
| 199 |
|
| 200 |
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 201 |
|
|
|
|
| 202 |
if isinstance(spike_overlay, pd.DataFrame) and not spike_overlay.empty:
|
| 203 |
spike_points = spike_overlay[spike_overlay.get("IsSpike", False)]
|
| 204 |
if not spike_points.empty:
|
|
|
|
| 214 |
return fig
|
| 215 |
|
| 216 |
|
| 217 |
+
# --- Category / Payment Charts ---
|
| 218 |
def build_category_bar_chart(
|
| 219 |
spend_per_category: pd.Series,
|
| 220 |
template: str = "plotly",
|
|
|
|
| 258 |
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10), template=template)
|
| 259 |
return fig
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
+
# --- Helpers for AI summaries ---
|
| 263 |
def _format_number(n: float) -> str:
|
| 264 |
+
if n >= 1_000_000:
|
| 265 |
+
return f"£{n/1_000_000:.1f}M"
|
| 266 |
+
if n >= 1_000:
|
| 267 |
+
return f"£{n/1_000:.1f}k"
|
| 268 |
+
return f"£{n:,.0f}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|