# app.py
import os
import time
import json
from datetime import datetime
from typing import Optional
import pandas as pd
import requests
import streamlit as st
# Support running as a module or script
try:
from .utils import (
generate_synthetic_transactions,
filter_transactions,
compute_aggregations,
build_time_series_chart,
build_category_bar_chart,
build_payment_method_pie_chart,
summarize_with_ai,
)
except Exception: # ImportError or relative import context issues
from utils import (
generate_synthetic_transactions,
filter_transactions,
compute_aggregations,
build_time_series_chart,
build_category_bar_chart,
build_payment_method_pie_chart,
summarize_with_ai,
)
st.set_page_config(
page_title="AI Spending Analyser",
page_icon="💳",
layout="wide",
)
def init_session_state():
if "data" not in st.session_state:
st.session_state.data = generate_synthetic_transactions(n_rows=900, seed=42)
if "filters" not in st.session_state:
min_date = st.session_state.data["Date"].min()
max_date = st.session_state.data["Date"].max()
st.session_state.filters = {
"date_range": (min_date, max_date),
"categories": [],
"merchant_query": "",
}
def render_header():
"""
Render a header with a blue ^ symbol and app title.
"""
st.markdown(
"""
""",
unsafe_allow_html=True,
)
def render_assistant_banner():
# Removed per request: no top assistant banner
return
def render_chat_fab():
# Removed per request: no floating chat widget
return
def render_sidebar(df: pd.DataFrame):
st.sidebar.header("Filters")
min_d = df["Date"].min()
max_d = df["Date"].max()
# Separate From and To date inputs
st.sidebar.subheader("Date Range")
col1, col2 = st.sidebar.columns(2)
with col1:
from_date = st.date_input(
"From",
value=min_d.date(),
min_value=min_d.date(),
max_value=max_d.date(),
key="from_date"
)
with col2:
to_date = st.date_input(
"To",
value=max_d.date(),
min_value=min_d.date(),
max_value=max_d.date(),
key="to_date"
)
# Validation for date range
date_error = None
if from_date > to_date:
date_error = "From date cannot be after To date"
elif from_date < min_d.date() or to_date > max_d.date():
date_error = f"Date range can only be between {min_d.date().strftime('%Y-%m-%d')} and {max_d.date().strftime('%Y-%m-%d')}"
elif from_date > max_d.date() or to_date < min_d.date():
date_error = f"Date range can only be between {min_d.date().strftime('%Y-%m-%d')} and {max_d.date().strftime('%Y-%m-%d')}"
if date_error:
st.sidebar.error(date_error)
# Use valid defaults when there's an error
from_date = min_d.date()
to_date = max_d.date()
all_categories = sorted(df["Category"].unique().tolist())
categories = st.sidebar.multiselect("Category", options=all_categories, default=[])
merchant_query = st.sidebar.text_input("Merchant search", value="", placeholder="Type a merchant name…")
st.sidebar.divider()
st.sidebar.header("AI")
# Default engine is now HuggingFace (not heuristic)
summary_mode = st.sidebar.radio("Summary", options=["Concise", "Detailed"], index=0, horizontal=True)
engine = st.sidebar.selectbox("Engine", options=["HuggingFace", "OpenAI", "Heuristic"], index=0)
ollama_model = None
st.sidebar.divider()
st.sidebar.header("Anomalies & Highlights")
show_spikes = st.sidebar.toggle("Show spike markers", value=True)
large_tx_threshold = st.sidebar.slider("Large transaction threshold (£)", 50, 1000, 250, step=25)
col1, col2 = st.sidebar.columns(2)
with col1:
regen = st.button("Regenerate")
with col2:
st.sidebar.write("")
if regen:
st.session_state.data = generate_synthetic_transactions(n_rows=900)
# Update filters
st.session_state.filters = {
"date_range": (
datetime.combine(from_date, datetime.min.time()),
datetime.combine(to_date, datetime.max.time()),
),
"categories": categories,
"merchant_query": merchant_query.strip(),
"summary_mode": summary_mode,
"engine": engine,
"ollama_model": None,
"show_spikes": show_spikes,
"large_tx_threshold": large_tx_threshold,
}
def render_metrics(agg: dict):
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown(f"Total Value
£{agg['total_spend']:,.0f}
", unsafe_allow_html=True)
with col2:
st.markdown(f"Avg Monthly
£{agg['avg_monthly_spend']:,.0f}
", unsafe_allow_html=True)
with col3:
st.markdown(f"Max Transaction
£{agg['max_transaction']['Amount']:,.0f}
", unsafe_allow_html=True)
with col4:
st.markdown(f"Min Transaction
£{agg['min_transaction']['Amount']:,.0f}
", unsafe_allow_html=True)
def render_isa_widget(current_spend: float, allowance: float):
used = min(current_spend, allowance)
remaining = max(allowance - used, 0)
percent = 0 if allowance <= 0 else int((used / allowance) * 100)
st.markdown("", unsafe_allow_html=True)
def render_charts(filtered_df: pd.DataFrame, agg: dict, template: str, show_spikes: bool):
t1, t2, t3 = st.tabs(["Trend", "By Category", "Payment Methods"])
with t1:
fig = build_time_series_chart(
filtered_df,
template=template,
spike_overlay=agg["spikes"] if show_spikes else None,
)
st.plotly_chart(fig, use_container_width=True)
with t2:
st.caption("Tip: Select categories in the sidebar to compare their total spend.")
brand_seq = ["#00AEEF", "#697089", "#005F7F", "#00CC99", "#7A7F87"]
fig = build_category_bar_chart(agg["spend_per_category"], template=template, color_sequence=brand_seq)
st.plotly_chart(fig, use_container_width=True)
with t3:
brand_seq = ["#00AEEF", "#00CC99", "#697089"]
fig = build_payment_method_pie_chart(agg["spend_per_payment"], template=template, color_sequence=brand_seq)
st.plotly_chart(fig, use_container_width=True)
# Simple deterministic heuristic fallback (keeps behavior predictable)
def heuristic_summary(agg: dict, mode: str) -> str:
# Produce a short, deterministic summary using aggregations
total = agg.get("total_spend", 0)
avg_month = agg.get("avg_monthly_spend", 0)
top_cat = None
if "spend_per_category" in agg and agg["spend_per_category"]:
top_cat = max(agg["spend_per_category"].items(), key=lambda x: x[1])[0]
spikes = agg.get("spikes", [])
lines = []
lines.append(f"Total spend in the selected period: £{total:,.2f}.")
lines.append(f"Average monthly spend: £{avg_month:,.2f}.")
if top_cat:
lines.append(f"Top category by spend: {top_cat}.")
lines.append(f"Detected {len(spikes)} spending spikes.")
if mode == "Detailed":
# Add a little more deterministic detail
items = list(agg.get("spend_per_category", {}).items())[:5]
lines.append("Spend per category: " + ", ".join(f"{k}: {chr(163)}{v:,.0f}" for k, v in items))
return " ".join(lines)
def _get_hf_token() -> Optional[str]:
"""Return a Hugging Face token using a configurable secret name.
Behavior:
- Look up env var HF_TOKEN_NAME to get the secret key name (default 'HF_TOKEN').
- Prefer Streamlit secrets (st.secrets[name]) when running on Spaces.
- Fall back to environment variable with that name, then to HUGGINGFACE_API_KEY or HF_TOKEN.
"""
# First, allow an explicit env var to override the secret name
name = os.getenv("HF_TOKEN_NAME", None)
# If the user used the name 'streamlit' for their token, prefer that too
preferred_names = []
if name:
preferred_names.append(name)
# include the user-specified token name 'streamlit' as a high-priority fallback
preferred_names.append("streamlit")
# finally include the common default
preferred_names.append("HF_TOKEN")
try:
for n in preferred_names:
if isinstance(st.secrets, dict) and n in st.secrets:
return st.secrets[n]
except Exception:
pass
for n in preferred_names:
val = os.getenv(n)
if val:
return val
# last-resort fallbacks
return os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
def _call_hf_inference(prompt: str, model: str = "tiiuae/falcon-7b-instruct", token: Optional[str] = None, max_tokens: int = 256) -> str:
"""Call the Hugging Face Inference API and return generated text.
Raises RuntimeError on non-200 responses.
"""
if not token:
raise RuntimeError("No Hugging Face token provided.")
url = f"https://api-inference.huggingface.co/models/{model}"
headers = {"Authorization": f"Bearer {token}"}
payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens, "temperature": 0.2}}
resp = requests.post(url, headers=headers, json=payload, timeout=60)
if resp.status_code != 200:
try:
msg = resp.json()
except Exception:
msg = resp.text
raise RuntimeError(f"Hugging Face inference error {resp.status_code}: {msg}")
data = resp.json()
if isinstance(data, dict):
if "error" in data:
raise RuntimeError(f"Hugging Face error: {data['error']}")
if "generated_text" in data:
return data["generated_text"]
for v in data.values():
if isinstance(v, dict) and "generated_text" in v:
return v["generated_text"]
return str(data)
if isinstance(data, list) and len(data) > 0:
if isinstance(data[0], dict) and "generated_text" in data[0]:
return data[0]["generated_text"]
return str(data[0])
return str(data)
# External inference via Hugging Face API and OpenAI have been intentionally
# removed to keep the app free to run on Hugging Face Spaces without paid APIs.
def render_ai_summary(agg: dict, mode: str, engine: str, ollama_model: str | None):
st.subheader("AI Summary")
placeholder = st.empty()
placeholder.markdown(f"Generating summary…
", unsafe_allow_html=True)
# Build a short prompt from agg (keep it concise)
prompt = f"Provide a {mode.lower()} natural-language summary of these spending analytics: {json.dumps({'total_spend': agg.get('total_spend'), 'avg_monthly_spend': agg.get('avg_monthly_spend'), 'top_categories': agg.get('spend_per_category'), 'spikes': agg.get('spikes')}, default=str)}"
# Preferred: Hugging Face
if engine == "HuggingFace":
# Use the local summarizer which prefers a small HF model when available
try:
text = summarize_with_ai(agg, api_key=None, mode=mode, engine="HuggingFace")
if not text:
raise RuntimeError("No response from local Hugging Face summarizer.")
placeholder.markdown(f"{text}
", unsafe_allow_html=True)
return
except Exception as e:
# If local summarizer failed, try remote HF inference if a token is available
hf_token = _get_hf_token()
if hf_token:
try:
prompt = f"Provide a {mode.lower()} natural-language summary of these spending analytics: {json.dumps({'total_spend': agg.get('total_spend'), 'avg_monthly_spend': agg.get('avg_monthly_spend'), 'top_categories': agg.get('spend_per_category'), 'spikes': agg.get('spikes')}, default=str)}"
full_text = _call_hf_inference(prompt, model="gpt2", token=hf_token, max_tokens=256)
placeholder.markdown(f"{full_text}
", unsafe_allow_html=True)
return
except Exception:
# Fall back to heuristic if remote inference fails
text = heuristic_summary(agg, mode)
placeholder.markdown(f"{text}
", unsafe_allow_html=True)
return
else:
placeholder.markdown(f"Local summarizer error: {e}. No Hugging Face token configured; showing deterministic summary instead.
", unsafe_allow_html=True)
text = heuristic_summary(agg, mode)
placeholder.markdown(f"{text}
", unsafe_allow_html=True)
return
# If the user explicitly selected OpenAI, show Coming soon (we don't want to rely on paid APIs)
if engine == "OpenAI":
placeholder.markdown("OpenAI summaries are coming soon. Please select HuggingFace (default) or Ollama (local) instead.
", unsafe_allow_html=True)
# still provide deterministic fallback to keep UX
text = heuristic_summary(agg, mode)
placeholder.markdown(f"{text}
", unsafe_allow_html=True)
return
# Ollama support removed — local Hugging Face (distilgpt2) is the supported free option.
# If Heuristic selected explicitly
if engine == "Heuristic":
text = heuristic_summary(agg, mode)
placeholder.markdown(f"{text}
", unsafe_allow_html=True)
return
# Fallback
placeholder.markdown("Coming soon — selected engine not available.
", unsafe_allow_html=True)
def main():
init_session_state()
# Inject custom CSS with hover animations (preserved exactly)
st.markdown("""
""", unsafe_allow_html=True)
render_header()
render_assistant_banner()
# Floating chat button
render_chat_fab()
# Sidebar filters and regenerate
render_sidebar(st.session_state.data)
# Apply filters
filters = st.session_state.filters
filtered = filter_transactions(
st.session_state.data,
date_range=filters["date_range"],
categories=filters["categories"],
merchant_query=filters["merchant_query"],
)
if filtered.empty:
st.info("No data for selected filters. Adjust filters to see insights.")
return
agg = compute_aggregations(filtered)
# Top KPIs
st.markdown("", unsafe_allow_html=True)
render_metrics(agg)
st.markdown("
", unsafe_allow_html=True)
# ISA-style allowance widget (configurable)
with st.expander("Allowance widget"):
allowance = st.number_input("Annual allowance (£)", min_value=0, value=20000, step=500)
render_isa_widget(current_spend=float(agg['total_spend']), allowance=float(allowance))
# Charts (use dark theme consistently as requested)
template = "plotly_dark"
render_charts(filtered, agg, template, show_spikes=filters["show_spikes"])
# AI Summary only
render_ai_summary(agg, mode=filters["summary_mode"], engine=filters["engine"], ollama_model=filters["ollama_model"])
# Large transactions table
threshold = filters["large_tx_threshold"]
large_df = filtered[filtered["Amount"] >= threshold].sort_values("Amount", ascending=False)
with st.expander(f"Show large transactions (≥ £{threshold}) [{len(large_df)}]"):
st.dataframe(large_df, use_container_width=True, hide_index=True)
# Downloads
st.divider()
col1, col2 = st.columns([2,1])
with col1:
st.caption("Download filtered data")
csv = filtered.to_csv(index=False).encode("utf-8")
st.download_button("Download CSV", csv, file_name="transactions_filtered.csv", mime="text/csv")
with col2:
st.caption("Dataset size")
st.write(f"{len(filtered):,} rows")
if __name__ == "__main__":
main()