# app.py import os import time import json from datetime import datetime from typing import Optional import pandas as pd import requests import streamlit as st # Support running as a module or script try: from .utils import ( generate_synthetic_transactions, filter_transactions, compute_aggregations, build_time_series_chart, build_category_bar_chart, build_payment_method_pie_chart, summarize_with_ai, ) except Exception: # ImportError or relative import context issues from utils import ( generate_synthetic_transactions, filter_transactions, compute_aggregations, build_time_series_chart, build_category_bar_chart, build_payment_method_pie_chart, summarize_with_ai, ) st.set_page_config( page_title="AI Spending Analyser", page_icon="💳", layout="wide", ) def init_session_state(): if "data" not in st.session_state: st.session_state.data = generate_synthetic_transactions(n_rows=900, seed=42) if "filters" not in st.session_state: min_date = st.session_state.data["Date"].min() max_date = st.session_state.data["Date"].max() st.session_state.filters = { "date_range": (min_date, max_date), "categories": [], "merchant_query": "", } def render_header(): """ Render a header with a blue ^ symbol and app title. """ st.markdown( """

AI Spending Analyser

""", unsafe_allow_html=True, ) def render_assistant_banner(): # Removed per request: no top assistant banner return def render_chat_fab(): # Removed per request: no floating chat widget return def render_sidebar(df: pd.DataFrame): st.sidebar.header("Filters") min_d = df["Date"].min() max_d = df["Date"].max() # Separate From and To date inputs st.sidebar.subheader("Date Range") col1, col2 = st.sidebar.columns(2) with col1: from_date = st.date_input( "From", value=min_d.date(), min_value=min_d.date(), max_value=max_d.date(), key="from_date" ) with col2: to_date = st.date_input( "To", value=max_d.date(), min_value=min_d.date(), max_value=max_d.date(), key="to_date" ) # Validation for date range date_error = None if from_date > to_date: date_error = "From date cannot be after To date" elif from_date < min_d.date() or to_date > max_d.date(): date_error = f"Date range can only be between {min_d.date().strftime('%Y-%m-%d')} and {max_d.date().strftime('%Y-%m-%d')}" elif from_date > max_d.date() or to_date < min_d.date(): date_error = f"Date range can only be between {min_d.date().strftime('%Y-%m-%d')} and {max_d.date().strftime('%Y-%m-%d')}" if date_error: st.sidebar.error(date_error) # Use valid defaults when there's an error from_date = min_d.date() to_date = max_d.date() all_categories = sorted(df["Category"].unique().tolist()) categories = st.sidebar.multiselect("Category", options=all_categories, default=[]) merchant_query = st.sidebar.text_input("Merchant search", value="", placeholder="Type a merchant name…") st.sidebar.divider() st.sidebar.header("AI") # Default engine is now HuggingFace (not heuristic) summary_mode = st.sidebar.radio("Summary", options=["Concise", "Detailed"], index=0, horizontal=True) engine = st.sidebar.selectbox("Engine", options=["HuggingFace", "OpenAI", "Heuristic"], index=0) ollama_model = None st.sidebar.divider() st.sidebar.header("Anomalies & Highlights") show_spikes = st.sidebar.toggle("Show spike markers", value=True) large_tx_threshold = st.sidebar.slider("Large transaction threshold (£)", 50, 1000, 250, step=25) col1, col2 = st.sidebar.columns(2) with col1: regen = st.button("Regenerate") with col2: st.sidebar.write("") if regen: st.session_state.data = generate_synthetic_transactions(n_rows=900) # Update filters st.session_state.filters = { "date_range": ( datetime.combine(from_date, datetime.min.time()), datetime.combine(to_date, datetime.max.time()), ), "categories": categories, "merchant_query": merchant_query.strip(), "summary_mode": summary_mode, "engine": engine, "ollama_model": None, "show_spikes": show_spikes, "large_tx_threshold": large_tx_threshold, } def render_metrics(agg: dict): col1, col2, col3, col4 = st.columns(4) with col1: st.markdown(f"

Total Value

£{agg['total_spend']:,.0f}

", unsafe_allow_html=True) with col2: st.markdown(f"

Avg Monthly

£{agg['avg_monthly_spend']:,.0f}

", unsafe_allow_html=True) with col3: st.markdown(f"

Max Transaction

£{agg['max_transaction']['Amount']:,.0f}

", unsafe_allow_html=True) with col4: st.markdown(f"

Min Transaction

£{agg['min_transaction']['Amount']:,.0f}

", unsafe_allow_html=True) def render_isa_widget(current_spend: float, allowance: float): used = min(current_spend, allowance) remaining = max(allowance - used, 0) percent = 0 if allowance <= 0 else int((used / allowance) * 100) st.markdown("", unsafe_allow_html=True) def render_charts(filtered_df: pd.DataFrame, agg: dict, template: str, show_spikes: bool): t1, t2, t3 = st.tabs(["Trend", "By Category", "Payment Methods"]) with t1: fig = build_time_series_chart( filtered_df, template=template, spike_overlay=agg["spikes"] if show_spikes else None, ) st.plotly_chart(fig, use_container_width=True) with t2: st.caption("Tip: Select categories in the sidebar to compare their total spend.") brand_seq = ["#00AEEF", "#697089", "#005F7F", "#00CC99", "#7A7F87"] fig = build_category_bar_chart(agg["spend_per_category"], template=template, color_sequence=brand_seq) st.plotly_chart(fig, use_container_width=True) with t3: brand_seq = ["#00AEEF", "#00CC99", "#697089"] fig = build_payment_method_pie_chart(agg["spend_per_payment"], template=template, color_sequence=brand_seq) st.plotly_chart(fig, use_container_width=True) # Simple deterministic heuristic fallback (keeps behavior predictable) def heuristic_summary(agg: dict, mode: str) -> str: # Produce a short, deterministic summary using aggregations total = agg.get("total_spend", 0) avg_month = agg.get("avg_monthly_spend", 0) top_cat = None if "spend_per_category" in agg and agg["spend_per_category"]: top_cat = max(agg["spend_per_category"].items(), key=lambda x: x[1])[0] spikes = agg.get("spikes", []) lines = [] lines.append(f"Total spend in the selected period: £{total:,.2f}.") lines.append(f"Average monthly spend: £{avg_month:,.2f}.") if top_cat: lines.append(f"Top category by spend: {top_cat}.") lines.append(f"Detected {len(spikes)} spending spikes.") if mode == "Detailed": # Add a little more deterministic detail items = list(agg.get("spend_per_category", {}).items())[:5] lines.append("Spend per category: " + ", ".join(f"{k}: {chr(163)}{v:,.0f}" for k, v in items)) return " ".join(lines) def _get_hf_token() -> Optional[str]: """Return a Hugging Face token using a configurable secret name. Behavior: - Look up env var HF_TOKEN_NAME to get the secret key name (default 'HF_TOKEN'). - Prefer Streamlit secrets (st.secrets[name]) when running on Spaces. - Fall back to environment variable with that name, then to HUGGINGFACE_API_KEY or HF_TOKEN. """ # First, allow an explicit env var to override the secret name name = os.getenv("HF_TOKEN_NAME", None) # If the user used the name 'streamlit' for their token, prefer that too preferred_names = [] if name: preferred_names.append(name) # include the user-specified token name 'streamlit' as a high-priority fallback preferred_names.append("streamlit") # finally include the common default preferred_names.append("HF_TOKEN") try: for n in preferred_names: if isinstance(st.secrets, dict) and n in st.secrets: return st.secrets[n] except Exception: pass for n in preferred_names: val = os.getenv(n) if val: return val # last-resort fallbacks return os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") def _call_hf_inference(prompt: str, model: str = "tiiuae/falcon-7b-instruct", token: Optional[str] = None, max_tokens: int = 256) -> str: """Call the Hugging Face Inference API and return generated text. Raises RuntimeError on non-200 responses. """ if not token: raise RuntimeError("No Hugging Face token provided.") url = f"https://api-inference.huggingface.co/models/{model}" headers = {"Authorization": f"Bearer {token}"} payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens, "temperature": 0.2}} resp = requests.post(url, headers=headers, json=payload, timeout=60) if resp.status_code != 200: try: msg = resp.json() except Exception: msg = resp.text raise RuntimeError(f"Hugging Face inference error {resp.status_code}: {msg}") data = resp.json() if isinstance(data, dict): if "error" in data: raise RuntimeError(f"Hugging Face error: {data['error']}") if "generated_text" in data: return data["generated_text"] for v in data.values(): if isinstance(v, dict) and "generated_text" in v: return v["generated_text"] return str(data) if isinstance(data, list) and len(data) > 0: if isinstance(data[0], dict) and "generated_text" in data[0]: return data[0]["generated_text"] return str(data[0]) return str(data) # External inference via Hugging Face API and OpenAI have been intentionally # removed to keep the app free to run on Hugging Face Spaces without paid APIs. def render_ai_summary(agg: dict, mode: str, engine: str, ollama_model: str | None): st.subheader("AI Summary") placeholder = st.empty() placeholder.markdown(f"

Generating summary…

", unsafe_allow_html=True) # Build a short prompt from agg (keep it concise) prompt = f"Provide a {mode.lower()} natural-language summary of these spending analytics: {json.dumps({'total_spend': agg.get('total_spend'), 'avg_monthly_spend': agg.get('avg_monthly_spend'), 'top_categories': agg.get('spend_per_category'), 'spikes': agg.get('spikes')}, default=str)}" # Preferred: Hugging Face if engine == "HuggingFace": # Use the local summarizer which prefers a small HF model when available try: text = summarize_with_ai(agg, api_key=None, mode=mode, engine="HuggingFace") if not text: raise RuntimeError("No response from local Hugging Face summarizer.") placeholder.markdown(f"

{text}

", unsafe_allow_html=True) return except Exception as e: # If local summarizer failed, try remote HF inference if a token is available hf_token = _get_hf_token() if hf_token: try: prompt = f"Provide a {mode.lower()} natural-language summary of these spending analytics: {json.dumps({'total_spend': agg.get('total_spend'), 'avg_monthly_spend': agg.get('avg_monthly_spend'), 'top_categories': agg.get('spend_per_category'), 'spikes': agg.get('spikes')}, default=str)}" full_text = _call_hf_inference(prompt, model="gpt2", token=hf_token, max_tokens=256) placeholder.markdown(f"

{full_text}

", unsafe_allow_html=True) return except Exception: # Fall back to heuristic if remote inference fails text = heuristic_summary(agg, mode) placeholder.markdown(f"

{text}

", unsafe_allow_html=True) return else: placeholder.markdown(f"

Local summarizer error: {e}. No Hugging Face token configured; showing deterministic summary instead.

", unsafe_allow_html=True) text = heuristic_summary(agg, mode) placeholder.markdown(f"

{text}

", unsafe_allow_html=True) return # If the user explicitly selected OpenAI, show Coming soon (we don't want to rely on paid APIs) if engine == "OpenAI": placeholder.markdown("

OpenAI summaries are coming soon. Please select HuggingFace (default) or Ollama (local) instead.

", unsafe_allow_html=True) # still provide deterministic fallback to keep UX text = heuristic_summary(agg, mode) placeholder.markdown(f"

{text}

", unsafe_allow_html=True) return # Ollama support removed — local Hugging Face (distilgpt2) is the supported free option. # If Heuristic selected explicitly if engine == "Heuristic": text = heuristic_summary(agg, mode) placeholder.markdown(f"

{text}

", unsafe_allow_html=True) return # Fallback placeholder.markdown("

Coming soon — selected engine not available.

", unsafe_allow_html=True) def main(): init_session_state() # Inject custom CSS with hover animations (preserved exactly) st.markdown(""" """, unsafe_allow_html=True) render_header() render_assistant_banner() # Floating chat button render_chat_fab() # Sidebar filters and regenerate render_sidebar(st.session_state.data) # Apply filters filters = st.session_state.filters filtered = filter_transactions( st.session_state.data, date_range=filters["date_range"], categories=filters["categories"], merchant_query=filters["merchant_query"], ) if filtered.empty: st.info("No data for selected filters. Adjust filters to see insights.") return agg = compute_aggregations(filtered) # Top KPIs st.markdown("

", unsafe_allow_html=True) render_metrics(agg) st.markdown("

", unsafe_allow_html=True) # ISA-style allowance widget (configurable) with st.expander("Allowance widget"): allowance = st.number_input("Annual allowance (£)", min_value=0, value=20000, step=500) render_isa_widget(current_spend=float(agg['total_spend']), allowance=float(allowance)) # Charts (use dark theme consistently as requested) template = "plotly_dark" render_charts(filtered, agg, template, show_spikes=filters["show_spikes"]) # AI Summary only render_ai_summary(agg, mode=filters["summary_mode"], engine=filters["engine"], ollama_model=filters["ollama_model"]) # Large transactions table threshold = filters["large_tx_threshold"] large_df = filtered[filtered["Amount"] >= threshold].sort_values("Amount", ascending=False) with st.expander(f"Show large transactions (≥ £{threshold}) [{len(large_df)}]"): st.dataframe(large_df, use_container_width=True, hide_index=True) # Downloads st.divider() col1, col2 = st.columns([2,1]) with col1: st.caption("Download filtered data") csv = filtered.to_csv(index=False).encode("utf-8") st.download_button("Download CSV", csv, file_name="transactions_filtered.csv", mime="text/csv") with col2: st.caption("Dataset size") st.write(f"{len(filtered):,} rows") if __name__ == "__main__": main()