PurchaseAgent / src /streamlit_app.py
PD03's picture
Update src/streamlit_app.py
fad5f5f verified
raw
history blame
16.9 kB
import os
import re
import math
from datetime import datetime, date
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
# =========================
# Theming and Page Config
# =========================
st.set_page_config(
page_title="Procurement Insight Agent",
page_icon="🧭",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for a premium look
st.markdown("""
<style>
/* Global font and spacing */
html, body, [class*="css"] {
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
}
section[data-testid="stSidebar"] {
background: linear-gradient(180deg, #0f172a 0%, #111827 100%);
border-right: 1px solid #1f2937;
}
section[data-testid="stSidebar"] * {
color: #e5e7eb !important;
}
.block-container {
padding-top: 1.5rem;
}
.card {
background: #0b1220;
border: 1px solid #1f2937;
border-radius: 14px;
padding: 16px;
}
.kpi {
background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%);
border: 1px solid #1f2937;
border-radius: 14px;
padding: 18px;
}
.kpi h3 {
margin: 0;
color: #93c5fd;
font-weight: 600;
font-size: 0.95rem;
}
.kpi .value {
margin-top: 6px;
font-size: 1.6rem;
font-weight: 700;
color: #e5e7eb;
}
.caption-note {
color: #94a3b8;
font-size: 0.85rem;
}
.prompt-box {
background: #0b1220;
border: 1px solid #1f2937;
border-radius: 12px;
padding: 10px 12px;
}
.footer-note {
color: #94a3b8;
font-size: 0.8rem;
text-align: center;
margin-top: 24px;
}
.stTabs [data-baseweb="tab-list"] {
gap: 12px;
}
.stTabs [data-baseweb="tab"] {
background-color: #0b1220;
border-radius: 10px 10px 0 0;
padding: 10px 16px;
color: #e5e7eb;
border: 1px solid #1f2937;
}
.stTabs [aria-selected="true"] {
background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%) !important;
color: #93c5fd !important;
}
</style>
""", unsafe_allow_html=True)
# =========================
# Synthetic Data Generator
# =========================
@st.cache_data(show_spinner=False)
def generate_synthetic_procurement(seed=42, start_year=2023, end_year=2025, rows=40_000):
"""
Generates synthetic procurement line items reflecting common S/4HANA Embedded Analytics procurement fields.
"""
rng = np.random.default_rng(seed)
months = pd.date_range(f"{start_year}-01-01", f"{end_year}-12-31", freq="MS")
purchasing_orgs = ["1000", "2000", "3000", "4000"]
company_codes = ["C100", "C200", "C300"]
suppliers = [f"SUPP-{i:03d}" for i in range(1, 61)]
material_groups = ["RAW", "PACK", "SERV", "CAPEX", "MRO"]
currencies = ["USD", "EUR", "GBP", "INR"]
# Seasonality and org effects
def base_amount(month):
# Seasonality: higher in Q2/Q4
q = (month.month-1)//3 + 1
base = 1.0
if q in (2, 4):
base *= 1.2
if month.month in (11, 12):
base *= 1.1
return base
data = {
"CalendarMonth": rng.choice(months, size=rows),
"PurchasingOrganization": rng.choice(purchasing_orgs, size=rows, p=[0.35, 0.25, 0.25, 0.15]),
"CompanyCode": rng.choice(company_codes, size=rows, p=[0.45, 0.35, 0.20]),
"Supplier": rng.choice(suppliers, size=rows),
"MaterialGroup": rng.choice(material_groups, size=rows, p=[0.3, 0.2, 0.25, 0.15, 0.10]),
"Currency": rng.choice(currencies, size=rows, p=[0.5, 0.25, 0.1, 0.15]),
}
df = pd.DataFrame(data)
df["CalendarYear"] = df["CalendarMonth"].dt.year
df["Month"] = df["CalendarMonth"].dt.to_period("M").astype(str)
# Amount generation with org/supplier effects
org_factor = df["PurchasingOrganization"].map({"1000":1.2, "2000":0.9, "3000":1.0, "4000":0.8})
supp_strength = df["Supplier"].str[-3:].astype(int)
supp_factor = 0.8 + (supp_strength / 1000.0) # small lift for higher IDs
seasonal = df["CalendarMonth"].apply(base_amount).astype(float)
base_val = rng.lognormal(mean=7.5, sigma=0.6, size=rows) # realistic skew
df["NetAmount"] = (base_val * org_factor * supp_factor * seasonal).round(2)
# Random off-contract flag
df["OffContract"] = rng.choice([True, False], size=rows, p=[0.18, 0.82])
# For “service POs”
df["IsService"] = df["MaterialGroup"].eq("SERV")
return df
# Load synthetic data
df_raw = generate_synthetic_procurement()
# ==================================
# Helper: Natural Language to Query
# ==================================
def parse_prompt(prompt: str):
"""
Very lightweight rules to detect:
- metric (po_value, off_contract, service_spend)
- time grain (month, quarter, year)
- time window (YTD, QTD, last quarter, range)
- grouping (Supplier, PurchasingOrganization, CompanyCode, MaterialGroup)
- top_n
"""
text = (prompt or "").lower()
# Metric
if "off-contract" in text or "off contract" in text or "leakage" in text:
metric = "off_contract"
elif "service" in text:
metric = "service_spend"
else:
metric = "po_value"
# Grain
if "by month" in text or "monthly" in text:
grain = "month"
elif "by quarter" in text or "quarterly" in text or "qtr" in text:
grain = "quarter"
else:
grain = "month" if "trend" in text else "year"
# Grouping
group_map = {
"supplier": "Supplier",
"purchasing org": "PurchasingOrganization",
"purchasing organization": "PurchasingOrganization",
"company": "CompanyCode",
"companycode": "CompanyCode",
"material group": "MaterialGroup",
"material": "MaterialGroup",
}
group_by = None
for k, v in group_map.items():
if k in text:
group_by = v
break
# Top-N
top_n = None
m = re.search(r"top\s+(\d+)", text)
if m:
top_n = int(m.group(1))
# Time window
today = date.today()
year = today.year
if "last year" in text or "previous year" in text:
start = date(year-1, 1, 1)
end = date(year-1, 12, 31)
elif "this year" in text or "ytd" in text:
start = date(year, 1, 1)
end = today
elif "last quarter" in text or "previous quarter" in text:
this_q = (today.month - 1)//3 + 1
last_q_end = date(year, (this_q-1)*3, 1) - relativedelta(days=1) if this_q > 1 else date(year-1, 12, 31)
last_q = (last_q_end.month - 1)//3 + 1
last_q_start = date(last_q_end.year, (last_q-1)*3 + 1, 1)
start, end = last_q_start, last_q_end
elif "q" in text and re.search(r"q[1-4]\s*\d{4}", text):
qm = re.search(r"q([1-4])\s*(\d{4})", text)
q, y = int(qm.group(1)), int(qm.group(2))
start = date(y, (q-1)*3 + 1, 1)
end = (start + relativedelta(months=3)) - relativedelta(days=1)
elif re.search(r"\b20\d{2}\b", text):
y = int(re.search(r"\b(20\d{2})\b", text).group(1))
start, end = date(y, 1, 1), date(y, 12, 31)
else:
# Default to this year to keep it demo-friendly
start, end = date(year, 1, 1), today
return {
"metric": metric,
"grain": grain,
"group_by": group_by,
"top_n": top_n,
"start": start,
"end": end
}
# =========================
# Query/Compute Functions
# =========================
def filter_timeframe(df, start: date, end: date):
return df[(df["CalendarMonth"].dt.date >= start) & (df["CalendarMonth"].dt.date <= end)]
def compute_metric(df, metric: str):
if metric == "off_contract":
return df[df["OffContract"]]
if metric == "service_spend":
return df[df["IsService"]]
return df
def group_and_aggregate(df, grain: str, group_by: str | None):
work = df.copy()
# Derive time buckets
work["Year"] = work["CalendarMonth"].dt.year
work["Quarter"] = work["CalendarMonth"].dt.to_period("Q").astype(str)
work["Month"] = work["CalendarMonth"].dt.to_period("M").astype(str)
time_col = {"year":"Year", "quarter":"Quarter", "month":"Month"}[grain]
group_cols = [time_col] + ([group_by] if group_by else [])
agg = work.groupby(group_cols, dropna=False)["NetAmount"].sum().reset_index()
agg = agg.rename(columns={"NetAmount":"TotalAmount"})
agg = agg.sort_values("TotalAmount", ascending=False)
return agg, time_col
def topn_if_needed(df, top_n: int | None, group_by: str | None, time_col: str = None):
if top_n and group_by:
# For time series with group, take top entities over total, then filter
total_by_entity = df.groupby(group_by)["TotalAmount"].sum().sort_values(ascending=False)
keep = list(total_by_entity.head(top_n).index)
return df[df[group_by].isin(keep)]
return df
def kpi_summary(df_filtered, df_metric):
total_spend = df_metric["NetAmount"].sum()
total_pos = len(df_metric)
suppliers = df_metric["Supplier"].nunique()
off_ratio = df_metric["OffContract"].mean()
return {
"total_spend": total_spend,
"lines": total_pos,
"suppliers": suppliers,
"off_ratio": off_ratio
}
def insight_sentence(parsed, kpis, group_by):
metric_name = {
"po_value": "PO value",
"off_contract": "Off-contract spend",
"service_spend": "Service PO spend"
}[parsed["metric"]]
date_str = f'{parsed["start"].isoformat()} to {parsed["end"].isoformat()}'
base = f"{metric_name} from {date_str}"
details = f"{kpis['suppliers']} suppliers across {kpis['lines']} line items; off-contract ratio {kpis['off_ratio']:.1%}."
if group_by:
return f"{base}. Grouped by {group_by}. {details}"
return f"{base}. {details}"
# =========================
# UI: Sidebar Controls
# =========================
with st.sidebar:
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=140)
st.markdown("## 🧭 Procurement Insight Agent")
st.markdown("Elegant Streamlit demo with synthetic procurement data.")
st.markdown("---")
seed = st.slider("Random seed", 1, 9999, 42)
rows = st.select_slider("Dataset size", options=[10_000, 20_000, 40_000, 80_000, 120_000], value=40_000)
st.caption("Higher rows = richer charts, slower compute.")
st.markdown("---")
default_prompt = "Top 5 suppliers by PO value this year by month"
user_prompt = st.text_area("Ask a question", value=default_prompt, height=96, label_visibility="visible", placeholder="e.g., Off-contract spend by purchasing org last quarter")
st.markdown("---")
st.caption("Tip: Try prompts like:")
st.code("PO value by month in 2025\nTop 5 suppliers this year by month\nOff-contract spend by purchasing org last quarter\nService spend by company in Q2 2024", language="text")
# Regenerate data if settings changed
if (seed != 42) or (rows != 40_000):
df_raw = generate_synthetic_procurement(seed=seed, rows=rows)
# =========================
# Header
# =========================
st.markdown("# 🧠 Procurement Insight Agent")
st.markdown(
"Turn natural-language questions into procurement insights using a polished Streamlit UI and synthetic S/4HANA-style analytics data."
)
# =========================
# Agent Parse + Compute
# =========================
parsed = parse_prompt(user_prompt)
df_time = filter_timeframe(df_raw, parsed["start"], parsed["end"])
df_metric = compute_metric(df_time, parsed["metric"])
kpis = kpi_summary(df_time, df_metric)
summary_text = insight_sentence(parsed, kpis, parsed["group_by"])
# =========================
# KPI Row
# =========================
c1, c2, c3, c4 = st.columns(4)
with c1:
st.markdown('<div class="kpi"><h3>Total Spend</h3><div class="value">${:,.0f}</div></div>'.format(kpis["total_spend"]), unsafe_allow_html=True)
with c2:
st.markdown('<div class="kpi"><h3>Line Items</h3><div class="value">{:,}</div></div>'.format(kpis["lines"]), unsafe_allow_html=True)
with c3:
st.markdown('<div class="kpi"><h3>Suppliers</h3><div class="value">{:,}</div></div>'.format(kpis["suppliers"]), unsafe_allow_html=True)
with c4:
st.markdown('<div class="kpi"><h3>Off-Contract Ratio</h3><div class="value">{:.1%}</div></div>'.format(kpis["off_ratio"]), unsafe_allow_html=True)
st.markdown(f"#### {summary_text}")
# =========================
# Main Tabs
# =========================
tab_trend, tab_breakdown, tab_table, tab_agent = st.tabs(["Trend & Composition", "Breakdowns & Drilldowns", "Data Table", "Agent Plan"])
# --- Trend & Composition ---
with tab_trend:
agg, time_col = group_and_aggregate(df_metric, parsed["grain"], parsed["group_by"])
agg_top = topn_if_needed(agg, parsed["top_n"], parsed["group_by"], time_col)
if parsed["group_by"]:
fig = px.line(
agg_top.sort_values(time_col),
x=time_col, y="TotalAmount", color=parsed["group_by"],
markers=True, line_shape="spline",
title="Trend"
)
else:
fig = px.bar(
agg_top.sort_values(time_col),
x=time_col, y="TotalAmount", title="Trend"
)
fig.update_layout(
margin=dict(l=10, r=10, t=50, b=10),
height=420,
template="plotly_dark",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
st.plotly_chart(fig, use_container_width=True)
# Composition pie for current period
comp_group = parsed["group_by"] or "Supplier"
comp = df_metric.groupby(comp_group)["NetAmount"].sum().reset_index().sort_values("NetAmount", ascending=False).head(10)
fig2 = px.pie(comp, values="NetAmount", names=comp_group, title=f"Composition by {comp_group} (Top 10)")
fig2.update_layout(template="plotly_dark", height=420)
st.plotly_chart(fig2, use_container_width=True)
# --- Breakdowns & Drilldowns ---
with tab_breakdown:
st.markdown("##### Drilldown Controls")
col_a, col_b, col_c = st.columns(3)
with col_a:
dim1 = st.selectbox("Primary dimension", ["Supplier", "PurchasingOrganization", "CompanyCode", "MaterialGroup"], index=0)
with col_b:
dim2 = st.selectbox("Secondary dimension (optional)", ["None", "PurchasingOrganization", "CompanyCode", "MaterialGroup", "Supplier"], index=1)
dim2 = None if dim2 == "None" else dim2
with col_c:
n = st.slider("Top N", 5, 25, 10, step=5)
# Calculate totals
if dim2:
gb = df_metric.groupby([dim1, dim2])["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"})
top_entities = gb.groupby(dim1)["TotalAmount"].sum().sort_values(ascending=False).head(n).index
gb = gb[gb[dim1].isin(top_entities)]
fig3 = px.bar(gb, x="TotalAmount", y=dim1, color=dim2, orientation="h", title=f"Top {n} by {dim1} with {dim2}")
else:
gb = df_metric.groupby(dim1)["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"})
gb = gb.sort_values("TotalAmount", ascending=False).head(n)
fig3 = px.bar(gb, x="TotalAmount", y=dim1, orientation="h", title=f"Top {n} by {dim1}")
fig3.update_layout(template="plotly_dark", height=520, margin=dict(l=10, r=10, t=50, b=10))
st.plotly_chart(fig3, use_container_width=True)
st.markdown("##### Cohort Trend")
cohort_value = st.selectbox(f"Pick a {dim1} cohort for trend", options=sorted(df_metric[dim1].unique().tolist())[:250])
cohort = df_metric[df_metric[dim1] == cohort_value]
cohort_trend = cohort.groupby(cohort["CalendarMonth"].dt.to_period("M").astype(str))["NetAmount"].sum().reset_index()
cohort_trend.columns = ["Month", "TotalAmount"]
fig4 = px.area(cohort_trend, x="Month", y="TotalAmount", title=f"Trend for {dim1}: {cohort_value}")
fig4.update_layout(template="plotly_dark", height=380)
st.plotly_chart(fig4, use_container_width=True)
# --- Data Table ---
with tab_table:
st.markdown("##### Result Table")
# Prepare final display table aligned with parsed settings
final = agg_top.copy()
# Beautify column order
cols = [c for c in ["Year", "Quarter", "Month", parsed.get("group_by"), "TotalAmount"] if c in final.columns]
final = final[cols]
st.dataframe(final, use_container_width=True, hide_index=True)
st.download_button("Download CSV", data=final.to_csv(index=False).encode("utf-8"), file_name="procurement_results.csv", mime="text/csv")
# --- Agent Plan ---
with tab_agent:
st.markdown("##### Agent Interpretation")
st.json({
"parsed_prompt": parsed,
"notes": "Replace synthetic data functions with an OData client to C_* CDS queries in S/4HANA. The rest of the pipeline remains unchanged."
})
st.markdown('<div class="footer-note">UI inspired by enterprise analytics dashboards. Built with Streamlit, Plotly, and synthetic data that mimics S/4HANA Embedded Analytics structures.</div>', unsafe_allow_html=True)