Spaces:
Build error
Build error
| import os | |
| import re | |
| import math | |
| from datetime import datetime, date | |
| from dateutil.relativedelta import relativedelta | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.express as px | |
| import streamlit as st | |
| # ========================= | |
| # Theming and Page Config | |
| # ========================= | |
| st.set_page_config( | |
| page_title="Procurement Insight Agent", | |
| page_icon="🧭", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for a premium look | |
| st.markdown(""" | |
| <style> | |
| /* Global font and spacing */ | |
| html, body, [class*="css"] { | |
| font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; | |
| } | |
| section[data-testid="stSidebar"] { | |
| background: linear-gradient(180deg, #0f172a 0%, #111827 100%); | |
| border-right: 1px solid #1f2937; | |
| } | |
| section[data-testid="stSidebar"] * { | |
| color: #e5e7eb !important; | |
| } | |
| .block-container { | |
| padding-top: 1.5rem; | |
| } | |
| .card { | |
| background: #0b1220; | |
| border: 1px solid #1f2937; | |
| border-radius: 14px; | |
| padding: 16px; | |
| } | |
| .kpi { | |
| background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%); | |
| border: 1px solid #1f2937; | |
| border-radius: 14px; | |
| padding: 18px; | |
| } | |
| .kpi h3 { | |
| margin: 0; | |
| color: #93c5fd; | |
| font-weight: 600; | |
| font-size: 0.95rem; | |
| } | |
| .kpi .value { | |
| margin-top: 6px; | |
| font-size: 1.6rem; | |
| font-weight: 700; | |
| color: #e5e7eb; | |
| } | |
| .caption-note { | |
| color: #94a3b8; | |
| font-size: 0.85rem; | |
| } | |
| .prompt-box { | |
| background: #0b1220; | |
| border: 1px solid #1f2937; | |
| border-radius: 12px; | |
| padding: 10px 12px; | |
| } | |
| .footer-note { | |
| color: #94a3b8; | |
| font-size: 0.8rem; | |
| text-align: center; | |
| margin-top: 24px; | |
| } | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 12px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| background-color: #0b1220; | |
| border-radius: 10px 10px 0 0; | |
| padding: 10px 16px; | |
| color: #e5e7eb; | |
| border: 1px solid #1f2937; | |
| } | |
| .stTabs [aria-selected="true"] { | |
| background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%) !important; | |
| color: #93c5fd !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ========================= | |
| # Synthetic Data Generator | |
| # ========================= | |
| def generate_synthetic_procurement(seed=42, start_year=2023, end_year=2025, rows=40_000): | |
| """ | |
| Generates synthetic procurement line items reflecting common S/4HANA Embedded Analytics procurement fields. | |
| """ | |
| rng = np.random.default_rng(seed) | |
| months = pd.date_range(f"{start_year}-01-01", f"{end_year}-12-31", freq="MS") | |
| purchasing_orgs = ["1000", "2000", "3000", "4000"] | |
| company_codes = ["C100", "C200", "C300"] | |
| suppliers = [f"SUPP-{i:03d}" for i in range(1, 61)] | |
| material_groups = ["RAW", "PACK", "SERV", "CAPEX", "MRO"] | |
| currencies = ["USD", "EUR", "GBP", "INR"] | |
| # Seasonality and org effects | |
| def base_amount(month): | |
| # Seasonality: higher in Q2/Q4 | |
| q = (month.month-1)//3 + 1 | |
| base = 1.0 | |
| if q in (2, 4): | |
| base *= 1.2 | |
| if month.month in (11, 12): | |
| base *= 1.1 | |
| return base | |
| data = { | |
| "CalendarMonth": rng.choice(months, size=rows), | |
| "PurchasingOrganization": rng.choice(purchasing_orgs, size=rows, p=[0.35, 0.25, 0.25, 0.15]), | |
| "CompanyCode": rng.choice(company_codes, size=rows, p=[0.45, 0.35, 0.20]), | |
| "Supplier": rng.choice(suppliers, size=rows), | |
| "MaterialGroup": rng.choice(material_groups, size=rows, p=[0.3, 0.2, 0.25, 0.15, 0.10]), | |
| "Currency": rng.choice(currencies, size=rows, p=[0.5, 0.25, 0.1, 0.15]), | |
| } | |
| df = pd.DataFrame(data) | |
| df["CalendarYear"] = df["CalendarMonth"].dt.year | |
| df["Month"] = df["CalendarMonth"].dt.to_period("M").astype(str) | |
| # Amount generation with org/supplier effects | |
| org_factor = df["PurchasingOrganization"].map({"1000":1.2, "2000":0.9, "3000":1.0, "4000":0.8}) | |
| supp_strength = df["Supplier"].str[-3:].astype(int) | |
| supp_factor = 0.8 + (supp_strength / 1000.0) # small lift for higher IDs | |
| seasonal = df["CalendarMonth"].apply(base_amount).astype(float) | |
| base_val = rng.lognormal(mean=7.5, sigma=0.6, size=rows) # realistic skew | |
| df["NetAmount"] = (base_val * org_factor * supp_factor * seasonal).round(2) | |
| # Random off-contract flag | |
| df["OffContract"] = rng.choice([True, False], size=rows, p=[0.18, 0.82]) | |
| # For “service POs” | |
| df["IsService"] = df["MaterialGroup"].eq("SERV") | |
| return df | |
| # Load synthetic data | |
| df_raw = generate_synthetic_procurement() | |
| # ================================== | |
| # Helper: Natural Language to Query | |
| # ================================== | |
| def parse_prompt(prompt: str): | |
| """ | |
| Very lightweight rules to detect: | |
| - metric (po_value, off_contract, service_spend) | |
| - time grain (month, quarter, year) | |
| - time window (YTD, QTD, last quarter, range) | |
| - grouping (Supplier, PurchasingOrganization, CompanyCode, MaterialGroup) | |
| - top_n | |
| """ | |
| text = (prompt or "").lower() | |
| # Metric | |
| if "off-contract" in text or "off contract" in text or "leakage" in text: | |
| metric = "off_contract" | |
| elif "service" in text: | |
| metric = "service_spend" | |
| else: | |
| metric = "po_value" | |
| # Grain | |
| if "by month" in text or "monthly" in text: | |
| grain = "month" | |
| elif "by quarter" in text or "quarterly" in text or "qtr" in text: | |
| grain = "quarter" | |
| else: | |
| grain = "month" if "trend" in text else "year" | |
| # Grouping | |
| group_map = { | |
| "supplier": "Supplier", | |
| "purchasing org": "PurchasingOrganization", | |
| "purchasing organization": "PurchasingOrganization", | |
| "company": "CompanyCode", | |
| "companycode": "CompanyCode", | |
| "material group": "MaterialGroup", | |
| "material": "MaterialGroup", | |
| } | |
| group_by = None | |
| for k, v in group_map.items(): | |
| if k in text: | |
| group_by = v | |
| break | |
| # Top-N | |
| top_n = None | |
| m = re.search(r"top\s+(\d+)", text) | |
| if m: | |
| top_n = int(m.group(1)) | |
| # Time window | |
| today = date.today() | |
| year = today.year | |
| if "last year" in text or "previous year" in text: | |
| start = date(year-1, 1, 1) | |
| end = date(year-1, 12, 31) | |
| elif "this year" in text or "ytd" in text: | |
| start = date(year, 1, 1) | |
| end = today | |
| elif "last quarter" in text or "previous quarter" in text: | |
| this_q = (today.month - 1)//3 + 1 | |
| last_q_end = date(year, (this_q-1)*3, 1) - relativedelta(days=1) if this_q > 1 else date(year-1, 12, 31) | |
| last_q = (last_q_end.month - 1)//3 + 1 | |
| last_q_start = date(last_q_end.year, (last_q-1)*3 + 1, 1) | |
| start, end = last_q_start, last_q_end | |
| elif "q" in text and re.search(r"q[1-4]\s*\d{4}", text): | |
| qm = re.search(r"q([1-4])\s*(\d{4})", text) | |
| q, y = int(qm.group(1)), int(qm.group(2)) | |
| start = date(y, (q-1)*3 + 1, 1) | |
| end = (start + relativedelta(months=3)) - relativedelta(days=1) | |
| elif re.search(r"\b20\d{2}\b", text): | |
| y = int(re.search(r"\b(20\d{2})\b", text).group(1)) | |
| start, end = date(y, 1, 1), date(y, 12, 31) | |
| else: | |
| # Default to this year to keep it demo-friendly | |
| start, end = date(year, 1, 1), today | |
| return { | |
| "metric": metric, | |
| "grain": grain, | |
| "group_by": group_by, | |
| "top_n": top_n, | |
| "start": start, | |
| "end": end | |
| } | |
| # ========================= | |
| # Query/Compute Functions | |
| # ========================= | |
| def filter_timeframe(df, start: date, end: date): | |
| return df[(df["CalendarMonth"].dt.date >= start) & (df["CalendarMonth"].dt.date <= end)] | |
| def compute_metric(df, metric: str): | |
| if metric == "off_contract": | |
| return df[df["OffContract"]] | |
| if metric == "service_spend": | |
| return df[df["IsService"]] | |
| return df | |
| def group_and_aggregate(df, grain: str, group_by: str | None): | |
| work = df.copy() | |
| # Derive time buckets | |
| work["Year"] = work["CalendarMonth"].dt.year | |
| work["Quarter"] = work["CalendarMonth"].dt.to_period("Q").astype(str) | |
| work["Month"] = work["CalendarMonth"].dt.to_period("M").astype(str) | |
| time_col = {"year":"Year", "quarter":"Quarter", "month":"Month"}[grain] | |
| group_cols = [time_col] + ([group_by] if group_by else []) | |
| agg = work.groupby(group_cols, dropna=False)["NetAmount"].sum().reset_index() | |
| agg = agg.rename(columns={"NetAmount":"TotalAmount"}) | |
| agg = agg.sort_values("TotalAmount", ascending=False) | |
| return agg, time_col | |
| def topn_if_needed(df, top_n: int | None, group_by: str | None, time_col: str = None): | |
| if top_n and group_by: | |
| # For time series with group, take top entities over total, then filter | |
| total_by_entity = df.groupby(group_by)["TotalAmount"].sum().sort_values(ascending=False) | |
| keep = list(total_by_entity.head(top_n).index) | |
| return df[df[group_by].isin(keep)] | |
| return df | |
| def kpi_summary(df_filtered, df_metric): | |
| total_spend = df_metric["NetAmount"].sum() | |
| total_pos = len(df_metric) | |
| suppliers = df_metric["Supplier"].nunique() | |
| off_ratio = df_metric["OffContract"].mean() | |
| return { | |
| "total_spend": total_spend, | |
| "lines": total_pos, | |
| "suppliers": suppliers, | |
| "off_ratio": off_ratio | |
| } | |
| def insight_sentence(parsed, kpis, group_by): | |
| metric_name = { | |
| "po_value": "PO value", | |
| "off_contract": "Off-contract spend", | |
| "service_spend": "Service PO spend" | |
| }[parsed["metric"]] | |
| date_str = f'{parsed["start"].isoformat()} to {parsed["end"].isoformat()}' | |
| base = f"{metric_name} from {date_str}" | |
| details = f"{kpis['suppliers']} suppliers across {kpis['lines']} line items; off-contract ratio {kpis['off_ratio']:.1%}." | |
| if group_by: | |
| return f"{base}. Grouped by {group_by}. {details}" | |
| return f"{base}. {details}" | |
| # ========================= | |
| # UI: Sidebar Controls | |
| # ========================= | |
| with st.sidebar: | |
| st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=140) | |
| st.markdown("## 🧭 Procurement Insight Agent") | |
| st.markdown("Elegant Streamlit demo with synthetic procurement data.") | |
| st.markdown("---") | |
| seed = st.slider("Random seed", 1, 9999, 42) | |
| rows = st.select_slider("Dataset size", options=[10_000, 20_000, 40_000, 80_000, 120_000], value=40_000) | |
| st.caption("Higher rows = richer charts, slower compute.") | |
| st.markdown("---") | |
| default_prompt = "Top 5 suppliers by PO value this year by month" | |
| user_prompt = st.text_area("Ask a question", value=default_prompt, height=96, label_visibility="visible", placeholder="e.g., Off-contract spend by purchasing org last quarter") | |
| st.markdown("---") | |
| st.caption("Tip: Try prompts like:") | |
| st.code("PO value by month in 2025\nTop 5 suppliers this year by month\nOff-contract spend by purchasing org last quarter\nService spend by company in Q2 2024", language="text") | |
| # Regenerate data if settings changed | |
| if (seed != 42) or (rows != 40_000): | |
| df_raw = generate_synthetic_procurement(seed=seed, rows=rows) | |
| # ========================= | |
| # Header | |
| # ========================= | |
| st.markdown("# 🧠 Procurement Insight Agent") | |
| st.markdown( | |
| "Turn natural-language questions into procurement insights using a polished Streamlit UI and synthetic S/4HANA-style analytics data." | |
| ) | |
| # ========================= | |
| # Agent Parse + Compute | |
| # ========================= | |
| parsed = parse_prompt(user_prompt) | |
| df_time = filter_timeframe(df_raw, parsed["start"], parsed["end"]) | |
| df_metric = compute_metric(df_time, parsed["metric"]) | |
| kpis = kpi_summary(df_time, df_metric) | |
| summary_text = insight_sentence(parsed, kpis, parsed["group_by"]) | |
| # ========================= | |
| # KPI Row | |
| # ========================= | |
| c1, c2, c3, c4 = st.columns(4) | |
| with c1: | |
| st.markdown('<div class="kpi"><h3>Total Spend</h3><div class="value">${:,.0f}</div></div>'.format(kpis["total_spend"]), unsafe_allow_html=True) | |
| with c2: | |
| st.markdown('<div class="kpi"><h3>Line Items</h3><div class="value">{:,}</div></div>'.format(kpis["lines"]), unsafe_allow_html=True) | |
| with c3: | |
| st.markdown('<div class="kpi"><h3>Suppliers</h3><div class="value">{:,}</div></div>'.format(kpis["suppliers"]), unsafe_allow_html=True) | |
| with c4: | |
| st.markdown('<div class="kpi"><h3>Off-Contract Ratio</h3><div class="value">{:.1%}</div></div>'.format(kpis["off_ratio"]), unsafe_allow_html=True) | |
| st.markdown(f"#### {summary_text}") | |
| # ========================= | |
| # Main Tabs | |
| # ========================= | |
| tab_trend, tab_breakdown, tab_table, tab_agent = st.tabs(["Trend & Composition", "Breakdowns & Drilldowns", "Data Table", "Agent Plan"]) | |
| # --- Trend & Composition --- | |
| with tab_trend: | |
| agg, time_col = group_and_aggregate(df_metric, parsed["grain"], parsed["group_by"]) | |
| agg_top = topn_if_needed(agg, parsed["top_n"], parsed["group_by"], time_col) | |
| if parsed["group_by"]: | |
| fig = px.line( | |
| agg_top.sort_values(time_col), | |
| x=time_col, y="TotalAmount", color=parsed["group_by"], | |
| markers=True, line_shape="spline", | |
| title="Trend" | |
| ) | |
| else: | |
| fig = px.bar( | |
| agg_top.sort_values(time_col), | |
| x=time_col, y="TotalAmount", title="Trend" | |
| ) | |
| fig.update_layout( | |
| margin=dict(l=10, r=10, t=50, b=10), | |
| height=420, | |
| template="plotly_dark", | |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1) | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Composition pie for current period | |
| comp_group = parsed["group_by"] or "Supplier" | |
| comp = df_metric.groupby(comp_group)["NetAmount"].sum().reset_index().sort_values("NetAmount", ascending=False).head(10) | |
| fig2 = px.pie(comp, values="NetAmount", names=comp_group, title=f"Composition by {comp_group} (Top 10)") | |
| fig2.update_layout(template="plotly_dark", height=420) | |
| st.plotly_chart(fig2, use_container_width=True) | |
| # --- Breakdowns & Drilldowns --- | |
| with tab_breakdown: | |
| st.markdown("##### Drilldown Controls") | |
| col_a, col_b, col_c = st.columns(3) | |
| with col_a: | |
| dim1 = st.selectbox("Primary dimension", ["Supplier", "PurchasingOrganization", "CompanyCode", "MaterialGroup"], index=0) | |
| with col_b: | |
| dim2 = st.selectbox("Secondary dimension (optional)", ["None", "PurchasingOrganization", "CompanyCode", "MaterialGroup", "Supplier"], index=1) | |
| dim2 = None if dim2 == "None" else dim2 | |
| with col_c: | |
| n = st.slider("Top N", 5, 25, 10, step=5) | |
| # Calculate totals | |
| if dim2: | |
| gb = df_metric.groupby([dim1, dim2])["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"}) | |
| top_entities = gb.groupby(dim1)["TotalAmount"].sum().sort_values(ascending=False).head(n).index | |
| gb = gb[gb[dim1].isin(top_entities)] | |
| fig3 = px.bar(gb, x="TotalAmount", y=dim1, color=dim2, orientation="h", title=f"Top {n} by {dim1} with {dim2}") | |
| else: | |
| gb = df_metric.groupby(dim1)["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"}) | |
| gb = gb.sort_values("TotalAmount", ascending=False).head(n) | |
| fig3 = px.bar(gb, x="TotalAmount", y=dim1, orientation="h", title=f"Top {n} by {dim1}") | |
| fig3.update_layout(template="plotly_dark", height=520, margin=dict(l=10, r=10, t=50, b=10)) | |
| st.plotly_chart(fig3, use_container_width=True) | |
| st.markdown("##### Cohort Trend") | |
| cohort_value = st.selectbox(f"Pick a {dim1} cohort for trend", options=sorted(df_metric[dim1].unique().tolist())[:250]) | |
| cohort = df_metric[df_metric[dim1] == cohort_value] | |
| cohort_trend = cohort.groupby(cohort["CalendarMonth"].dt.to_period("M").astype(str))["NetAmount"].sum().reset_index() | |
| cohort_trend.columns = ["Month", "TotalAmount"] | |
| fig4 = px.area(cohort_trend, x="Month", y="TotalAmount", title=f"Trend for {dim1}: {cohort_value}") | |
| fig4.update_layout(template="plotly_dark", height=380) | |
| st.plotly_chart(fig4, use_container_width=True) | |
| # --- Data Table --- | |
| with tab_table: | |
| st.markdown("##### Result Table") | |
| # Prepare final display table aligned with parsed settings | |
| final = agg_top.copy() | |
| # Beautify column order | |
| cols = [c for c in ["Year", "Quarter", "Month", parsed.get("group_by"), "TotalAmount"] if c in final.columns] | |
| final = final[cols] | |
| st.dataframe(final, use_container_width=True, hide_index=True) | |
| st.download_button("Download CSV", data=final.to_csv(index=False).encode("utf-8"), file_name="procurement_results.csv", mime="text/csv") | |
| # --- Agent Plan --- | |
| with tab_agent: | |
| st.markdown("##### Agent Interpretation") | |
| st.json({ | |
| "parsed_prompt": parsed, | |
| "notes": "Replace synthetic data functions with an OData client to C_* CDS queries in S/4HANA. The rest of the pipeline remains unchanged." | |
| }) | |
| st.markdown('<div class="footer-note">UI inspired by enterprise analytics dashboards. Built with Streamlit, Plotly, and synthetic data that mimics S/4HANA Embedded Analytics structures.</div>', unsafe_allow_html=True) | |