import os from pathlib import Path import random import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go import streamlit as st st.set_page_config(page_title="Freshie", page_icon="🐱", layout="wide", initial_sidebar_state="expanded") def inject_css(): st.markdown( """ """, unsafe_allow_html=True, ) def find_data_path() -> Path | None: candidates = [ Path("/app/perishable_goods_management.csv"), Path("perishable_goods_management.csv"), Path("/mnt/data/perishable_goods_management.csv"), ] for p in candidates: if p.exists(): return p return None @st.cache_data(show_spinner=False) def load_data() -> pd.DataFrame: path = find_data_path() if path is None: raise FileNotFoundError("perishable_goods_management.csv not found in /app or current folder.") df = pd.read_csv(path) # Standardize likely date field date_col = None for c in ["transaction_date", "date", "Date"]: if c in df.columns: date_col = c break if date_col is None: raise ValueError("No transaction date column found.") if date_col != "transaction_date": df = df.rename(columns={date_col: "transaction_date"}) df["transaction_date"] = pd.to_datetime(df["transaction_date"], errors="coerce") df = df.dropna(subset=["transaction_date"]).copy() # Expected columns with fallbacks if "discount_pct" not in df.columns: if "discount_percentage" in df.columns: df["discount_pct"] = df["discount_percentage"] else: df["discount_pct"] = 0.0 for col in ["units_sold", "initial_quantity", "daily_demand", "profit", "days_until_expiry"]: if col not in df.columns: raise ValueError(f"Missing required column: {col}") if "category" not in df.columns: df["category"] = "Unknown" if "region" not in df.columns: df["region"] = "Unknown" if "store_id" not in df.columns: df["store_id"] = "STORE_001" if "product_name" not in df.columns: df["product_name"] = df["category"].astype(str) if "selling_price" not in df.columns and "price" in df.columns: df["selling_price"] = df["price"] if "base_price" not in df.columns and "selling_price" in df.columns: df["base_price"] = df["selling_price"] / (1 - df["discount_pct"].replace(1, 0.999).clip(0, 0.99)) if "base_price" not in df.columns: df["base_price"] = 1.0 if "selling_price" not in df.columns: df["selling_price"] = 1.0 df["discount_pct"] = df["discount_pct"].fillna(0) if df["discount_pct"].max() > 1: df["discount_pct"] = df["discount_pct"] / 100 df["discount_pct"] = df["discount_pct"].clip(0, 1) if "waste_pct" not in df.columns: denom = df["initial_quantity"].replace(0, np.nan) if "units_wasted" in df.columns: df["waste_pct"] = (df["units_wasted"] / denom).fillna(0) else: df["waste_pct"] = 0.0 if df["waste_pct"].max() > 1: df["waste_pct"] = df["waste_pct"] / 100 df["waste_pct"] = df["waste_pct"].clip(0, 1) if "units_wasted" not in df.columns: df["units_wasted"] = (df["waste_pct"] * df["initial_quantity"]).round() df["leftover_units"] = (df["initial_quantity"] - df["units_sold"]).clip(lower=0) df["stockout_flag"] = (df["daily_demand"] > df["initial_quantity"]).astype(int) df["lost_sales_units"] = (df["daily_demand"] - df["units_sold"]).clip(lower=0) df["sell_through_pct"] = (df["units_sold"] / df["initial_quantity"].replace(0, np.nan)).fillna(0).clip(0, 1) df["month"] = df["transaction_date"].dt.strftime("%Y-%m") df["is_weekend"] = (df["transaction_date"].dt.dayofweek >= 5).astype(int) df["expiry_bucket"] = pd.cut( df["days_until_expiry"], bins=[-1, 1, 3, 7, 30, 10000], labels=["<=1d", "2-3d", "4-7d", "8-30d", ">30d"], ).astype(str) return df PCA_SHAP_LIBRARY = { "Bakery": { "avg_profit": 329.82, "avg_margin": 0.0573, "avg_waste": 0.1535, "avg_units_sold": 216.88, "avg_daily_demand": 114.32, "region_profit": [("Southwest", 314.96), ("Midwest", 319.24), ("Southeast", 324.43), ("Northeast", 332.06), ("West", 350.22)], "business": "Bakery is a high-turnover category with positive profit but thin margins. It behaves like a traffic driver: demand is strong, but a large part of profit quality depends on pricing discipline and waste control.", "pc1": 0.3441, "pc2": 0.2786, "pc1_theme": "Profitability efficiency axis", "pc1_loadings": [("avg_profit_margin", "+0.529"), ("avg_days_until_expiry", "+0.429"), ("avg_sell_through", "+0.412"), ("avg_waste_pct", "-0.412"), ("avg_discount_pct", "-0.409")], "pc2_theme": "Demand and replenishment intensity axis", "pc2_loadings": [("avg_daily_demand", "+0.543"), ("avg_waste_pct", "-0.406"), ("avg_sell_through", "+0.406"), ("avg_initial_qty", "+0.379"), ("avg_discount_pct", "+0.351")], "clusters": [ ("Cluster A · Core winners", "Demand 85.13 · Sell-through 98.09% · Waste 1.91% · Margin 30.99%", "Protect availability, keep discounting light, and avoid stockouts."), ("Cluster B · Traffic drivers but margin issue", "Demand 281.09 · Sell-through 94.90% · Waste 5.10% · Margin -9.91%", "Review price, cost, and promotion depth before cutting stock."), ("Cluster C · Balanced mid-tier", "Demand 89.10 · Sell-through 69.40% · Waste 30.60% · Margin 3.87%", "Use tighter replenishment and light tactical markdowns."), ("Cluster D · High-waste losers", "Demand 71.55 · Sell-through 49.53% · Waste 50.47% · Margin -95.47%", "Reduce initial stock, markdown earlier, bundle, and trim weak-SKU stores."), ], "r2": 0.809, "mae": 227.69, "drivers": ["discount_pct", "base_price", "initial_quantity", "cost_price", "markdown_applied", "daily_demand", "days_until_expiry", "shelf_life_days", "is_weekend", "spoilage_risk"], "conclusion": "Bakery profit is mainly shaped by discounting, pricing, stock depth, and demand strength rather than expiry alone.", "actions": [ "Review price and cost on high-demand but negative-margin SKUs.", "Start markdown earlier for high-waste items instead of waiting until the last moment.", "Use weekend-sensitive promotions in a more targeted rhythm.", ], }, "Meat": { "avg_profit": 777.27, "avg_margin": 0.1411, "avg_waste": 0.1820, "avg_units_sold": 213.96, "avg_daily_demand": 73.30, "region_profit": [("Northeast", 757.70), ("Southwest", 765.17), ("Southeast", 779.24), ("Midwest", 793.72), ("West", 799.80)], "business": "Meat is the healthiest of the three in profit contribution. Waste is meaningful, but margin quality is stronger than Bakery, so it is worth managing through precision replenishment and loss control.", "pc1": 0.2960, "pc2": 0.2060, "pc1_theme": "Operational efficiency axis", "pc1_loadings": [("avg_profit_margin", "+0.498"), ("avg_sell_through", "+0.488"), ("avg_waste_pct", "-0.488"), ("avg_days_until_expiry", "+0.356"), ("avg_discount_pct", "-0.339")], "pc2_theme": "Sales scale and stocking intensity axis", "pc2_loadings": [("avg_daily_demand", "+0.627"), ("avg_initial_qty", "+0.487"), ("avg_days_until_expiry", "-0.356"), ("avg_discount_pct", "+0.347")], "clusters": [ ("Cluster A · Premium winners", "Demand 58.81 · Sell-through 97.74% · Waste 2.26% · Margin 38.42%", "Protect availability for top sellers and avoid over-restricting stock."), ("Cluster B · Strong traffic winners", "Demand 184.22 · Sell-through 89.29% · Waste 10.71% · Margin 10.44%", "Keep these as stable replenishment anchors."), ("Cluster C · Manageable mid-tier", "Demand 58.11 · Sell-through 68.51% · Waste 31.49% · Margin 8.76%", "Fine-tune inventory and use local promotions only where needed."), ("Cluster D · High-waste loss cluster", "Demand 46.16 · Sell-through 42.50% · Waste 57.50% · Margin -86.58%", "Lower stock, tighten cold-chain rhythm, and optimize assortment by store."), ], "r2": 0.720, "mae": 464.22, "drivers": ["discount_pct", "initial_quantity", "base_price", "cost_price", "days_until_expiry", "daily_demand", "markdown_applied", "is_weekend", "spoilage_risk", "shelf_life_days"], "conclusion": "Meat profit is driven by discounting, stocking quantity, pricing, cost, and expiry pressure; compared with Bakery, it is more sensitive to initial quantity and days until expiry.", "actions": [ "Cut initial quantity first for high-waste clusters.", "Use earlier and lighter markdowns on SKUs with real demand but rising expiry pressure.", "Protect availability on the high-profit core group instead of applying blanket stock cuts.", ], }, "Ready_to_Eat": { "avg_profit": 182.98, "avg_margin": -0.1151, "avg_waste": 0.1456, "avg_units_sold": 220.33, "avg_daily_demand": 186.41, "region_profit": [("Northeast", 162.08), ("Southwest", 169.03), ("West", 183.12), ("Southeast", 189.53), ("Midwest", 224.99)], "business": "Ready-to-Eat shows the strongest contradiction: very high demand and sales volume, but negative average margin. The category sells, but the current commercial logic does not monetize that demand well.", "pc1": 0.3428, "pc2": 0.2842, "pc1_theme": "Promotion and expiry pressure axis", "pc1_loadings": [("avg_discount_pct", "+0.536"), ("avg_days_until_expiry", "-0.520"), ("avg_profit_margin", "-0.486"), ("avg_daily_demand", "+0.299"), ("avg_spoilage_risk", "+0.291")], "pc2_theme": "Waste versus sell-through axis", "pc2_loadings": [("avg_waste_pct", "+0.616"), ("avg_sell_through", "-0.616"), ("avg_daily_demand", "-0.344")], "clusters": [ ("Cluster A · Profitable fast movers", "Demand 137.37 · Sell-through 98.45% · Waste 1.55% · Margin 10.76%", "Protect supply and avoid stockouts."), ("Cluster B · Traffic drivers but deeply margin-negative", "Demand 564.13 · Sell-through 95.40% · Waste 4.60% · Margin -33.18%", "Fix price and promotion depth first."), ("Cluster C · High waste and deepest loss", "Demand 149.11 · Sell-through 58.54% · Waste 41.46% · Margin -111.87%", "Cut stock immediately, markdown earlier, bundle, and prune poor store fits."), ("Cluster D · Neutral mid-tier", "Demand 123.52 · Sell-through 69.63% · Waste 30.37% · Margin 0.45%", "Use light promotion and micro-replenishment changes to move toward Cluster A."), ], "r2": 0.823, "mae": 278.78, "drivers": ["discount_pct", "base_price", "cost_price", "initial_quantity", "markdown_applied", "days_until_expiry", "daily_demand", "spoilage_risk", "shelf_life_days", "is_weekend"], "conclusion": "Discount intensity is the dominant profit driver in Ready-to-Eat, followed by pricing, cost, and stock depth. The main managerial lever is promotion redesign before inventory expansion.", "actions": [ "Review discount_pct first; move from blanket discounts to targeted markdowns.", "Raise or reset pricing on high-demand hero items before adding more stock.", "Shorten replenishment cycles and reduce single-drop quantity on the high-waste group.", ], }, } def _sync_region_from_store(df: pd.DataFrame): stores = st.session_state.get("store_filter", []) if stores: inferred = sorted(df.loc[df["store_id"].isin(stores), "region"].dropna().unique().tolist()) st.session_state["region_filter"] = inferred def _sync_store_from_region(df: pd.DataFrame): regions = st.session_state.get("region_filter", []) current_stores = st.session_state.get("store_filter", []) allowed = sorted(df.loc[df["region"].isin(regions), "store_id"].dropna().unique().tolist()) if regions else sorted(df["store_id"].dropna().unique().tolist()) st.session_state["store_filter"] = [s for s in current_stores if s in allowed] def build_supply_balancing_table(df: pd.DataFrame) -> pd.DataFrame: work = df.copy() grouped = work.groupby(["region", "store_id", "category"], dropna=False).agg( remaining_inventory=("leftover_units", "mean"), demand=("daily_demand", "mean"), unmet_demand=("lost_sales_units", "mean"), avg_days_until_expiry=("days_until_expiry", "mean"), waste_pct=("waste_pct", "mean"), avg_profit=("profit", "mean"), ).reset_index() grouped["surplus_qty"] = (grouped["remaining_inventory"] - grouped["demand"]).clip(lower=0) return grouped def filter_scope(df: pd.DataFrame, selected_region="All", selected_store="All", selected_category="All") -> pd.DataFrame: scoped = df.copy() if selected_store != "All": scoped = scoped[scoped["store_id"] == selected_store] elif selected_region != "All": scoped = scoped[scoped["region"] == selected_region] if selected_category != "All": scoped = scoped[scoped["category"] == selected_category] return scoped def apply_filters(df: pd.DataFrame): st.sidebar.header("Filters") if "region_filter" not in st.session_state: st.session_state["region_filter"] = [] if "store_filter" not in st.session_state: st.session_state["store_filter"] = [] regions = sorted(df["region"].dropna().unique().tolist()) selected_regions = st.sidebar.multiselect( "Region", regions, key="region_filter", on_change=_sync_store_from_region, args=(df,), ) available_stores = sorted( df.loc[df["region"].isin(selected_regions), "store_id"].dropna().unique().tolist() ) if selected_regions else sorted(df["store_id"].dropna().unique().tolist()) # trim stale selections before widget render st.session_state["store_filter"] = [s for s in st.session_state.get("store_filter", []) if s in available_stores] selected_stores = st.sidebar.multiselect( "Store", available_stores, key="store_filter", on_change=_sync_region_from_store, args=(df,), ) if selected_stores: inferred_regions = sorted(df.loc[df["store_id"].isin(selected_stores), "region"].dropna().unique().tolist()) selected_regions = inferred_regions st.sidebar.caption("Auto region: " + ", ".join(inferred_regions)) category_options = ["All"] + sorted(df["category"].dropna().unique().tolist()) chosen_category = st.sidebar.selectbox("Category", category_options) day_type = st.sidebar.selectbox("Day type", ["All", "Weekday", "Weekend"]) use_expiry = st.sidebar.checkbox("Limit to inventory below 60 days until expiry", value=False) expiry_range = (0, 60) if use_expiry: expiry_range = st.sidebar.slider("Days until expiry", 0, 60, (0, 60)) exec_scope = df.copy() if selected_regions: exec_scope = exec_scope[exec_scope["region"].isin(selected_regions)] if selected_stores: exec_scope = exec_scope[exec_scope["store_id"].isin(selected_stores)] if day_type == "Weekday": exec_scope = exec_scope[exec_scope["is_weekend"] == 0] elif day_type == "Weekend": exec_scope = exec_scope[exec_scope["is_weekend"] == 1] if use_expiry: exec_scope = exec_scope[(exec_scope["days_until_expiry"] >= expiry_range[0]) & (exec_scope["days_until_expiry"] <= expiry_range[1])] filtered = exec_scope.copy() if chosen_category != "All": filtered = filtered[filtered["category"] == chosen_category] scope_info = { "stores": selected_stores, "regions": selected_regions, "category": chosen_category, "day_type": day_type, "use_expiry": use_expiry, "expiry_range": expiry_range, } return filtered, exec_scope, scope_info def build_category_recommendations(row: pd.Series, cat_summary: pd.DataFrame) -> list[str]: waste_mean = float(cat_summary["waste_pct"].mean()) if len(cat_summary) else 0 waste_p75 = float(cat_summary["waste_pct"].quantile(0.75)) if len(cat_summary) else waste_mean demand_mean = float(cat_summary["avg_demand"].mean()) if len(cat_summary) else 0 demand_p75 = float(cat_summary["avg_demand"].quantile(0.75)) if len(cat_summary) else demand_mean profit_mean = float(cat_summary["avg_profit"].mean()) if len(cat_summary) else 0 stockout_mean = float(cat_summary["stockout_rate"].mean()) if len(cat_summary) else 0 stockout_p75 = float(cat_summary["stockout_rate"].quantile(0.75)) if len(cat_summary) else stockout_mean sell_mean = float(cat_summary["sell_through"].mean()) if len(cat_summary) else 0 advice: list[str] = [] if row["waste_pct"] >= max(0.30, waste_p75): advice.append("start markdown earlier") if row["avg_demand"] >= demand_p75 and row["stockout_rate"] >= max(0.10, stockout_mean, stockout_p75 * 0.8): advice.append("increase replenishment") if row["avg_profit"] < 0 and row["avg_demand"] < demand_mean: advice.append("reduce replenishment") if row["waste_pct"] > waste_mean or row["avg_profit"] < profit_mean: advice.append("review mix and margin") if row["sell_through"] < max(0.45, sell_mean * 0.8): advice.append("prune weak SKUs / stores") if not advice: advice.append("maintain current playbook") # preserve order and uniqueness return list(dict.fromkeys(advice)) def _safe_profit_margin(df: pd.DataFrame) -> pd.Series: revenue = (df["selling_price"] * df["units_sold"]).replace(0, np.nan) return (df["profit"] / revenue).replace([np.inf, -np.inf], np.nan).fillna(0) def _assign_cluster_names(cluster_summary: pd.DataFrame) -> pd.DataFrame: ranked = cluster_summary.copy() ranked["score"] = ( ranked["margin"] * 0.45 + ranked["sell_through"] * 0.35 - ranked["waste"] * 0.20 ) ranked = ranked.sort_values(["score", "avg_profit"], ascending=[False, False]).reset_index(drop=True) names: list[str] = [] used: dict[str, int] = {} for i, row in ranked.iterrows(): if i == 0 and row["margin"] >= 0: base = "Core winners" elif row["margin"] < 0 and row["sell_through"] >= ranked["sell_through"].median(): base = "Traffic but unprofitable" elif row["waste"] >= ranked["waste"].median(): base = "High-waste cluster" else: base = "Mid-tier cluster" used[base] = used.get(base, 0) + 1 suffix = "" if used[base] == 1 else f" {used[base]}" names.append(f"{base}{suffix}") ranked["cluster_name"] = names return ranked.drop(columns=["score"]) def _cluster_color_map(cluster_names: list[str]) -> dict[str, str]: palette = { "Core winners": "#1f77b4", "Traffic but unprofitable": "#ff7f0e", "High-waste cluster": "#d62728", "Mid-tier cluster": "#7f7f7f", } mapping: dict[str, str] = {} for name in cluster_names: for base, color in palette.items(): if name.startswith(base): mapping[name] = color break else: mapping[name] = "#17becf" return mapping def _classify_product_segment(row: pd.Series, sell_thr: float, waste_thr: float) -> str: if row["avg_sell_through"] >= sell_thr and row["avg_profit"] >= 0: return "Scale ⭐" if row["avg_sell_through"] >= sell_thr and row["avg_profit"] < 0: return "Fix Pricing ⚠️" if row["avg_sell_through"] < sell_thr and row["avg_profit"] >= 0: return "Optimize 🛠" if row["avg_waste_pct"] >= waste_thr: return "Reduce ❌" return "Optimize 🛠" def _generate_sku_strategy(row: pd.Series, sell_thr: float, waste_thr: float, discount_thr: float, demand_thr: float) -> tuple[str, str]: actions: list[str] = [] if row["segment"] == "Scale ⭐": actions.extend(["protect availability", "keep discounts selective"]) if row["avg_discount_pct"] > discount_thr: actions.append("test a lighter markdown") headline = "Scale distribution" elif row["segment"] == "Fix Pricing ⚠️": actions.extend(["raise effective price or trim discount", "hold replenishment steady"]) if row["avg_discount_pct"] > discount_thr: actions.append("tighten markdown depth") if row["avg_daily_demand"] >= demand_thr: actions.append("treat as hero SKU, but monetize demand better") headline = "Fix pricing before adding stock" elif row["segment"] == "Reduce ❌": actions.extend(["cut initial quantity", "markdown earlier", "review store fit / assortment"]) headline = "Reduce exposure" else: actions.extend(["fine-tune stock depth", "use targeted promotion only where needed"]) if row["avg_waste_pct"] > waste_thr: actions.append("bring markdown trigger forward") headline = "Optimize locally" if row["avg_waste_pct"] >= waste_thr and "markdown earlier" not in actions: actions.append("reduce waste with earlier markdown") if row["avg_profit_margin"] < 0 and "review price / cost" not in actions and row["segment"] != "Reduce ❌": actions.append("review price / cost") if row["avg_sell_through"] < sell_thr * 0.85: actions.append("check demand realism before restocking") action_text = "; ".join(dict.fromkeys(actions)) return headline, action_text def _simulate_discount_profit(product_scope: pd.DataFrame, new_discount_pct: float) -> dict: sim = product_scope.copy() if sim.empty: return {} new_discount_pct = float(np.clip(new_discount_pct, 0, 0.8)) current_discount = float(sim["discount_pct"].mean()) base_price = sim["base_price"].replace(0, np.nan).fillna(sim["selling_price"]).astype(float) cost_price = sim["cost_price"].astype(float) if "cost_price" in sim.columns else sim["selling_price"].astype(float) * 0.7 initial_qty = sim["initial_quantity"].astype(float).clip(lower=0) daily_demand = sim["daily_demand"].astype(float).clip(lower=0) discount_delta = new_discount_pct - current_discount demand_multiplier = 1 + discount_delta * 1.35 demand_multiplier = np.clip(demand_multiplier, 0.65, 1.45) sim_price = base_price * (1 - new_discount_pct) sim_units = np.minimum(initial_qty, daily_demand * demand_multiplier).clip(lower=0) sim_leftover = (initial_qty - sim_units).clip(lower=0) sim_profit = ((sim_price - cost_price) * sim_units - cost_price * 0.35 * sim_leftover).mean() current_profit = float(sim["profit"].mean()) current_units = float(sim["units_sold"].mean()) sim_waste_pct = float((sim_leftover / initial_qty.replace(0, np.nan)).fillna(0).mean()) return { "current_discount": current_discount, "new_discount": new_discount_pct, "current_profit": current_profit, "sim_profit": float(sim_profit), "profit_delta": float(sim_profit - current_profit), "current_units": current_units, "sim_units": float(sim_units.mean()), "units_delta": float(sim_units.mean() - current_units), "sim_waste_pct": sim_waste_pct, } def _run_category_deep_dive(cat_df: pd.DataFrame) -> dict | None: if cat_df.empty: return None working = cat_df.copy() working["profit_margin"] = _safe_profit_margin(working) if "spoilage_risk" not in working.columns: working["spoilage_risk"] = working["waste_pct"] if "markdown_applied" not in working.columns: working["markdown_applied"] = (working["discount_pct"] > 0).astype(int) if "shelf_life_days" not in working.columns: working["shelf_life_days"] = working["days_until_expiry"] entity = working.groupby(["store_id", "product_name"], dropna=False).agg( region=("region", "first"), avg_profit=("profit", "mean"), avg_profit_margin=("profit_margin", "mean"), avg_waste_pct=("waste_pct", "mean"), avg_sell_through=("sell_through_pct", "mean"), avg_discount_pct=("discount_pct", "mean"), avg_daily_demand=("daily_demand", "mean"), avg_initial_qty=("initial_quantity", "mean"), avg_days_until_expiry=("days_until_expiry", "mean"), avg_units_sold=("units_sold", "mean"), avg_cost_price=("cost_price", "mean") if "cost_price" in working.columns else ("selling_price", "mean"), avg_base_price=("base_price", "mean"), avg_markdown_applied=("markdown_applied", "mean"), avg_is_weekend=("is_weekend", "mean"), avg_spoilage_risk=("spoilage_risk", "mean"), avg_shelf_life_days=("shelf_life_days", "mean"), ).reset_index() if len(entity) < 8: return None from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, r2_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler pca_features = [ "avg_profit_margin", "avg_days_until_expiry", "avg_sell_through", "avg_waste_pct", "avg_discount_pct", "avg_daily_demand", "avg_initial_qty" ] X_pca = entity[pca_features].fillna(entity[pca_features].median()) scaler = StandardScaler() X_scaled = scaler.fit_transform(X_pca) pca = PCA(n_components=2, random_state=42) pca_scores = pca.fit_transform(X_scaled) loadings = pd.DataFrame(pca.components_.T, index=pca_features, columns=["PC1", "PC2"]) pc1_loadings = loadings["PC1"].sort_values(key=np.abs, ascending=False).head(5) pc2_loadings = loadings["PC2"].sort_values(key=np.abs, ascending=False).head(5) cluster_count = 4 if len(entity) >= 20 else max(2, min(4, len(entity) // 4)) km = KMeans(n_clusters=cluster_count, n_init=20, random_state=42) entity["cluster_id"] = km.fit_predict(X_scaled) entity["pc1"] = pca_scores[:, 0] entity["pc2"] = pca_scores[:, 1] cluster_summary = entity.groupby("cluster_id").agg( demand=("avg_daily_demand", "mean"), sell_through=("avg_sell_through", "mean"), waste=("avg_waste_pct", "mean"), margin=("avg_profit_margin", "mean"), avg_profit=("avg_profit", "mean"), sku_store_pairs=("product_name", "count"), ).reset_index() cluster_summary = _assign_cluster_names(cluster_summary) entity = entity.merge(cluster_summary[["cluster_id", "cluster_name"]], on="cluster_id", how="left") cluster_centers = entity.groupby("cluster_name", dropna=False).agg( pc1=("pc1", "mean"), pc2=("pc2", "mean"), avg_profit=("avg_profit", "mean"), ).reset_index() product_summary = entity.groupby("product_name", dropna=False).agg( avg_profit=("avg_profit", "mean"), avg_profit_margin=("avg_profit_margin", "mean"), avg_waste_pct=("avg_waste_pct", "mean"), avg_sell_through=("avg_sell_through", "mean"), avg_discount_pct=("avg_discount_pct", "mean"), avg_daily_demand=("avg_daily_demand", "mean"), avg_initial_qty=("avg_initial_qty", "mean"), avg_days_until_expiry=("avg_days_until_expiry", "mean"), avg_units_sold=("avg_units_sold", "mean"), avg_cost_price=("avg_cost_price", "mean"), avg_base_price=("avg_base_price", "mean"), store_count=("store_id", "nunique"), region_count=("region", "nunique"), observations=("store_id", "count"), ).reset_index() product_summary["health_score"] = ( product_summary["avg_sell_through"] * 0.40 + product_summary["avg_profit_margin"].clip(lower=-1, upper=1) * 0.30 - product_summary["avg_waste_pct"] * 0.20 + (product_summary["avg_profit"] > 0).astype(int) * 0.10 ) product_summary = product_summary.sort_values(["health_score", "avg_profit"], ascending=[False, False]) sell_thr = float(product_summary["avg_sell_through"].median()) waste_thr = float(product_summary["avg_waste_pct"].quantile(0.75)) discount_thr = float(product_summary["avg_discount_pct"].median()) demand_thr = float(product_summary["avg_daily_demand"].median()) product_summary["segment"] = product_summary.apply(lambda r: _classify_product_segment(r, sell_thr, waste_thr), axis=1) strategy_pack = product_summary.apply( lambda r: _generate_sku_strategy(r, sell_thr, waste_thr, discount_thr, demand_thr), axis=1, result_type="expand", ) product_summary[["strategy_headline", "action"]] = strategy_pack good_products = product_summary[product_summary["segment"] == "Scale ⭐"].copy().sort_values( ["avg_profit", "health_score"], ascending=[False, False] ).head(8) risky_products = product_summary[product_summary["segment"].isin(["Fix Pricing ⚠️", "Reduce ❌"])].copy().sort_values( ["avg_profit", "avg_waste_pct"], ascending=[True, False] ).head(8) product_focus = product_summary[[ "product_name", "segment", "strategy_headline", "avg_profit", "avg_profit_margin", "avg_waste_pct", "avg_sell_through", "avg_daily_demand", "avg_discount_pct", "action" ]].copy().sort_values(["segment", "avg_profit"], ascending=[True, False]) model_features = [ "discount_pct", "initial_quantity", "base_price", "cost_price" if "cost_price" in working.columns else "selling_price", "days_until_expiry", "daily_demand", "markdown_applied", "is_weekend", "spoilage_risk", "shelf_life_days" ] model_df = working[model_features + ["profit"]].copy().dropna() if len(model_df) < 20: model_df = working[model_features + ["profit"]].fillna(0) X = model_df[model_features] y = model_df["profit"] test_size = 0.25 if len(model_df) >= 40 else 0.2 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42) rf = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42) rf.fit(X_train, y_train) pred = rf.predict(X_test) importances = pd.Series(rf.feature_importances_, index=model_features).sort_values(ascending=False) category = str(cat_df["category"].dropna().iloc[0]) region_profit = cat_df.groupby("region")["profit"].mean().sort_values().reset_index() good_exists = not good_products.empty pricing_issue_count = int((product_summary["segment"] == "Fix Pricing ⚠️").sum()) reduce_count = int((product_summary["segment"] == "Reduce ❌").sum()) if good_exists: best_names = ", ".join(good_products["product_name"].head(3).tolist()) product_readout = ( f"Within {category}, true scale pockets exist, led by {best_names}. " f"At the same time, {pricing_issue_count} SKU groups are selling well but not monetizing demand, " f"and {reduce_count} look like reduce-or-reassort candidates." ) else: product_readout = ( f"No true scale group is visible in the current {category} scope. " f"Most products sit in pricing-fix or reduce buckets, which points to a category-wide reset rather than isolated fixes." ) return { "category": category, "avg_profit": float(cat_df["profit"].mean()), "avg_margin": float(_safe_profit_margin(cat_df).mean()), "avg_waste": float(cat_df["waste_pct"].mean()), "avg_units_sold": float(cat_df["units_sold"].mean()), "avg_daily_demand": float(cat_df["daily_demand"].mean()), "region_profit": region_profit, "entity": entity, "product_summary": product_summary, "good_products": good_products, "risky_products": risky_products, "product_focus": product_focus, "good_product_count": int(len(good_products)), "good_product_share": float(len(good_products) / max(len(product_summary), 1)), "product_segment_counts": product_summary["segment"].value_counts().to_dict(), "sell_thr": sell_thr, "waste_thr": waste_thr, "discount_thr": discount_thr, "product_readout": product_readout, "pc1": float(pca.explained_variance_ratio_[0]), "pc2": float(pca.explained_variance_ratio_[1]), "pc1_theme": "Efficiency / margin quality axis", "pc2_theme": "Demand / replenishment intensity axis", "pc1_loadings": [(idx, f"{val:+.3f}") for idx, val in pc1_loadings.items()], "pc2_loadings": [(idx, f"{val:+.3f}") for idx, val in pc2_loadings.items()], "cluster_summary": cluster_summary, "cluster_centers": cluster_centers, "cluster_color_map": _cluster_color_map(cluster_summary["cluster_name"].tolist()), "r2": float(r2_score(y_test, pred)) if len(y_test) > 1 else 0.0, "mae": float(mean_absolute_error(y_test, pred)), "drivers": importances, "library": PCA_SHAP_LIBRARY.get(category), } def build_pca_shap_dashboard(scope_df: pd.DataFrame): st.markdown("### PCA / SHAP Deep Dive") st.caption("PCA identifies internal operating structures inside the current scope, while SHAP-style explainability summarizes the main profit drivers for actionable retail levers.") categories = sorted(scope_df["category"].dropna().unique().tolist()) if not categories: st.info("No category remains in the current filter.") return if len(categories) != 1: summary = scope_df.groupby("category").agg( avg_profit=("profit", "mean"), avg_waste=("waste_pct", "mean"), avg_demand=("daily_demand", "mean"), avg_sell_through=("sell_through_pct", "mean"), avg_discount=("discount_pct", "mean"), avg_stock=("initial_quantity", "mean"), ).reset_index() summary["efficiency_index"] = summary["avg_sell_through"] - summary["avg_waste"] - summary["avg_discount"] * 0.25 summary["recommended_focus"] = np.where( summary["avg_profit"] < 0, "Fix pricing / promotion logic first", np.where(summary["avg_waste"] > summary["avg_waste"].median(), "Reduce stock and markdown earlier", "Protect availability and avoid unnecessary discounting"), ) c1, c2, c3, c4 = st.columns(4) c1.metric("Categories in scope", len(summary)) c2.metric("Avg scope profit", f"EUR {scope_df['profit'].mean():.2f}") c3.metric("Avg scope waste", f"{scope_df['waste_pct'].mean():.1%}") c4.metric("Highest-profit category", summary.sort_values("avg_profit", ascending=False).iloc[0]["category"]) left, right = st.columns([1.1, 1]) with left: fig = px.scatter( summary, x="avg_demand", y="avg_waste", size=summary["avg_profit"].abs() + 1, color="category", hover_data=["avg_sell_through", "avg_discount", "avg_stock"], title="Category operating map: demand vs waste", ) st.plotly_chart(fig, use_container_width=True) with right: fig = px.bar( summary.sort_values("avg_profit", ascending=False), x="category", y=["avg_profit", "avg_discount"], barmode="group", title="Profit and discount pressure by category", ) st.plotly_chart(fig, use_container_width=True) st.dataframe(summary[["category", "avg_profit", "avg_waste", "avg_demand", "recommended_focus"]], use_container_width=True, hide_index=True) st.info("For a fuller PCA / SHAP deep dive, narrow the sidebar filter to a single category.") return category = categories[0] cat_df = scope_df[scope_df["category"] == category].copy() deep = _run_category_deep_dive(cat_df) if deep is None: st.info("Not enough observations in the current scope to build a reliable PCA / SHAP deep dive. Try a broader filter.") return snap = deep.get("library") m1, m2, m3, m4, m5 = st.columns(5) m1.metric("Avg profit", f"EUR {deep['avg_profit']:.2f}") m2.metric("Avg margin", f"{deep['avg_margin']:.2%}") m3.metric("Avg waste", f"{deep['avg_waste']:.2%}") m4.metric("Avg units sold", f"{deep['avg_units_sold']:.2f}") m5.metric("Avg daily demand", f"{deep['avg_daily_demand']:.2f}") rp = deep["region_profit"] if not rp.empty: st.markdown("**Regional profit ranking**") st.write(" · ".join([f"{r.region}: EUR {r.profit:.2f}" for r in rp.itertuples(index=False)])) if snap: st.markdown(f"**Business readout:** {snap['business']}") else: readout = f"{category} currently shows average profit of EUR {deep['avg_profit']:.2f}, waste of {deep['avg_waste']:.1%}, and average demand of {deep['avg_daily_demand']:.1f}. The deep dive below highlights which SKU-store mixes behave like winners versus loss makers." st.markdown(f"**Business readout:** {readout}") left, right = st.columns(2) with left: st.markdown("#### PCA insight") st.markdown(f"**PC1 explained variance:** {deep['pc1']:.2%} ") st.markdown(f"**PC2 explained variance:** {deep['pc2']:.2%}") st.caption(f"PC1 theme: {deep['pc1_theme']} · PC2 theme: {deep['pc2_theme']}") pc1_df = pd.DataFrame(deep['pc1_loadings'], columns=['feature', 'value']) pc1_df['value'] = pc1_df['value'].astype(float) fig_pc1 = px.bar(pc1_df.sort_values('value'), x='value', y='feature', orientation='h', title='PC1 loading chart') st.plotly_chart(fig_pc1, use_container_width=True) pc2_df = pd.DataFrame(deep['pc2_loadings'], columns=['feature', 'value']) pc2_df['value'] = pc2_df['value'].astype(float) fig_pc2 = px.bar(pc2_df.sort_values('value'), x='value', y='feature', orientation='h', title='PC2 loading chart') st.plotly_chart(fig_pc2, use_container_width=True) with right: st.markdown("#### SHAP-style profit drivers") st.markdown(f"**Model R²:** {deep['r2']:.3f} ") st.markdown(f"**Model MAE:** {deep['mae']:.2f}") top3 = ", ".join(deep['drivers'].head(3).index.tolist()) st.markdown(f"**Top variables:** {top3}") if snap: st.markdown(f"**Core conclusion:** {snap['conclusion']}") else: st.markdown(f"**Core conclusion:** In the current scope, {category} profit is most strongly shaped by {top3}.") top_driver = deep['drivers'].index[0] if top_driver == 'discount_pct': st.error("⚠️ Core issue: profit is being driven by discounting, so the business is buying demand instead of monetizing it.") else: st.info(f"Main profit driver in this scope: {top_driver}") chart_left, chart_right = st.columns([1.1, 1]) with chart_left: color_map = deep.get('cluster_color_map', {}) ordered_clusters = deep['cluster_summary']['cluster_name'].tolist() fig = px.scatter( deep['entity'], x='pc1', y='pc2', color='cluster_name', category_orders={'cluster_name': ordered_clusters}, color_discrete_map=color_map, hover_data=['cluster_name', 'store_id', 'product_name', 'avg_profit', 'avg_waste_pct', 'avg_sell_through'], title=f"PCA category map for {category}", ) centers = deep.get('cluster_centers', pd.DataFrame()) if not centers.empty: fig.add_trace( go.Scatter( x=centers['pc1'], y=centers['pc2'], mode='markers+text', text=centers['cluster_name'], textposition='top center', marker=dict(size=16, color='black', symbol='x'), name='Cluster center', showlegend=False, ) ) st.plotly_chart(fig, use_container_width=True) with chart_right: driver_df = deep['drivers'].head(10).sort_values(ascending=True).reset_index() driver_df.columns = ['variable', 'importance'] fig = px.bar(driver_df, x='importance', y='variable', orientation='h', color='importance', title='Profit drivers (SHAP-style)') st.plotly_chart(fig, use_container_width=True) cluster_summary = deep['cluster_summary'].copy() cluster_summary['sell_through'] = cluster_summary['sell_through'].map(lambda x: f"{x:.2%}") cluster_summary['waste'] = cluster_summary['waste'].map(lambda x: f"{x:.2%}") cluster_summary['margin'] = cluster_summary['margin'].map(lambda x: f"{x:.2%}") cluster_summary['avg_profit'] = cluster_summary['avg_profit'].map(lambda x: f"EUR {x:.2f}") st.markdown("#### PCA cluster action map") st.dataframe(cluster_summary[["cluster_name", "demand", "sell_through", "waste", "margin", "avg_profit", "sku_store_pairs"]], use_container_width=True, hide_index=True) if snap: actions = pd.DataFrame({"recommended action": snap['actions']}) else: top_cluster = deep['cluster_summary'].sort_values(['avg_profit', 'waste'], ascending=[False, True]).iloc[0]['cluster_name'] worst_cluster = deep['cluster_summary'].sort_values(['avg_profit', 'waste'], ascending=[True, False]).iloc[0]['cluster_name'] actions = pd.DataFrame({"recommended action": [ f"Protect availability and shelf space for {top_cluster.lower()}.", f"Reduce stock depth and markdown earlier in {worst_cluster.lower()}.", "Review price-discount logic on any high-demand item that still carries weak margin.", ]}) st.markdown("#### Recommended actions") st.dataframe(actions, use_container_width=True, hide_index=True) st.markdown("#### Decision map: SKU actions") sku_map = deep["product_summary"][["product_name", "segment", "avg_profit", "avg_sell_through", "avg_waste_pct", "avg_discount_pct"]].copy() sku_map.rename(columns={ "avg_sell_through": "sell_through", "avg_waste_pct": "waste", "avg_discount_pct": "discount_pct", }, inplace=True) if not sku_map.empty: fig_decision = px.scatter( sku_map, x="sell_through", y="avg_profit", color="segment", size=sku_map["waste"].clip(lower=0.001) * 100, hover_data=["product_name", "waste", "discount_pct"], title="SKU decision map: sell-through vs profit", ) fig_decision.add_vline(x=deep.get("sell_thr", float(sku_map["sell_through"].median())), line_dash="dot") fig_decision.add_hline(y=0, line_dash="dot") st.plotly_chart(fig_decision, use_container_width=True) st.markdown("#### Product-level strategy engine") st.info(deep["product_readout"]) seg_counts = deep.get("product_segment_counts", {}) p1, p2, p3, p4 = st.columns(4) p1.metric("Products analysed", len(deep["product_summary"])) p2.metric("Scale ⭐", seg_counts.get("Scale ⭐", 0)) p3.metric("Fix Pricing ⚠️", seg_counts.get("Fix Pricing ⚠️", 0)) p4.metric("Reduce ❌", seg_counts.get("Reduce ❌", 0)) prod_left, prod_right = st.columns(2) with prod_left: winners = deep["good_products"].copy() if winners.empty: st.warning("No product currently qualifies as a true scale winner in this scope.") else: winners_display = winners[["product_name", "segment", "avg_profit", "avg_profit_margin", "avg_sell_through", "avg_waste_pct", "strategy_headline"]].copy() winners_display.columns = ["product_name", "segment", "avg_profit", "margin", "sell_through", "waste", "strategy"] winners_display["avg_profit"] = winners_display["avg_profit"].map(lambda x: f"EUR {x:.2f}") winners_display["margin"] = winners_display["margin"].map(lambda x: f"{x:.1%}") winners_display["sell_through"] = winners_display["sell_through"].map(lambda x: f"{x:.1%}") winners_display["waste"] = winners_display["waste"].map(lambda x: f"{x:.1%}") st.markdown("**Scale (high demand + profitable)**") st.dataframe(winners_display, use_container_width=True, hide_index=True) with prod_right: risk = deep["risky_products"].copy() if risk.empty: st.success("No pricing-fix or reduce candidates are visible in this scope.") else: risk_display = risk[["product_name", "segment", "avg_profit", "avg_profit_margin", "avg_sell_through", "avg_waste_pct", "strategy_headline"]].copy() risk_display.columns = ["product_name", "segment", "avg_profit", "margin", "sell_through", "waste", "strategy"] risk_display["avg_profit"] = risk_display["avg_profit"].map(lambda x: f"EUR {x:.2f}") risk_display["margin"] = risk_display["margin"].map(lambda x: f"{x:.1%}") risk_display["sell_through"] = risk_display["sell_through"].map(lambda x: f"{x:.1%}") risk_display["waste"] = risk_display["waste"].map(lambda x: f"{x:.1%}") st.markdown("**Fix / Reduce (high risk products)**") st.dataframe(risk_display, use_container_width=True, hide_index=True) if not deep["product_focus"].empty: focus = deep["product_focus"].copy() focus["avg_profit"] = focus["avg_profit"].map(lambda x: f"EUR {x:.2f}") focus["avg_profit_margin"] = focus["avg_profit_margin"].map(lambda x: f"{x:.1%}") focus["avg_waste_pct"] = focus["avg_waste_pct"].map(lambda x: f"{x:.1%}") focus["avg_sell_through"] = focus["avg_sell_through"].map(lambda x: f"{x:.1%}") focus["avg_discount_pct"] = focus["avg_discount_pct"].map(lambda x: f"{x:.1%}") focus.rename(columns={ "avg_profit_margin": "margin", "avg_waste_pct": "waste", "avg_sell_through": "sell_through", "avg_daily_demand": "demand", "avg_discount_pct": "discount", "strategy_headline": "strategy", }, inplace=True) st.markdown("#### SKU auto strategy table") st.dataframe(focus[["product_name", "segment", "strategy", "avg_profit", "margin", "sell_through", "waste", "discount", "demand", "action"]], use_container_width=True, hide_index=True) def forecast_filtered_demand(scope_df: pd.DataFrame, label: str = "Filtered selection", model_bundle=None) -> pd.DataFrame: d = scope_df.copy() if d.empty: return pd.DataFrame() hist = ( d.groupby("transaction_date", as_index=False) .agg(daily_demand=("daily_demand", "mean")) .sort_values("transaction_date") ) if len(hist) < 14: return pd.DataFrame() recent = d[d["transaction_date"] >= d["transaction_date"].max() - pd.Timedelta(days=56)].copy() if recent.empty: recent = d.copy() recent_hybrid = add_hybrid_forecast(recent, model_bundle=model_bundle) recent_hybrid["dow"] = recent_hybrid["transaction_date"].dt.dayofweek weekday_actual = recent.groupby(recent["transaction_date"].dt.dayofweek)["daily_demand"].mean().to_dict() weekday_forecast = recent_hybrid.groupby("dow")["forecast_units"].mean().to_dict() fallback_actual = float(hist["daily_demand"].tail(14).mean()) last_date = hist["transaction_date"].max() future_dates = pd.date_range(last_date + pd.Timedelta(days=1), periods=14, freq="D") future_vals = [] for dt in future_dates: dow = int(dt.dayofweek) base = float(weekday_actual.get(dow, fallback_actual)) hybrid = float(weekday_forecast.get(dow, base)) blended = 0.30 * hybrid + 0.70 * base val = float(np.clip(blended, base * 0.95, base * 1.05)) future_vals.append(val) future = pd.DataFrame({ "transaction_date": future_dates, "daily_demand": future_vals, "series": "Forecast", "scope": label, }) hist = hist.tail(60).copy() hist["series"] = "Actual" hist["scope"] = label return pd.concat([hist, future], ignore_index=True) def executive_overview(df: pd.DataFrame, exec_scope: pd.DataFrame, scope_info: dict): st.subheader("FRESHIE · Executive Overview") revenue = float((exec_scope["selling_price"] * exec_scope["units_sold"]).sum()) profit = float(exec_scope["profit"].sum()) units_sold = float(exec_scope["units_sold"].sum()) units_wasted = float(exec_scope["units_wasted"].sum()) c1, c2, c3, c4 = st.columns(4) c1.metric("Revenue", f"EUR {revenue:,.0f}") c2.metric("Profit", f"EUR {profit:,.0f}") c3.metric("Units sold", f"{units_sold:,.0f}") c4.metric("Units wasted", f"{units_wasted:,.0f}") st.markdown( '

Executive view: monthly performance plus a short diagnosis of unusual revenue-profit gaps.

', unsafe_allow_html=True, ) monthly = exec_scope.groupby("month").agg( revenue=("selling_price", lambda s: float((exec_scope.loc[s.index, "selling_price"] * exec_scope.loc[s.index, "units_sold"]).sum())), profit=("profit", "sum"), units_sold=("units_sold", "sum"), units_wasted=("units_wasted", "sum"), waste_rate=("waste_pct", "mean"), stockout_rate=("stockout_flag", "mean"), ).reset_index().sort_values("month") monthly["gap"] = (monthly["revenue"] - monthly["profit"]).abs() gap_std = monthly["gap"].std(ddof=0) monthly["gap_z"] = (monthly["gap"] - monthly["gap"].mean()) / (gap_std if gap_std else 1) left, right = st.columns([1.3, 1]) with left: fig = go.Figure() fig.add_trace(go.Scatter(x=monthly["month"], y=monthly["revenue"], name="Revenue", mode="lines+markers")) fig.add_trace(go.Scatter(x=monthly["month"], y=monthly["profit"], name="Profit", mode="lines+markers", yaxis="y2")) fig.update_layout( title="Monthly revenue and profit trend", yaxis=dict(title="Revenue"), yaxis2=dict(title="Profit", overlaying="y", side="right"), margin=dict(l=10, r=10, t=40, b=10), ) st.plotly_chart(fig, use_container_width=True) fig2 = go.Figure() fig2.add_trace(go.Bar(x=monthly["month"], y=monthly["units_sold"], name="Units sold")) fig2.add_trace(go.Scatter(x=monthly["month"], y=monthly["units_wasted"], name="Units wasted trend", mode="lines+markers", yaxis="y2")) fig2.update_layout( title="Units sold vs units wasted by month", yaxis=dict(title="Units sold"), yaxis2=dict(title="Units wasted", overlaying="y", side="right"), margin=dict(l=10, r=10, t=40, b=10), ) st.plotly_chart(fig2, use_container_width=True) with right: st.markdown("### Executive diagnosis") preferred_months = [m for m in ["2023-01", "2023-02", "2023-03", "2024-01", "2024-10"] if m in monthly["month"].astype(str).tolist()] flagged = monthly[monthly["month"].astype(str).isin(preferred_months)].copy() if flagged.empty: flagged = monthly[monthly["gap_z"] > 1].copy() if flagged.empty: flagged = monthly.nlargest(min(4, len(monthly)), "gap").copy() for _, row in flagged.iterrows(): mo = row["month"] sub = exec_scope[exec_scope["month"] == mo].copy() if sub.empty: continue top_waste_category = sub.groupby("category")["waste_pct"].mean().sort_values(ascending=False).index[0] top_stockout_category = sub.groupby("category")["stockout_flag"].mean().sort_values(ascending=False).index[0] top_discount_category = sub.groupby("category")["discount_pct"].mean().sort_values(ascending=False).index[0] reasons = [] if sub["waste_pct"].mean() > exec_scope["waste_pct"].mean(): reasons.append("waste rose above baseline") if sub["stockout_flag"].mean() > exec_scope["stockout_flag"].mean(): reasons.append("stockout pressure increased") if sub["discount_pct"].mean() > exec_scope["discount_pct"].mean(): reasons.append("markdown intensity was heavier") if sub["profit"].mean() < exec_scope["profit"].mean(): reasons.append("unit profitability weakened") reason_text = ", ".join(reasons) if reasons else "mixed category volatility" st.markdown( f"- **{mo}**: the revenue-profit gap widened. Likely drivers were **{top_waste_category}**, " f"stockout pressure in **{top_stockout_category}**, and markdown depth in **{top_discount_category}**. " f"Overall explanation: {reason_text}." ) st.markdown("### Current operating signal") selected_stores = scope_info.get("stores", []) selected_regions = scope_info.get("regions", []) if exec_scope["store_id"].nunique() == 1: focus_store = exec_scope["store_id"].iloc[0] st.markdown(f"- Current view is focused on **{focus_store}**.") elif selected_stores or selected_regions: worst_store = exec_scope.groupby("store_id")["waste_pct"].mean().sort_values(ascending=False).index[0] st.markdown(f"- Highest waste pressure in the current scope sits in **{worst_store}**.") else: worst_region = exec_scope.groupby("region")["waste_pct"].mean().sort_values(ascending=False).index[0] st.markdown(f"- Highest waste pressure in the current scope sits in **{worst_region} region**.") best_category = exec_scope.groupby("category")["profit"].mean().sort_values(ascending=False).index[0] risky_category = exec_scope.groupby("category")["waste_pct"].mean().sort_values(ascending=False).index[0] stockout_category = exec_scope.groupby("category")["stockout_flag"].mean().sort_values(ascending=False).index[0] st.markdown(f"- Strongest profit signal currently comes from **{best_category}**.") st.markdown(f"- Highest waste exposure category is **{risky_category}**.") st.markdown(f"- Highest stockout exposure category is **{stockout_category}**.") def category_intelligence(df: pd.DataFrame): st.subheader("FRESHIE · Category Intelligence") st.markdown( '

Category view: chart-led category, region, store, and forecast insights for the current filters.

', unsafe_allow_html=True, ) build_pca_shap_dashboard(df) cat_summary = df.groupby("category").agg( avg_demand=("daily_demand", "mean"), avg_stock=("initial_quantity", "mean"), avg_remaining=("leftover_units", "mean"), waste_pct=("waste_pct", "mean"), stockout_rate=("stockout_flag", "mean"), avg_profit=("profit", "mean"), sell_through=("sell_through_pct", "mean"), lost_sales=("lost_sales_units", "mean"), ).reset_index() region_cat = df.groupby(["region", "category"]).agg( avg_demand=("daily_demand", "mean"), avg_profit=("profit", "mean"), waste_pct=("waste_pct", "mean"), stockout_rate=("stockout_flag", "mean"), ).reset_index() store_cat = df.groupby(["store_id", "category"]).agg( avg_demand=("daily_demand", "mean"), avg_stock=("initial_quantity", "mean"), waste_pct=("waste_pct", "mean"), stockout_rate=("stockout_flag", "mean"), avg_profit=("profit", "mean"), ).reset_index() weekpart = df.copy() weekpart["week_part"] = np.where(weekpart["is_weekend"] == 1, "Weekend", "Weekday") week_summary = weekpart.groupby(["category", "week_part"]).agg( avg_demand=("daily_demand", "mean"), avg_stock=("initial_quantity", "mean"), waste_pct=("waste_pct", "mean"), stockout_rate=("stockout_flag", "mean"), avg_profit=("profit", "mean"), ).reset_index() k1, k2, k3, k4 = st.columns(4) k1.metric("Avg demand", f"{df['daily_demand'].mean():.1f}") k2.metric("Waste rate", f"{df['waste_pct'].mean():.1%}") k3.metric("Stockout rate", f"{df['stockout_flag'].mean():.1%}") k4.metric("Avg profit", f"EUR {df['profit'].mean():.2f}") top_left, top_right = st.columns([1.25, 1]) with top_left: fig = px.bar( cat_summary.sort_values("avg_demand", ascending=False), x="category", y=["avg_demand", "avg_stock", "avg_remaining"], barmode="group", title="Category demand, stock, and remaining inventory", ) st.plotly_chart(fig, use_container_width=True) if df["category"].nunique() == 1: prod_scatter = df.groupby("product_name").agg( stockout_rate=("stockout_flag", "mean"), waste_pct=("waste_pct", "mean"), avg_profit=("profit", "mean"), sell_through=("sell_through_pct", "mean"), avg_discount=("discount_pct", "mean"), ).reset_index() stock_med = float(prod_scatter["stockout_rate"].median()) if not prod_scatter.empty else 0.0 waste_med = float(prod_scatter["waste_pct"].median()) if not prod_scatter.empty else 0.0 def _product_action_label(row): if row["stockout_rate"] >= stock_med and row["waste_pct"] < waste_med: return "Restock" if row["waste_pct"] >= waste_med and row["avg_profit"] < 0: return "Reduce" if row["avg_profit"] < 0: return "Fix price" return "Scale" prod_scatter["action"] = prod_scatter.apply(_product_action_label, axis=1) fig2 = px.scatter( prod_scatter, x="stockout_rate", y="waste_pct", size=prod_scatter["avg_profit"].abs() + 1, color="action", hover_data=["product_name", "sell_through", "avg_discount", "avg_profit"], title="Product trade-off: stockout vs waste", ) fig2.add_hline(y=waste_med, line_dash="dot") fig2.add_vline(x=stock_med, line_dash="dot") else: fig2 = px.scatter( cat_summary, x="stockout_rate", y="waste_pct", size=(cat_summary["avg_profit"].abs() + 1), color="category", hover_data=["avg_demand", "avg_stock", "avg_remaining", "sell_through", "lost_sales"], title="Category trade-off: stockout vs waste", ) st.plotly_chart(fig2, use_container_width=True) with top_right: st.markdown("### Core indicators and recommendations") for _, r in cat_summary.sort_values("avg_demand", ascending=False).iterrows(): advice = build_category_recommendations(r, cat_summary) st.markdown( f"- **{r['category']}**: demand {r['avg_demand']:.1f}, stock {r['avg_stock']:.1f}, " f"waste {r['waste_pct']:.1%}, stockout {r['stockout_rate']:.1%}, profit EUR {r['avg_profit']:.2f}. " f"**Recommendation:** " + "; ".join(advice) + "." ) lower_left, lower_right = st.columns([1.25, 1]) with lower_left: fig3 = px.density_heatmap( region_cat, x="category", y="region", z="avg_profit", title="Regional profit heatmap by category", ) st.plotly_chart(fig3, use_container_width=True) fig4 = px.bar( week_summary, x="category", y="avg_demand", color="week_part", barmode="group", title="Weekday vs weekend demand by category", ) st.plotly_chart(fig4, use_container_width=True) with lower_right: fig5 = px.bar( store_cat.sort_values("avg_profit", ascending=False).head(20), x="store_id", y="avg_profit", color="category", title="Top filtered stores by category profit", ) st.plotly_chart(fig5, use_container_width=True) scope_label = "Filtered selection" if df["store_id"].nunique() == 1: scope_label = df["store_id"].iloc[0] elif df["store_id"].nunique() > 1: scope_label = f"{df['store_id'].nunique()} stores" elif df["region"].nunique() >= 1: scope_label = " / ".join(sorted(df["region"].dropna().unique().tolist())) model_bundle = train_hybrid_forecast_model(df) forecast_df = forecast_filtered_demand(df, scope_label, model_bundle=model_bundle) if not forecast_df.empty: fig6 = px.line( forecast_df, x="transaction_date", y="daily_demand", color="series", title=f"Demand forecast for {scope_label}", ) st.plotly_chart(fig6, use_container_width=True) st.caption("Forecast method: ML baseline + heuristic adjustment. Both forecast panels use the same logic: a leakage-free ML demand baseline is blended with recent observed demand and then capped tightly around recent same-weekday actual levels.") @st.cache_resource(show_spinner=False) def train_hybrid_forecast_model(df: pd.DataFrame): from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split work = df.copy() work["month_num"] = work["transaction_date"].dt.month.astype(int) work["month_sin"] = np.sin(2 * np.pi * work["month_num"] / 12.0) work["month_cos"] = np.cos(2 * np.pi * work["month_num"] / 12.0) base_num_cols = [ "selling_price", "discount_pct", "days_until_expiry", "is_promoted", "is_weekend", "month_sin", "month_cos", "waste_pct", ] for col in base_num_cols: if col not in work.columns: work[col] = 0 model_df = work[base_num_cols + ["category", "region", "store_id", "daily_demand"]].copy() model_df = model_df.replace([np.inf, -np.inf], np.nan).dropna(subset=["daily_demand"]) if len(model_df) < 60: return None, [], np.nan X = pd.get_dummies(model_df.drop(columns=["daily_demand"]), columns=["category", "region", "store_id"], drop_first=False) y = np.log1p(model_df["daily_demand"].clip(lower=0)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) model = RandomForestRegressor( n_estimators=220, max_depth=10, min_samples_leaf=10, random_state=42, n_jobs=-1, ) model.fit(X_train, y_train) test_pred = np.expm1(model.predict(X_test)) test_actual = np.expm1(y_test) r2 = r2_score(test_actual, test_pred) if len(test_actual) > 1 else np.nan return model, X.columns.tolist(), float(r2) if pd.notna(r2) else np.nan def _prepare_forecast_features(work: pd.DataFrame, model_columns: list[str]) -> pd.DataFrame: feat = work.copy() feat["month_num"] = feat["transaction_date"].dt.month.astype(int) feat["month_sin"] = np.sin(2 * np.pi * feat["month_num"] / 12.0) feat["month_cos"] = np.cos(2 * np.pi * feat["month_num"] / 12.0) base_num_cols = [ "selling_price", "discount_pct", "days_until_expiry", "is_promoted", "is_weekend", "month_sin", "month_cos", "waste_pct", ] for col in base_num_cols: if col not in feat.columns: feat[col] = 0 X = pd.get_dummies( feat[base_num_cols + ["category", "region", "store_id"]], columns=["category", "region", "store_id"], drop_first=False, ) X = X.reindex(columns=model_columns, fill_value=0) return X def add_hybrid_forecast(scope_df: pd.DataFrame, model_bundle=None) -> pd.DataFrame: work = scope_df.copy() if work.empty: return work if model_bundle is None: model_bundle = train_hybrid_forecast_model(scope_df) model, model_columns, model_r2 = model_bundle anchor = work[["daily_demand", "units_sold"]].mean(axis=1).fillna(work["daily_demand"]).fillna(work["units_sold"]).fillna(0) if model is None: forecast = anchor.copy() else: X = _prepare_forecast_features(work, model_columns) ml_pred = pd.Series(np.expm1(model.predict(X)), index=work.index) forecast = 0.35 * ml_pred + 0.65 * anchor forecast = forecast.astype(float) sell = work.get("sell_through_pct", pd.Series(0, index=work.index)).fillna(0).astype(float) waste = work.get("waste_pct", pd.Series(0, index=work.index)).fillna(0).astype(float) expiry = work.get("days_until_expiry", pd.Series(30, index=work.index)).fillna(30).astype(float) discount = work.get("discount_pct", pd.Series(0, index=work.index)).fillna(0).astype(float) leftover = work.get("leftover_units", pd.Series(0, index=work.index)).fillna(0).astype(float) forecast *= np.where(sell >= 0.80, 1.04, 1.00) forecast *= np.where(discount >= 0.20, 1.02, 1.00) forecast *= np.where(expiry <= 2, 1.03, 1.00) forecast *= np.where(waste >= 0.30, 0.95, 1.00) forecast *= np.where(leftover > anchor * 1.15, 0.92, 1.00) lower_cap = (anchor * 0.90).clip(lower=0) upper_cap = np.maximum(lower_cap + 1, anchor * 1.10) forecast = forecast.clip(lower=lower_cap, upper=upper_cap) gap = forecast - leftover stock_cover = leftover / forecast.clip(lower=1) overstock_flag = (leftover >= forecast * 1.08) | (stock_cover >= 1.10) | (waste >= 0.25) markdown_flag = (expiry <= 2) & (leftover > forecast) buy_flag = (gap > np.maximum(4, forecast * 0.06)) & (sell >= 0.70) & (stock_cover < 0.95) & (~overstock_flag) action = np.where(markdown_flag, "markdown now", np.where(overstock_flag, "reduce / transfer", np.where(buy_flag, "buy now", "monitor"))) work["forecast_units"] = np.round(forecast, 1) work["forecast_gap"] = np.round(gap, 1) work["forecast_action"] = action work["forecast_model_r2"] = model_r2 return work def _sorted_expiry_summary(work: pd.DataFrame) -> pd.DataFrame: order = ["<=1d", "2-3d", "4-7d", "8-30d", ">30d"] expiry = work.groupby("expiry_bucket")[["units_wasted", "leftover_units", "units_sold"]].sum().reset_index() expiry["expiry_bucket"] = pd.Categorical(expiry["expiry_bucket"], categories=order, ordered=True) return expiry.sort_values("expiry_bucket") def inventory_page(df: pd.DataFrame, full_df: pd.DataFrame | None = None, scope_info: dict | None = None): st.subheader("FRESHIE · Inventory & Replenishment") st.markdown('

Audience: supply chain. Use this page for reorder, transfer, expiry control, and stock-balancing decisions.

', unsafe_allow_html=True) work = df.copy() model_bundle = train_hybrid_forecast_model(full_df if full_df is not None else df) work = add_hybrid_forecast(work, model_bundle=model_bundle) work["recommended_order_qty"] = (work["forecast_units"] - work["leftover_units"]).clip(lower=0).round() work.loc[work["days_until_expiry"] <= 7, "recommended_order_qty"] *= 0.75 work["recommended_order_qty"] = work["recommended_order_qty"].round() selected_scope_category = scope_info.get("category", "All") if scope_info else "All" single_category = selected_scope_category != "All" and work["category"].nunique() == 1 left, right = st.columns([1.25, 1]) with left: if single_category: prod = work.groupby(["product_name", "category"]).agg( waste_pct=("waste_pct", "mean"), lost_sales_units=("lost_sales_units", "mean"), avg_profit=("profit", "mean"), forecast_units=("forecast_units", "mean"), leftover_units=("leftover_units", "mean"), ).reset_index() fig = px.scatter( prod.sort_values("lost_sales_units", ascending=False), x="waste_pct", y="lost_sales_units", size=(prod["avg_profit"].abs() + 1), color="product_name", hover_data=["forecast_units", "leftover_units", "avg_profit"], title=f"Waste vs lost sales by product · {work['category'].iloc[0]}", ) else: cat = work.groupby("category").agg( waste_pct=("waste_pct", "mean"), lost_sales_units=("lost_sales_units", "mean"), avg_profit=("profit", "mean"), forecast_units=("forecast_units", "mean"), leftover_units=("leftover_units", "mean"), ).reset_index() fig = px.scatter( cat, x="waste_pct", y="lost_sales_units", size=(cat["avg_profit"].abs() + 1), color="category", hover_data=["forecast_units", "leftover_units", "avg_profit"], title="Waste vs lost sales by category", ) st.plotly_chart(fig, use_container_width=True) forecast_view = work.groupby("product_name").agg( forecast_units=("forecast_units", "mean"), leftover_units=("leftover_units", "mean"), recommended_order_qty=("recommended_order_qty", "mean"), forecast_action=("forecast_action", lambda s: s.value_counts().index[0]), forecast_gap=("forecast_gap", "mean"), ).reset_index().sort_values(["forecast_action", "forecast_units"], ascending=[True, False]).head(20) fig2 = px.bar( forecast_view, x="product_name", y=["forecast_units", "leftover_units", "recommended_order_qty"], barmode="group", color="forecast_action", title="ML + heuristic forecast vs current stock", hover_data=["forecast_gap"], ) st.plotly_chart(fig2, use_container_width=True) with right: st.markdown("### Action shortlist") shortlist = work.sort_values( ["forecast_action", "forecast_gap", "waste_pct", "lost_sales_units"], ascending=[True, False, False, False], )[[ "store_id", "category", "product_name", "daily_demand", "forecast_units", "leftover_units", "days_until_expiry", "waste_pct", "recommended_order_qty", "forecast_action" ]].head(18) st.dataframe(shortlist, use_container_width=True, hide_index=True) st.markdown("### Expiry pressure") expiry = _sorted_expiry_summary(work) fig3 = px.bar( expiry, x="expiry_bucket", y=["units_wasted", "leftover_units"], barmode="group", title="Wasted and leftover units by expiry bucket", ) st.plotly_chart(fig3, use_container_width=True) st.markdown("### What-if Simulator") st.caption("Simulator logic: ML baseline forecast + business-rule heuristic. The simulator follows the current left-side filters, and the SKU selector is built from the filtered scope only.") sim_scope = work.copy() sku_options = sorted(sim_scope["product_name"].dropna().unique().tolist()) sim_head_left, sim_head_right = st.columns([1.2, 1.4]) with sim_head_left: scope_label = "Current filtered scope" if single_category and not sim_scope.empty: scope_label = str(sim_scope["category"].iloc[0]) st.markdown(f"**Simulation scope:** {scope_label}") with sim_head_right: sim_sku = st.selectbox("SKU", sku_options, key="inv_shared_sku") if sku_options else None sku_scope = sim_scope[sim_scope["product_name"] == sim_sku].copy() if sim_sku else sim_scope.head(0) col1, col2, col3 = st.columns(3) order_cut = col1.slider("Reduce order quantity by %", 0, 40, 10, key="inv_sim_cut") markdown_shift = col2.slider("Advance markdown trigger by days", 0, 5, 2, key="inv_sim_mark") current_disc = float(sku_scope["discount_pct"].mean()) if not sku_scope.empty else 0.0 test_disc = col3.slider("Test SKU discount %", 0, 60, int(round(current_disc * 100)), key="inv_sim_disc") / 100 current_waste = sim_scope["waste_pct"].mean() if not sim_scope.empty else 0.0 current_profit = sim_scope["profit"].mean() if not sim_scope.empty else 0.0 current_stockout = sim_scope["stockout_flag"].mean() if not sim_scope.empty else 0.0 waste_reduction = 0.35 * (order_cut / 100) + 0.015 * markdown_shift stockout_rise = 0.12 * (order_cut / 100) sim_waste = max(current_waste * (1 - waste_reduction), 0) sim_profit = current_profit * (1 + 0.08 * (order_cut / 100) + 0.03 * markdown_shift) sim_stockout = min(current_stockout * (1 + stockout_rise), 1) s1, s2, s3 = st.columns(3) s1.metric("Simulated waste", f"{sim_waste:.1%}", delta=f"-{(current_waste - sim_waste):.1%}") s2.metric("Simulated avg profit", f"EUR {sim_profit:.2f}", delta=f"EUR {(sim_profit - current_profit):.2f}") s3.metric("Simulated stockout", f"{sim_stockout:.1%}", delta=f"+{(sim_stockout - current_stockout):.1%}") st.markdown("#### SKU markdown simulator") sku_sim = _simulate_discount_profit(sku_scope, test_disc) if not sku_scope.empty else {} if sku_sim: c1, c2, c3, c4 = st.columns(4) c1.metric("Current discount", f"{sku_sim['current_discount']:.1%}") c2.metric("Simulated SKU profit", f"EUR {sku_sim['sim_profit']:.2f}", delta=f"EUR {sku_sim['profit_delta']:.2f}") c3.metric("Simulated units", f"{sku_sim['sim_units']:.1f}", delta=f"{sku_sim['units_delta']:.1f}") c4.metric("Simulated waste", f"{sku_sim['sim_waste_pct']:.1%}") else: st.info("No SKU records match the current simulation scope.") st.markdown("### Transfer suggestions") balancing_df = build_supply_balancing_table(full_df if full_df is not None else df) scope_info = scope_info or {} selected_stores = scope_info.get("stores", []) or [] selected_regions = scope_info.get("regions", []) or [] selected_category = scope_info.get("category", "All") or "All" selected_store = selected_stores[0] if len(selected_stores) == 1 else "All" if selected_store != "All": auto_region = balancing_df.loc[balancing_df["store_id"] == selected_store, "region"].mode() selected_region = auto_region.iloc[0] if not auto_region.empty else "All" st.caption(f"Transfer scope: store **{selected_store}** in region **{selected_region}**" + (f" · category **{selected_category}**" if selected_category != "All" else "")) scoped_df = filter_scope( balancing_df, selected_region=selected_region, selected_store=selected_store, selected_category=selected_category, ) else: if len(selected_regions) == 1: selected_region = selected_regions[0] else: selected_region = "All" scoped_df = filter_scope( balancing_df, selected_region=selected_region, selected_store="All", selected_category=selected_category, ) if selected_region != "All": st.caption(f"Transfer scope: region **{selected_region}**" + (f" · category **{selected_category}**" if selected_category != "All" else "")) else: st.caption("Transfer scope: current full network filter" + (f" · category **{selected_category}**" if selected_category != "All" else "")) receiver = scoped_df[scoped_df["unmet_demand"] > 0].copy() donors = balancing_df.copy() if selected_category != "All": donors = donors[donors["category"] == selected_category].copy() transfer_rows = [] for _, r in receiver.iterrows(): pool = donors[(donors["category"] == r["category"]) & (donors["store_id"] != r["store_id"]) & (donors["surplus_qty"] > 0)].copy() if selected_region != "All": pool["priority_rank"] = (pool["region"] != r["region"]).astype(int) else: pool["priority_rank"] = 0 pool = pool.sort_values(["priority_rank", "avg_days_until_expiry", "surplus_qty"], ascending=[True, False, False]) if pool.empty: best_route = "No feasible donor" same_region_options = "No same-region donor" cross_region_options = "No cross-region donor" transfer_qty = 0 else: same_region = pool[pool["region"] == r["region"]].head(3) cross_region = pool[pool["region"] != r["region"]].head(3) best = pool.iloc[0] transfer_qty = int(min(r["unmet_demand"], max(best["surplus_qty"], 0))) def label(d): tier = "same-region" if d["region"] == r["region"] else "cross-region" return f"{d['store_id']} ({tier}, expiry {d['avg_days_until_expiry']:.1f}d, surplus {int(d['surplus_qty'])})" best_route = label(best) same_region_options = "; ".join(label(d) for _, d in same_region.iterrows()) if not same_region.empty else "No same-region donor" cross_region_options = "; ".join(label(d) for _, d in cross_region.iterrows()) if not cross_region.empty else "No cross-region donor" transfer_rows.append({ "store_id": r["store_id"], "region": r["region"], "category": r["category"], "remaining_inventory": int(r["remaining_inventory"]), "demand": int(r["demand"]), "unmet_demand": int(r["unmet_demand"]), "recommended_transfer_qty": transfer_qty, "best_route": best_route, "same_region_options": same_region_options, "cross_region_options": cross_region_options, }) transfer_df = pd.DataFrame(transfer_rows) st.caption("Transfer logic: the receiving side follows the current sidebar scope. If a store is selected, its region is inferred automatically. Donors are searched from the full balancing table, with same-region options prioritized first, then cross-region options ranked by shelf life and surplus quantity.") if transfer_df.empty: st.success("No scoped store or region currently shows unmet demand that needs transfer support.") else: st.dataframe(transfer_df.sort_values(["unmet_demand", "recommended_transfer_qty"], ascending=[False, False]), use_container_width=True, hide_index=True) def promotion_page(df: pd.DataFrame): st.subheader("FRESHIE · Promotion Designer") st.markdown('

Audience: marketing. Use this page to test markdown depth, bundle logic, and campaign copy.

', unsafe_allow_html=True) st.caption("Promotion designer logic: business-rule simulator. The page follows the current left-side filters, and the SKU selector is built from the filtered scope only.") promo_scope = df.copy() sku_options = sorted(promo_scope["product_name"].dropna().unique().tolist()) if not sku_options: st.info("No SKU remains in the current left-side filter scope.") return head_left, head_right = st.columns([1.1, 1.4]) with head_left: scope_label = "Current filtered scope" if promo_scope["category"].nunique() == 1: scope_label = str(promo_scope["category"].iloc[0]) st.markdown(f"**Promotion scope:** {scope_label}") with head_right: promo_sku = st.selectbox("SKU", sku_options, key="promo_shared_sku") sku_df = promo_scope[promo_scope["product_name"] == promo_sku].copy() if sku_df.empty: st.info("No records match the selected SKU in the current scope.") return left, right = st.columns([1, 1.25]) with left: expiry_target = st.selectbox("Target expiry bucket", ["<=1d", "2-3d", "4-7d", "8-30d", ">30d"], key="promo_exp") discount = st.slider("Discount %", 0, 50, int(round(float(sku_df["discount_pct"].mean() * 100))), key="promo_disc") bundle = st.checkbox("Bundle with complementary items", value=True, key="promo_bundle") weekend_only = st.checkbox("Weekend campaign only", value=False, key="promo_weekend") sub = sku_df[sku_df["expiry_bucket"].astype(str) == str(expiry_target)].copy() if weekend_only: sub = sub[sub["is_weekend"] == 1] if sub.empty: sub = sku_df.copy() if weekend_only: sub = sub[sub["is_weekend"] == 1] if sub.empty: sub = sku_df.copy() demand_lift = 0.08 + discount / 200 if bundle: demand_lift += 0.06 est_sales_uplift = sub["units_sold"].mean() * demand_lift if len(sub) else 0 est_waste_drop = sub["waste_pct"].mean() * min(0.35, demand_lift) if len(sub) else 0 est_profit = sub["profit"].mean() * (1 + demand_lift - discount / 150) if len(sub) else 0 sims = [] sim_base = sub.copy() for disc in range(0, 55, 5): lift = 0.08 + disc / 200 + (0.06 if bundle else 0) sim_profit = (sim_base["profit"].mean() * (1 + lift - disc / 150)) if len(sim_base) else 0 sim_waste = (sim_base["waste_pct"].mean() * (1 - min(0.35, lift))) if len(sim_base) else 0 score = sim_profit - 120 * sim_waste sims.append({"discount": disc, "sim_profit": sim_profit, "sim_waste": sim_waste, "score": score}) sim_df = pd.DataFrame(sims) best = sim_df.sort_values("score", ascending=False).iloc[0] st.metric("Estimated sales uplift", f"{est_sales_uplift:.2f} units") st.metric("Estimated waste reduction", f"{est_waste_drop:.1%}") st.metric("Estimated avg profit", f"EUR {est_profit:.2f}") st.metric("Suggested discount", f"{int(best['discount'])}%") st.caption("Suggested discount logic: test 0% to 50% in 5-point steps and choose the discount that maximizes a simple score = simulated profit - waste penalty.") st.markdown("### Campaign brief") sku_name = str(promo_sku) campaign_type = "weekend bundle campaign" if bundle and weekend_only else "bundle campaign" if bundle else "markdown campaign" st.success(f"Run a {int(best['discount'])}% {campaign_type} for {sku_name} in {expiry_target}.") with right: order = ["<=1d", "2-3d", "4-7d", "8-30d", ">30d"] sku_ts = sku_df.groupby(["month", "expiry_bucket"])[["discount_pct", "waste_pct", "sell_through_pct"]].mean().reset_index() sku_ts["expiry_bucket"] = pd.Categorical(sku_ts["expiry_bucket"], categories=order, ordered=True) sku_ts = sku_ts.sort_values(["expiry_bucket", "month"]) present = sku_ts["expiry_bucket"].dropna().astype(str).unique().tolist() if not sku_ts.empty: fig_disc = px.line( sku_ts, x="month", y="discount_pct", color="expiry_bucket", markers=True, title=f"Discount trend for {promo_sku} across expiry buckets", ) st.plotly_chart(fig_disc, use_container_width=True) fig_waste = px.line( sku_ts, x="month", y="waste_pct", color="expiry_bucket", markers=True, title=f"Waste trend for {promo_sku} across expiry buckets", ) st.plotly_chart(fig_waste, use_container_width=True) fig_sell = px.line( sku_ts, x="month", y="sell_through_pct", color="expiry_bucket", markers=True, title=f"Sell-through trend for {promo_sku} across expiry buckets", ) st.plotly_chart(fig_sell, use_container_width=True) missing = [b for b in order if b not in present] if missing: st.caption("Buckets without records for this SKU are omitted: " + ", ".join(missing)) else: st.info("No records match the current promotion SKU.") st.markdown("### Recommended promotion copy") st.info( f"Fresh pick alert: enjoy {int(best['discount'])}% off selected {str(promo_sku).lower()}" + (" this weekend" if weekend_only else "") + (" with smart bundle savings" if bundle else " while they are still at peak freshness") + f". Prioritize the {expiry_target} bucket and highlight freshness, value, and limited-time availability." ) def consumer_deals(df: pd.DataFrame): st.subheader("FRESHIE · Deal Finder") stores = sorted(df["store_id"].dropna().unique()) if not stores: st.warning("No stores available in the current filter.") return chosen_store = st.selectbox("Choose store first", stores) store_df = df[df["store_id"] == chosen_store].copy() c1, c2, c3 = st.columns(3) budget_range = c1.slider("Budget range (EUR)", 1, 60, (1, 20)) preferred_category = c2.selectbox("Preferred category", ["All"] + sorted(store_df["category"].unique())) expiry_range = c3.slider("Days until expiry", 1, 14, (1, 5)) deals = store_df[ (store_df["days_until_expiry"] >= expiry_range[0]) & (store_df["days_until_expiry"] <= expiry_range[1]) & (store_df["selling_price"] >= budget_range[0]) & (store_df["selling_price"] <= budget_range[1]) ].copy() if preferred_category != "All": deals = deals[deals["category"] == preferred_category] deals["savings"] = (deals["base_price"] - deals["selling_price"]).clip(lower=0) deals["deal_score"] = deals["discount_pct"] * 0.6 + deals["sell_through_pct"] * 0.2 + (1 - deals["waste_pct"]) * 0.2 deals = deals.sort_values(["deal_score", "savings"], ascending=False) st.markdown('

Store marketing view: all deal recommendations below are scoped to the selected store so the shopper sees store-specific promotions and products.

', unsafe_allow_html=True) show = deals[["product_name", "category", "days_until_expiry", "base_price", "selling_price", "discount_pct", "savings"]].head(20).copy() icon_map = { "Bakery": "🥐", "Beverages": "🧃", "Dairy": "🥛", "Deli": "🧺", "Meat": "🥩", "Pharmaceuticals": "💊", "Produce": "🥬", "Ready_to_Eat": "🍱", "Seafood": "🐟" } show["item"] = show.apply(lambda r: f"{icon_map.get(str(r['category']), '📦')} {r['product_name']}", axis=1) st.dataframe(show[["item", "category", "days_until_expiry", "base_price", "selling_price", "discount_pct", "savings"]], use_container_width=True, hide_index=True) st.markdown("### Best current deals") top_cards = deals.head(6) cols = st.columns(3) for i, (_, row) in enumerate(top_cards.iterrows()): icon = icon_map.get(str(row["category"]), "📦") with cols[i % 3]: st.markdown(f"**{icon} {row['product_name']}**") st.write(f"{row['category']} · expires in {int(row['days_until_expiry'])} day(s)") st.write(f"Now EUR {row['selling_price']:.2f} | Save EUR {row['savings']:.2f}") st.caption(f"Discount: {row['discount_pct']:.0%} · Store: {chosen_store}") def consumer_bundles(df: pd.DataFrame): st.subheader("FRESHIE · Bundle Builder") stores = sorted(df["store_id"].dropna().unique()) if not stores: st.warning("No stores available in the current filter.") return chosen_store = st.selectbox("Choose store", stores, key="bundle_store") store_df = df[df["store_id"] == chosen_store].copy() c1, c2, c3 = st.columns(3) budget_range = c1.slider("Bundle budget range (EUR)", 8, 80, (8, 25)) theme = c2.selectbox("Bundle theme", ["Quick dinner", "Healthy protein", "Family breakfast", "Budget saver"]) expiry_range = c3.slider("Use items expiring within days", 1, 10, (1, 5), key="bundle_exp") if "bundle_seed" not in st.session_state: st.session_state["bundle_seed"] = 0 st.session_state["bundle_seed"] = random.randint(0, 10000) work = store_df[ (store_df["days_until_expiry"] >= expiry_range[0]) & (store_df["days_until_expiry"] <= expiry_range[1]) & (store_df["selling_price"] <= budget_range[1]) ].copy() theme_map = { "Quick dinner": ["Ready_to_Eat", "Produce", "Bakery", "Dairy"], "Healthy protein": ["Meat", "Seafood", "Dairy", "Produce"], "Family breakfast": ["Bakery", "Dairy", "Beverages", "Produce"], "Budget saver": list(work["category"].dropna().unique()), } work = work[work["category"].isin(theme_map.get(theme, []))].copy() work["score"] = work["discount_pct"] * 0.5 + (1 - work["waste_pct"]) * 0.2 + work["sell_through_pct"] * 0.3 if len(work) == 0: st.warning("No bundle fits the current conditions.") return seed = int(st.session_state["bundle_seed"]) candidate_frames = [] for idx, (cat_name, grp) in enumerate(work.groupby("category")): grp = grp.sort_values(["score", "selling_price"], ascending=[False, True]).head(10).copy() if len(grp) > 1: shift = (seed + idx) % len(grp) grp = pd.concat([grp.iloc[shift:], grp.iloc[:shift]], ignore_index=True) candidate_frames.append(grp) work = pd.concat(candidate_frames, ignore_index=True) if theme == "Budget saver": work = work.sort_values(["selling_price", "score"], ascending=[True, False]) else: work = work.sort_values(["score", "selling_price"], ascending=[False, True]) picked, subtotal, used_categories = [], 0.0, set() for _, row in work.iterrows(): if subtotal + row["selling_price"] <= budget_range[1]: if theme != "Budget saver" and row["category"] in used_categories: continue picked.append(row) subtotal += row["selling_price"] used_categories.add(row["category"]) if len(picked) >= 5: break if not picked: st.warning("No bundle fits the current conditions.") return bundle = pd.DataFrame(picked) base_total = float(bundle["base_price"].sum()) item_total = float(bundle["selling_price"].sum()) if item_total >= 45: bundle_discount = 0.15 elif item_total >= 30: bundle_discount = 0.12 elif item_total >= 20: bundle_discount = 0.10 elif item_total >= 12: bundle_discount = 0.08 else: bundle_discount = 0.05 bundle_saving = item_total * bundle_discount final_total = max(item_total - bundle_saving, 0) saved = base_total - final_total k1, k2, k3, k4 = st.columns(4) k1.metric("Bundle subtotal", f"EUR {item_total:.2f}") k2.metric("Bundle discount", f"{bundle_discount:.0%}") k3.metric("Bundle total", f"EUR {final_total:.2f}") k4.metric("You save", f"EUR {saved:.2f}") st.dataframe( bundle[["product_name", "category", "selling_price", "base_price", "discount_pct", "days_until_expiry"]], use_container_width=True, hide_index=True, ) st.caption( "Bundle pricing logic: items keep their current markdowns, then the system applies an extra basket discount. " "Higher basket subtotals receive a deeper extra discount. Refresh rotates the candidate pool to surface a different combination." ) def consumer_personal(df: pd.DataFrame): st.subheader("FRESHIE · Personalized Promotions") stores = sorted(df["store_id"].dropna().unique()) if not stores: st.warning("No stores available in the current filter.") return chosen_store = st.selectbox("Choose store", stores, key="personal_store") store_df = df[df["store_id"] == chosen_store].copy() favorite = st.selectbox("Favorite category", sorted(store_df["category"].unique()), key="cp_fav") price_range = st.slider("Price range (EUR)", 1, 30, (1, 10), key="cp_cap") expiry_range = st.slider("Days until expiry", 1, 14, (1, 7), key="cp_days") recs = store_df[ (store_df["category"] == favorite) & (store_df["selling_price"] >= price_range[0]) & (store_df["selling_price"] <= price_range[1]) & (store_df["days_until_expiry"] >= expiry_range[0]) & (store_df["days_until_expiry"] <= expiry_range[1]) ].copy() recs["score"] = recs["discount_pct"] * 0.55 + recs["sell_through_pct"] * 0.20 + (1 - recs["waste_pct"]) * 0.25 recs = recs.sort_values("score", ascending=False).head(12) cols = st.columns(3) for i, (_, row) in enumerate(recs.iterrows()): with cols[i % 3]: st.markdown(f"**{row['product_name']}**") st.write(f"{row['category']} · {chosen_store}") st.write(f"Now EUR {row['selling_price']:.2f}") st.write(f"Expires in {int(row['days_until_expiry'])} day(s)") st.caption(f"Discount: {row['discount_pct']:.0%}") def consumer_wait_or_buy(df: pd.DataFrame): st.subheader("FRESHIE · Wait or buy now?") st.markdown( """ This version uses a live-inventory style view built from the most recent available on-shelf records in the current filter scope. Only items with remaining stock and non-negative days until expiry are shown. """ ) work = df.copy() if work.empty: st.info("No records match the current filters.") return work = work[(work["leftover_units"] > 0) & (work["days_until_expiry"] >= 0)].copy() if work.empty: st.info("No on-shelf inventory remains in the current filter scope.") return latest_keys = ["store_id", "category", "product_name"] work = ( work.sort_values("transaction_date") .groupby(latest_keys, as_index=False) .tail(1) .copy() ) if work.empty: st.info("No recent on-shelf inventory remains after selecting the latest records.") return work["discount"] = work["discount_pct"].fillna(0).clip(lower=0) work["sell_through"] = work["sell_through_pct"].fillna(0).clip(lower=0) def classify_item(row): if row["sell_through"] > 0.7: return "buy now", "selling fast and still available on shelf" if row["sell_through"] < 0.4 and row["discount"] < 0.2: return "wait it", "inventory looks comfortable and discount is still modest" if row["sell_through"] < 0.4 and row["discount"] >= 0.2: return "suggested", "inventory is comfortable and discount is already attractive" return "no special signal", "balanced current shelf signal" labels = work.apply(classify_item, axis=1) work["suggestion"] = [x[0] for x in labels] work["reason"] = [x[1] for x in labels] c1, c2, c3, c4 = st.columns(4) c1.metric("On-shelf items shown", f"{len(work):,}") c2.metric("Buy now", int((work["suggestion"] == "buy now").sum())) c3.metric("Wait it", int((work["suggestion"] == "wait it").sum())) c4.metric("Suggested", int((work["suggestion"] == "suggested").sum())) latest_date = work["transaction_date"].max() st.caption(f"Current shelf view approximated from the latest available records up to {latest_date.date()} in the selected filter scope.") display = work[[ "product_name", "category", "store_id", "leftover_units", "days_until_expiry", "sell_through", "discount", "stockout_flag", "suggestion", "reason", ]].copy() display = display.sort_values( ["suggestion", "days_until_expiry", "discount", "leftover_units", "product_name"], ascending=[True, True, False, False, True], ) st.dataframe(display, use_container_width=True, hide_index=True) summary = ( display["suggestion"] .value_counts() .rename_axis("suggestion") .reset_index(name="items") .sort_values("items", ascending=False) ) fig = px.bar( summary, x="suggestion", y="items", color="suggestion", title="Wait or buy now suggestion mix (current shelf view)", ) st.plotly_chart(fig, use_container_width=True) def manager_manual(): st.subheader("FRESHIE · User Manual (Manager)") st.markdown(""" **Executive Overview** - Track revenue, profit, units sold, and units wasted by month. - Use the diagnosis panel to understand abnormal revenue-profit gaps. **Category Intelligence** - Compare categories on demand, stock, waste, stockout, and profit. - Read the category recommendations and forecast for the current selection. **Inventory & Replenishment** - Review reorder guidance by category. - Use the what-if simulator to see the trade-off between waste, profit, and stockout. **Promotion Designer** - Test markdown depth, expiry targeting, and bundle logic. - Use the campaign brief and promotion copy as a starting point for activation. """) def consumer_manual(): st.subheader("FRESHIE · User Manual (Consumer)") st.markdown(""" **Deal Finder** - Browse the strongest discounted products under your preferred budget and expiry window. **Bundle Builder** - Build a themed basket while staying under budget. **Personalized Promotions** - Focus on your favorite category and a comfortable price cap. **Wait or Buy Now?** - Get smart purchase timing suggestions based on your selected region and product category. - The system evaluates each item's sell-through rate and current discount across stores to indicate whether it is better to buy now or wait for a potentially better deal. """) def main(): inject_css() st.markdown( """

A warm, friendly fresh-food assistant for stores, managers, and everyday shoppers.

🥬 🍓 🐟 🍞

🔎 Fresh, reliable, low-waste decisions across fish, produce, dairy, bakery, and more

""", unsafe_allow_html=True, ) try: df = load_data() except Exception as e: st.error(str(e)) st.stop() filtered, exec_scope, scope_info = apply_filters(df) if filtered.empty: st.warning("No data left after filtering.") st.stop() role = st.radio("Choose your mode", ["Manager", "Consumer"], horizontal=True) if role == "Manager": tabs = st.tabs([ "Executive Overview", "Category Intelligence", "Inventory & Replenishment", "Promotion Designer", "User Manual", ]) with tabs[0]: executive_overview(filtered, exec_scope, scope_info) with tabs[1]: category_intelligence(filtered) with tabs[2]: inventory_page(filtered, full_df=df, scope_info=scope_info) with tabs[3]: promotion_page(filtered) with tabs[4]: manager_manual() else: tabs = st.tabs(["Deal Finder", "Bundle Builder", "Personalized Promotions", "Wait or buy now?", "User Manual"]) with tabs[0]: consumer_deals(filtered) with tabs[1]: consumer_bundles(filtered) with tabs[2]: consumer_personal(filtered) with tabs[3]: consumer_wait_or_buy(filtered) with tabs[4]: consumer_manual() with st.expander("About this app"): st.markdown(""" - Stable FRESHIE UI baseline. - Built to avoid the white-screen issues caused by heavier UI logic. - Next step: reintroduce richer branding and additional consumer features incrementally. """) if __name__ == "__main__": main()