XRachel commited on
Commit
79ef5dc
·
verified ·
1 Parent(s): 686a6ed

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +21 -0
  3. README.md +25 -5
  4. app.py +812 -0
  5. perishable_goods_management.csv +3 -0
  6. requirements.txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ perishable_goods_management.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1 \
6
+ PORT=7860
7
+
8
+ WORKDIR /app
9
+
10
+ RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ build-essential \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ COPY requirements.txt /app/requirements.txt
15
+ RUN pip install --upgrade pip && pip install -r /app/requirements.txt
16
+
17
+ COPY . /app
18
+
19
+ EXPOSE 7860
20
+
21
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,10 +1,30 @@
1
  ---
2
- title: Pg033111
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: FreshWise Studio
3
+ emoji: 🥐
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: docker
7
+ app_port: 7860
8
+ short_description: Bakery-focused perishable retail optimization app
9
  pinned: false
10
  ---
11
 
12
+ # FreshWise Studio
13
+
14
+ This version adds a bakery-focused category intelligence layer.
15
+
16
+ ## Added in this package
17
+ - Bakery as the featured category
18
+ - Regional comparison of operations, inventory, waste, profitability and demand
19
+ - 14-day regional demand forecast for the featured category
20
+ - Stockout / lost-sales / waste trade-off analysis
21
+ - Category-level promotion simulator
22
+ - Existing linked Region ↔ Store filters retained
23
+
24
+
25
+ ## New in this version
26
+ - Executive Summary tab
27
+ - Slide-ready insights tab content
28
+ - Decision Tree visualization for high waste classification
29
+ - High/Low classification for waste, profit, and promotion effectiveness
30
+ - Decision-oriented manager structure: Overview, Executive Summary, Category Intelligence, Inventory, Promotion, Diagnose
app.py ADDED
@@ -0,0 +1,812 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from functools import lru_cache
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ import streamlit as st
9
+ from sklearn.cluster import KMeans
10
+ from sklearn.ensemble import RandomForestClassifier
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.tree import DecisionTreeClassifier, plot_tree
14
+ import matplotlib.pyplot as plt
15
+
16
+ st.set_page_config(
17
+ page_title="FreshWise - Perishable Retail Optimization",
18
+ page_icon="🥗",
19
+ layout="wide",
20
+ initial_sidebar_state="expanded",
21
+ )
22
+
23
+ DATA_CANDIDATES = [
24
+ os.environ.get("DATA_PATH", ""),
25
+ "perishable_goods_management.csv",
26
+ "/app/perishable_goods_management.csv",
27
+ "/data/perishable_goods_management.csv",
28
+ "/mnt/data/perishable_goods_management.csv",
29
+ ]
30
+
31
+ CATEGORY_COLORS = {
32
+ "Produce": "#2E8B57",
33
+ "Dairy": "#1E90FF",
34
+ "Meat": "#B22222",
35
+ "Seafood": "#20B2AA",
36
+ "Bakery": "#D2691E",
37
+ "Ready_to_Eat": "#8A2BE2",
38
+ }
39
+
40
+ FOCUS_CATEGORY = "Bakery"
41
+
42
+
43
+ def find_data_path() -> str:
44
+ for path in DATA_CANDIDATES:
45
+ if path and os.path.exists(path):
46
+ return path
47
+ raise FileNotFoundError(
48
+ "perishable_goods_management.csv not found. Put it next to app.py or set DATA_PATH."
49
+ )
50
+
51
+
52
+ @st.cache_data(show_spinner=False)
53
+ def load_data() -> pd.DataFrame:
54
+ path = find_data_path()
55
+ df = pd.read_csv(path)
56
+
57
+ df["transaction_date"] = pd.to_datetime(df["transaction_date"], errors="coerce")
58
+ df["expiration_date"] = pd.to_datetime(df["expiration_date"], errors="coerce")
59
+
60
+ df["sell_through_pct"] = np.where(
61
+ df["initial_quantity"] > 0, df["units_sold"] / df["initial_quantity"], 0
62
+ )
63
+ df["stock_demand_ratio"] = np.where(
64
+ df["daily_demand"] > 0, df["initial_quantity"] / df["daily_demand"], np.nan
65
+ )
66
+ df["gross_margin"] = df["selling_price"] - df["cost_price"]
67
+ df["leftover_units"] = (df["initial_quantity"] - df["units_sold"]).clip(lower=0)
68
+ df["stockout_flag"] = (df["daily_demand"] > df["initial_quantity"]).astype(int)
69
+ df["lost_sales_units"] = (df["daily_demand"] - df["units_sold"]).clip(lower=0)
70
+ df["value_score"] = (
71
+ (1 - df["waste_pct"].clip(0, 1)) * 0.35
72
+ + df["profit_margin_pct"].clip(lower=0) / 100 * 0.25
73
+ + (1 - df["days_until_expiry"].clip(upper=14) / 14) * 0.15
74
+ + df["discount_pct"].clip(0, 0.5) * 0.25
75
+ )
76
+ df["expiry_bucket"] = pd.cut(
77
+ df["days_until_expiry"],
78
+ bins=[-1, 1, 3, 7, 30, 10_000],
79
+ labels=["<=1d", "2-3d", "4-7d", "8-30d", ">30d"],
80
+ )
81
+ df["high_waste_flag"] = (df["waste_pct"] >= df["waste_pct"].quantile(0.75)).astype(int)
82
+ df["waste_high"] = (df["waste_pct"] > df["waste_pct"].median()).astype(int)
83
+ df["profit_high"] = (df["profit"] > df["profit"].median()).astype(int)
84
+ df["promo_effective"] = ((df["is_promoted"] == 1) & (df["sell_through_pct"] > df["sell_through_pct"].median())).astype(int)
85
+ return df
86
+
87
+
88
+ @st.cache_data(show_spinner=False)
89
+ def fit_segments(df: pd.DataFrame) -> pd.DataFrame:
90
+ work = df[[
91
+ "daily_demand",
92
+ "initial_quantity",
93
+ "waste_pct",
94
+ "shelf_life_days",
95
+ "stock_demand_ratio",
96
+ "sell_through_pct",
97
+ ]].replace([np.inf, -np.inf], np.nan).dropna().copy()
98
+
99
+ sample_size = min(len(work), 20000)
100
+ work = work.sample(sample_size, random_state=42)
101
+ scaler = StandardScaler()
102
+ X = scaler.fit_transform(work)
103
+ km = KMeans(n_clusters=4, random_state=42, n_init=10)
104
+ work["cluster"] = km.fit_predict(X)
105
+ return work
106
+
107
+
108
+ @st.cache_resource(show_spinner=False)
109
+ def fit_risk_model(df: pd.DataFrame):
110
+ features = [
111
+ "daily_demand",
112
+ "initial_quantity",
113
+ "shelf_life_days",
114
+ "days_until_expiry",
115
+ "temp_deviation",
116
+ "temp_abuse_events",
117
+ "handling_score",
118
+ "packaging_score",
119
+ "spoilage_risk",
120
+ "discount_pct",
121
+ "markdown_applied",
122
+ "is_weekend",
123
+ "supplier_score",
124
+ ]
125
+ X = df[features]
126
+ y = df["high_waste_flag"]
127
+ X_train, X_test, y_train, y_test = train_test_split(
128
+ X, y, test_size=0.2, random_state=42, stratify=y
129
+ )
130
+ model = RandomForestClassifier(
131
+ n_estimators=120, random_state=42, n_jobs=-1, max_depth=10
132
+ )
133
+ model.fit(X_train, y_train)
134
+ importances = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
135
+ return model, importances
136
+
137
+
138
+ @lru_cache(maxsize=1)
139
+ def cluster_name_map():
140
+ return {
141
+ 0: "Stable performers",
142
+ 1: "Overstocked slow movers",
143
+ 2: "Short-life high risk",
144
+ 3: "High demand fast movers",
145
+ }
146
+
147
+
148
+ def apply_filters(df: pd.DataFrame):
149
+ st.sidebar.header("Filters")
150
+
151
+ if "filter_regions" not in st.session_state:
152
+ st.session_state["filter_regions"] = []
153
+ if "filter_stores" not in st.session_state:
154
+ st.session_state["filter_stores"] = []
155
+
156
+ all_regions = sorted(df["region"].dropna().unique())
157
+ all_stores = sorted(df["store_id"].dropna().unique())
158
+
159
+ # If the user selected stores directly, infer the matching region(s).
160
+ if st.session_state["filter_stores"] and not st.session_state["filter_regions"]:
161
+ inferred_regions = sorted(
162
+ df.loc[df["store_id"].isin(st.session_state["filter_stores"]), "region"]
163
+ .dropna()
164
+ .unique()
165
+ )
166
+ st.session_state["filter_regions"] = inferred_regions
167
+
168
+ # Region selection drives store options.
169
+ regions = st.sidebar.multiselect(
170
+ "Region",
171
+ all_regions,
172
+ key="filter_regions",
173
+ )
174
+
175
+ available_stores = sorted(
176
+ df.loc[df["region"].isin(regions), "store_id"].dropna().unique()
177
+ ) if regions else all_stores
178
+
179
+ # Keep only stores that still belong to the selected region(s).
180
+ st.session_state["filter_stores"] = [
181
+ s for s in st.session_state["filter_stores"] if s in available_stores
182
+ ]
183
+
184
+ stores = st.sidebar.multiselect(
185
+ "Store",
186
+ available_stores,
187
+ key="filter_stores",
188
+ )
189
+
190
+ # If stores are selected, make region selection follow them exactly.
191
+ if stores:
192
+ inferred_regions = sorted(
193
+ df.loc[df["store_id"].isin(stores), "region"].dropna().unique()
194
+ )
195
+ if inferred_regions != regions:
196
+ st.session_state["filter_regions"] = inferred_regions
197
+ regions = inferred_regions
198
+
199
+ categories = st.sidebar.multiselect("Category", sorted(df["category"].dropna().unique()), default=[])
200
+ expiry_range = st.sidebar.slider("Days until expiry", 0, int(df["days_until_expiry"].max()), (0, 30))
201
+ weekend_choice = st.sidebar.selectbox("Day type", ["All", "Weekday", "Weekend"])
202
+
203
+ filtered = df.copy()
204
+ if regions:
205
+ filtered = filtered[filtered["region"].isin(regions)]
206
+ if stores:
207
+ filtered = filtered[filtered["store_id"].isin(stores)]
208
+ if categories:
209
+ filtered = filtered[filtered["category"].isin(categories)]
210
+ filtered = filtered[
211
+ (filtered["days_until_expiry"] >= expiry_range[0])
212
+ & (filtered["days_until_expiry"] <= expiry_range[1])
213
+ ]
214
+ if weekend_choice == "Weekday":
215
+ filtered = filtered[filtered["is_weekend"] == 0]
216
+ elif weekend_choice == "Weekend":
217
+ filtered = filtered[filtered["is_weekend"] == 1]
218
+ return filtered
219
+
220
+
221
+ def metric_row(df: pd.DataFrame):
222
+ c1, c2, c3, c4, c5 = st.columns(5)
223
+ c1.metric("Waste %", f"{df['waste_pct'].mean():.1%}")
224
+ c2.metric("Profit", f"€{df['profit'].mean():.2f}")
225
+ c3.metric("Sell-through", f"{df['sell_through_pct'].mean():.1%}")
226
+ c4.metric("Units wasted", f"{df['units_wasted'].mean():.1f}")
227
+ c5.metric("Markdown rate", f"{df['markdown_applied'].mean():.1%}")
228
+
229
+
230
+ def manager_dashboard(df: pd.DataFrame):
231
+ st.subheader("Manager Mode")
232
+ metric_row(df)
233
+
234
+ a, b = st.columns([1.2, 1])
235
+ with a:
236
+ trend = df.groupby(df["transaction_date"].dt.to_period("M").astype(str))[["waste_pct", "profit"]].mean().reset_index()
237
+ fig = go.Figure()
238
+ fig.add_trace(go.Scatter(x=trend["transaction_date"], y=trend["waste_pct"], name="Waste %", mode="lines+markers"))
239
+ fig.add_trace(go.Scatter(x=trend["transaction_date"], y=trend["profit"], name="Profit", mode="lines+markers", yaxis="y2"))
240
+ fig.update_layout(
241
+ title="Monthly Waste and Profit Trend",
242
+ yaxis=dict(title="Waste %"),
243
+ yaxis2=dict(title="Profit", overlaying="y", side="right"),
244
+ legend=dict(orientation="h"),
245
+ margin=dict(l=10, r=10, t=40, b=10),
246
+ )
247
+ st.plotly_chart(fig, use_container_width=True)
248
+ with b:
249
+ top_risk = (
250
+ df.groupby("category")[["waste_pct", "profit", "stock_demand_ratio"]]
251
+ .mean()
252
+ .sort_values("waste_pct", ascending=False)
253
+ .head(8)
254
+ .reset_index()
255
+ )
256
+ fig = px.bar(top_risk, x="waste_pct", y="category", orientation="h", title="High Waste Categories")
257
+ st.plotly_chart(fig, use_container_width=True)
258
+
259
+ c1, c2 = st.columns(2)
260
+ with c1:
261
+ store_risk = (
262
+ df.groupby("store_id")[["waste_pct", "profit", "temp_deviation"]]
263
+ .mean()
264
+ .sort_values(["waste_pct", "temp_deviation"], ascending=[False, False])
265
+ .head(15)
266
+ .reset_index()
267
+ )
268
+ st.dataframe(store_risk, use_container_width=True, hide_index=True)
269
+ with c2:
270
+ expiry = df.groupby("expiry_bucket")[["waste_pct", "profit", "discount_pct"]].mean().reset_index()
271
+ fig = px.line(expiry, x="expiry_bucket", y=["waste_pct", "profit", "discount_pct"], markers=True, title="Expiry Stage Performance")
272
+ st.plotly_chart(fig, use_container_width=True)
273
+
274
+
275
+
276
+ def forecast_region_demand(cat_df: pd.DataFrame, region: str) -> pd.DataFrame:
277
+ d = cat_df[cat_df["region"] == region].copy()
278
+ if d.empty:
279
+ return pd.DataFrame()
280
+ ts = d.groupby("transaction_date")["daily_demand"].mean().reset_index().sort_values("transaction_date")
281
+ if len(ts) < 14:
282
+ return pd.DataFrame()
283
+ recent = ts.tail(56).copy()
284
+ weekday_avg = recent.groupby(recent["transaction_date"].dt.dayofweek)["daily_demand"].mean().to_dict()
285
+ last_date = ts["transaction_date"].max()
286
+ future_dates = pd.date_range(last_date + pd.Timedelta(days=1), periods=14, freq="D")
287
+ future = pd.DataFrame({
288
+ "transaction_date": future_dates,
289
+ "daily_demand": [weekday_avg.get(d.dayofweek, ts["daily_demand"].tail(14).mean()) for d in future_dates],
290
+ "series": "Forecast"
291
+ })
292
+ hist = ts.tail(60).copy()
293
+ hist["series"] = "Actual"
294
+ return pd.concat([hist, future], ignore_index=True)
295
+
296
+
297
+ def manager_category_intelligence(df: pd.DataFrame):
298
+ st.subheader("Category Intelligence")
299
+ categories = sorted(df["category"].dropna().unique())
300
+ default_idx = categories.index(FOCUS_CATEGORY) if FOCUS_CATEGORY in categories else 0
301
+ focus = st.selectbox("Focus category", categories, index=default_idx)
302
+ cat_df = df[df["category"] == focus].copy()
303
+
304
+ if cat_df.empty:
305
+ st.warning("No data for the selected category.")
306
+ return
307
+
308
+ st.markdown(
309
+ f"Selected category: **{focus}**. This page compares regional operations, inventory, profitability, demand, stockout and waste trade-offs for a distinctive perishable category."
310
+ )
311
+
312
+ c1, c2, c3, c4 = st.columns(4)
313
+ c1.metric("Avg demand", f"{cat_df['daily_demand'].mean():.1f}")
314
+ c2.metric("Avg stock", f"{cat_df['initial_quantity'].mean():.1f}")
315
+ c3.metric("Stockout rate", f"{cat_df['stockout_flag'].mean():.1%}")
316
+ c4.metric("Waste rate", f"{cat_df['waste_pct'].mean():.1%}")
317
+
318
+ region_summary = (
319
+ cat_df.groupby("region")
320
+ .agg(
321
+ avg_demand=("daily_demand", "mean"),
322
+ avg_stock=("initial_quantity", "mean"),
323
+ avg_profit=("profit", "mean"),
324
+ avg_margin=("profit_margin_pct", "mean"),
325
+ waste_pct=("waste_pct", "mean"),
326
+ units_wasted=("units_wasted", "mean"),
327
+ markdown_rate=("markdown_applied", "mean"),
328
+ promo_rate=("is_promoted", "mean"),
329
+ temp_dev=("temp_deviation", "mean"),
330
+ shelf_life=("shelf_life_days", "mean"),
331
+ days_until_expiry=("days_until_expiry", "mean"),
332
+ stockout_rate=("stockout_flag", "mean"),
333
+ lost_sales=("lost_sales_units", "mean"),
334
+ )
335
+ .reset_index()
336
+ )
337
+
338
+ a, b = st.columns([1.2, 1])
339
+ with a:
340
+ melt = region_summary.melt(
341
+ id_vars="region",
342
+ value_vars=["avg_demand", "avg_stock", "avg_profit"],
343
+ var_name="metric",
344
+ value_name="value",
345
+ )
346
+ fig = px.bar(
347
+ melt, x="region", y="value", color="metric", barmode="group",
348
+ title=f"{focus}: regional operations, inventory and profit comparison",
349
+ )
350
+ st.plotly_chart(fig, use_container_width=True)
351
+ with b:
352
+ fig = px.scatter(
353
+ region_summary, x="stockout_rate", y="waste_pct", size="avg_profit", color="region",
354
+ hover_data=["avg_demand", "avg_stock", "markdown_rate", "promo_rate", "lost_sales"],
355
+ title=f"{focus}: stockout vs waste trade-off by region",
356
+ )
357
+ st.plotly_chart(fig, use_container_width=True)
358
+
359
+ c1, c2 = st.columns([1, 1.2])
360
+ with c1:
361
+ st.dataframe(region_summary.sort_values("avg_profit", ascending=False), use_container_width=True, hide_index=True)
362
+ with c2:
363
+ region_choice = st.selectbox("Forecast region", sorted(cat_df["region"].dropna().unique()))
364
+ forecast_df = forecast_region_demand(cat_df, region_choice)
365
+ if not forecast_df.empty:
366
+ fig = px.line(
367
+ forecast_df, x="transaction_date", y="daily_demand", color="series",
368
+ title=f"{focus}: 60-day actual + 14-day demand forecast for {region_choice}",
369
+ )
370
+ st.plotly_chart(fig, use_container_width=True)
371
+
372
+ st.markdown("### Regional recommendations")
373
+ mean_stockout = region_summary["stockout_rate"].mean()
374
+ mean_waste = region_summary["waste_pct"].mean()
375
+ mean_margin = region_summary["avg_margin"].mean()
376
+ mean_temp = region_summary["temp_dev"].mean()
377
+ for _, r in region_summary.iterrows():
378
+ advice = []
379
+ if r["stockout_rate"] > mean_stockout:
380
+ advice.append("raise replenishment and morning safety stock")
381
+ if r["waste_pct"] > mean_waste:
382
+ advice.append("start markdown earlier")
383
+ if r["avg_margin"] < mean_margin:
384
+ advice.append("use bundles instead of deeper discounts")
385
+ if r["temp_dev"] > mean_temp:
386
+ advice.append("tighten storage handling")
387
+ if not advice:
388
+ advice.append("maintain and scale current playbook")
389
+ st.markdown(f"- **{r['region']}**: " + "; ".join(advice) + ".")
390
+
391
+ st.markdown("### Marketing design simulator")
392
+ m1, m2, m3, m4 = st.columns(4)
393
+ promo_region = m1.selectbox("Target region", sorted(cat_df["region"].dropna().unique()), key="cat_region")
394
+ promo_type = m2.selectbox("Promo type", ["Early markdown", "Breakfast bundle", "Happy-hour discount", "Loyalty coupon"])
395
+ discount = m3.slider("Discount %", 0, 40, 15, key="cat_discount")
396
+ duration = m4.slider("Duration (days)", 1, 10, 4, key="cat_duration")
397
+
398
+ base = cat_df[cat_df["region"] == promo_region].copy()
399
+ base_sales = base["units_sold"].mean()
400
+ base_waste = base["waste_pct"].mean()
401
+ base_profit = base["profit"].mean()
402
+ promo_factor = {"Early markdown": 0.12, "Breakfast bundle": 0.16, "Happy-hour discount": 0.10, "Loyalty coupon": 0.08}[promo_type]
403
+ sales_lift = promo_factor + discount / 180 + min(duration / 60, 0.10)
404
+ waste_drop = min(0.42, promo_factor + discount / 200)
405
+ margin_drag = discount / 160
406
+ if promo_type == "Breakfast bundle":
407
+ margin_drag *= 0.75
408
+
409
+ est_sales = base_sales * (1 + sales_lift)
410
+ est_waste = max(base_waste * (1 - waste_drop), 0)
411
+ est_profit = base_profit * (1 + sales_lift - margin_drag)
412
+
413
+ x1, x2, x3 = st.columns(3)
414
+ x1.metric("Estimated avg units sold", f"{est_sales:.2f}", delta=f"+{(est_sales-base_sales):.2f}")
415
+ x2.metric("Estimated waste", f"{est_waste:.1%}", delta=f"-{(base_waste-est_waste):.1%}")
416
+ x3.metric("Estimated avg profit", f"€{est_profit:.2f}", delta=f"€{(est_profit-base_profit):.2f}")
417
+
418
+
419
+ def generate_summary(df: pd.DataFrame) -> str:
420
+ waste = df["waste_pct"].mean()
421
+ profit = df["profit"].mean()
422
+ stockout = (df["daily_demand"] > df["initial_quantity"]).mean()
423
+ worst_region = df.groupby("region")["waste_pct"].mean().idxmax()
424
+ best_region = df.groupby("region")["profit"].mean().idxmax()
425
+ return f"""
426
+ ### Executive Summary
427
+
428
+ - Average waste rate is **{waste:.1%}**, indicating {'high inefficiency' if waste > 0.2 else 'acceptable performance'}.
429
+ - Average profit is **EUR {profit:.2f}**, with strongest performance in **{best_region}**.
430
+ - Stockout rate is **{stockout:.1%}**, suggesting {'understocking risk' if stockout > 0.2 else 'balanced supply'}.
431
+
432
+ Key issue:
433
+ - Highest waste occurs in **{worst_region}**.
434
+
435
+ Recommended actions:
436
+ - Advance markdown timing for short-life products.
437
+ - Rebalance inventory using demand signals.
438
+ - Use bundles instead of deeper discounts where possible.
439
+ """
440
+
441
+
442
+ def generate_slide_insights(df: pd.DataFrame):
443
+ insights = []
444
+ if df["waste_pct"].mean() > 0.2:
445
+ insights.append("High waste is driven by short shelf-life items and delayed markdown timing.")
446
+ if (df["daily_demand"] > df["initial_quantity"]).mean() > 0.2:
447
+ insights.append("Frequent stockouts indicate under-forecasting of demand in key regions.")
448
+ if df["discount_pct"].mean() > 0.25:
449
+ insights.append("Over-reliance on discounting is reducing margin quality.")
450
+ if df["temp_deviation"].mean() > 2:
451
+ insights.append("Temperature deviation is materially contributing to spoilage risk.")
452
+ if not insights:
453
+ insights.append("Current performance is stable, with room to optimize promotion quality and inventory precision.")
454
+ return insights
455
+
456
+
457
+ def train_decision_tree(df: pd.DataFrame):
458
+ features = ["daily_demand", "initial_quantity", "days_until_expiry", "temp_deviation", "discount_pct"]
459
+ X = df[features]
460
+ y = df["high_waste_flag"]
461
+ model = DecisionTreeClassifier(max_depth=4, random_state=42)
462
+ model.fit(X, y)
463
+ return model, features
464
+
465
+
466
+ def manager_summary(df: pd.DataFrame):
467
+ st.subheader("Executive Summary")
468
+ st.markdown(generate_summary(df))
469
+ st.markdown("### Slide-ready insights")
470
+ for ins in generate_slide_insights(df):
471
+ st.success(ins)
472
+
473
+
474
+ def manager_diagnose(df: pd.DataFrame):
475
+ st.subheader("Diagnose")
476
+ c1, c2, c3 = st.columns(3)
477
+ c1.metric("High waste share", f"{df['waste_high'].mean():.1%}")
478
+ c2.metric("High profit share", f"{df['profit_high'].mean():.1%}")
479
+ c3.metric("Effective promo share", f"{df['promo_effective'].mean():.1%}")
480
+
481
+ model, features = train_decision_tree(df)
482
+ fig, ax = plt.subplots(figsize=(12, 6))
483
+ plot_tree(model, feature_names=features, class_names=["Low Waste", "High Waste"], filled=True, ax=ax)
484
+ st.pyplot(fig)
485
+ plt.close(fig)
486
+
487
+ importance_df = pd.DataFrame({"feature": features, "importance": model.feature_importances_}).sort_values("importance", ascending=False)
488
+ fig2 = px.bar(importance_df, x="importance", y="feature", orientation="h", title="Decision Tree Split Importance")
489
+ st.plotly_chart(fig2, use_container_width=True)
490
+
491
+ st.markdown("### Classification views")
492
+ c4, c5 = st.columns(2)
493
+ with c4:
494
+ waste_by_region = df.groupby("region")[["waste_high", "profit_high"]].mean().reset_index()
495
+ fig3 = px.bar(waste_by_region.melt(id_vars="region", var_name="label", value_name="rate"), x="region", y="rate", color="label", barmode="group", title="High Waste vs High Profit by Region")
496
+ st.plotly_chart(fig3, use_container_width=True)
497
+ with c5:
498
+ promo_by_cat = df.groupby("category")["promo_effective"].mean().sort_values(ascending=False).reset_index()
499
+ fig4 = px.bar(promo_by_cat, x="promo_effective", y="category", orientation="h", title="Promotion Effectiveness by Category")
500
+ st.plotly_chart(fig4, use_container_width=True)
501
+
502
+
503
+ def manager_inventory(df: pd.DataFrame):
504
+ st.subheader("Inventory & Replenishment")
505
+
506
+ overstock = df.copy()
507
+ overstock["recommended_order_qty"] = (
508
+ 1.2 * overstock["daily_demand"] * (1 + overstock["demand_variability"])
509
+ - overstock["leftover_units"]
510
+ )
511
+ overstock.loc[overstock["shelf_life_days"] <= 7, "recommended_order_qty"] *= 0.7
512
+ overstock.loc[overstock["spoilage_risk"] >= overstock["spoilage_risk"].quantile(0.75), "recommended_order_qty"] *= 0.8
513
+ overstock["recommended_order_qty"] = overstock["recommended_order_qty"].clip(lower=0).round()
514
+
515
+ c1, c2 = st.columns([1.3, 1])
516
+ with c1:
517
+ category_summary = overstock.groupby("category")[["initial_quantity", "recommended_order_qty", "waste_pct", "profit"]].mean().reset_index()
518
+ category_summary["order_reduction_pct"] = 1 - category_summary["recommended_order_qty"] / category_summary["initial_quantity"]
519
+ fig = px.bar(
520
+ category_summary.sort_values("order_reduction_pct", ascending=False),
521
+ x="order_reduction_pct",
522
+ y="category",
523
+ orientation="h",
524
+ title="Recommended Order Reduction by Category",
525
+ )
526
+ st.plotly_chart(fig, use_container_width=True)
527
+ with c2:
528
+ st.markdown("**Action shortlist**")
529
+ shortlist = overstock.sort_values(["waste_pct", "stock_demand_ratio"], ascending=[False, False])[[
530
+ "store_id", "product_name", "category", "initial_quantity", "daily_demand",
531
+ "days_until_expiry", "waste_pct", "recommended_order_qty"
532
+ ]].head(20)
533
+ st.dataframe(shortlist, use_container_width=True, hide_index=True)
534
+
535
+ st.markdown("### What-if Simulator")
536
+ col1, col2, col3 = st.columns(3)
537
+ selected_category = col1.selectbox("Category for simulation", sorted(df["category"].unique()))
538
+ order_cut = col2.slider("Reduce order quantity by %", 0, 40, 10)
539
+ markdown_shift = col3.slider("Advance markdown trigger by days", 0, 5, 2)
540
+
541
+ sim = df[df["category"] == selected_category].copy()
542
+ current_waste = sim["waste_pct"].mean()
543
+ current_profit = sim["profit"].mean()
544
+
545
+ waste_reduction = 0.35 * (order_cut / 100) + 0.015 * markdown_shift
546
+ sim_waste = max(current_waste * (1 - waste_reduction), 0)
547
+ sim_profit = current_profit * (1 + 0.08 * (order_cut / 100) + 0.03 * markdown_shift)
548
+
549
+ s1, s2, s3 = st.columns(3)
550
+ s1.metric("Current waste", f"{current_waste:.1%}")
551
+ s2.metric("Simulated waste", f"{sim_waste:.1%}", delta=f"-{(current_waste-sim_waste):.1%}")
552
+ s3.metric("Simulated avg profit", f"€{sim_profit:.2f}", delta=f"€{(sim_profit-current_profit):.2f}")
553
+
554
+
555
+ def manager_promotions(df: pd.DataFrame):
556
+ st.subheader("Promotion Designer")
557
+ left, right = st.columns([1, 1.2])
558
+ with left:
559
+ promo_category = st.selectbox("Promotion category", sorted(df["category"].unique()), key="promo_cat")
560
+ expiry_target = st.selectbox("Target expiry bucket", ["<=1d", "2-3d", "4-7d", "8-30d", ">30d"])
561
+ discount = st.slider("Discount %", 0, 50, 18)
562
+ bundle = st.checkbox("Bundle with complementary items", value=True)
563
+ weekend_only = st.checkbox("Weekend campaign only", value=False)
564
+
565
+ sub = df[(df["category"] == promo_category) & (df["expiry_bucket"].astype(str) == expiry_target)].copy()
566
+ if weekend_only:
567
+ sub = sub[sub["is_weekend"] == 1]
568
+
569
+ demand_lift = 0.08 + discount / 200
570
+ if bundle:
571
+ demand_lift += 0.06
572
+
573
+ est_sales_uplift = sub["units_sold"].mean() * demand_lift if len(sub) else 0
574
+ est_waste_drop = sub["waste_pct"].mean() * min(0.35, demand_lift) if len(sub) else 0
575
+ est_profit = sub["profit"].mean() * (1 + demand_lift - discount / 150) if len(sub) else 0
576
+
577
+ st.metric("Estimated sales uplift", f"{est_sales_uplift:.2f} units")
578
+ st.metric("Estimated waste reduction", f"{est_waste_drop:.1%}")
579
+ st.metric("Estimated avg profit", f"€{est_profit:.2f}")
580
+
581
+ with right:
582
+ promo_base = df.groupby(["expiry_bucket"])[["discount_pct", "waste_pct", "profit"]].mean().reset_index()
583
+ fig = px.bar(promo_base, x="expiry_bucket", y=["discount_pct", "waste_pct"], barmode="group", title="Current Discount vs Waste by Expiry")
584
+ st.plotly_chart(fig, use_container_width=True)
585
+
586
+ st.markdown("**Recommended promotion copy**")
587
+ st.info(
588
+ f"Run a {discount}% {promo_category} campaign for {expiry_target} items"
589
+ + (" on weekends" if weekend_only else "")
590
+ + (" with bundle offers" if bundle else " as single-item markdown")
591
+ + ". Position the offer at high-traffic display zones and highlight value + freshness."
592
+ )
593
+
594
+
595
+ def manager_risk(df: pd.DataFrame):
596
+ st.subheader("Risk & Store Operations")
597
+ _, importances = fit_risk_model(df)
598
+ c1, c2 = st.columns([1.1, 1])
599
+ with c1:
600
+ fig = px.bar(importances.head(10).sort_values(), orientation="h", title="Top Drivers of High Waste Risk")
601
+ st.plotly_chart(fig, use_container_width=True)
602
+ with c2:
603
+ heat = df.groupby(["region", "category"])["temp_deviation"].mean().reset_index()
604
+ fig = px.density_heatmap(heat, x="category", y="region", z="temp_deviation", title="Temperature Deviation Heatmap")
605
+ st.plotly_chart(fig, use_container_width=True)
606
+
607
+ alerts = (
608
+ df.groupby("store_id")[["temp_deviation", "temp_abuse_events", "waste_pct", "profit"]]
609
+ .mean()
610
+ .assign(alert_score=lambda x: 0.35 * x["temp_deviation"] + 0.25 * x["temp_abuse_events"] + 0.4 * x["waste_pct"] * 10)
611
+ .sort_values("alert_score", ascending=False)
612
+ .head(15)
613
+ .reset_index()
614
+ )
615
+ st.markdown("### Automated store alerts")
616
+ st.dataframe(alerts, use_container_width=True, hide_index=True)
617
+
618
+
619
+ def consumer_deals(df: pd.DataFrame):
620
+ st.subheader("Consumer Mode")
621
+ c1, c2, c3 = st.columns(3)
622
+ max_budget = c1.slider("Budget (€)", 5, 60, 20)
623
+ preferred_category = c2.selectbox("Preferred category", ["All"] + sorted(df["category"].unique()))
624
+ max_expiry = c3.slider("Maximum days until expiry", 1, 14, 5)
625
+
626
+ deals = df[df["days_until_expiry"] <= max_expiry].copy()
627
+ if preferred_category != "All":
628
+ deals = deals[deals["category"] == preferred_category]
629
+ deals = deals.assign(
630
+ savings=lambda x: x["base_price"] - x["selling_price"],
631
+ deal_score=lambda x: x["discount_pct"] * 0.5 + x["value_score"] * 0.35 + (x["profit_margin_pct"].clip(lower=0) / 100) * 0.15,
632
+ ).sort_values(["deal_score", "savings"], ascending=False)
633
+
634
+ display = deals[[
635
+ "product_name", "category", "store_id", "days_until_expiry",
636
+ "base_price", "selling_price", "discount_pct", "savings"
637
+ ]].head(25)
638
+ st.dataframe(display, use_container_width=True, hide_index=True)
639
+
640
+ fig = px.scatter(
641
+ deals.head(500), x="selling_price", y="discount_pct", color="category",
642
+ hover_data=["product_name", "store_id", "days_until_expiry"],
643
+ title="Discounted Items Map"
644
+ )
645
+ st.plotly_chart(fig, use_container_width=True)
646
+
647
+ affordable = deals[deals["selling_price"] <= max_budget].head(10)
648
+ if not affordable.empty:
649
+ st.markdown("### Best picks for your budget")
650
+ for _, row in affordable.iterrows():
651
+ st.success(
652
+ f"Now €{row['selling_price']:.2f} (save €{row['base_price'] - row['selling_price']:.2f}) · expires in {int(row['days_until_expiry'])} day(s)"
653
+ )
654
+ st.markdown(
655
+ f"""
656
+ 🛒 **{row['product_name']}**
657
+ 📦 Category: {row['category']}
658
+ 🏪 Store: {row['store_id']}
659
+ 💸 Discount: {row['discount_pct']*100:.0f}%
660
+ ⏳ Expiry: {row['days_until_expiry']} days
661
+ """
662
+ )
663
+
664
+
665
+ def build_bundle(df: pd.DataFrame, budget: float, people: int, theme: str):
666
+ work = df.copy()
667
+ work = work[work["days_until_expiry"] <= 7].copy()
668
+ work["score"] = work["value_score"] + work["discount_pct"]
669
+
670
+ theme_map = {
671
+ "Quick dinner": ["Ready_to_Eat", "Produce", "Bakery", "Dairy"],
672
+ "Healthy protein": ["Meat", "Seafood", "Dairy", "Produce"],
673
+ "Family breakfast": ["Bakery", "Dairy", "Beverages", "Produce"],
674
+ "Budget saver": list(work["category"].unique()),
675
+ }
676
+ cats = theme_map.get(theme, list(work["category"].unique()))
677
+ work = work[work["category"].isin(cats)].sort_values(["score", "selling_price"], ascending=[False, True])
678
+
679
+ chosen = []
680
+ remaining = budget
681
+ target_items = min(max(people + 1, 3), 6)
682
+ used_categories = set()
683
+
684
+ for _, row in work.iterrows():
685
+ if row["selling_price"] <= remaining:
686
+ if theme != "Budget saver" and row["category"] in used_categories:
687
+ continue
688
+ chosen.append(row)
689
+ remaining -= row["selling_price"]
690
+ used_categories.add(row["category"])
691
+ if len(chosen) >= target_items:
692
+ break
693
+
694
+ if not chosen:
695
+ return pd.DataFrame(), 0.0, 0.0
696
+ bundle = pd.DataFrame(chosen)
697
+ total = bundle["selling_price"].sum()
698
+ saved = (bundle["base_price"] - bundle["selling_price"]).sum()
699
+ return bundle, total, saved
700
+
701
+
702
+ def consumer_bundles(df: pd.DataFrame):
703
+ st.subheader("Bundle Builder")
704
+ c1, c2, c3 = st.columns(3)
705
+ budget = c1.slider("Bundle budget (€)", 8, 80, 25)
706
+ people = c2.slider("People", 1, 6, 2)
707
+ theme = c3.selectbox("Bundle theme", ["Quick dinner", "Healthy protein", "Family breakfast", "Budget saver"])
708
+
709
+ bundle, total, saved = build_bundle(df, budget, people, theme)
710
+ if bundle.empty:
711
+ st.warning("No bundle found for the current filters.")
712
+ return
713
+
714
+ k1, k2, k3 = st.columns(3)
715
+ k1.metric("Bundle total", f"€{total:.2f}")
716
+ k2.metric("You save", f"€{saved:.2f}")
717
+ k3.metric("Items", f"{len(bundle)}")
718
+
719
+ st.dataframe(bundle[[
720
+ "product_name", "category", "store_id", "selling_price", "base_price", "discount_pct", "days_until_expiry"
721
+ ]], use_container_width=True, hide_index=True)
722
+
723
+ st.info(
724
+ "Suggested marketing use: turn these bundles into one-click promotions for end customers or pre-designed campaign packs for store managers."
725
+ )
726
+
727
+
728
+ def consumer_personal(df: pd.DataFrame):
729
+ st.subheader("Personalized Promotions")
730
+ favorite = st.selectbox("Favorite category", sorted(df["category"].unique()))
731
+ price_cap = st.slider("Max item price (€)", 1, 30, 10)
732
+ not_too_close = st.checkbox("Hide items expiring within 1 day", value=False)
733
+
734
+ recs = df[df["category"] == favorite].copy()
735
+ recs = recs[recs["selling_price"] <= price_cap]
736
+ if not_too_close:
737
+ recs = recs[recs["days_until_expiry"] > 1]
738
+ recs = recs.assign(score=lambda x: x["discount_pct"] * 0.55 + x["value_score"] * 0.45).sort_values("score", ascending=False).head(12)
739
+
740
+ cols = st.columns(3)
741
+ for i, (_, row) in enumerate(recs.iterrows()):
742
+ with cols[i % 3]:
743
+ st.markdown(f"### {row['product_name']}")
744
+ st.write(f"{row['category']} · {row['store_id']}")
745
+ st.write(f"Now **€{row['selling_price']:.2f}** | Save **€{(row['base_price'] - row['selling_price']):.2f}**")
746
+ st.write(f"Expires in {int(row['days_until_expiry'])} day(s)")
747
+ st.button("Add to shortlist", key=f"short_{i}")
748
+
749
+
750
+ def main():
751
+ st.title("🥗 FreshWise")
752
+ st.caption("Perishable retail optimization for managers and consumers")
753
+
754
+ try:
755
+ df = load_data()
756
+ except Exception as e:
757
+ st.error(str(e))
758
+ st.stop()
759
+
760
+ filtered = apply_filters(df)
761
+ if filtered.empty:
762
+ st.warning("No data left after filtering.")
763
+ st.stop()
764
+
765
+ role = st.radio("Choose your mode", ["Manager", "Consumer"], horizontal=True)
766
+
767
+ if role == "Manager":
768
+ tabs = st.tabs([
769
+ "Overview",
770
+ "Executive Summary",
771
+ "Category Intelligence",
772
+ "Inventory & Replenishment",
773
+ "Promotion Designer",
774
+ "Diagnose",
775
+ ])
776
+ with tabs[0]:
777
+ manager_dashboard(filtered)
778
+ with tabs[1]:
779
+ manager_summary(filtered)
780
+ with tabs[2]:
781
+ manager_category_intelligence(filtered)
782
+ with tabs[3]:
783
+ manager_inventory(filtered)
784
+ with tabs[4]:
785
+ manager_promotions(filtered)
786
+ with tabs[5]:
787
+ manager_diagnose(filtered)
788
+ else:
789
+ tabs = st.tabs([
790
+ "Deal Finder",
791
+ "Bundle Builder",
792
+ "Personalized Promotions",
793
+ ])
794
+ with tabs[0]:
795
+ consumer_deals(filtered)
796
+ with tabs[1]:
797
+ consumer_bundles(filtered)
798
+ with tabs[2]:
799
+ consumer_personal(filtered)
800
+
801
+ with st.expander("About this app"):
802
+ st.markdown(
803
+ """
804
+ - **Manager mode** turns data into inventory, markdown, and operational decisions.
805
+ - **Consumer mode** surfaces discounted products, smart bundles, and personalized promotions.
806
+ - Built for deployment on Hugging Face Docker Spaces with Streamlit.
807
+ """
808
+ )
809
+
810
+
811
+ if __name__ == "__main__":
812
+ main()
perishable_goods_management.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de94302b867c9debedfd45c431306623fdfc038f5ed8ca17736339b4460a6674
3
+ size 21095333
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit==1.44.1
2
+ pandas==2.2.3
3
+ numpy==2.2.4
4
+ plotly==6.0.1
5
+ scikit-learn==1.6.1
6
+ pyarrow==19.0.1
7
+ matplotlib==3.10.1