PD03 commited on
Commit
7c10250
·
verified ·
1 Parent(s): a85e11a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -155
app.py CHANGED
@@ -174,7 +174,7 @@ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
174
  @st.cache_data(show_spinner=False)
175
  def build_features(_df):
176
  df = _df.copy()
177
- # FIXED: Remove duplicate features - keep only one of unit_cost/cost_per_unit and net_price/price_per_unit
178
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
179
  feats_cat = ["product", "region", "channel"]
180
  df = df.sort_values("date").copy()
@@ -185,7 +185,6 @@ def build_features(_df):
185
  df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
186
  df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
187
 
188
- # Add these to numeric features
189
  feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
190
  target = "gm_pct"
191
  return df, feats_num, feats_cat, target
@@ -198,7 +197,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
198
  ("num", "passthrough", feats_num),
199
  ]
200
  )
201
- model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
202
  pipe = Pipeline([("pre", pre), ("rf", model)])
203
  X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
204
  pipe.fit(X_train, y_train)
@@ -208,7 +207,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
208
  return pipe, {"r2": r2, "mae": mae}, X_test
209
 
210
  @st.cache_data(show_spinner=False)
211
- def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
212
  try:
213
  np.random.seed(42)
214
  X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
@@ -227,15 +226,12 @@ def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500)
227
  explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
228
  shap_values = explainer.shap_values(X_t)
229
 
230
- # ADDED: Compute SHAP interaction values for deeper insights
231
- shap_interaction_values = explainer.shap_interaction_values(X_t)
232
-
233
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
234
 
235
- return shap_df, shap_interaction_values, X_sample.reset_index(drop=True), feature_names
236
  except Exception as e:
237
  st.error(f"Error computing SHAP: {str(e)}")
238
- return None, None, None, None
239
 
240
  def estimate_segment_elasticity(df, product, region, channel):
241
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
@@ -289,77 +285,6 @@ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
289
  except:
290
  return None
291
 
292
- def analyze_shap_interactions(shap_interaction_values, X_sample, feature_names, feats_cat):
293
- """Analyze SHAP interaction effects to find problematic combinations"""
294
- try:
295
- # Get categorical feature indices in the transformed feature space
296
- cat_feature_indices = {}
297
- for cat in feats_cat:
298
- cat_feature_indices[cat] = [i for i, fname in enumerate(feature_names) if fname.startswith(f"cat__{cat}_")]
299
-
300
- # Find strongest interactions for each sample
301
- interaction_insights = []
302
-
303
- for sample_idx in range(min(100, len(X_sample))): # Analyze first 100 samples
304
- interaction_matrix = shap_interaction_values[sample_idx]
305
-
306
- # Get the sample's categorical values
307
- sample_product = X_sample.iloc[sample_idx]["product"]
308
- sample_region = X_sample.iloc[sample_idx]["region"]
309
- sample_channel = X_sample.iloc[sample_idx]["channel"]
310
-
311
- # Find product feature index
312
- prod_idx = [i for i, fname in enumerate(feature_names) if f"product_{sample_product}" in fname]
313
- reg_idx = [i for i, fname in enumerate(feature_names) if f"region_{sample_region}" in fname]
314
- chan_idx = [i for i, fname in enumerate(feature_names) if f"channel_{sample_channel}" in fname]
315
-
316
- if prod_idx and reg_idx:
317
- prod_reg_interaction = interaction_matrix[prod_idx[0], reg_idx[0]]
318
- if abs(prod_reg_interaction) > 0.001:
319
- interaction_insights.append({
320
- "Product": sample_product,
321
- "Region": sample_region,
322
- "Channel": sample_channel,
323
- "Interaction_Type": "Product × Region",
324
- "Interaction_Effect": prod_reg_interaction
325
- })
326
-
327
- if prod_idx and chan_idx:
328
- prod_chan_interaction = interaction_matrix[prod_idx[0], chan_idx[0]]
329
- if abs(prod_chan_interaction) > 0.001:
330
- interaction_insights.append({
331
- "Product": sample_product,
332
- "Region": sample_region,
333
- "Channel": sample_channel,
334
- "Interaction_Type": "Product × Channel",
335
- "Interaction_Effect": prod_chan_interaction
336
- })
337
-
338
- if reg_idx and chan_idx:
339
- reg_chan_interaction = interaction_matrix[reg_idx[0], chan_idx[0]]
340
- if abs(reg_chan_interaction) > 0.001:
341
- interaction_insights.append({
342
- "Product": sample_product,
343
- "Region": sample_region,
344
- "Channel": sample_channel,
345
- "Interaction_Type": "Region × Channel",
346
- "Interaction_Effect": reg_chan_interaction
347
- })
348
-
349
- if interaction_insights:
350
- interactions_df = pd.DataFrame(interaction_insights)
351
- # Aggregate by combination
352
- agg_interactions = interactions_df.groupby(["Product", "Region", "Channel", "Interaction_Type"]).agg({
353
- "Interaction_Effect": "mean"
354
- }).reset_index()
355
- agg_interactions = agg_interactions.sort_values("Interaction_Effect")
356
- return agg_interactions
357
- else:
358
- return pd.DataFrame()
359
- except Exception as e:
360
- st.warning(f"Could not compute interaction effects: {str(e)}")
361
- return pd.DataFrame()
362
-
363
  # -----------------------------
364
  # Main App
365
  # -----------------------------
@@ -485,8 +410,8 @@ with st.spinner("🤖 Training AI model..."):
485
  st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
486
 
487
  # Compute SHAP once for all tabs
488
- with st.spinner("🔬 Analyzing profitability drivers and interactions..."):
489
- shap_df, shap_interaction_values, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
490
 
491
  # Tabs for different sections
492
  tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
@@ -495,8 +420,8 @@ with tab1:
495
  st.markdown("### Understanding What Drives Your Profitability")
496
  st.markdown("""
497
  <div class="insight-box">
498
- <b>🎓 Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin,
499
- including complex interactions between product, region, and channel combinations.
500
  </div>
501
  """, unsafe_allow_html=True)
502
 
@@ -591,7 +516,7 @@ with tab1:
591
  </div>
592
  """, unsafe_allow_html=True)
593
 
594
- st.markdown("#### Single-Feature Segment Impact")
595
 
596
  try:
597
  cat_cols = ["product", "region", "channel"]
@@ -602,81 +527,44 @@ with tab1:
602
  key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
603
  grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
604
 
605
- top_negative = grp.nsmallest(5, "net_impact")
606
- top_positive = grp.nlargest(5, "net_impact")
607
-
608
- st.markdown("**⚠️ Segments Reducing Margin:**")
609
- for _, row in top_negative.head(3).iterrows():
610
- st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
611
-
612
- st.markdown("**✅ Segments Boosting Margin:**")
613
- for _, row in top_positive.head(3).iterrows():
614
- st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
615
- except Exception as e:
616
- st.warning(f"Unable to compute segment analysis: {str(e)}")
617
-
618
- # NEW: SHAP Interaction Analysis
619
- st.markdown("---")
620
- st.markdown("### 🔗 Interaction Effects Analysis")
621
- st.markdown("""
622
- <div class="warning-box">
623
- <b>⚡ Advanced Insight:</b> These combinations show how features interact to create compound effects on profitability.
624
- For example, "Premium Widget in EMEA via E-Commerce" may have a different margin profile than individual factors suggest.
625
- </div>
626
- """, unsafe_allow_html=True)
627
-
628
- if shap_interaction_values is not None:
629
- interactions_df = analyze_shap_interactions(shap_interaction_values, X_test_sample, feature_names, feats_cat)
630
-
631
- if not interactions_df.empty:
632
- col_int1, col_int2 = st.columns(2)
633
 
634
- with col_int1:
635
- st.markdown("#### ⚠️ Problematic Combinations (Reducing Margin)")
636
- worst_interactions = interactions_df.nsmallest(10, "Interaction_Effect")
637
-
638
- for idx, row in worst_interactions.head(5).iterrows():
639
- st.markdown(f"""
640
- <div class="recommendation-card" style="border-left: 4px solid #dc3545;">
641
- <b>{row['Interaction_Type']}</b><br>
642
- {row['Product']} • {row['Region']} • {row['Channel']}<br>
643
- <span style="color: #dc3545; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
644
- </div>
645
- """, unsafe_allow_html=True)
646
-
647
- with col_int2:
648
- st.markdown("#### ✅ High-Performing Combinations (Boosting Margin)")
649
- best_interactions = interactions_df.nlargest(10, "Interaction_Effect")
650
-
651
- for idx, row in best_interactions.head(5).iterrows():
652
- st.markdown(f"""
653
- <div class="recommendation-card" style="border-left: 4px solid #28a745;">
654
- <b>{row['Interaction_Type']}</b><br>
655
- {row['Product']} • {row['Region']} • {row['Channel']}<br>
656
- <span style="color: #28a745; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
657
- </div>
658
- """, unsafe_allow_html=True)
659
 
660
- # Visualization of interaction effects
661
- st.markdown("#### Interaction Effects Heatmap")
 
 
 
 
 
 
662
 
663
- # Create summary by combination
664
- interaction_summary = interactions_df.groupby(["Product", "Region", "Channel"]).agg({
665
- "Interaction_Effect": "sum"
666
- }).reset_index()
667
 
668
- fig_int = px.treemap(
669
- interaction_summary,
670
- path=['Product', 'Region', 'Channel'],
671
- values=interaction_summary['Interaction_Effect'].abs(),
672
- color='Interaction_Effect',
673
  color_continuous_scale='RdYlGn',
674
- title="Interaction Effects by Product-Region-Channel Combinations"
675
  )
676
- fig_int.update_layout(height=500)
677
- st.plotly_chart(fig_int, use_container_width=True)
678
- else:
679
- st.info("No significant interaction effects detected in the current sample.")
 
680
  else:
681
  st.error("Unable to compute driver analysis. Please check your data.")
682
 
@@ -810,7 +698,7 @@ with tab3:
810
 
811
  st.markdown(f"""
812
  <div class="insight-box">
813
- <b>📊 Current State:</b><br>
814
  • Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
815
  • Net Price: <b>${current['net_price']:.2f}</b><br>
816
  • Unit Cost: <b>${current['unit_cost']:.2f}</b><br>
 
174
  @st.cache_data(show_spinner=False)
175
  def build_features(_df):
176
  df = _df.copy()
177
+ # FIXED: Remove duplicate features
178
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
179
  feats_cat = ["product", "region", "channel"]
180
  df = df.sort_values("date").copy()
 
185
  df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
186
  df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
187
 
 
188
  feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
189
  target = "gm_pct"
190
  return df, feats_num, feats_cat, target
 
197
  ("num", "passthrough", feats_num),
198
  ]
199
  )
200
+ model = RandomForestRegressor(n_estimators=80, max_depth=8, random_state=42, n_jobs=-1)
201
  pipe = Pipeline([("pre", pre), ("rf", model)])
202
  X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
203
  pipe.fit(X_train, y_train)
 
207
  return pipe, {"r2": r2, "mae": mae}, X_test
208
 
209
  @st.cache_data(show_spinner=False)
210
+ def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=400):
211
  try:
212
  np.random.seed(42)
213
  X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
 
226
  explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
227
  shap_values = explainer.shap_values(X_t)
228
 
 
 
 
229
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
230
 
231
+ return shap_df, X_sample.reset_index(drop=True), feature_names
232
  except Exception as e:
233
  st.error(f"Error computing SHAP: {str(e)}")
234
+ return None, None, None
235
 
236
  def estimate_segment_elasticity(df, product, region, channel):
237
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
 
285
  except:
286
  return None
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  # -----------------------------
289
  # Main App
290
  # -----------------------------
 
410
  st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
411
 
412
  # Compute SHAP once for all tabs
413
+ with st.spinner("🔬 Analyzing profitability drivers..."):
414
+ shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=400)
415
 
416
  # Tabs for different sections
417
  tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
 
420
  st.markdown("### Understanding What Drives Your Profitability")
421
  st.markdown("""
422
  <div class="insight-box">
423
+ <b>🎓 Business Insight:</b> This analysis reveals which business factors and segment combinations have the strongest impact on gross margin.
424
+ Understanding these drivers helps prioritize strategic initiatives and operational improvements.
425
  </div>
426
  """, unsafe_allow_html=True)
427
 
 
516
  </div>
517
  """, unsafe_allow_html=True)
518
 
519
+ st.markdown("#### Segment Performance Analysis")
520
 
521
  try:
522
  cat_cols = ["product", "region", "channel"]
 
527
  key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
528
  grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
529
 
530
+ top_negative = grp.nsmallest(8, "net_impact")
531
+ top_positive = grp.nlargest(8, "net_impact")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
+ st.markdown("**⚠️ Product-Region-Channel Combinations Reducing Margin:**")
534
+ for _, row in top_negative.head(5).iterrows():
535
+ st.markdown(f"""
536
+ <div class="recommendation-card" style="border-left: 4px solid #dc3545; padding: 0.8rem; margin: 0.5rem 0;">
537
+ <b>{row['product']}</b> {row['region']} {row['channel']}<br>
538
+ <small style="color: #dc3545;">Cumulative Impact: {row['net_impact']:.4f}</small>
539
+ </div>
540
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
+ st.markdown("**✅ Product-Region-Channel Combinations Boosting Margin:**")
543
+ for _, row in top_positive.head(5).iterrows():
544
+ st.markdown(f"""
545
+ <div class="recommendation-card" style="border-left: 4px solid #28a745; padding: 0.8rem; margin: 0.5rem 0;">
546
+ <b>{row['product']}</b> • {row['region']} • {row['channel']}<br>
547
+ <small style="color: #28a745;">Cumulative Impact: {row['net_impact']:.4f}</small>
548
+ </div>
549
+ """, unsafe_allow_html=True)
550
 
551
+ # Visualization
552
+ st.markdown("---")
553
+ st.markdown("#### Segment Impact Visualization")
 
554
 
555
+ fig_segments = px.treemap(
556
+ grp,
557
+ path=['product', 'region', 'channel'],
558
+ values=grp['net_impact'].abs(),
559
+ color='net_impact',
560
  color_continuous_scale='RdYlGn',
561
+ title="Product-Region-Channel Combinations Impact on Margin"
562
  )
563
+ fig_segments.update_layout(height=500)
564
+ st.plotly_chart(fig_segments, use_container_width=True)
565
+
566
+ except Exception as e:
567
+ st.warning(f"Unable to compute detailed segment analysis: {str(e)}")
568
  else:
569
  st.error("Unable to compute driver analysis. Please check your data.")
570
 
 
698
 
699
  st.markdown(f"""
700
  <div class="insight-box">
701
+ <b>📊 Current State for {selected_product} • {selected_region} • {selected_channel}:</b><br>
702
  • Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
703
  • Net Price: <b>${current['net_price']:.2f}</b><br>
704
  • Unit Cost: <b>${current['unit_cost']:.2f}</b><br>