Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -174,7 +174,7 @@ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
|
|
| 174 |
@st.cache_data(show_spinner=False)
|
| 175 |
def build_features(_df):
|
| 176 |
df = _df.copy()
|
| 177 |
-
# FIXED: Remove duplicate features
|
| 178 |
feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
|
| 179 |
feats_cat = ["product", "region", "channel"]
|
| 180 |
df = df.sort_values("date").copy()
|
|
@@ -185,7 +185,6 @@ def build_features(_df):
|
|
| 185 |
df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
|
| 186 |
df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
|
| 187 |
|
| 188 |
-
# Add these to numeric features
|
| 189 |
feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
|
| 190 |
target = "gm_pct"
|
| 191 |
return df, feats_num, feats_cat, target
|
|
@@ -198,7 +197,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
|
|
| 198 |
("num", "passthrough", feats_num),
|
| 199 |
]
|
| 200 |
)
|
| 201 |
-
model = RandomForestRegressor(n_estimators=
|
| 202 |
pipe = Pipeline([("pre", pre), ("rf", model)])
|
| 203 |
X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
|
| 204 |
pipe.fit(X_train, y_train)
|
|
@@ -208,7 +207,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
|
|
| 208 |
return pipe, {"r2": r2, "mae": mae}, X_test
|
| 209 |
|
| 210 |
@st.cache_data(show_spinner=False)
|
| 211 |
-
def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=
|
| 212 |
try:
|
| 213 |
np.random.seed(42)
|
| 214 |
X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
|
|
@@ -227,15 +226,12 @@ def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500)
|
|
| 227 |
explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
|
| 228 |
shap_values = explainer.shap_values(X_t)
|
| 229 |
|
| 230 |
-
# ADDED: Compute SHAP interaction values for deeper insights
|
| 231 |
-
shap_interaction_values = explainer.shap_interaction_values(X_t)
|
| 232 |
-
|
| 233 |
shap_df = pd.DataFrame(shap_values, columns=feature_names)
|
| 234 |
|
| 235 |
-
return shap_df,
|
| 236 |
except Exception as e:
|
| 237 |
st.error(f"Error computing SHAP: {str(e)}")
|
| 238 |
-
return None, None, None
|
| 239 |
|
| 240 |
def estimate_segment_elasticity(df, product, region, channel):
|
| 241 |
seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
|
|
@@ -289,77 +285,6 @@ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
|
|
| 289 |
except:
|
| 290 |
return None
|
| 291 |
|
| 292 |
-
def analyze_shap_interactions(shap_interaction_values, X_sample, feature_names, feats_cat):
|
| 293 |
-
"""Analyze SHAP interaction effects to find problematic combinations"""
|
| 294 |
-
try:
|
| 295 |
-
# Get categorical feature indices in the transformed feature space
|
| 296 |
-
cat_feature_indices = {}
|
| 297 |
-
for cat in feats_cat:
|
| 298 |
-
cat_feature_indices[cat] = [i for i, fname in enumerate(feature_names) if fname.startswith(f"cat__{cat}_")]
|
| 299 |
-
|
| 300 |
-
# Find strongest interactions for each sample
|
| 301 |
-
interaction_insights = []
|
| 302 |
-
|
| 303 |
-
for sample_idx in range(min(100, len(X_sample))): # Analyze first 100 samples
|
| 304 |
-
interaction_matrix = shap_interaction_values[sample_idx]
|
| 305 |
-
|
| 306 |
-
# Get the sample's categorical values
|
| 307 |
-
sample_product = X_sample.iloc[sample_idx]["product"]
|
| 308 |
-
sample_region = X_sample.iloc[sample_idx]["region"]
|
| 309 |
-
sample_channel = X_sample.iloc[sample_idx]["channel"]
|
| 310 |
-
|
| 311 |
-
# Find product feature index
|
| 312 |
-
prod_idx = [i for i, fname in enumerate(feature_names) if f"product_{sample_product}" in fname]
|
| 313 |
-
reg_idx = [i for i, fname in enumerate(feature_names) if f"region_{sample_region}" in fname]
|
| 314 |
-
chan_idx = [i for i, fname in enumerate(feature_names) if f"channel_{sample_channel}" in fname]
|
| 315 |
-
|
| 316 |
-
if prod_idx and reg_idx:
|
| 317 |
-
prod_reg_interaction = interaction_matrix[prod_idx[0], reg_idx[0]]
|
| 318 |
-
if abs(prod_reg_interaction) > 0.001:
|
| 319 |
-
interaction_insights.append({
|
| 320 |
-
"Product": sample_product,
|
| 321 |
-
"Region": sample_region,
|
| 322 |
-
"Channel": sample_channel,
|
| 323 |
-
"Interaction_Type": "Product × Region",
|
| 324 |
-
"Interaction_Effect": prod_reg_interaction
|
| 325 |
-
})
|
| 326 |
-
|
| 327 |
-
if prod_idx and chan_idx:
|
| 328 |
-
prod_chan_interaction = interaction_matrix[prod_idx[0], chan_idx[0]]
|
| 329 |
-
if abs(prod_chan_interaction) > 0.001:
|
| 330 |
-
interaction_insights.append({
|
| 331 |
-
"Product": sample_product,
|
| 332 |
-
"Region": sample_region,
|
| 333 |
-
"Channel": sample_channel,
|
| 334 |
-
"Interaction_Type": "Product × Channel",
|
| 335 |
-
"Interaction_Effect": prod_chan_interaction
|
| 336 |
-
})
|
| 337 |
-
|
| 338 |
-
if reg_idx and chan_idx:
|
| 339 |
-
reg_chan_interaction = interaction_matrix[reg_idx[0], chan_idx[0]]
|
| 340 |
-
if abs(reg_chan_interaction) > 0.001:
|
| 341 |
-
interaction_insights.append({
|
| 342 |
-
"Product": sample_product,
|
| 343 |
-
"Region": sample_region,
|
| 344 |
-
"Channel": sample_channel,
|
| 345 |
-
"Interaction_Type": "Region × Channel",
|
| 346 |
-
"Interaction_Effect": reg_chan_interaction
|
| 347 |
-
})
|
| 348 |
-
|
| 349 |
-
if interaction_insights:
|
| 350 |
-
interactions_df = pd.DataFrame(interaction_insights)
|
| 351 |
-
# Aggregate by combination
|
| 352 |
-
agg_interactions = interactions_df.groupby(["Product", "Region", "Channel", "Interaction_Type"]).agg({
|
| 353 |
-
"Interaction_Effect": "mean"
|
| 354 |
-
}).reset_index()
|
| 355 |
-
agg_interactions = agg_interactions.sort_values("Interaction_Effect")
|
| 356 |
-
return agg_interactions
|
| 357 |
-
else:
|
| 358 |
-
return pd.DataFrame()
|
| 359 |
-
except Exception as e:
|
| 360 |
-
st.warning(f"Could not compute interaction effects: {str(e)}")
|
| 361 |
-
return pd.DataFrame()
|
| 362 |
-
|
| 363 |
# -----------------------------
|
| 364 |
# Main App
|
| 365 |
# -----------------------------
|
|
@@ -485,8 +410,8 @@ with st.spinner("🤖 Training AI model..."):
|
|
| 485 |
st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
|
| 486 |
|
| 487 |
# Compute SHAP once for all tabs
|
| 488 |
-
with st.spinner("🔬 Analyzing profitability drivers
|
| 489 |
-
shap_df,
|
| 490 |
|
| 491 |
# Tabs for different sections
|
| 492 |
tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
|
|
@@ -495,8 +420,8 @@ with tab1:
|
|
| 495 |
st.markdown("### Understanding What Drives Your Profitability")
|
| 496 |
st.markdown("""
|
| 497 |
<div class="insight-box">
|
| 498 |
-
<b>🎓 Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin
|
| 499 |
-
|
| 500 |
</div>
|
| 501 |
""", unsafe_allow_html=True)
|
| 502 |
|
|
@@ -591,7 +516,7 @@ with tab1:
|
|
| 591 |
</div>
|
| 592 |
""", unsafe_allow_html=True)
|
| 593 |
|
| 594 |
-
st.markdown("####
|
| 595 |
|
| 596 |
try:
|
| 597 |
cat_cols = ["product", "region", "channel"]
|
|
@@ -602,81 +527,44 @@ with tab1:
|
|
| 602 |
key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
|
| 603 |
grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
|
| 604 |
|
| 605 |
-
top_negative = grp.nsmallest(
|
| 606 |
-
top_positive = grp.nlargest(
|
| 607 |
-
|
| 608 |
-
st.markdown("**⚠️ Segments Reducing Margin:**")
|
| 609 |
-
for _, row in top_negative.head(3).iterrows():
|
| 610 |
-
st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
|
| 611 |
-
|
| 612 |
-
st.markdown("**✅ Segments Boosting Margin:**")
|
| 613 |
-
for _, row in top_positive.head(3).iterrows():
|
| 614 |
-
st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
|
| 615 |
-
except Exception as e:
|
| 616 |
-
st.warning(f"Unable to compute segment analysis: {str(e)}")
|
| 617 |
-
|
| 618 |
-
# NEW: SHAP Interaction Analysis
|
| 619 |
-
st.markdown("---")
|
| 620 |
-
st.markdown("### 🔗 Interaction Effects Analysis")
|
| 621 |
-
st.markdown("""
|
| 622 |
-
<div class="warning-box">
|
| 623 |
-
<b>⚡ Advanced Insight:</b> These combinations show how features interact to create compound effects on profitability.
|
| 624 |
-
For example, "Premium Widget in EMEA via E-Commerce" may have a different margin profile than individual factors suggest.
|
| 625 |
-
</div>
|
| 626 |
-
""", unsafe_allow_html=True)
|
| 627 |
-
|
| 628 |
-
if shap_interaction_values is not None:
|
| 629 |
-
interactions_df = analyze_shap_interactions(shap_interaction_values, X_test_sample, feature_names, feats_cat)
|
| 630 |
-
|
| 631 |
-
if not interactions_df.empty:
|
| 632 |
-
col_int1, col_int2 = st.columns(2)
|
| 633 |
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
{row['Product']} • {row['Region']} • {row['Channel']}<br>
|
| 643 |
-
<span style="color: #dc3545; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
|
| 644 |
-
</div>
|
| 645 |
-
""", unsafe_allow_html=True)
|
| 646 |
-
|
| 647 |
-
with col_int2:
|
| 648 |
-
st.markdown("#### ✅ High-Performing Combinations (Boosting Margin)")
|
| 649 |
-
best_interactions = interactions_df.nlargest(10, "Interaction_Effect")
|
| 650 |
-
|
| 651 |
-
for idx, row in best_interactions.head(5).iterrows():
|
| 652 |
-
st.markdown(f"""
|
| 653 |
-
<div class="recommendation-card" style="border-left: 4px solid #28a745;">
|
| 654 |
-
<b>{row['Interaction_Type']}</b><br>
|
| 655 |
-
{row['Product']} • {row['Region']} • {row['Channel']}<br>
|
| 656 |
-
<span style="color: #28a745; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
|
| 657 |
-
</div>
|
| 658 |
-
""", unsafe_allow_html=True)
|
| 659 |
|
| 660 |
-
|
| 661 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 662 |
|
| 663 |
-
#
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
}).reset_index()
|
| 667 |
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
path=['
|
| 671 |
-
values=
|
| 672 |
-
color='
|
| 673 |
color_continuous_scale='RdYlGn',
|
| 674 |
-
title="
|
| 675 |
)
|
| 676 |
-
|
| 677 |
-
st.plotly_chart(
|
| 678 |
-
|
| 679 |
-
|
|
|
|
| 680 |
else:
|
| 681 |
st.error("Unable to compute driver analysis. Please check your data.")
|
| 682 |
|
|
@@ -810,7 +698,7 @@ with tab3:
|
|
| 810 |
|
| 811 |
st.markdown(f"""
|
| 812 |
<div class="insight-box">
|
| 813 |
-
<b>📊 Current State:</b><br>
|
| 814 |
• Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
|
| 815 |
• Net Price: <b>${current['net_price']:.2f}</b><br>
|
| 816 |
• Unit Cost: <b>${current['unit_cost']:.2f}</b><br>
|
|
|
|
| 174 |
@st.cache_data(show_spinner=False)
|
| 175 |
def build_features(_df):
|
| 176 |
df = _df.copy()
|
| 177 |
+
# FIXED: Remove duplicate features
|
| 178 |
feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
|
| 179 |
feats_cat = ["product", "region", "channel"]
|
| 180 |
df = df.sort_values("date").copy()
|
|
|
|
| 185 |
df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
|
| 186 |
df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
|
| 187 |
|
|
|
|
| 188 |
feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
|
| 189 |
target = "gm_pct"
|
| 190 |
return df, feats_num, feats_cat, target
|
|
|
|
| 197 |
("num", "passthrough", feats_num),
|
| 198 |
]
|
| 199 |
)
|
| 200 |
+
model = RandomForestRegressor(n_estimators=80, max_depth=8, random_state=42, n_jobs=-1)
|
| 201 |
pipe = Pipeline([("pre", pre), ("rf", model)])
|
| 202 |
X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
|
| 203 |
pipe.fit(X_train, y_train)
|
|
|
|
| 207 |
return pipe, {"r2": r2, "mae": mae}, X_test
|
| 208 |
|
| 209 |
@st.cache_data(show_spinner=False)
|
| 210 |
+
def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=400):
|
| 211 |
try:
|
| 212 |
np.random.seed(42)
|
| 213 |
X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
|
|
|
|
| 226 |
explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
|
| 227 |
shap_values = explainer.shap_values(X_t)
|
| 228 |
|
|
|
|
|
|
|
|
|
|
| 229 |
shap_df = pd.DataFrame(shap_values, columns=feature_names)
|
| 230 |
|
| 231 |
+
return shap_df, X_sample.reset_index(drop=True), feature_names
|
| 232 |
except Exception as e:
|
| 233 |
st.error(f"Error computing SHAP: {str(e)}")
|
| 234 |
+
return None, None, None
|
| 235 |
|
| 236 |
def estimate_segment_elasticity(df, product, region, channel):
|
| 237 |
seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
|
|
|
|
| 285 |
except:
|
| 286 |
return None
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
# -----------------------------
|
| 289 |
# Main App
|
| 290 |
# -----------------------------
|
|
|
|
| 410 |
st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
|
| 411 |
|
| 412 |
# Compute SHAP once for all tabs
|
| 413 |
+
with st.spinner("🔬 Analyzing profitability drivers..."):
|
| 414 |
+
shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=400)
|
| 415 |
|
| 416 |
# Tabs for different sections
|
| 417 |
tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
|
|
|
|
| 420 |
st.markdown("### Understanding What Drives Your Profitability")
|
| 421 |
st.markdown("""
|
| 422 |
<div class="insight-box">
|
| 423 |
+
<b>🎓 Business Insight:</b> This analysis reveals which business factors and segment combinations have the strongest impact on gross margin.
|
| 424 |
+
Understanding these drivers helps prioritize strategic initiatives and operational improvements.
|
| 425 |
</div>
|
| 426 |
""", unsafe_allow_html=True)
|
| 427 |
|
|
|
|
| 516 |
</div>
|
| 517 |
""", unsafe_allow_html=True)
|
| 518 |
|
| 519 |
+
st.markdown("#### Segment Performance Analysis")
|
| 520 |
|
| 521 |
try:
|
| 522 |
cat_cols = ["product", "region", "channel"]
|
|
|
|
| 527 |
key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
|
| 528 |
grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
|
| 529 |
|
| 530 |
+
top_negative = grp.nsmallest(8, "net_impact")
|
| 531 |
+
top_positive = grp.nlargest(8, "net_impact")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
|
| 533 |
+
st.markdown("**⚠️ Product-Region-Channel Combinations Reducing Margin:**")
|
| 534 |
+
for _, row in top_negative.head(5).iterrows():
|
| 535 |
+
st.markdown(f"""
|
| 536 |
+
<div class="recommendation-card" style="border-left: 4px solid #dc3545; padding: 0.8rem; margin: 0.5rem 0;">
|
| 537 |
+
<b>{row['product']}</b> • {row['region']} • {row['channel']}<br>
|
| 538 |
+
<small style="color: #dc3545;">Cumulative Impact: {row['net_impact']:.4f}</small>
|
| 539 |
+
</div>
|
| 540 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
+
st.markdown("**✅ Product-Region-Channel Combinations Boosting Margin:**")
|
| 543 |
+
for _, row in top_positive.head(5).iterrows():
|
| 544 |
+
st.markdown(f"""
|
| 545 |
+
<div class="recommendation-card" style="border-left: 4px solid #28a745; padding: 0.8rem; margin: 0.5rem 0;">
|
| 546 |
+
<b>{row['product']}</b> • {row['region']} • {row['channel']}<br>
|
| 547 |
+
<small style="color: #28a745;">Cumulative Impact: {row['net_impact']:.4f}</small>
|
| 548 |
+
</div>
|
| 549 |
+
""", unsafe_allow_html=True)
|
| 550 |
|
| 551 |
+
# Visualization
|
| 552 |
+
st.markdown("---")
|
| 553 |
+
st.markdown("#### Segment Impact Visualization")
|
|
|
|
| 554 |
|
| 555 |
+
fig_segments = px.treemap(
|
| 556 |
+
grp,
|
| 557 |
+
path=['product', 'region', 'channel'],
|
| 558 |
+
values=grp['net_impact'].abs(),
|
| 559 |
+
color='net_impact',
|
| 560 |
color_continuous_scale='RdYlGn',
|
| 561 |
+
title="Product-Region-Channel Combinations Impact on Margin"
|
| 562 |
)
|
| 563 |
+
fig_segments.update_layout(height=500)
|
| 564 |
+
st.plotly_chart(fig_segments, use_container_width=True)
|
| 565 |
+
|
| 566 |
+
except Exception as e:
|
| 567 |
+
st.warning(f"Unable to compute detailed segment analysis: {str(e)}")
|
| 568 |
else:
|
| 569 |
st.error("Unable to compute driver analysis. Please check your data.")
|
| 570 |
|
|
|
|
| 698 |
|
| 699 |
st.markdown(f"""
|
| 700 |
<div class="insight-box">
|
| 701 |
+
<b>📊 Current State for {selected_product} • {selected_region} • {selected_channel}:</b><br>
|
| 702 |
• Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
|
| 703 |
• Net Price: <b>${current['net_price']:.2f}</b><br>
|
| 704 |
• Unit Cost: <b>${current['unit_cost']:.2f}</b><br>
|