Spaces:

PD03
/

FinanceDemo

Sleeping

App Files Files Community

PD03 commited on Oct 5, 2025

Commit

7c10250

verified ·

1 Parent(s): a85e11a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -155

app.py CHANGED Viewed

@@ -174,7 +174,7 @@ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
 @st.cache_data(show_spinner=False)
 def build_features(_df):
     df = _df.copy()
-    # FIXED: Remove duplicate features - keep only one of unit_cost/cost_per_unit and net_price/price_per_unit
     feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
     feats_cat = ["product", "region", "channel"]
     df = df.sort_values("date").copy()
@@ -185,7 +185,6 @@ def build_features(_df):
     df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
     df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
-    # Add these to numeric features
     feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
     target = "gm_pct"
     return df, feats_num, feats_cat, target
@@ -198,7 +197,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
             ("num", "passthrough", feats_num),
         ]
     )
-    model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
     pipe = Pipeline([("pre", pre), ("rf", model)])
     X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
     pipe.fit(X_train, y_train)
@@ -208,7 +207,7 @@ def train_model(feats_num, feats_cat, target, _X, _y):
     return pipe, {"r2": r2, "mae": mae}, X_test
 @st.cache_data(show_spinner=False)
-def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
     try:
         np.random.seed(42)
         X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
@@ -227,15 +226,12 @@ def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500)
         explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
         shap_values = explainer.shap_values(X_t)
-        # ADDED: Compute SHAP interaction values for deeper insights
-        shap_interaction_values = explainer.shap_interaction_values(X_t)
         shap_df = pd.DataFrame(shap_values, columns=feature_names)
-        return shap_df, shap_interaction_values, X_sample.reset_index(drop=True), feature_names
     except Exception as e:
         st.error(f"Error computing SHAP: {str(e)}")
-        return None, None, None, None
 def estimate_segment_elasticity(df, product, region, channel):
     seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
@@ -289,77 +285,6 @@ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
     except:
         return None
-def analyze_shap_interactions(shap_interaction_values, X_sample, feature_names, feats_cat):
-    """Analyze SHAP interaction effects to find problematic combinations"""
-    try:
-        # Get categorical feature indices in the transformed feature space
-        cat_feature_indices = {}
-        for cat in feats_cat:
-            cat_feature_indices[cat] = [i for i, fname in enumerate(feature_names) if fname.startswith(f"cat__{cat}_")]
-        # Find strongest interactions for each sample
-        interaction_insights = []
-        for sample_idx in range(min(100, len(X_sample))):  # Analyze first 100 samples
-            interaction_matrix = shap_interaction_values[sample_idx]
-            # Get the sample's categorical values
-            sample_product = X_sample.iloc[sample_idx]["product"]
-            sample_region = X_sample.iloc[sample_idx]["region"]
-            sample_channel = X_sample.iloc[sample_idx]["channel"]
-            # Find product feature index
-            prod_idx = [i for i, fname in enumerate(feature_names) if f"product_{sample_product}" in fname]
-            reg_idx = [i for i, fname in enumerate(feature_names) if f"region_{sample_region}" in fname]
-            chan_idx = [i for i, fname in enumerate(feature_names) if f"channel_{sample_channel}" in fname]
-            if prod_idx and reg_idx:
-                prod_reg_interaction = interaction_matrix[prod_idx[0], reg_idx[0]]
-                if abs(prod_reg_interaction) > 0.001:
-                    interaction_insights.append({
-                        "Product": sample_product,
-                        "Region": sample_region,
-                        "Channel": sample_channel,
-                        "Interaction_Type": "Product × Region",
-                        "Interaction_Effect": prod_reg_interaction
-                    })
-            if prod_idx and chan_idx:
-                prod_chan_interaction = interaction_matrix[prod_idx[0], chan_idx[0]]
-                if abs(prod_chan_interaction) > 0.001:
-                    interaction_insights.append({
-                        "Product": sample_product,
-                        "Region": sample_region,
-                        "Channel": sample_channel,
-                        "Interaction_Type": "Product × Channel",
-                        "Interaction_Effect": prod_chan_interaction
-                    })
-            if reg_idx and chan_idx:
-                reg_chan_interaction = interaction_matrix[reg_idx[0], chan_idx[0]]
-                if abs(reg_chan_interaction) > 0.001:
-                    interaction_insights.append({
-                        "Product": sample_product,
-                        "Region": sample_region,
-                        "Channel": sample_channel,
-                        "Interaction_Type": "Region × Channel",
-                        "Interaction_Effect": reg_chan_interaction
-                    })
-        if interaction_insights:
-            interactions_df = pd.DataFrame(interaction_insights)
-            # Aggregate by combination
-            agg_interactions = interactions_df.groupby(["Product", "Region", "Channel", "Interaction_Type"]).agg({
-                "Interaction_Effect": "mean"
-            }).reset_index()
-            agg_interactions = agg_interactions.sort_values("Interaction_Effect")
-            return agg_interactions
-        else:
-            return pd.DataFrame()
-    except Exception as e:
-        st.warning(f"Could not compute interaction effects: {str(e)}")
-        return pd.DataFrame()
 # -----------------------------
 # Main App
 # -----------------------------
@@ -485,8 +410,8 @@ with st.spinner("🤖 Training AI model..."):
     st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
 # Compute SHAP once for all tabs
-with st.spinner("🔬 Analyzing profitability drivers and interactions..."):
-    shap_df, shap_interaction_values, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
 # Tabs for different sections
 tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
@@ -495,8 +420,8 @@ with tab1:
     st.markdown("### Understanding What Drives Your Profitability")
     st.markdown("""
     <div class="insight-box">
-    <b>🎓 Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin,
-    including complex interactions between product, region, and channel combinations.
     </div>
     """, unsafe_allow_html=True)
@@ -591,7 +516,7 @@ with tab1:
             </div>
             """, unsafe_allow_html=True)
-            st.markdown("#### Single-Feature Segment Impact")
             try:
                 cat_cols = ["product", "region", "channel"]
@@ -602,81 +527,44 @@ with tab1:
                 key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
                 grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
-                top_negative = grp.nsmallest(5, "net_impact")
-                top_positive = grp.nlargest(5, "net_impact")
-                st.markdown("**⚠️ Segments Reducing Margin:**")
-                for _, row in top_negative.head(3).iterrows():
-                    st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
-                st.markdown("**✅ Segments Boosting Margin:**")
-                for _, row in top_positive.head(3).iterrows():
-                    st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
-            except Exception as e:
-                st.warning(f"Unable to compute segment analysis: {str(e)}")
-        # NEW: SHAP Interaction Analysis
-        st.markdown("---")
-        st.markdown("### 🔗 Interaction Effects Analysis")
-        st.markdown("""
-        <div class="warning-box">
-        <b>⚡ Advanced Insight:</b> These combinations show how features interact to create compound effects on profitability.
-        For example, "Premium Widget in EMEA via E-Commerce" may have a different margin profile than individual factors suggest.
-        </div>
-        """, unsafe_allow_html=True)
-        if shap_interaction_values is not None:
-            interactions_df = analyze_shap_interactions(shap_interaction_values, X_test_sample, feature_names, feats_cat)
-            if not interactions_df.empty:
-                col_int1, col_int2 = st.columns(2)
-                with col_int1:
-                    st.markdown("#### ⚠️ Problematic Combinations (Reducing Margin)")
-                    worst_interactions = interactions_df.nsmallest(10, "Interaction_Effect")
-                    for idx, row in worst_interactions.head(5).iterrows():
-                        st.markdown(f"""
-                        <div class="recommendation-card" style="border-left: 4px solid #dc3545;">
-                        <b>{row['Interaction_Type']}</b><br>
-                        {row['Product']} • {row['Region']} • {row['Channel']}<br>
-                        <span style="color: #dc3545; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
-                        </div>
-                        """, unsafe_allow_html=True)
-                with col_int2:
-                    st.markdown("#### ✅ High-Performing Combinations (Boosting Margin)")
-                    best_interactions = interactions_df.nlargest(10, "Interaction_Effect")
-                    for idx, row in best_interactions.head(5).iterrows():
-                        st.markdown(f"""
-                        <div class="recommendation-card" style="border-left: 4px solid #28a745;">
-                        <b>{row['Interaction_Type']}</b><br>
-                        {row['Product']} • {row['Region']} • {row['Channel']}<br>
-                        <span style="color: #28a745; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
-                        </div>
-                        """, unsafe_allow_html=True)
-                # Visualization of interaction effects
-                st.markdown("#### Interaction Effects Heatmap")
-                # Create summary by combination
-                interaction_summary = interactions_df.groupby(["Product", "Region", "Channel"]).agg({
-                    "Interaction_Effect": "sum"
-                }).reset_index()
-                fig_int = px.treemap(
-                    interaction_summary,
-                    path=['Product', 'Region', 'Channel'],
-                    values=interaction_summary['Interaction_Effect'].abs(),
-                    color='Interaction_Effect',
                     color_continuous_scale='RdYlGn',
-                    title="Interaction Effects by Product-Region-Channel Combinations"
                 )
-                fig_int.update_layout(height=500)
-                st.plotly_chart(fig_int, use_container_width=True)
-            else:
-                st.info("No significant interaction effects detected in the current sample.")
     else:
         st.error("Unable to compute driver analysis. Please check your data.")
@@ -810,7 +698,7 @@ with tab3:
         st.markdown(f"""
         <div class="insight-box">
-        <b>📊 Current State:</b><br>
         • Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
         • Net Price: <b>${current['net_price']:.2f}</b><br>
         • Unit Cost: <b>${current['unit_cost']:.2f}</b><br>

 @st.cache_data(show_spinner=False)
 def build_features(_df):
     df = _df.copy()
+    # FIXED: Remove duplicate features
     feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
     feats_cat = ["product", "region", "channel"]
     df = df.sort_values("date").copy()
     df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
     df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
     feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
     target = "gm_pct"
     return df, feats_num, feats_cat, target
             ("num", "passthrough", feats_num),
         ]
     )
+    model = RandomForestRegressor(n_estimators=80, max_depth=8, random_state=42, n_jobs=-1)
     pipe = Pipeline([("pre", pre), ("rf", model)])
     X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
     pipe.fit(X_train, y_train)
     return pipe, {"r2": r2, "mae": mae}, X_test
 @st.cache_data(show_spinner=False)
+def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=400):
     try:
         np.random.seed(42)
         X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
         explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
         shap_values = explainer.shap_values(X_t)
         shap_df = pd.DataFrame(shap_values, columns=feature_names)
+        return shap_df, X_sample.reset_index(drop=True), feature_names
     except Exception as e:
         st.error(f"Error computing SHAP: {str(e)}")
+        return None, None, None
 def estimate_segment_elasticity(df, product, region, channel):
     seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
     except:
         return None
 # -----------------------------
 # Main App
 # -----------------------------
     st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
 # Compute SHAP once for all tabs
+with st.spinner("🔬 Analyzing profitability drivers..."):
+    shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=400)
 # Tabs for different sections
 tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
     st.markdown("### Understanding What Drives Your Profitability")
     st.markdown("""
     <div class="insight-box">
+    <b>🎓 Business Insight:</b> This analysis reveals which business factors and segment combinations have the strongest impact on gross margin.
+    Understanding these drivers helps prioritize strategic initiatives and operational improvements.
     </div>
     """, unsafe_allow_html=True)
             </div>
             """, unsafe_allow_html=True)
+            st.markdown("#### Segment Performance Analysis")
             try:
                 cat_cols = ["product", "region", "channel"]
                 key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
                 grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
+                top_negative = grp.nsmallest(8, "net_impact")
+                top_positive = grp.nlargest(8, "net_impact")
+                st.markdown("**⚠️ Product-Region-Channel Combinations Reducing Margin:**")
+                for _, row in top_negative.head(5).iterrows():
+                    st.markdown(f"""
+                    <div class="recommendation-card" style="border-left: 4px solid #dc3545; padding: 0.8rem; margin: 0.5rem 0;">
+                    <b>{row['product']}</b> • {row['region']} • {row['channel']}<br>
+                    <small style="color: #dc3545;">Cumulative Impact: {row['net_impact']:.4f}</small>
+                    </div>
+                    """, unsafe_allow_html=True)
+                st.markdown("**✅ Product-Region-Channel Combinations Boosting Margin:**")
+                for _, row in top_positive.head(5).iterrows():
+                    st.markdown(f"""
+                    <div class="recommendation-card" style="border-left: 4px solid #28a745; padding: 0.8rem; margin: 0.5rem 0;">
+                    <b>{row['product']}</b> • {row['region']} • {row['channel']}<br>
+                    <small style="color: #28a745;">Cumulative Impact: {row['net_impact']:.4f}</small>
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Visualization
+                st.markdown("---")
+                st.markdown("#### Segment Impact Visualization")
+                fig_segments = px.treemap(
+                    grp,
+                    path=['product', 'region', 'channel'],
+                    values=grp['net_impact'].abs(),
+                    color='net_impact',
                     color_continuous_scale='RdYlGn',
+                    title="Product-Region-Channel Combinations Impact on Margin"
                 )
+                fig_segments.update_layout(height=500)
+                st.plotly_chart(fig_segments, use_container_width=True)
+            except Exception as e:
+                st.warning(f"Unable to compute detailed segment analysis: {str(e)}")
     else:
         st.error("Unable to compute driver analysis. Please check your data.")
         st.markdown(f"""
         <div class="insight-box">
+        <b>📊 Current State for {selected_product} • {selected_region} • {selected_channel}:</b><br>
         • Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
         • Net Price: <b>${current['net_price']:.2f}</b><br>
         • Unit Cost: <b>${current['unit_cost']:.2f}</b><br>