Spaces:

Ashed00
/

BERT_Token_Weights

Sleeping

App Files Files Community

Ashed00 commited on Feb 1, 2025

Commit

59d626e

verified ·

1 Parent(s): aff6f72

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -13

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import shap
 import torch
 import numpy as np
 import matplotlib.pyplot as plt
 # Load model and tokenizer with caching
 @st.cache_resource
@@ -18,8 +19,8 @@ tokenizer, model = load_model()
 def predict(texts):
     processed_texts = []
     for text in texts:
-        processed_texts.append(text if not isinstance(text, list)
-                              else tokenizer.convert_tokens_to_string(text))
     inputs = tokenizer(
         processed_texts,
@@ -44,9 +45,9 @@ explainer = shap.Explainer(predict, masker, output_names=output_names)
 st.title("🎯 BERT Sentiment Analysis with SHAP")
 st.markdown("""
 **How it works:**
-1. Enter text in the box below
-2. See predicted sentiment (1-5 stars)
-3. View confidence scores and word-level explanations
 """)
 text_input = st.text_area("Input Text", placeholder="Enter text to analyze...", height=100)
@@ -62,7 +63,6 @@ if st.button("Analyze Sentiment"):
             st.subheader("📊 Results")
             cols = st.columns(2)
             cols[0].metric("Predicted Sentiment", output_names[predicted_class])
             with cols[1]:
                 st.markdown("**Confidence Scores**")
                 for label, score in zip(output_names, probabilities):
@@ -76,19 +76,30 @@ if st.button("Analyze Sentiment"):
             🔵 Lower negative values → Decreases sentiment
             """)
             shap_values = explainer([text_input])
             # Create tabs for each sentiment class
             tabs = st.tabs(output_names)
             for i, tab in enumerate(tabs):
                 with tab:
-                    # Create a bar plot of SHAP values
-                    fig, ax = plt.subplots(figsize=(8, 4))
-                    shap.plots.bar(shap_values[:, :, i], show=False)
-                    # Display the plot in Streamlit
                     st.pyplot(fig)
-                    plt.close(fig)  # Free memory after rendering
     else:
         st.warning("Please enter some text to analyze")

 import torch
 import numpy as np
 import matplotlib.pyplot as plt
+import pandas as pd
 # Load model and tokenizer with caching
 @st.cache_resource
 def predict(texts):
     processed_texts = []
     for text in texts:
+        processed_texts.append(text if not isinstance(text, list)
+                               else tokenizer.convert_tokens_to_string(text))
     inputs = tokenizer(
         processed_texts,
 st.title("🎯 BERT Sentiment Analysis with SHAP")
 st.markdown("""
 **How it works:**
+1. Enter text in the box below
+2. See predicted sentiment (1-5 stars)
+3. View confidence scores and word-level explanations
 """)
 text_input = st.text_area("Input Text", placeholder="Enter text to analyze...", height=100)
             st.subheader("📊 Results")
             cols = st.columns(2)
             cols[0].metric("Predicted Sentiment", output_names[predicted_class])
             with cols[1]:
                 st.markdown("**Confidence Scores**")
                 for label, score in zip(output_names, probabilities):
             🔵 Lower negative values → Decreases sentiment
             """)
+            # Get SHAP values for the input text
             shap_values = explainer([text_input])
             # Create tabs for each sentiment class
             tabs = st.tabs(output_names)
             for i, tab in enumerate(tabs):
                 with tab:
+                    # Extract the values and corresponding tokens for our single example.
+                    # shap_values is of shape (1, num_tokens, num_classes)
+                    values = shap_values.values[0, :, i]  # SHAP values for class i
+                    tokens = shap_values.data[0]          # Tokenized words
+                    # Create a DataFrame to sort and plot the tokens by importance
+                    df = pd.DataFrame({"token": tokens, "shap_value": values})
+                    # Sort tokens by the absolute SHAP value (smallest at the bottom for horizontal bar plot)
+                    df = df.sort_values("shap_value", key=lambda x: np.abs(x), ascending=True)
+                    # Create a horizontal bar plot
+                    fig, ax = plt.subplots(figsize=(8, max(4, len(tokens) * 0.3)))
+                    ax.barh(df["token"], df["shap_value"], color='skyblue')
+                    ax.set_xlabel("SHAP value")
+                    ax.set_title(f"SHAP bar plot for class '{output_names[i]}'")
                     st.pyplot(fig)
+                    plt.close(fig)
     else:
         st.warning("Please enter some text to analyze")