Spaces:

pratikshahp
/

Next-Word-Prediction-SHAP

Build error

App Files Files Community

pratikshahp commited on Aug 6, 2024

Commit

72a8b0c

verified ·

1 Parent(s): 427d981

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -35

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import numpy as np
 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import shap
 # Load pre-trained model and tokenizer
 model_name = 'gpt2'
@@ -18,42 +19,20 @@ def predict_next_word_probabilities(text):
         predicted_probabilities = torch.nn.functional.softmax(logits[:, -1], dim=-1)
     return predicted_probabilities.squeeze().numpy()
-# Example sentence
-sentence = "The cat sat on the"
-predicted_prob = predict_next_word_probabilities(sentence)
-# Define a function to get SHAP values
-def shap_values(sentence, tokenizer, model):
-    # Create a set of perturbed versions of the input sentence
-    perturbed_sentences = [
-        sentence.replace("the", "", 1),    # Remove the first occurrence of "the"
-        sentence.replace("mat", "rug"),    # Replace "mat" with "rug" (if it was present)
-    ]
-    # Calculate the predicted probabilities for each perturbed sentence
-    base_prob = predict_next_word_probabilities(sentence)
-    perturbed_probs = [predict_next_word_probabilities(s) for s in perturbed_sentences]
-    # Compute SHAP values (simplified)
-    shap_values = {}
-    for idx, perturbed_sentence in enumerate(perturbed_sentences):
-        change = base_prob - perturbed_probs[idx]
-        shap_values[perturbed_sentence] = change.mean()
-    return shap_values
-# Compute SHAP values
-shap_values = shap_values(sentence, tokenizer, model)
-print("SHAP values:", shap_values)
-# Visualization using matplotlib (optional)
-import matplotlib.pyplot as plt
-words = list(shap_values.keys())
-values = list(shap_values.values())
-plt.bar(words, values)
-plt.xlabel('Perturbed Sentences')
-plt.ylabel('SHAP Value')
-plt.title('SHAP Values for Next-Word Prediction')
-plt.show()

 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import shap
+import matplotlib.pyplot as plt
 # Load pre-trained model and tokenizer
 model_name = 'gpt2'
         predicted_probabilities = torch.nn.functional.softmax(logits[:, -1], dim=-1)
     return predicted_probabilities.squeeze().numpy()
+# Define a function to wrap the model prediction for SHAP
+def predict(texts):
+    return np.array([predict_next_word_probabilities(text)[tokenizer.encode("mat")[0]] for text in texts])
+# Define the initial text
+initial_text = "The cat sat on the"
+texts = [initial_text, initial_text.replace("the", "", 1), initial_text.replace("mat", "rug")]
+# Create an explainer object
+explainer = shap.Explainer(predict, texts)
+# Generate SHAP values
+shap_values = explainer(texts)
+# Visualization
+shap.initjs()
+shap.summary_plot(shap_values, texts)