import numpy as np
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import shap
import matplotlib.pyplot as plt

# Load pre-trained model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

# Define the function to predict next word probabilities
def predict_next_word_probabilities(text):
    inputs = tokenizer.encode(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(inputs, labels=inputs)
        logits = outputs.logits
        predicted_probabilities = torch.nn.functional.softmax(logits[:, -1], dim=-1)
    return predicted_probabilities.squeeze().numpy()

# Define a function to wrap the model prediction for SHAP
def predict(texts):
    return np.array([predict_next_word_probabilities(text)[tokenizer.encode("mat")[0]] for text in texts])

# Define the initial text
initial_text = "The cat sat on the"
texts = [initial_text, initial_text.replace("the", "", 1), initial_text.replace("mat", "rug")]

# Create an explainer object
explainer = shap.Explainer(predict, texts)

# Generate SHAP values
shap_values = explainer(texts)

# Visualization
shap.initjs()
shap.summary_plot(shap_values, texts)