Spaces:

tomaszki
/

visualize_attention

Sleeping

App Files Files Community

visualize_attention / app.py

tomaszki

Switch model from llama to a small 1.5B Qwen

39de0ac verified over 1 year ago

raw

history blame contribute delete

1.94 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import plotly.express as px


	model_name = 'Qwen/Qwen2-1.5B'
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	@st.cache_resource
	def load_model():
	return AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	token=st.secrets['hf_token']
	).to(device)

	@st.cache_resource
	def load_tokenizer():
	return AutoTokenizer.from_pretrained(
	model_name,
	token=st.secrets['hf_token']
	)

	@torch.no_grad()
	@st.cache_data()
	def get_attention_weights_and_tokens(text):
	tokenized = tokenizer(text, return_tensors='pt')
	tokens = [tokenizer.decode(token) for token in tokenized.input_ids[0]]
	tokenized = tokenized.to(device)
	output = model(**tokenized, output_attentions=True)
	attentions = [attention.to(torch.float32) for attention in output.attentions]
	return attentions, tokens

	model = load_model()
	tokenizer = load_tokenizer()

	st.title('Attention visualizer')
	text = st.text_area('Write your text here and see attention weights.')
	layer = st.slider(
	'Which layer do you want to see?',
	min_value=1,
	max_value=model.config.num_hidden_layers
	) - 1

	head = st.select_slider(
	'Which head do you want to see?',
	options = ['Average'] + list(range(1, model.config.num_attention_heads + 1))
	)
	if text:
	attentions, tokens = get_attention_weights_and_tokens(text)
	if head == 'Average':
	weights = attentions[layer].cpu()[0].mean(dim=0)
	else:
	weights = attentions[layer].cpu()[0][head - 1]
	fig = px.imshow(
	weights,
	)
	fig.update_layout(xaxis={
	'ticktext': tokens,
	'tickvals': list(range(len(tokens))),
	}, yaxis={
	'ticktext': tokens,
	'tickvals': list(range(len(tokens))),
	},
	height=800,
	)

	st.plotly_chart(fig)