Spaces:

psychedelicbunny
/

sentimentanalysis

Runtime error

App Files Files Community

sentimentanalysis / app.py

psychedelicbunny

Update app.py

b231981 almost 3 years ago

raw

history blame contribute delete

3.29 kB

	import streamlit as st
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
	import torch
	from torch.utils.data import DataLoader
	import pandas as pd

	#for all other models
	def sentiment_analysis(text, model_name):
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
	result = classifier(text)

	return result[0]['label'], result[0]['score']

	#for my finetuned model
	def finetune(text):
	#defining tokenizer and loading model
	tokenizer = DistilBertTokenizerFast.from_pretrained("psychedelicbunny/bertfinetuned")
	model = DistilBertForSequenceClassification.from_pretrained("psychedelicbunny/bertfinetuned")

	model.eval() # switch model to evaluation mode
	label_names = {
	0: 'toxic',
	1: 'severe_toxic',
	2: 'obscene',
	3: 'threat',
	4: 'insult',
	5: 'identity_hate'
	}
	#creating encodings from input text
	encoding = tokenizer(text, truncation = True, padding = True, max_length = 128)
	input_ids = torch.tensor(encoding["input_ids"]).unsqueeze(0)
	attention_mask = torch.tensor(encoding["attention_mask"]).unsqueeze(0)

	with torch.no_grad():
	#running the model
	outputs = model(input_ids=input_ids, attention_mask=attention_mask)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1)

	#score and label for first predicted label - usually 'toxic'
	score = max(probabilities[0]).item()
	predicted_class_idx = torch.argmax(probabilities, dim=1)
	predicted_label = label_names[predicted_class_idx.item()]
	#score and label for secondary predicted label
	confidence = max(probabilities[0][2:6]).item()
	other_index = torch.where(probabilities[0] == confidence)
	other_label = label_names[other_index[0].item()]

	#creating table
	df = pd.DataFrame({
	'Text': text,
	'Main Label': predicted_label,
	'Score1': score,
	'Second Label': other_label,
	'Score2': confidence
	}, index = [0])
	return df
	st.table(df)

	#experiment/backup prediction function
	def predict(text):
	inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
	outputs = model(**inputs)
	_, prediction = torch.max(outputs.logits, dim=1)
	return prediction.item()


	def main():
	st.title("Sentiment Analysis App")
	text = st.text_input("Enter text:", value = "You're great!")
	model_name = st.selectbox("Select a pretrained model", ["psychedelicbunny/bertfinetuned", "bert-base-uncased", "finiteautomata/bertweet-base-sentiment-analysis", "roberta-base"])

	if st.button("Analyze"):
	with st.spinner('Analyzing...'):
	if model_name == "psychedelicbunny/bertfinetuned":
	sentiment = finetune(text)
	else:
	sentiment = sentiment_analysis(text, model_name)
	st.write("Sentiment:", sentiment)

	if __name__ == '__main__':
	main()