Spaces:

sachiniyer
/

toxic-tweets

Running

App Files Files Community

toxic-tweets / app.py

sachiniyer

commit again

2829bae almost 3 years ago

raw

history blame contribute delete

2.09 kB

	import streamlit as st
	import torch
	import pandas as pd
	import numpy as np
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

	st.title('Sentiment Analysis with Streamlit')

	speech = ""
	with open("tweet.txt") as file:
	speech = "".join(line.rstrip() for line in file)

	data = st.text_area(label="Text for Sentiment Analysis", value=speech)

	models = ["sachiniyer/tweet_toxicity",
	"distilbert-base-uncased-finetuned-sst-2-english",
	"Ghost1/bert-base-uncased-finetuned_for_sentiment_analysis1-sst2",
	"Seethal/sentiment_analysis_generic_dataset",
	"sbcBI/sentiment_analysis_model",
	"juliensimon/reviews-sentiment-analysis"]

	model_name = st.selectbox(
	'Which model do you want to use',
	models)


	labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]

	def score(item):
	return item['score']

	def get_tokens(data, model):
	tokenizer = AutoTokenizer.from_pretrained("sachiniyer/tweet_toxicity")
	tokens = tokenizer(data, return_tensors="pt")
	return tokens

	def get_out(tokens, model):
	output = model(**tokens)
	return output

	def get_perc(output):
	return torch.sigmoid(output.logits).detach().numpy()[0]

	def get_dict(percs, data):
	sorted_indices = np.argsort(percs)[-2:]
	row = {"text": data,
	"label 1": labels[sorted_indices[1]],
	"perc 1": str(round(percs[sorted_indices[1]], 3)),
	"label 2": labels[sorted_indices[0]],
	"perc 2": str(round(percs[sorted_indices[0]], 3))}
	return row

	def get(data, model):
	tokens = get_tokens(data, model)
	output = get_out(tokens, model)
	percs = get_perc(output)
	d = get_dict(percs, data)
	return pd.DataFrame([d])

	if st.button('Run model'):
	if model_name == "sachiniyer/tweet_toxicity":
	model = AutoModelForSequenceClassification.from_pretrained("sachiniyer/tweet_toxicity")
	d = get(data, model)
	st.table(d)
	else:
	generator = pipeline(model=model_name)
	st.markdown(generator(model_name))