psychedelicbunny's picture
Update app.py
b231981
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
from torch.utils.data import DataLoader
import pandas as pd
#for all other models
def sentiment_analysis(text, model_name):
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
result = classifier(text)
return result[0]['label'], result[0]['score']
#for my finetuned model
def finetune(text):
#defining tokenizer and loading model
tokenizer = DistilBertTokenizerFast.from_pretrained("psychedelicbunny/bertfinetuned")
model = DistilBertForSequenceClassification.from_pretrained("psychedelicbunny/bertfinetuned")
model.eval() # switch model to evaluation mode
label_names = {
0: 'toxic',
1: 'severe_toxic',
2: 'obscene',
3: 'threat',
4: 'insult',
5: 'identity_hate'
}
#creating encodings from input text
encoding = tokenizer(text, truncation = True, padding = True, max_length = 128)
input_ids = torch.tensor(encoding["input_ids"]).unsqueeze(0)
attention_mask = torch.tensor(encoding["attention_mask"]).unsqueeze(0)
with torch.no_grad():
#running the model
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)
#score and label for first predicted label - usually 'toxic'
score = max(probabilities[0]).item()
predicted_class_idx = torch.argmax(probabilities, dim=1)
predicted_label = label_names[predicted_class_idx.item()]
#score and label for secondary predicted label
confidence = max(probabilities[0][2:6]).item()
other_index = torch.where(probabilities[0] == confidence)
other_label = label_names[other_index[0].item()]
#creating table
df = pd.DataFrame({
'Text': text,
'Main Label': predicted_label,
'Score1': score,
'Second Label': other_label,
'Score2': confidence
}, index = [0])
return df
st.table(df)
#experiment/backup prediction function
def predict(text):
inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
outputs = model(**inputs)
_, prediction = torch.max(outputs.logits, dim=1)
return prediction.item()
def main():
st.title("Sentiment Analysis App")
text = st.text_input("Enter text:", value = "You're great!")
model_name = st.selectbox("Select a pretrained model", ["psychedelicbunny/bertfinetuned", "bert-base-uncased", "finiteautomata/bertweet-base-sentiment-analysis", "roberta-base"])
if st.button("Analyze"):
with st.spinner('Analyzing...'):
if model_name == "psychedelicbunny/bertfinetuned":
sentiment = finetune(text)
else:
sentiment = sentiment_analysis(text, model_name)
st.write("Sentiment:", sentiment)
if __name__ == '__main__':
main()