Spaces:
Runtime error
Runtime error
File size: 3,294 Bytes
2391bdc a3a7bbb 2391bdc 68b0057 71be7dc 2391bdc b231981 2391bdc 34ad7ba b231981 a3a7bbb b231981 a3a7bbb b231981 a3a7bbb b231981 1127b19 2fd8db3 b231981 a3a7bbb b231981 2fd8db3 a3a7bbb b231981 c4a7b1a a3a7bbb 8c59820 b231981 8c59820 a3a7bbb b231981 71be7dc 280d5f4 71be7dc 280d5f4 8c59820 71be7dc b231981 a3a7bbb 2391bdc 2e4165d ff8683d 2391bdc f45fa9b a730dbb 34ad7ba 2391bdc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
from torch.utils.data import DataLoader
import pandas as pd
#for all other models
def sentiment_analysis(text, model_name):
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
result = classifier(text)
return result[0]['label'], result[0]['score']
#for my finetuned model
def finetune(text):
#defining tokenizer and loading model
tokenizer = DistilBertTokenizerFast.from_pretrained("psychedelicbunny/bertfinetuned")
model = DistilBertForSequenceClassification.from_pretrained("psychedelicbunny/bertfinetuned")
model.eval() # switch model to evaluation mode
label_names = {
0: 'toxic',
1: 'severe_toxic',
2: 'obscene',
3: 'threat',
4: 'insult',
5: 'identity_hate'
}
#creating encodings from input text
encoding = tokenizer(text, truncation = True, padding = True, max_length = 128)
input_ids = torch.tensor(encoding["input_ids"]).unsqueeze(0)
attention_mask = torch.tensor(encoding["attention_mask"]).unsqueeze(0)
with torch.no_grad():
#running the model
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)
#score and label for first predicted label - usually 'toxic'
score = max(probabilities[0]).item()
predicted_class_idx = torch.argmax(probabilities, dim=1)
predicted_label = label_names[predicted_class_idx.item()]
#score and label for secondary predicted label
confidence = max(probabilities[0][2:6]).item()
other_index = torch.where(probabilities[0] == confidence)
other_label = label_names[other_index[0].item()]
#creating table
df = pd.DataFrame({
'Text': text,
'Main Label': predicted_label,
'Score1': score,
'Second Label': other_label,
'Score2': confidence
}, index = [0])
return df
st.table(df)
#experiment/backup prediction function
def predict(text):
inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
outputs = model(**inputs)
_, prediction = torch.max(outputs.logits, dim=1)
return prediction.item()
def main():
st.title("Sentiment Analysis App")
text = st.text_input("Enter text:", value = "You're great!")
model_name = st.selectbox("Select a pretrained model", ["psychedelicbunny/bertfinetuned", "bert-base-uncased", "finiteautomata/bertweet-base-sentiment-analysis", "roberta-base"])
if st.button("Analyze"):
with st.spinner('Analyzing...'):
if model_name == "psychedelicbunny/bertfinetuned":
sentiment = finetune(text)
else:
sentiment = sentiment_analysis(text, model_name)
st.write("Sentiment:", sentiment)
if __name__ == '__main__':
main()
|