import streamlit as st from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification import torch from torch.utils.data import DataLoader import pandas as pd #for all other models def sentiment_analysis(text, model_name): model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer) result = classifier(text) return result[0]['label'], result[0]['score'] #for my finetuned model def finetune(text): #defining tokenizer and loading model tokenizer = DistilBertTokenizerFast.from_pretrained("psychedelicbunny/bertfinetuned") model = DistilBertForSequenceClassification.from_pretrained("psychedelicbunny/bertfinetuned") model.eval() # switch model to evaluation mode label_names = { 0: 'toxic', 1: 'severe_toxic', 2: 'obscene', 3: 'threat', 4: 'insult', 5: 'identity_hate' } #creating encodings from input text encoding = tokenizer(text, truncation = True, padding = True, max_length = 128) input_ids = torch.tensor(encoding["input_ids"]).unsqueeze(0) attention_mask = torch.tensor(encoding["attention_mask"]).unsqueeze(0) with torch.no_grad(): #running the model outputs = model(input_ids=input_ids, attention_mask=attention_mask) logits = outputs.logits probabilities = torch.softmax(logits, dim=1) #score and label for first predicted label - usually 'toxic' score = max(probabilities[0]).item() predicted_class_idx = torch.argmax(probabilities, dim=1) predicted_label = label_names[predicted_class_idx.item()] #score and label for secondary predicted label confidence = max(probabilities[0][2:6]).item() other_index = torch.where(probabilities[0] == confidence) other_label = label_names[other_index[0].item()] #creating table df = pd.DataFrame({ 'Text': text, 'Main Label': predicted_label, 'Score1': score, 'Second Label': other_label, 'Score2': confidence }, index = [0]) return df st.table(df) #experiment/backup prediction function def predict(text): inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt') outputs = model(**inputs) _, prediction = torch.max(outputs.logits, dim=1) return prediction.item() def main(): st.title("Sentiment Analysis App") text = st.text_input("Enter text:", value = "You're great!") model_name = st.selectbox("Select a pretrained model", ["psychedelicbunny/bertfinetuned", "bert-base-uncased", "finiteautomata/bertweet-base-sentiment-analysis", "roberta-base"]) if st.button("Analyze"): with st.spinner('Analyzing...'): if model_name == "psychedelicbunny/bertfinetuned": sentiment = finetune(text) else: sentiment = sentiment_analysis(text, model_name) st.write("Sentiment:", sentiment) if __name__ == '__main__': main()