File size: 3,294 Bytes
2391bdc
 
a3a7bbb
2391bdc
68b0057
71be7dc
2391bdc
b231981
2391bdc
 
 
 
 
 
 
34ad7ba
b231981
 
a3a7bbb
b231981
a3a7bbb
 
b231981
a3a7bbb
 
 
 
 
 
 
 
 
b231981
1127b19
2fd8db3
 
b231981
a3a7bbb
b231981
2fd8db3
a3a7bbb
 
b231981
 
c4a7b1a
a3a7bbb
8c59820
b231981
8c59820
 
 
a3a7bbb
b231981
71be7dc
 
 
280d5f4
71be7dc
280d5f4
8c59820
 
71be7dc
b231981
 
a3a7bbb
 
 
 
 
 
 
2391bdc
 
2e4165d
ff8683d
2391bdc
 
 
f45fa9b
a730dbb
 
 
34ad7ba
2391bdc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
from torch.utils.data import DataLoader
import pandas as pd

#for all other models
def sentiment_analysis(text, model_name):
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
    result = classifier(text)

    return result[0]['label'], result[0]['score']
    
#for my finetuned model
def finetune(text):
    #defining tokenizer and loading model
    tokenizer = DistilBertTokenizerFast.from_pretrained("psychedelicbunny/bertfinetuned")
    model = DistilBertForSequenceClassification.from_pretrained("psychedelicbunny/bertfinetuned")
    
    model.eval()  # switch model to evaluation mode
    label_names = {
        0: 'toxic',
        1: 'severe_toxic',
        2: 'obscene',
        3: 'threat',
        4: 'insult',
        5: 'identity_hate'
    }
    #creating encodings from input text
    encoding = tokenizer(text, truncation = True, padding = True, max_length = 128)
    input_ids = torch.tensor(encoding["input_ids"]).unsqueeze(0)
    attention_mask = torch.tensor(encoding["attention_mask"]).unsqueeze(0)
    
    with torch.no_grad():
        #running the model
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)

        #score and label for first predicted label - usually 'toxic'
        score = max(probabilities[0]).item()
        predicted_class_idx = torch.argmax(probabilities, dim=1)
        predicted_label = label_names[predicted_class_idx.item()]
        #score and label for secondary predicted label
        confidence = max(probabilities[0][2:6]).item()
        other_index = torch.where(probabilities[0] == confidence)
        other_label = label_names[other_index[0].item()]

        #creating table
        df = pd.DataFrame({
            'Text': text,
            'Main Label': predicted_label,
            'Score1': score,
            'Second Label': other_label,
            'Score2': confidence
        }, index = [0])
        return df
        st.table(df)

#experiment/backup prediction function       
def predict(text):
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    outputs = model(**inputs)
    _, prediction = torch.max(outputs.logits, dim=1)
    return prediction.item()
        

def main():
    st.title("Sentiment Analysis App")
    text = st.text_input("Enter text:",  value = "You're great!")
    model_name = st.selectbox("Select a pretrained model", ["psychedelicbunny/bertfinetuned", "bert-base-uncased", "finiteautomata/bertweet-base-sentiment-analysis", "roberta-base"])

    if st.button("Analyze"):
        with st.spinner('Analyzing...'):
            if model_name == "psychedelicbunny/bertfinetuned":
                sentiment = finetune(text)
            else:
                sentiment = sentiment_analysis(text, model_name)
            st.write("Sentiment:", sentiment)

if __name__ == '__main__':
    main()