Ptato commited on
Commit
5cf11d3
·
1 Parent(s): b22a50f
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/Sentiment-Analysis.iml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ <component name="PyDocumentationSettings">
11
+ <option name="format" value="PLAIN" />
12
+ <option name="myDocStringFormat" value="Plain" />
13
+ </component>
14
+ </module>
.idea/aws.xml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="accountSettings">
4
+ <option name="activeRegion" value="us-east-1" />
5
+ <option name="recentlyUsedRegions">
6
+ <list>
7
+ <option value="us-east-1" />
8
+ </list>
9
+ </option>
10
+ </component>
11
+ </project>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Sentiment-Analysis)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" filepath="$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,18 +1,23 @@
1
  import streamlit as st
2
  import time
3
  from transformers import pipeline
 
4
  import os
5
-
 
6
  os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
7
 
8
 
9
 
10
  st.title("Sentiment Analysis App")
11
 
 
 
12
  form = st.form(key='Sentiment Analysis')
13
  box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
14
  'distilbert-base-uncased-finetuned-sst-2-english',
15
- 'twitter-roberta-base-sentiment'
 
16
  ], key=1)
17
  tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
18
  submit = form.form_submit_button(label='Submit')
@@ -20,44 +25,99 @@ submit = form.form_submit_button(label='Submit')
20
  if submit and tweet:
21
  with st.spinner('Analyzing...'):
22
  time.sleep(1)
23
- # st.header(tweet)
24
 
25
  if tweet is not None:
26
- col1, col2, col3 = st.columns(3)
 
 
 
27
  if box == 'bertweet-base-sentiment-analysis':
28
  pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
29
- elif box == 'twitter-xlm-roberta-base-sentiment':
30
  pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
31
- else:
32
  pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
33
- predictions = pipeline(tweet)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  print(predictions)
 
35
  col1.header("Tweet")
36
- col1.subheader(tweet)
37
- col2.header("Judgement")
38
  col3.header("Probability")
 
 
39
  for p in predictions:
40
  if box == 'bertweet-base-sentiment-analysis':
41
  if p['label'] == "POS":
42
- col2.success(f"{ p['label'] }")
43
  col3.success(f"{ round(p['score'] * 100, 1)}%")
44
  elif p['label'] == "NEU":
45
  col2.warning(f"{ p['label'] }")
46
  col3.warning(f"{round(p['score'] * 100, 1)}%")
47
  else:
48
- col2.error(f"{p['label']}")
49
  col3.error(f"{round(p['score'] * 100, 1)}%")
50
  elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
51
  if p['label'] == "POSITIVE":
52
- col2.success(f"{p['label']}")
53
  col3.success(f"{round(p['score'] * 100, 1)}%")
54
  else:
55
- col2.error(f"{p['label']}")
56
  col3.error(f"{round(p['score'] * 100, 1)}%")
57
- else:
58
- if p['label'] == "POSITIVE":
59
- col2.success(f"{p['label']}")
60
  col3.success(f"{round(p['score'] * 100, 1)}%")
 
 
 
 
 
 
 
 
 
 
 
 
61
  else:
62
- col2.error(f"{p['label']}")
63
- col3.error(f"{round(p['score'] * 100, 1)}%")
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import time
3
  from transformers import pipeline
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
  import os
6
+ import torch
7
+ import numpy as np
8
  os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
9
 
10
 
11
 
12
  st.title("Sentiment Analysis App")
13
 
14
+ labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
15
+
16
  form = st.form(key='Sentiment Analysis')
17
  box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
18
  'distilbert-base-uncased-finetuned-sst-2-english',
19
+ 'twitter-roberta-base-sentiment',
20
+ 'Modified Bert Toxicity Classification'
21
  ], key=1)
22
  tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
23
  submit = form.form_submit_button(label='Submit')
 
25
  if submit and tweet:
26
  with st.spinner('Analyzing...'):
27
  time.sleep(1)
 
28
 
29
  if tweet is not None:
30
+ if box != 'Modified Bert Toxicity Classification':
31
+ col1, col2, col3 = st.columns(3)
32
+ else:
33
+ col1, col2, col3, col4, col5 = st.columns(5)
34
  if box == 'bertweet-base-sentiment-analysis':
35
  pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
36
+ elif box == 'twitter-roberta-base-sentiment':
37
  pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
38
+ elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
39
  pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
40
+
41
+
42
+ # <--- Unecessary Testing --->
43
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
44
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
45
+ encoding = tokenizer(tweet, return_tensors="pt")
46
+ encoding = {k: v.to(model.device) for k,v in encoding.items()}
47
+ predictions = model(**encoding)
48
+
49
+ logits = predictions.logits
50
+ sigmoid = torch.nn.Sigmoid()
51
+ probs = sigmoid(logits.squeeze().cpu())
52
+ print(probs)
53
+ predictions = np.zeros(probs.shape)
54
+ predictions[np.where(probs >= 0.5)] = 1
55
+ # turn predicted id's into actual label names
56
+ id2label = {idx: label for idx, label in enumerate(labels)}
57
+ predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
58
+ print(predicted_labels)
59
+ print(predictions[0])
60
+ else:
61
+ model = AutoModelForSequenceClassification.from_pretrained('./model')
62
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
63
+ encoding = tokenizer(tweet, return_tensors="pt")
64
+ encoding = {k: v.to(model.device) for k,v in encoding.items()}
65
+ predictions = model(**encoding)
66
+ print(predictions)
67
+ col4
68
+ if pipeline:
69
+ predictions = pipeline(tweet)
70
+ col2.header("Judgement")
71
+ else:
72
+ col2.header("Toxic?")
73
+ col4.header("Toxicity Type")
74
+ col5.header("Probability")
75
  print(predictions)
76
+
77
  col1.header("Tweet")
 
 
78
  col3.header("Probability")
79
+
80
+ col1.subheader(tweet)
81
  for p in predictions:
82
  if box == 'bertweet-base-sentiment-analysis':
83
  if p['label'] == "POS":
84
+ col2.success("POSITIVE")
85
  col3.success(f"{ round(p['score'] * 100, 1)}%")
86
  elif p['label'] == "NEU":
87
  col2.warning(f"{ p['label'] }")
88
  col3.warning(f"{round(p['score'] * 100, 1)}%")
89
  else:
90
+ col2.error("NEGATIVE")
91
  col3.error(f"{round(p['score'] * 100, 1)}%")
92
  elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
93
  if p['label'] == "POSITIVE":
94
+ col2.success("POSITIVE")
95
  col3.success(f"{round(p['score'] * 100, 1)}%")
96
  else:
97
+ col2.error("NEGATIVE")
98
  col3.error(f"{round(p['score'] * 100, 1)}%")
99
+ elif box == 'twitter-roberta-base-sentiment':
100
+ if p['label'] == "LABEL_2":
101
+ col2.success("POSITIVE")
102
  col3.success(f"{round(p['score'] * 100, 1)}%")
103
+ elif p['label'] == "LABEL_0":
104
+ col2.error("NEGATIVE")
105
+ col3.error(f"{round(p['score'] * 100, 1)}%")
106
+ else:
107
+ col2.warning("NEUTRAL")
108
+ col3.warning(f"{round(p['score'] * 100, 1)}%")
109
+ else:
110
+ if predictions[0] == 0:
111
+ col2.success("NO TOXICITY")
112
+ col3.success(f"{100 - round(probs[0] * 100, 1)}%")
113
+ col4.success("N/A")
114
+ col5.success("N/A")
115
  else:
116
+ col2.error("TOXIC")
117
+ col3.error(f"{round(probs[0] * 100, 1)}%")
118
+ _max = 1
119
+ for i in range(2, len(predictions)):
120
+ if probs[i] > probs[_max]:
121
+ _max = i
122
+ col4.error(labels[_max])
123
+ col5.error(f"{round(probs[_max] * 100, 1)}%")