Spaces:
Sleeping
Sleeping
MS3
Browse files- .idea/.gitignore +3 -0
- .idea/Sentiment-Analysis.iml +14 -0
- .idea/aws.xml +11 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- app.py +78 -18
.idea/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
.idea/Sentiment-Analysis.iml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$">
|
| 5 |
+
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
| 6 |
+
</content>
|
| 7 |
+
<orderEntry type="inheritedJdk" />
|
| 8 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 9 |
+
</component>
|
| 10 |
+
<component name="PyDocumentationSettings">
|
| 11 |
+
<option name="format" value="PLAIN" />
|
| 12 |
+
<option name="myDocStringFormat" value="Plain" />
|
| 13 |
+
</component>
|
| 14 |
+
</module>
|
.idea/aws.xml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="accountSettings">
|
| 4 |
+
<option name="activeRegion" value="us-east-1" />
|
| 5 |
+
<option name="recentlyUsedRegions">
|
| 6 |
+
<list>
|
| 7 |
+
<option value="us-east-1" />
|
| 8 |
+
</list>
|
| 9 |
+
</option>
|
| 10 |
+
</component>
|
| 11 |
+
</project>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Sentiment-Analysis)" project-jdk-type="Python SDK" />
|
| 4 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" filepath="$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
app.py
CHANGED
|
@@ -1,18 +1,23 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
|
|
|
| 4 |
import os
|
| 5 |
-
|
|
|
|
| 6 |
os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
|
| 7 |
|
| 8 |
|
| 9 |
|
| 10 |
st.title("Sentiment Analysis App")
|
| 11 |
|
|
|
|
|
|
|
| 12 |
form = st.form(key='Sentiment Analysis')
|
| 13 |
box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
|
| 14 |
'distilbert-base-uncased-finetuned-sst-2-english',
|
| 15 |
-
'twitter-roberta-base-sentiment'
|
|
|
|
| 16 |
], key=1)
|
| 17 |
tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
|
| 18 |
submit = form.form_submit_button(label='Submit')
|
|
@@ -20,44 +25,99 @@ submit = form.form_submit_button(label='Submit')
|
|
| 20 |
if submit and tweet:
|
| 21 |
with st.spinner('Analyzing...'):
|
| 22 |
time.sleep(1)
|
| 23 |
-
# st.header(tweet)
|
| 24 |
|
| 25 |
if tweet is not None:
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
if box == 'bertweet-base-sentiment-analysis':
|
| 28 |
pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
|
| 29 |
-
elif box == 'twitter-
|
| 30 |
pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 31 |
-
|
| 32 |
pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
print(predictions)
|
|
|
|
| 35 |
col1.header("Tweet")
|
| 36 |
-
col1.subheader(tweet)
|
| 37 |
-
col2.header("Judgement")
|
| 38 |
col3.header("Probability")
|
|
|
|
|
|
|
| 39 |
for p in predictions:
|
| 40 |
if box == 'bertweet-base-sentiment-analysis':
|
| 41 |
if p['label'] == "POS":
|
| 42 |
-
col2.success(
|
| 43 |
col3.success(f"{ round(p['score'] * 100, 1)}%")
|
| 44 |
elif p['label'] == "NEU":
|
| 45 |
col2.warning(f"{ p['label'] }")
|
| 46 |
col3.warning(f"{round(p['score'] * 100, 1)}%")
|
| 47 |
else:
|
| 48 |
-
col2.error(
|
| 49 |
col3.error(f"{round(p['score'] * 100, 1)}%")
|
| 50 |
elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 51 |
if p['label'] == "POSITIVE":
|
| 52 |
-
col2.success(
|
| 53 |
col3.success(f"{round(p['score'] * 100, 1)}%")
|
| 54 |
else:
|
| 55 |
-
col2.error(
|
| 56 |
col3.error(f"{round(p['score'] * 100, 1)}%")
|
| 57 |
-
|
| 58 |
-
if p['label'] == "
|
| 59 |
-
col2.success(
|
| 60 |
col3.success(f"{round(p['score'] * 100, 1)}%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
else:
|
| 62 |
-
col2.error(
|
| 63 |
-
col3.error(f"{round(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
| 4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 5 |
import os
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
|
| 9 |
|
| 10 |
|
| 11 |
|
| 12 |
st.title("Sentiment Analysis App")
|
| 13 |
|
| 14 |
+
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
| 15 |
+
|
| 16 |
form = st.form(key='Sentiment Analysis')
|
| 17 |
box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
|
| 18 |
'distilbert-base-uncased-finetuned-sst-2-english',
|
| 19 |
+
'twitter-roberta-base-sentiment',
|
| 20 |
+
'Modified Bert Toxicity Classification'
|
| 21 |
], key=1)
|
| 22 |
tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
|
| 23 |
submit = form.form_submit_button(label='Submit')
|
|
|
|
| 25 |
if submit and tweet:
|
| 26 |
with st.spinner('Analyzing...'):
|
| 27 |
time.sleep(1)
|
|
|
|
| 28 |
|
| 29 |
if tweet is not None:
|
| 30 |
+
if box != 'Modified Bert Toxicity Classification':
|
| 31 |
+
col1, col2, col3 = st.columns(3)
|
| 32 |
+
else:
|
| 33 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 34 |
if box == 'bertweet-base-sentiment-analysis':
|
| 35 |
pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
|
| 36 |
+
elif box == 'twitter-roberta-base-sentiment':
|
| 37 |
pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 38 |
+
elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 39 |
pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# <--- Unecessary Testing --->
|
| 43 |
+
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
| 44 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
| 45 |
+
encoding = tokenizer(tweet, return_tensors="pt")
|
| 46 |
+
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
| 47 |
+
predictions = model(**encoding)
|
| 48 |
+
|
| 49 |
+
logits = predictions.logits
|
| 50 |
+
sigmoid = torch.nn.Sigmoid()
|
| 51 |
+
probs = sigmoid(logits.squeeze().cpu())
|
| 52 |
+
print(probs)
|
| 53 |
+
predictions = np.zeros(probs.shape)
|
| 54 |
+
predictions[np.where(probs >= 0.5)] = 1
|
| 55 |
+
# turn predicted id's into actual label names
|
| 56 |
+
id2label = {idx: label for idx, label in enumerate(labels)}
|
| 57 |
+
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
|
| 58 |
+
print(predicted_labels)
|
| 59 |
+
print(predictions[0])
|
| 60 |
+
else:
|
| 61 |
+
model = AutoModelForSequenceClassification.from_pretrained('./model')
|
| 62 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 63 |
+
encoding = tokenizer(tweet, return_tensors="pt")
|
| 64 |
+
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
| 65 |
+
predictions = model(**encoding)
|
| 66 |
+
print(predictions)
|
| 67 |
+
col4
|
| 68 |
+
if pipeline:
|
| 69 |
+
predictions = pipeline(tweet)
|
| 70 |
+
col2.header("Judgement")
|
| 71 |
+
else:
|
| 72 |
+
col2.header("Toxic?")
|
| 73 |
+
col4.header("Toxicity Type")
|
| 74 |
+
col5.header("Probability")
|
| 75 |
print(predictions)
|
| 76 |
+
|
| 77 |
col1.header("Tweet")
|
|
|
|
|
|
|
| 78 |
col3.header("Probability")
|
| 79 |
+
|
| 80 |
+
col1.subheader(tweet)
|
| 81 |
for p in predictions:
|
| 82 |
if box == 'bertweet-base-sentiment-analysis':
|
| 83 |
if p['label'] == "POS":
|
| 84 |
+
col2.success("POSITIVE")
|
| 85 |
col3.success(f"{ round(p['score'] * 100, 1)}%")
|
| 86 |
elif p['label'] == "NEU":
|
| 87 |
col2.warning(f"{ p['label'] }")
|
| 88 |
col3.warning(f"{round(p['score'] * 100, 1)}%")
|
| 89 |
else:
|
| 90 |
+
col2.error("NEGATIVE")
|
| 91 |
col3.error(f"{round(p['score'] * 100, 1)}%")
|
| 92 |
elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 93 |
if p['label'] == "POSITIVE":
|
| 94 |
+
col2.success("POSITIVE")
|
| 95 |
col3.success(f"{round(p['score'] * 100, 1)}%")
|
| 96 |
else:
|
| 97 |
+
col2.error("NEGATIVE")
|
| 98 |
col3.error(f"{round(p['score'] * 100, 1)}%")
|
| 99 |
+
elif box == 'twitter-roberta-base-sentiment':
|
| 100 |
+
if p['label'] == "LABEL_2":
|
| 101 |
+
col2.success("POSITIVE")
|
| 102 |
col3.success(f"{round(p['score'] * 100, 1)}%")
|
| 103 |
+
elif p['label'] == "LABEL_0":
|
| 104 |
+
col2.error("NEGATIVE")
|
| 105 |
+
col3.error(f"{round(p['score'] * 100, 1)}%")
|
| 106 |
+
else:
|
| 107 |
+
col2.warning("NEUTRAL")
|
| 108 |
+
col3.warning(f"{round(p['score'] * 100, 1)}%")
|
| 109 |
+
else:
|
| 110 |
+
if predictions[0] == 0:
|
| 111 |
+
col2.success("NO TOXICITY")
|
| 112 |
+
col3.success(f"{100 - round(probs[0] * 100, 1)}%")
|
| 113 |
+
col4.success("N/A")
|
| 114 |
+
col5.success("N/A")
|
| 115 |
else:
|
| 116 |
+
col2.error("TOXIC")
|
| 117 |
+
col3.error(f"{round(probs[0] * 100, 1)}%")
|
| 118 |
+
_max = 1
|
| 119 |
+
for i in range(2, len(predictions)):
|
| 120 |
+
if probs[i] > probs[_max]:
|
| 121 |
+
_max = i
|
| 122 |
+
col4.error(labels[_max])
|
| 123 |
+
col5.error(f"{round(probs[_max] * 100, 1)}%")
|