Spaces:

Ptato
/

Sentiment-Analysis

Sleeping

App Files Files Community

Ptato commited on Apr 26, 2023

Commit

5cf11d3

1 Parent(s): b22a50f

MS3

Browse files

Files changed (8) hide show

.idea/.gitignore +3 -0
.idea/Sentiment-Analysis.iml +14 -0
.idea/aws.xml +11 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
app.py +78 -18

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/Sentiment-Analysis.iml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/aws.xml ADDED Viewed

	@@ -0,0 +1,11 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="accountSettings">
+    <option name="activeRegion" value="us-east-1" />
+    <option name="recentlyUsedRegions">
+      <list>
+        <option value="us-east-1" />
+      </list>
+    </option>
+  </component>
+</project>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Sentiment-Analysis)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" filepath="$PROJECT_DIR$/.idea/Sentiment-Analysis.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

app.py CHANGED Viewed

@@ -1,18 +1,23 @@
 import streamlit as st
 import time
 from transformers import pipeline
 import os
 os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
 st.title("Sentiment Analysis App")
 form = st.form(key='Sentiment Analysis')
 box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
                                                    'distilbert-base-uncased-finetuned-sst-2-english',
-                                                   'twitter-roberta-base-sentiment'
                                                    ], key=1)
 tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
 submit = form.form_submit_button(label='Submit')
@@ -20,44 +25,99 @@ submit = form.form_submit_button(label='Submit')
 if submit and tweet:
     with st.spinner('Analyzing...'):
         time.sleep(1)
-    # st.header(tweet)
     if tweet is not None:
-        col1, col2, col3 = st.columns(3)
         if box == 'bertweet-base-sentiment-analysis':
             pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
-        elif box == 'twitter-xlm-roberta-base-sentiment':
             pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
-        else:
             pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-        predictions = pipeline(tweet)
         print(predictions)
         col1.header("Tweet")
-        col1.subheader(tweet)
-        col2.header("Judgement")
         col3.header("Probability")
         for p in predictions:
             if box == 'bertweet-base-sentiment-analysis':
                 if p['label'] == "POS":
-                    col2.success(f"{ p['label'] }")
                     col3.success(f"{ round(p['score'] * 100, 1)}%")
                 elif p['label'] == "NEU":
                     col2.warning(f"{ p['label'] }")
                     col3.warning(f"{round(p['score'] * 100, 1)}%")
                 else:
-                    col2.error(f"{p['label']}")
                     col3.error(f"{round(p['score'] * 100, 1)}%")
             elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
                 if p['label'] == "POSITIVE":
-                    col2.success(f"{p['label']}")
                     col3.success(f"{round(p['score'] * 100, 1)}%")
                 else:
-                    col2.error(f"{p['label']}")
                     col3.error(f"{round(p['score'] * 100, 1)}%")
-            else:
-                if p['label'] == "POSITIVE":
-                    col2.success(f"{p['label']}")
                     col3.success(f"{round(p['score'] * 100, 1)}%")
                 else:
-                    col2.error(f"{p['label']}")
-                    col3.error(f"{round(p['score'] * 100, 1)}%")

 import streamlit as st
 import time
 from transformers import pipeline
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import os
+import torch
+import numpy as np
 os.environ['KMP_DUPLICATE_LIB_OK'] = "True"
 st.title("Sentiment Analysis App")
+labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
 form = st.form(key='Sentiment Analysis')
 box = form.selectbox('Select Pre-trained Model:', ['bertweet-base-sentiment-analysis',
                                                    'distilbert-base-uncased-finetuned-sst-2-english',
+                                                   'twitter-roberta-base-sentiment',
+                                                   'Modified Bert Toxicity Classification'
                                                    ], key=1)
 tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
 submit = form.form_submit_button(label='Submit')
 if submit and tweet:
     with st.spinner('Analyzing...'):
         time.sleep(1)
     if tweet is not None:
+        if box != 'Modified Bert Toxicity Classification':
+            col1, col2, col3 = st.columns(3)
+        else:
+            col1, col2, col3, col4, col5 = st.columns(5)
         if box == 'bertweet-base-sentiment-analysis':
             pipeline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
+        elif box == 'twitter-roberta-base-sentiment':
             pipeline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
+        elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
             pipeline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+            # <--- Unecessary Testing --->
+            model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
+            tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
+            encoding = tokenizer(tweet, return_tensors="pt")
+            encoding = {k: v.to(model.device) for k,v in encoding.items()}
+            predictions = model(**encoding)
+            logits = predictions.logits
+            sigmoid = torch.nn.Sigmoid()
+            probs = sigmoid(logits.squeeze().cpu())
+            print(probs)
+            predictions = np.zeros(probs.shape)
+            predictions[np.where(probs >= 0.5)] = 1
+            # turn predicted id's into actual label names
+            id2label = {idx: label for idx, label in enumerate(labels)}
+            predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
+            print(predicted_labels)
+            print(predictions[0])
+        else:
+            model = AutoModelForSequenceClassification.from_pretrained('./model')
+            tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+            encoding = tokenizer(tweet, return_tensors="pt")
+            encoding = {k: v.to(model.device) for k,v in encoding.items()}
+            predictions = model(**encoding)
+            print(predictions)
+            col4
+        if pipeline:
+            predictions = pipeline(tweet)
+            col2.header("Judgement")
+        else:
+            col2.header("Toxic?")
+            col4.header("Toxicity Type")
+            col5.header("Probability")
         print(predictions)
         col1.header("Tweet")
         col3.header("Probability")
+        col1.subheader(tweet)
         for p in predictions:
             if box == 'bertweet-base-sentiment-analysis':
                 if p['label'] == "POS":
+                    col2.success("POSITIVE")
                     col3.success(f"{ round(p['score'] * 100, 1)}%")
                 elif p['label'] == "NEU":
                     col2.warning(f"{ p['label'] }")
                     col3.warning(f"{round(p['score'] * 100, 1)}%")
                 else:
+                    col2.error("NEGATIVE")
                     col3.error(f"{round(p['score'] * 100, 1)}%")
             elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
                 if p['label'] == "POSITIVE":
+                    col2.success("POSITIVE")
                     col3.success(f"{round(p['score'] * 100, 1)}%")
                 else:
+                    col2.error("NEGATIVE")
                     col3.error(f"{round(p['score'] * 100, 1)}%")
+            elif box == 'twitter-roberta-base-sentiment':
+                if p['label'] == "LABEL_2":
+                    col2.success("POSITIVE")
                     col3.success(f"{round(p['score'] * 100, 1)}%")
+                elif p['label'] == "LABEL_0":
+                    col2.error("NEGATIVE")
+                    col3.error(f"{round(p['score'] * 100, 1)}%")
+                else:
+                    col2.warning("NEUTRAL")
+                    col3.warning(f"{round(p['score'] * 100, 1)}%")
+            else:
+                if predictions[0] == 0:
+                    col2.success("NO TOXICITY")
+                    col3.success(f"{100 - round(probs[0] * 100, 1)}%")
+                    col4.success("N/A")
+                    col5.success("N/A")
                 else:
+                    col2.error("TOXIC")
+                    col3.error(f"{round(probs[0] * 100, 1)}%")
+                    _max = 1
+                    for i in range(2, len(predictions)):
+                        if probs[i] > probs[_max]:
+                            _max = i
+                    col4.error(labels[_max])
+                    col5.error(f"{round(probs[_max] * 100, 1)}%")