Spaces:

julesy
/

toxic-tweets

Build error

App Files Files Community

julesy commited on Apr 24, 2023

Commit

4895da9

1 Parent(s): f78315e

initial commit for milestone-3 used finetuned model and some sample toxic tweets

Browse files

Files changed (2) hide show

.github/workflows/Huggingface_Spaces.yml +1 -1
app.py +18 -13

.github/workflows/Huggingface_Spaces.yml CHANGED Viewed

@@ -1,7 +1,7 @@
 name: Sync to Hugging Face hub
 on:
   push:
-    branches: [milestone-2]
   # to run this workflow manually from the Actions tab
   workflow_dispatch:

 name: Sync to Hugging Face hub
 on:
   push:
+    branches: [milestone-2, milestone-3]
   # to run this workflow manually from the Actions tab
   workflow_dispatch:

app.py CHANGED Viewed

@@ -1,40 +1,45 @@
 import streamlit as st
 import numpy as np
 #https://huggingface.co/course/chapter2/2?fw=pt
 from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-import tensorflow as tf
 models = {
-    "DistilBERT": "distilbert-base-uncased-finetuned-sst-2-english",
-    "RoBERTa": "roberta-large-mnli",
-    "XLM-RoBERTa": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
-    "ELECTRA": "bhadresh-savani/electra-base-emotion"
 }
 title = "Toxic Tweets"
 st.title(title)
 options = np.array( list(models.keys()) )
-choice = str(st.selectbox("Select Base Model:", options))
 pre_model = models[choice]
-st.write("Model Used: ", pre_model)
 tokenizer = AutoTokenizer.from_pretrained(pre_model)
 model = TFAutoModelForSequenceClassification.from_pretrained(pre_model)
 response = st.text_input("Enter Text to Analyse:", "I am excited to begin working on this Project!")
 if st.button("Submit"):
     st.write(":blue[=== Results ===]")
-    tokens = tokenizer(response, return_tensors='tf')
-    outputs = model(tokens)
-    predictions = tf.nn.softmax(outputs.logits, axis=-1)
-    predicted_amount = float(tf.math.reduce_max(predictions, axis=-1)[0])
-    predicted_class_id = int(tf.math.argmax(predictions, axis=-1)[0])
-    st.markdown("This sentence can be interpreted as: _:green[{:.2%}] {}_".format(predicted_amount, model.config.id2label[predicted_class_id]))

 import streamlit as st
 import numpy as np
+import tensorflow as tf
 #https://huggingface.co/course/chapter2/2?fw=pt
 from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
 models = {
+    "toxic-comments-distilbert": "julesy/toxic-comments-distilbert"
 }
 title = "Toxic Tweets"
 st.title(title)
 options = np.array( list(models.keys()) )
+choice = str(st.selectbox("Finetuned Model:", options))
 pre_model = models[choice]
+st.write("Fine Tuned DistilBert model for identifying Toxic Comments.")
 tokenizer = AutoTokenizer.from_pretrained(pre_model)
 model = TFAutoModelForSequenceClassification.from_pretrained(pre_model)
 response = st.text_input("Enter Text to Analyse:", "I am excited to begin working on this Project!")
+sample_tweets = ["I hate you!", "you're a dumbass", "thats pretty bad", "damn, i wish i didnt do that"]
 if st.button("Submit"):
     st.write(":blue[=== Results ===]")
+    col1, col2, col3 = st.columns(3)
+    for i in sample_tweets:
+        tokens = tokenizer(response, return_tensors='tf')
+        outputs = model(tokens)
+        predictions = tf.nn.softmax(outputs.logits, axis=-1)
+        predicted_amount = float(tf.math.reduce_max(predictions, axis=-1)[0])
+        predicted_class_id = int(tf.math.argmax(predictions, axis=-1)[0])
+        col1.write(i)
+        col2.write(model.config.id2label[predicted_class_id])
+        col3.write(predicted_amount)
+        #st.markdown("This sentence can be interpreted as: _:green[{:.2%}] {}_".format(predicted_amount, model.config.id2label[predicted_class_id]))