julesy commited on
Commit
4895da9
·
1 Parent(s): f78315e

initial commit for milestone-3 used finetuned model and some sample toxic tweets

Browse files
Files changed (2) hide show
  1. .github/workflows/Huggingface_Spaces.yml +1 -1
  2. app.py +18 -13
.github/workflows/Huggingface_Spaces.yml CHANGED
@@ -1,7 +1,7 @@
1
  name: Sync to Hugging Face hub
2
  on:
3
  push:
4
- branches: [milestone-2]
5
 
6
  # to run this workflow manually from the Actions tab
7
  workflow_dispatch:
 
1
  name: Sync to Hugging Face hub
2
  on:
3
  push:
4
+ branches: [milestone-2, milestone-3]
5
 
6
  # to run this workflow manually from the Actions tab
7
  workflow_dispatch:
app.py CHANGED
@@ -1,40 +1,45 @@
1
  import streamlit as st
2
  import numpy as np
 
3
 
4
  #https://huggingface.co/course/chapter2/2?fw=pt
5
  from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
6
- import tensorflow as tf
7
 
8
  models = {
9
- "DistilBERT": "distilbert-base-uncased-finetuned-sst-2-english",
10
- "RoBERTa": "roberta-large-mnli",
11
- "XLM-RoBERTa": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
12
- "ELECTRA": "bhadresh-savani/electra-base-emotion"
13
  }
14
 
15
  title = "Toxic Tweets"
16
  st.title(title)
17
 
18
  options = np.array( list(models.keys()) )
19
- choice = str(st.selectbox("Select Base Model:", options))
20
  pre_model = models[choice]
21
 
22
- st.write("Model Used: ", pre_model)
23
 
24
  tokenizer = AutoTokenizer.from_pretrained(pre_model)
25
  model = TFAutoModelForSequenceClassification.from_pretrained(pre_model)
26
 
27
  response = st.text_input("Enter Text to Analyse:", "I am excited to begin working on this Project!")
 
28
 
29
  if st.button("Submit"):
30
  st.write(":blue[=== Results ===]")
31
 
32
- tokens = tokenizer(response, return_tensors='tf')
33
- outputs = model(tokens)
 
 
 
 
 
34
 
35
- predictions = tf.nn.softmax(outputs.logits, axis=-1)
 
36
 
37
- predicted_amount = float(tf.math.reduce_max(predictions, axis=-1)[0])
38
- predicted_class_id = int(tf.math.argmax(predictions, axis=-1)[0])
 
39
 
40
- st.markdown("This sentence can be interpreted as: _:green[{:.2%}] {}_".format(predicted_amount, model.config.id2label[predicted_class_id]))
 
1
  import streamlit as st
2
  import numpy as np
3
+ import tensorflow as tf
4
 
5
  #https://huggingface.co/course/chapter2/2?fw=pt
6
  from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
 
7
 
8
  models = {
9
+ "toxic-comments-distilbert": "julesy/toxic-comments-distilbert"
 
 
 
10
  }
11
 
12
  title = "Toxic Tweets"
13
  st.title(title)
14
 
15
  options = np.array( list(models.keys()) )
16
+ choice = str(st.selectbox("Finetuned Model:", options))
17
  pre_model = models[choice]
18
 
19
+ st.write("Fine Tuned DistilBert model for identifying Toxic Comments.")
20
 
21
  tokenizer = AutoTokenizer.from_pretrained(pre_model)
22
  model = TFAutoModelForSequenceClassification.from_pretrained(pre_model)
23
 
24
  response = st.text_input("Enter Text to Analyse:", "I am excited to begin working on this Project!")
25
+ sample_tweets = ["I hate you!", "you're a dumbass", "thats pretty bad", "damn, i wish i didnt do that"]
26
 
27
  if st.button("Submit"):
28
  st.write(":blue[=== Results ===]")
29
 
30
+ col1, col2, col3 = st.columns(3)
31
+
32
+ for i in sample_tweets:
33
+ tokens = tokenizer(response, return_tensors='tf')
34
+ outputs = model(tokens)
35
+
36
+ predictions = tf.nn.softmax(outputs.logits, axis=-1)
37
 
38
+ predicted_amount = float(tf.math.reduce_max(predictions, axis=-1)[0])
39
+ predicted_class_id = int(tf.math.argmax(predictions, axis=-1)[0])
40
 
41
+ col1.write(i)
42
+ col2.write(model.config.id2label[predicted_class_id])
43
+ col3.write(predicted_amount)
44
 
45
+ #st.markdown("This sentence can be interpreted as: _:green[{:.2%}] {}_".format(predicted_amount, model.config.id2label[predicted_class_id]))