CodyJiang commited on
Commit
147d336
·
1 Parent(s): ec9433f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  from scipy.special import softmax
6
  import torch
7
 
 
8
  def preprocess(text):
9
  new_text = []
10
  for t in text.split(" "):
@@ -13,19 +14,16 @@ def preprocess(text):
13
  new_text.append(t)
14
  return " ".join(new_text)
15
 
16
- # Define toxicity class labels
17
- toxicity_labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
18
 
19
  st.title("Toxicity Classification App")
20
 
21
  user_input = st.text_input("Input texts to analyze", "Cody Jiang is a fantastic student in CS-UY-4613!")
22
 
23
- # Define the models to choose from
24
  model_names = ['distilbert-base-uncased-finetuned-sst-2-english', 'bert-base-uncased', 'roberta-base', 'Codys-Finetuning-Language-Model']
25
  model_name = st.selectbox("Select a pretrained model", model_names)
26
 
27
  if model_name == "Codys-Finetuning-Language-Model":
28
- model = transformers.BertForSequenceClassification.from_pretrained("./myModel/")
29
  tokenizer = transformers.BertTokenizerFast.from_pretrained("bert-base-uncased")
30
  else:
31
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
@@ -38,10 +36,11 @@ if st.button("Submit"):
38
  output_val = model(**input_val)
39
  probabilities = torch.sigmoid(output_val.logits)
40
  result_list = probabilities.tolist()[0]
41
- toxicity_class = toxicity_labels[np.argmax(result_list)]
42
- toxicity_prob = np.max(result_list)
43
- df_result = pd.DataFrame([[user_input, toxicity_class, toxicity_prob]], columns=["Tweet", "Toxicity Class", "Probability"])
44
- st.table(df_result)
 
45
 
46
  else:
47
  text = preprocess(user_input)
@@ -50,12 +49,17 @@ if st.button("Submit"):
50
  scores = output[0][0].detach().numpy()
51
  scores = softmax(scores)
52
 
 
 
 
 
53
  result_list = []
 
54
  for i in range(scores.shape[0]):
55
- l = toxicity_labels[i]
56
- s = scores[i]
57
  result_list.append(s)
58
- toxicity_class = toxicity_labels[np.argmax(result_list)]
59
- toxicity_prob = np.max(result_list)
60
- df_result = pd.DataFrame([[user_input, toxicity_class, toxicity_prob]], columns=["Tweet", "Toxicity Class", "Probability"])
61
- st.table(df_result)
 
5
  from scipy.special import softmax
6
  import torch
7
 
8
+
9
  def preprocess(text):
10
  new_text = []
11
  for t in text.split(" "):
 
14
  new_text.append(t)
15
  return " ".join(new_text)
16
 
 
 
17
 
18
  st.title("Toxicity Classification App")
19
 
20
  user_input = st.text_input("Input texts to analyze", "Cody Jiang is a fantastic student in CS-UY-4613!")
21
 
 
22
  model_names = ['distilbert-base-uncased-finetuned-sst-2-english', 'bert-base-uncased', 'roberta-base', 'Codys-Finetuning-Language-Model']
23
  model_name = st.selectbox("Select a pretrained model", model_names)
24
 
25
  if model_name == "Codys-Finetuning-Language-Model":
26
+ model = transformers.BertForSequenceClassification.from_pretrained("./model/")
27
  tokenizer = transformers.BertTokenizerFast.from_pretrained("bert-base-uncased")
28
  else:
29
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
 
36
  output_val = model(**input_val)
37
  probabilities = torch.sigmoid(output_val.logits)
38
  result_list = probabilities.tolist()[0]
39
+ columns = ["Tweet", "Toxicity Class", "Probability"]
40
+ toxicity_class = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
41
+ result_df = pd.DataFrame(columns=columns)
42
+ result_df.loc[0] = [user_input, toxicity_class[result_list.index(max(result_list))], max(result_list)]
43
+ st.table(result_df)
44
 
45
  else:
46
  text = preprocess(user_input)
 
49
  scores = output[0][0].detach().numpy()
50
  scores = softmax(scores)
51
 
52
+ ranking = np.argsort(scores)
53
+ ranking = ranking[::-1]
54
+ columns = ["Tweet", "Toxicity Class", "Probability"]
55
+ toxicity_class = config.id2label
56
  result_list = []
57
+ columns_list = []
58
  for i in range(scores.shape[0]):
59
+ l = toxicity_class[ranking[i]]
60
+ s = scores[ranking[i]]
61
  result_list.append(s)
62
+ columns_list.append(l)
63
+ result_df = pd.DataFrame(columns=columns)
64
+ result_df.loc[0] = [user_input, toxicity_class[result_list.index(max(result_list))], max(result_list)]
65
+ st.table(result_df)