Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import pandas as pd
|
|
| 5 |
from scipy.special import softmax
|
| 6 |
import torch
|
| 7 |
|
|
|
|
| 8 |
def preprocess(text):
|
| 9 |
new_text = []
|
| 10 |
for t in text.split(" "):
|
|
@@ -13,19 +14,16 @@ def preprocess(text):
|
|
| 13 |
new_text.append(t)
|
| 14 |
return " ".join(new_text)
|
| 15 |
|
| 16 |
-
# Define toxicity class labels
|
| 17 |
-
toxicity_labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
|
| 18 |
|
| 19 |
st.title("Toxicity Classification App")
|
| 20 |
|
| 21 |
user_input = st.text_input("Input texts to analyze", "Cody Jiang is a fantastic student in CS-UY-4613!")
|
| 22 |
|
| 23 |
-
# Define the models to choose from
|
| 24 |
model_names = ['distilbert-base-uncased-finetuned-sst-2-english', 'bert-base-uncased', 'roberta-base', 'Codys-Finetuning-Language-Model']
|
| 25 |
model_name = st.selectbox("Select a pretrained model", model_names)
|
| 26 |
|
| 27 |
if model_name == "Codys-Finetuning-Language-Model":
|
| 28 |
-
model = transformers.BertForSequenceClassification.from_pretrained("./
|
| 29 |
tokenizer = transformers.BertTokenizerFast.from_pretrained("bert-base-uncased")
|
| 30 |
else:
|
| 31 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
|
|
@@ -38,10 +36,11 @@ if st.button("Submit"):
|
|
| 38 |
output_val = model(**input_val)
|
| 39 |
probabilities = torch.sigmoid(output_val.logits)
|
| 40 |
result_list = probabilities.tolist()[0]
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
| 45 |
|
| 46 |
else:
|
| 47 |
text = preprocess(user_input)
|
|
@@ -50,12 +49,17 @@ if st.button("Submit"):
|
|
| 50 |
scores = output[0][0].detach().numpy()
|
| 51 |
scores = softmax(scores)
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
result_list = []
|
|
|
|
| 54 |
for i in range(scores.shape[0]):
|
| 55 |
-
l =
|
| 56 |
-
s = scores[i]
|
| 57 |
result_list.append(s)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
st.table(
|
|
|
|
| 5 |
from scipy.special import softmax
|
| 6 |
import torch
|
| 7 |
|
| 8 |
+
|
| 9 |
def preprocess(text):
|
| 10 |
new_text = []
|
| 11 |
for t in text.split(" "):
|
|
|
|
| 14 |
new_text.append(t)
|
| 15 |
return " ".join(new_text)
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
st.title("Toxicity Classification App")
|
| 19 |
|
| 20 |
user_input = st.text_input("Input texts to analyze", "Cody Jiang is a fantastic student in CS-UY-4613!")
|
| 21 |
|
|
|
|
| 22 |
model_names = ['distilbert-base-uncased-finetuned-sst-2-english', 'bert-base-uncased', 'roberta-base', 'Codys-Finetuning-Language-Model']
|
| 23 |
model_name = st.selectbox("Select a pretrained model", model_names)
|
| 24 |
|
| 25 |
if model_name == "Codys-Finetuning-Language-Model":
|
| 26 |
+
model = transformers.BertForSequenceClassification.from_pretrained("./model/")
|
| 27 |
tokenizer = transformers.BertTokenizerFast.from_pretrained("bert-base-uncased")
|
| 28 |
else:
|
| 29 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
|
|
|
|
| 36 |
output_val = model(**input_val)
|
| 37 |
probabilities = torch.sigmoid(output_val.logits)
|
| 38 |
result_list = probabilities.tolist()[0]
|
| 39 |
+
columns = ["Tweet", "Toxicity Class", "Probability"]
|
| 40 |
+
toxicity_class = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
|
| 41 |
+
result_df = pd.DataFrame(columns=columns)
|
| 42 |
+
result_df.loc[0] = [user_input, toxicity_class[result_list.index(max(result_list))], max(result_list)]
|
| 43 |
+
st.table(result_df)
|
| 44 |
|
| 45 |
else:
|
| 46 |
text = preprocess(user_input)
|
|
|
|
| 49 |
scores = output[0][0].detach().numpy()
|
| 50 |
scores = softmax(scores)
|
| 51 |
|
| 52 |
+
ranking = np.argsort(scores)
|
| 53 |
+
ranking = ranking[::-1]
|
| 54 |
+
columns = ["Tweet", "Toxicity Class", "Probability"]
|
| 55 |
+
toxicity_class = config.id2label
|
| 56 |
result_list = []
|
| 57 |
+
columns_list = []
|
| 58 |
for i in range(scores.shape[0]):
|
| 59 |
+
l = toxicity_class[ranking[i]]
|
| 60 |
+
s = scores[ranking[i]]
|
| 61 |
result_list.append(s)
|
| 62 |
+
columns_list.append(l)
|
| 63 |
+
result_df = pd.DataFrame(columns=columns)
|
| 64 |
+
result_df.loc[0] = [user_input, toxicity_class[result_list.index(max(result_list))], max(result_list)]
|
| 65 |
+
st.table(result_df)
|