Spaces:
Build error
Build error
| import streamlit as st | |
| import transformers | |
| import numpy as np | |
| import pandas as pd | |
| from scipy.special import softmax | |
| import torch | |
| def preprocess(text): | |
| new_text = [] | |
| for t in text.split(" "): | |
| t = '@user' if t.startswith('@') and len(t) > 1 else t | |
| t = 'http' if t.startswith('http') else t | |
| new_text.append(t) | |
| return " ".join(new_text) | |
| st.title("Toxicity Classification App") | |
| user_input = st.text_input("Input texts to analyze", "Cody Jiang is a happy boy!") | |
| model_names = ['distilbert-base-uncased-finetuned-sst-2-english', 'Codys-Finetuning-Language-Model'] | |
| model_name = st.selectbox("Select a pretrained model", model_names) | |
| if model_name == "Codys-Finetuning-Language-Model": | |
| model = transformers.BertForSequenceClassification.from_pretrained("./myModel/") | |
| tokenizer = transformers.BertTokenizerFast.from_pretrained("bert-base-uncased") | |
| else: | |
| tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
| config = transformers.AutoConfig.from_pretrained(model_name) | |
| model = transformers.AutoModelForSequenceClassification.from_pretrained(model_name) | |
| if st.button("Submit"): | |
| if model_name == "Codys-Finetuning-Language-Model": | |
| input_val = tokenizer(user_input, padding=True, truncation=True, max_length=512, return_tensors="pt") | |
| output_val = model(**input_val) | |
| probabilities = torch.sigmoid(output_val.logits) | |
| result_list = probabilities.tolist()[0] | |
| columns = ["Tweet", "Toxicity Class", "Probability"] | |
| toxicity_class = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"] | |
| result_df = pd.DataFrame(columns=columns) | |
| result_df.loc[0] = [user_input, toxicity_class[result_list.index(max(result_list))], max(result_list)] | |
| st.table(result_df) | |
| else: | |
| text = preprocess(user_input) | |
| encoded_input = tokenizer(text, return_tensors='pt') | |
| output = model(**encoded_input) | |
| scores = output[0][0].detach().numpy() | |
| scores = softmax(scores) | |
| ranking = np.argsort(scores) | |
| ranking = ranking[::-1] | |
| columns = ["Tweet", "Toxicity Class", "Probability"] | |
| toxicity_class = config.id2label | |
| result_list = [] | |
| columns_list = [] | |
| for i in range(scores.shape[0]): | |
| l = toxicity_class[ranking[i]] | |
| s = scores[ranking[i]] | |
| result_list.append(s) | |
| columns_list.append(l) | |
| result_df = pd.DataFrame(columns=columns) | |
| result_df.loc[0] = [user_input, columns_list[0], result_list[0]] | |
| st.table(result_df) | |