| import streamlit as st |
| import pandas as pd |
| from transformers import BertTokenizer, BertForSequenceClassification |
| import torch |
|
|
| |
| tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') |
| model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6) |
| model.eval() |
|
|
| |
| def classify_text(text): |
| |
| input_ids = tokenizer.encode(text, add_special_tokens=True) |
| |
| input_tensor = torch.tensor([input_ids]) |
| |
| with torch.no_grad(): |
| logits = model(input_tensor)[0] |
| |
| predicted_labels = torch.sigmoid(logits).numpy() |
| return predicted_labels |
|
|
| |
| results_df = pd.DataFrame(columns=['Text', 'Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate']) |
|
|
| |
| def app(): |
| st.title("Toxicity Classification App") |
| st.write("Enter text below to classify its toxicity.") |
| |
| |
| user_input = st.text_area("Enter text here:", "", key='user_input') |
| |
| |
| if st.button("Classify"): |
| |
| labels = classify_text(user_input) |
| |
| st.write("Classification Results:") |
| st.write("Toxic: {:.2%}".format(labels[0][0])) |
| st.write("Severe Toxic: {:.2%}".format(labels[0][1])) |
| st.write("Obscene: {:.2%}".format(labels[0][2])) |
| st.write("Threat: {:.2%}".format(labels[0][3])) |
| st.write("Insult: {:.2%}".format(labels[0][4])) |
| st.write("Identity Hate: {:.2%}".format(labels[0][5])) |
| |
| results_df.loc[len(results_df)] = [user_input, labels[0][0], labels[0][1], labels[0][2], labels[0][3], labels[0][4], labels[0][5]] |
| |
| |
| st.write("Classification Results DataFrame:") |
| st.write(results_df) |
|
|
| |
| if __name__ == "__main__": |
| app() |
|
|