Spaces:

ynp3
/

Milestone3

Sleeping

App Files Files Community

ynp3 commited on Apr 25, 2023

Commit

92d0911

1 Parent(s): 222e77a

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -27

app.py CHANGED Viewed

@@ -3,40 +3,54 @@ import pandas as pd
 from transformers import BertTokenizer, BertForSequenceClassification
 import torch
-# Load pre-trained BERT model
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
 model.eval()
-# Create a persistent DataFrame to store classification results
-classified_data = pd.DataFrame(columns=['Text', 'Toxicity'])
 def classify_text(text):
-    # Tokenize and encode input text
-    input_ids = torch.tensor(tokenizer.encode(text, add_special_tokens=True)).unsqueeze(0)
-    # Forward pass through BERT model
-    outputs = model(input_ids)
-    logits = outputs.logits
-    predicted_class = torch.argmax(logits, dim=1).item()
-    toxicity = "Toxic" if predicted_class == 1 else "Non-Toxic"
-    return toxicity
 # Streamlit app
-def main():
-    st.title("Toxicity Classifier")
-    user_text = st.text_area("Enter text to classify:")
     if st.button("Classify"):
-        if user_text:
-            toxicity = classify_text(user_text)
-            st.write(f"Predicted Toxicity: {toxicity}")
-            # Add classification results to the persistent DataFrame
-            global classified_data
-            classified_data = classified_data.append({'Text': user_text, 'Toxicity': toxicity}, ignore_index=True)
-        else:
-            st.warning("Please enter some text.")
-    if st.button("View Classified Data"):
-        st.write(classified_data)
 if __name__ == "__main__":
-    main()

 from transformers import BertTokenizer, BertForSequenceClassification
 import torch
+# Load pre-trained BERT model and tokenizer
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6)
 model.eval()
+# Function to classify text using the pre-trained BERT model
 def classify_text(text):
+    # Tokenize input text
+    input_ids = tokenizer.encode(text, add_special_tokens=True)
+    # Convert tokenized input to tensor
+    input_tensor = torch.tensor([input_ids])
+    # Get model predictions
+    with torch.no_grad():
+        logits = model(input_tensor)[0]
+    # Get predicted labels
+    predicted_labels = torch.sigmoid(logits).numpy()
+    return predicted_labels
+# Create a persistent DataFrame to store classification results
+results_df = pd.DataFrame(columns=['Text', 'Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate'])
 # Streamlit app
+def app():
+    st.title("Toxicity Classification App")
+    st.write("Enter text below to classify its toxicity.")
+    # User input
+    user_input = st.text_area("Enter text here:", "", key='user_input')
+    # Classification
     if st.button("Classify"):
+        # Perform classification
+        labels = classify_text(user_input)
+        # Print classification results
+        st.write("Classification Results:")
+        st.write("Toxic: {:.2%}".format(labels[0][0]))
+        st.write("Severe Toxic: {:.2%}".format(labels[0][1]))
+        st.write("Obscene: {:.2%}".format(labels[0][2]))
+        st.write("Threat: {:.2%}".format(labels[0][3]))
+        st.write("Insult: {:.2%}".format(labels[0][4]))
+        st.write("Identity Hate: {:.2%}".format(labels[0][5]))
+        # Add results to persistent DataFrame
+        results_df.loc[len(results_df)] = [user_input, labels[0][0], labels[0][1], labels[0][2], labels[0][3], labels[0][4], labels[0][5]]
+    # Show results DataFrame
+    st.write("Classification Results DataFrame:")
+    st.write(results_df)
+# Run the app
 if __name__ == "__main__":
+    app()