| |
|
|
| import streamlit as st |
|
|
| import datetime |
| print(datetime.datetime.now(),"Program start.") |
|
|
| |
| |
| import re |
| |
| |
|
|
| import pickle |
| |
| |
| import numpy as np |
| |
|
|
| |
| |
|
|
| |
| from tensorflow.keras.preprocessing.text import Tokenizer |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
|
| |
| |
| |
| |
|
|
| if "model_loaded" not in st.session_state: |
| with open('model.pkl', 'rb') as f: |
| clf2 = pickle.load(f) |
| st.session_state.model_loaded=clf2 |
| else: |
| clf2=st.session_state.model_loaded |
|
|
| print(datetime.datetime.now(),"Finished import.") |
| st.text("Hate Speech Detector") |
| sentence=st.text_input('Sentence to analyze') |
|
|
| labels=['Homophobe', 'Sexist', 'OtherHate', 'NotHate', 'Religion', 'Racist'] |
|
|
| |
| def clean(text): |
| global str_punc |
| text = re.sub(r'[^a-zA-Z ]', '', text) |
| text = text.lower() |
| return text |
|
|
| tokenizer = Tokenizer() |
| |
|
|
| print(datetime.datetime.now(),"Program. About to load the model.") |
|
|
| print(datetime.datetime.now(),"Program. Finished loading the model.") |
| if sentence: |
| print("*************\nSentence:",sentence) |
| sentence = clean(sentence) |
| sentence = tokenizer.texts_to_sequences([sentence]) |
| sentence = pad_sequences(sentence, maxlen=256, truncating='pre') |
| p=clf2.predict(sentence) |
| print("Prediction:",p) |
| a=np.argmax(p) |
| print("ArgMax:",a) |
| result=labels[a] |
| |
| proba = np.max(clf2.predict(sentence)) |
| print(f"{result} : {proba}\n\n") |
| st.text(f"{result}") |
| print(datetime.datetime.now(),"Program end.") |
|
|