File size: 2,121 Bytes
378aae4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/local/bin/python3.9

import streamlit as st

import datetime
print(datetime.datetime.now(),"Program start.")

#import nltk
#from nltk.corpus import stopwords
import re
#import pandas as pd
#import nltk

import pickle
#import re
#import string
import numpy as np
#import pandas as pd

#from sklearn.preprocessing import LabelEncoder
#from sklearn.model_selection import train_test_split

#from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#from tensorflow.keras.optimizers import Adam
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.callbacks import EarlyStopping
#from tensorflow.keras.layers import Dense, LSTM, Embedding, Bidirectional

if "model_loaded" not in st.session_state:
  with open('model.pkl', 'rb') as f:
    clf2 = pickle.load(f)
  st.session_state.model_loaded=clf2
else:
  clf2=st.session_state.model_loaded

print(datetime.datetime.now(),"Finished import.")
st.text("Hate Speech Detector")
sentence=st.text_input('Sentence to analyze')

labels=['Homophobe', 'Sexist', 'OtherHate', 'NotHate', 'Religion', 'Racist']

# LIB LIB str_punc = string.punctuation.replace(',', '').replace("'",'')
def clean(text):
    global str_punc
    text = re.sub(r'[^a-zA-Z ]', '', text)
    text = text.lower()
    return text 

tokenizer = Tokenizer()
# LIB LIB le = LabelEncoder()

print(datetime.datetime.now(),"Program. About to load the model.")

print(datetime.datetime.now(),"Program. Finished loading the model.")
if sentence:
  print("*************\nSentence:",sentence)
  sentence = clean(sentence)
  sentence = tokenizer.texts_to_sequences([sentence])
  sentence = pad_sequences(sentence, maxlen=256, truncating='pre')
  p=clf2.predict(sentence)
  print("Prediction:",p)
  a=np.argmax(p)
  print("ArgMax:",a)
  result=labels[a]
  #result = le.inverse_transform(np.argmax(clf2.predict(sentence), axis=-1))[0]
  proba =  np.max(clf2.predict(sentence))
  print(f"{result} : {proba}\n\n")
  st.text(f"{result}")
  print(datetime.datetime.now(),"Program end.")