File size: 1,443 Bytes
b305d96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pickle
import streamlit as st
from string import punctuation
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')


model = pickle.load(open('model.pkl', 'rb'))
vectorizer = pickle.load(open('vectorizer.pkl', 'rb'))

# Defining a function that will clean the text
def clean_text(text):
    punc = list(punctuation)
    stop = stopwords.words('english')
    bad_tokens = punc + stop
    tokens = word_tokenize(text)
    lemma = WordNetLemmatizer()
    word_tokens = [t for t in tokens if t.isalpha()]
    clean_tokens = [lemma.lemmatize(t.lower()) for t in word_tokens if t not in bad_tokens]
    return ' '.join(clean_tokens)


# defining the main function and creating the ui
def main():
    st.set_page_config(page_title='Hate Speech Detector', page_icon='🤖')
    st.header("Hate Speech Detection System")
    text = st.text_area('Enter Text')
    submit = st.button('Submit')
    if submit:
        itext = clean_text(text)
        vectorized_text = vectorizer.transform([itext])
        response = model.predict(vectorized_text)[0]
        if response == 0:
            st.write('Hate Speech')
        elif response == 1:
            st.write('Offensive Language')
        else:
            st.write('No hate speech or offensive language detected')

if __name__ == "__main__":
    main()