freak360's picture
Update app.py
b305d96 verified
import pickle
import streamlit as st
from string import punctuation
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
model = pickle.load(open('model.pkl', 'rb'))
vectorizer = pickle.load(open('vectorizer.pkl', 'rb'))
# Defining a function that will clean the text
def clean_text(text):
punc = list(punctuation)
stop = stopwords.words('english')
bad_tokens = punc + stop
tokens = word_tokenize(text)
lemma = WordNetLemmatizer()
word_tokens = [t for t in tokens if t.isalpha()]
clean_tokens = [lemma.lemmatize(t.lower()) for t in word_tokens if t not in bad_tokens]
return ' '.join(clean_tokens)
# defining the main function and creating the ui
def main():
st.set_page_config(page_title='Hate Speech Detector', page_icon='🤖')
st.header("Hate Speech Detection System")
text = st.text_area('Enter Text')
submit = st.button('Submit')
if submit:
itext = clean_text(text)
vectorized_text = vectorizer.transform([itext])
response = model.predict(vectorized_text)[0]
if response == 0:
st.write('Hate Speech')
elif response == 1:
st.write('Offensive Language')
else:
st.write('No hate speech or offensive language detected')
if __name__ == "__main__":
main()