Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pickle | |
| import numpy as np | |
| import pandas as pd | |
| import re | |
| import tensorflow | |
| from tensorflow import keras | |
| from keras.preprocessing import text,sequence,utils | |
| import html | |
| import string | |
| import nltk | |
| from nltk.stem.porter import PorterStemmer | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| stop_words = stopwords.words('english') | |
| from tensorflow.keras.preprocessing.text import text_to_word_sequence | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras import models | |
| from tensorflow.keras import layers | |
| from tensorflow.keras import losses | |
| from tensorflow.keras import metrics | |
| from tensorflow.keras import optimizers | |
| from tensorflow.keras.utils import plot_model | |
| def remove_special_chars(text): | |
| re1 = re.compile(r' +') | |
| x1 = text.lower().replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace( | |
| 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace( | |
| '<br />', "\n").replace('\\"', '"').replace('<unk>', 'u_n').replace(' @.@ ', '.').replace( | |
| ' @-@ ', '-').replace('\\', ' \\ ') | |
| return re1.sub(' ', html.unescape(x1)) | |
| def to_lowercase(text): | |
| return text.lower() | |
| def remove_punctuation(text): | |
| """Remove punctuation from list of tokenized words""" | |
| translator = str.maketrans('', '', string.punctuation) | |
| return text.translate(translator) | |
| def replace_numbers(text): | |
| """Replace all interger occurrences in list of tokenized words with textual representation""" | |
| return re.sub(r'\d+', '', text) | |
| def remove_whitespaces(text): | |
| return text.strip() | |
| def remove_stopwords(words, stop_words): | |
| return [word for word in words if word not in stop_words] | |
| def stem_words(words): | |
| """Stem words in text""" | |
| stemmer = PorterStemmer() | |
| return [stemmer.stem(word) for word in words] | |
| def lemmatize_words(words): | |
| """Lemmatize words in text""" | |
| lemmatizer = WordNetLemmatizer() | |
| return [lemmatizer.lemmatize(word) for word in words] | |
| def lemmatize_verbs(words): | |
| """Lemmatize verbs in text""" | |
| lemmatizer = WordNetLemmatizer() | |
| return ' '.join([lemmatizer.lemmatize(word, pos='v') for word in words]) | |
| def text2words(text): | |
| return word_tokenize(text) | |
| def clean_text( text): | |
| text = remove_special_chars(text) | |
| text = remove_punctuation(text) | |
| text = to_lowercase(text) | |
| text = replace_numbers(text) | |
| words = text2words(text) | |
| words = remove_stopwords(words, stop_words) | |
| #words = stem_words(words)# Either stem ovocar lemmatize | |
| words = lemmatize_words(words) | |
| words = lemmatize_verbs(words) | |
| return ''.join(words) | |
| #df = pd.read_csv('train.csv.zip') | |
| #df['comment_text'] = df['comment_text'].apply(lambda x: clean_text(x)) | |
| model = pickle.load(open('tox_model.h5','rb')) | |
| st.title('Toxic comment classification') | |
| input = st.text_area('Enter your comment') | |
| input = input.apply(lambda x: clean_text(x)) | |
| tok = Tokenizer(num_words=1000, oov_token='UNK') | |
| #tok.fit_on_texts(df['comment_text'] ) | |
| x_test = tok.texts_to_sequence(input) | |
| input_text = pad_sequences(x_test, | |
| maxlen=50, | |
| truncating='post', | |
| padding='post' | |
| ) | |
| if input: | |
| out = model.predict(input_text) | |
| st.json(out) | |