Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import tensorflow as tf | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| import re | |
| import ast | |
| import string | |
| from tensorflow.keras.models import load_model | |
| nltk.download('wordnet') | |
| nltk.download('stopwords') | |
| nltk.download('omw-1.4') | |
| nltk.download('punkt') | |
| # open chatwords.txt | |
| with open('chatwords.txt') as f: | |
| data = f.read() | |
| chatwords = ast.literal_eval(data) | |
| # open abbreviation.txt | |
| with open('abbreviation.txt') as abb: | |
| ab2 = abb.read() | |
| abbreviation = ast.literal_eval(ab2) | |
| # define stopwords | |
| stop_words = stopwords.words('english') | |
| # define lemmatizer | |
| lem = WordNetLemmatizer() | |
| # load model | |
| final_gru = tf.keras.models.load_model('model_gru') | |
| # import functions | |
| def check_chatwords(text): | |
| temp=[] | |
| for chat in text.split(): | |
| if chat.upper() in chatwords: | |
| temp.append(chatwords[chat.upper()]) | |
| else: | |
| temp.append(chat) | |
| return " ".join(temp) | |
| def lower(text): | |
| data = text.lower() | |
| return data | |
| def check_abbr(text): | |
| temp2=[] | |
| for abbr in text.split(): | |
| if abbr in abbreviation: | |
| temp2.append(abbreviation[abbr]) | |
| else: | |
| temp2.append(abbr) | |
| return " ".join(temp2) | |
| def check_punctuation(text): | |
| data = re.sub("[^a-zA-Z]",' ', text) | |
| data = re.sub("[[^]]*]", ' ', data) | |
| data = re.sub(r"\n", " ", data) | |
| data = data.strip() | |
| data = ' '.join(data.split()) | |
| return data | |
| def token_stopwords_lemma(text): | |
| tokens = word_tokenize(text) | |
| stop_words2 = ' '.join([word for word in tokens if word not in stop_words]) | |
| data = [lem.lemmatize(word) for word in stop_words2.split()] | |
| data = ' '.join(data) | |
| return data | |
| st.title("SPAM Message Detection") | |
| message = st.text_input('Please input your message here (in English):') | |
| st.write('Message:', message) | |
| df_inf = [message] | |
| df_inf1 = pd.DataFrame() | |
| df_inf1['message'] = df_inf | |
| df_inf1['message'] = df_inf1['message'].apply(lambda j: check_chatwords(j)) | |
| df_inf1['message'] = df_inf1['message'].apply(lambda k: lower(k)) | |
| df_inf1['message'] = df_inf1['message'].apply(lambda v: check_abbr(v)) | |
| df_inf1['message'] = df_inf1['message'].apply(lambda r: check_punctuation(r)) | |
| df_inf1['message'] = df_inf1['message'].apply(lambda m: token_stopwords_lemma(m)) | |
| y_pred_inf = final_gru.predict(df_inf1['message']) | |
| y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) | |
| # Membuat dataframe dari array | |
| pred_df = pd.DataFrame(y_pred_inf, columns=['label']) | |
| # Melakukan prediksi pada new dataframe | |
| df_inf2 = pd.DataFrame(df_inf, columns=['message']) | |
| df_combined = pd.concat([df_inf2, pred_df], axis=1) | |
| # Predict | |
| if st.button('Predict'): | |
| y_pred_inf = final_gru.predict(df_inf1['message']) | |
| y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) | |
| spam_status = str(y_pred_inf[0][0]) | |
| if spam_status == "0": | |
| st.success("Your message is not spam.") | |
| else: | |
| st.error("Your message is spam") |