capstone_main / inference.py
vikranth1111's picture
Upload 16 files
a4b94b2
import pickle
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import config
import preprocessing as pp
def predict_test(model:str, test_data:pd.DataFrame= config.MODIFIED_TEST):
# path to model
model_path = f"{config.MODEL_DIR}/PRETRAIN_WORD2VEC_{model}/"
# read data
df_test = pd.read_csv(test_data)
# do cleaning to text
df_test[config.CLEANED_TEXT] = df_test[config.TEXT].apply(pp.clean_tweet)
# loading tokenizer
with open(f'{model_path}tokenizer.pkl', 'rb') as handle:
tokenizer = pickle.load(handle)
# convert tokens to sequences and pad them
data_values = tokenizer.texts_to_sequences(df_test[config.CLEANED_TEXT].values)
X_padded = pad_sequences(data_values, maxlen=config.MAXLEN)
# load the classifier
clf = load_model(f"{model_path}{model}_Word2Vec .h5")
predictions = clf.predict_classes(X_padded, verbose=-1)
return predictions
if __name__ == "__main__":
submission = predict_test(model="LSTM")
sample_sub = pd.read_csv(config.SUBMISSION)
sample_sub.loc[:, config.TARGET] = submission
sample_sub.to_csv(f"{config.MODEL_DIR}PRETRAIN_WORD2VEC_LSTM/LSTM.csv", index=False)