| import json | |
| import string | |
| import random | |
| import nltk | |
| import os | |
| import numpy as np | |
| from nltk.stem import WordNetLemmatizer | |
| import tensorflow as tf | |
| from keras import Sequential | |
| from keras.layers import Dense, Dropout | |
| from nltk.tokenize import word_tokenize | |
| """ | |
| nltk.download('omw-1.4') | |
| nltk.download("stopwords", quiet=True) | |
| nltk.download("punkt", quiet=True) | |
| nltk.download("wordnet", quiet=True) | |
| """ | |
| class ModeleDeepLearning: | |
| def __init__(self, file_path, epochs=200): | |
| self.file_path = file_path | |
| self.epochs = epochs | |
| self.model = None | |
| self.words = [] | |
| self.classes = [] | |
| self.lemmatizer = WordNetLemmatizer() | |
| self.run() | |
| def importer(self): | |
| with open(self.file_path, encoding="utf-8") as f: | |
| data = json.load(f) | |
| return data | |
| def preprocess_data(self): | |
| doc_X = [] | |
| doc_y = [] | |
| for intent in self.data["intents"]: | |
| for pattern in intent["patterns"]: | |
| tokens = word_tokenize(pattern) | |
| self.words.extend(tokens) | |
| doc_X.append(pattern) | |
| doc_y.append(intent["tag"]) | |
| if intent["tag"] not in self.classes: | |
| self.classes.append(intent["tag"]) | |
| self.words = [self.lemmatizer.lemmatize(word.lower()) for word in self.words if word not in string.punctuation] | |
| self.words = sorted(set(self.words)) | |
| self.classes = sorted(set(self.classes)) | |
| training = [] | |
| out_empty = [0] * len(self.classes) | |
| for idx, doc in enumerate(doc_X): | |
| bow = [] | |
| text = self.lemmatizer.lemmatize(doc.lower()) | |
| for word in self.words: | |
| bow.append(1) if word in text else bow.append(0) | |
| output_row = list(out_empty) | |
| output_row[self.classes.index(doc_y[idx])] = 1 | |
| training.append([bow, output_row]) | |
| random.shuffle(training) | |
| training = np.array(training, dtype=object) | |
| train_X = np.array(list(training[:, 0])) | |
| train_y = np.array(list(training[:, 1])) | |
| return train_X, train_y | |
| def build_model(self, input_shape, output_shape): | |
| model = Sequential() | |
| model.add(Dense(128, input_shape=input_shape, activation="relu")) | |
| model.add(Dropout(0.5)) | |
| model.add(Dense(64, activation="relu")) | |
| model.add(Dropout(0.3)) | |
| model.add(Dense(output_shape, activation="softmax")) | |
| adam = tf.keras.optimizers.Adam(learning_rate=0.01) | |
| model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"]) | |
| return model | |
| def train_model(self): | |
| input_shape = (len(self.train_X[0]),) | |
| output_shape = len(self.train_y[0]) | |
| self.model = self.build_model(input_shape, output_shape) | |
| self.model.fit(x=self.train_X, y=self.train_y, epochs=self.epochs, verbose=1) | |
| def clean_text(self, text): | |
| tokens = word_tokenize(text) | |
| tokens = [self.lemmatizer.lemmatize(word) for word in tokens] | |
| return tokens | |
| def bag_of_words(self, text): | |
| tokens = self.clean_text(text) | |
| bow = [0] * len(self.words) | |
| for w in tokens: | |
| for idx, word in enumerate(self.words): | |
| if word == w: | |
| bow[idx] = 1 | |
| return np.array(bow) | |
| def predict_class(self, text): | |
| bow = self.bag_of_words(text) | |
| result = self.model.predict(np.array([bow]))[0] | |
| thresh = 0.2 | |
| y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh] | |
| y_pred.sort(key=lambda x: x[1], reverse=True) | |
| return_list = [self.classes[r[0]] for r in y_pred] | |
| return return_list | |
| def get_response(self, intents_list): | |
| tag = intents_list[0] | |
| list_of_intents = self.data["intents"] | |
| for i in list_of_intents: | |
| if i["tag"] == tag: | |
| result = random.choice(i["responses"]) | |
| break | |
| return result | |
| def predict(self, question): | |
| intents = self.predict_class(question) | |
| return self.get_response(intents) | |
| def run(self): | |
| self.data = self.importer() | |
| self.train_X, self.train_y = self.preprocess_data() | |
| self.train_model() | |