{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# CHATBOTS - Using Natural Language Processing and Tensorflow" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "# In this Jupyter Notebook, We are going to Build a Chatbot that Understands the Context of Sentense and Respond accordingly.\n", "These are the Things that we are going to do in this Project -\n", "1. Transforming the Conversational Intents into Tensorflow model (Neural Network using TFLEARN) using NLP and Save it as Pickle also.\n", "2. Load the Same Pickle and Model to Build the Framework to Process the Responses.\n", "3. At Last, We Show How the Inputs are Processed and Give the Reponses.\n", "-------------------------------------------------------------------------------------------------------\n", "\n", "##### TFLEARN - TFlearn is a modular and transparent deep learning library built on top of Tensorflow. It was designed to provide a higher-level API to TensorFlow in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it. (http://tflearn.org/)\n", "-------------------------------------------------------------------------------------------------------\n", "##### TENSORFLOW - TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML powered applications.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\meghn\\AppData\\Local\\Temp\\ipykernel_29544\\870218512.py:4: The name tf.disable_v2_behavior is deprecated. Please use tf.compat.v1.disable_v2_behavior instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\compat\\v2_compat.py:108: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "non-resource variables are not supported in the long term\n", "curses is not supported on this machine (please install/reinstall curses for an optimal experience)\n", "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n", "\n" ] } ], "source": [ "#Used in Tensorflow Model\n", "import numpy as np\n", "import tensorflow.compat.v1 as tf\n", "tf.disable_v2_behavior()\n", "import tflearn\n", "import random\n", "\n", "#Usde to for Contextualisation and Other NLP Tasks.\n", "import nltk\n", "from nltk.stem.lancaster import LancasterStemmer\n", "stemmer = LancasterStemmer()\n", "\n", "#Other\n", "import json\n", "import pickle\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing the Intents.....\n" ] } ], "source": [ "print(\"Processing the Intents.....\")\n", "with open('intents.json') as json_data:\n", " intents = json.load(json_data)\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looping through the Intents to Convert them to words, classes, documents and ignore_words.......\n" ] } ], "source": [ "words = []\n", "classes = []\n", "documents = []\n", "ignore_words = ['?']\n", "print(\"Looping through the Intents to Convert them to words, classes, documents and ignore_words.......\")\n", "for intent in intents['intents']:\n", " for pattern in intent['patterns']:\n", " # tokenize each word in the sentence\n", " w = nltk.word_tokenize(pattern)\n", " # add to our words list\n", " words.extend(w)\n", " # add to documents in our corpus\n", " documents.append((w, intent['tag']))\n", " # add to our classes list\n", " if intent['tag'] not in classes:\n", " classes.append(intent['tag'])\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stemming, Lowering and Removing Duplicates.......\n", "98 documents\n", "30 classes ['application_dates', 'ba', 'baallb', 'bba', 'bdesign', 'blu_embers_timings', 'bsc', 'btech', 'cafes', 'cost_of_study', 'courses', 'cup_of_joe_timings', 'doctors', 'eligibility_criteria', 'emergency', 'exchange_program', 'faculty', 'goodbye', 'greeting', 'hours', 'mba', 'meal_menu', 'meal_timings', 'new_embers_timings', 'online_payments', 'other_requirements', 'phd', 'restaurants', 'rise_timings', 'thanks']\n", "125 unique stemmed words [\"'\", \"'s\", '.', 'a', 'about', 'abroad', 'addit', 'am', 'anyon', 'apply', 'ar', 'assocy', 'at', 'avail', 'ba', 'bba', 'bdesign', 'beyond', 'blu', 'breakfast', 'bsc', 'btech', 'bye', 'caf', 'can', 'cas', 'coff', 'contact', 'cost', 'cours', 'criter', 'cup', 'dat', 'day', 'deadlin', 'degr', 'detail', 'din', 'do', 'doct', 'doe', 'eat', 'elig', 'els', 'emb', 'emerg', 'entail', 'exchang', 'expens', 'facul', 'for', 'get', 'giv', 'good', 'goodby', 'hello', 'help', 'hi', 'hono', 'hospit', 'hour', 'how', 'i', 'in', 'inform', 'is', 'it', 'joe', 'lat', 'lik', 'list', 'llb', 'lunch', 'mba', 'me', 'meal', 'memb', 'menu', 'method', 'mor', 'much', 'nee', 'new', 'numb', 'of', 'off', 'on', 'onlin', 'op', 'opt', 'oth', 'particip', 'pay', 'phd', 'plac', 'profess', 'program', 'requir', 'resta', 'ris', 'see', 'shop', 'should', 'stud', 'study', 'sunday', 'tel', 'thank', 'that', 'the', 'ther', 'tim', 'timelin', 'to', 'today', 'univers', 'what', 'when', 'wher', 'who', 'with', 'work', 'woxs', 'yo', 'you']\n" ] } ], "source": [ "print(\"Stemming, Lowering and Removing Duplicates.......\")\n", "words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]\n", "words = sorted(list(set(words)))\n", "\n", "# remove duplicates\n", "classes = sorted(list(set(classes)))\n", "\n", "print (len(documents), \"documents\")\n", "print (len(classes), \"classes\", classes)\n", "print (len(words), \"unique stemmed words\", words)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Creating the Data for our Model.....\n", "Creating an List (Empty) for Output.....\n", "Creating Training Set, Bag of Words for our Model....\n", "Shuffling Randomly and Converting into Numpy Array for Faster Processing......\n", "Creating Train and Test Lists.....\n", "Building Neural Network for Our Chatbot to be Contextual....\n", "Resetting graph data....\n" ] } ], "source": [ "print(\"Creating the Data for our Model.....\")\n", "training = []\n", "output = []\n", "print(\"Creating an List (Empty) for Output.....\")\n", "output_empty = [0] * len(classes)\n", "\n", "print(\"Creating Training Set, Bag of Words for our Model....\")\n", "for doc in documents:\n", " # Initialize our bag of words\n", " bag = []\n", " # List of tokenized words for the pattern\n", " pattern_words = doc[0]\n", " # Stem each word\n", " pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n", " \n", " # Create our bag of words array\n", " for w in words:\n", " bag.append(1) if w in pattern_words else bag.append(0)\n", "\n", " # Output is a '0' for each tag and '1' for current tag\n", " output_row = list(output_empty)\n", " output_row[classes.index(doc[1])] = 1\n", "\n", " # Append the feature vector and output row as a tuple\n", " training.append((bag, output_row))\n", "\n", "print(\"Shuffling Randomly and Converting into Numpy Array for Faster Processing......\")\n", "random.shuffle(training)\n", "\n", "# Separate feature vectors and output rows into separate lists\n", "train_x = np.array([x[0] for x in training])\n", "train_y = np.array([x[1] for x in training])\n", "\n", "print(\"Creating Train and Test Lists.....\")\n", "\n", "print(\"Building Neural Network for Our Chatbot to be Contextual....\")\n", "print(\"Resetting graph data....\")\n", "tf.reset_default_graph()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\initializations.py:164: calling TruncatedNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Call initializer instance with the dtype argument instead of passing it to the constructor\n", "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\optimizers.py:238: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.\n", "\n", "Training....\n" ] } ], "source": [ "net = tflearn.input_data(shape=[None, len(train_x[0])])\n", "net = tflearn.fully_connected(net, 8)\n", "net = tflearn.fully_connected(net, 8)\n", "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n", "net = tflearn.regression(net)\n", "print(\"Training....\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Step: 12999 | total loss: \u001b[1m\u001b[32m0.00966\u001b[0m\u001b[0m | time: 0.050s\n", "| Adam | epoch: 1000 | loss: 0.00966 - acc: 0.9996 -- iter: 96/98\n", "Training Step: 13000 | total loss: \u001b[1m\u001b[32m0.00887\u001b[0m\u001b[0m | time: 0.054s\n", "| Adam | epoch: 1000 | loss: 0.00887 - acc: 0.9997 -- iter: 98/98\n", "--\n", "Saving the Model.......\n", "INFO:tensorflow:C:\\Users\\meghn\\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.\n" ] } ], "source": [ "print(\"Training the Model.......\")\n", "model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)\n", "print(\"Saving the Model.......\")\n", "model.save('model.tflearn')\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pickle is also Saved..........\n" ] } ], "source": [ "print(\"Pickle is also Saved..........\")\n", "#pickling \n", "pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( \"training_data\", \"wb\" ) )" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading Pickle.....\n", "Loading the Model......\n", "INFO:tensorflow:Restoring parameters from C:\\Users\\meghn\\model.tflearn\n" ] } ], "source": [ "print(\"Loading Pickle.....\")\n", "data = pickle.load( open( \"training_data\", \"rb\" ) )#serializes the dta (convert in byte stream)\n", "words = data['words']\n", "classes = data['classes']\n", "train_x = data['train_x']\n", "train_y = data['train_y']\n", "\n", "\n", "with open('intents.json') as json_data:\n", " intents = json.load(json_data)\n", " \n", "print(\"Loading the Model......\")\n", "# load our saved model\n", "model.load('./model.tflearn')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ERROR_THRESHOLD = 0.25\n" ] } ], "source": [ "def clean_up_sentence(sentence):\n", " # It Tokenize or Break it into the constituents parts of Sentense.\n", " sentence_words = nltk.word_tokenize(sentence)\n", " # Stemming means to find the root of the word.\n", " sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n", " return sentence_words\n", "\n", "# Return the Array of Bag of Words: True or False and 0 or 1 for each word of bag that exists in the Sentence\n", "def bow(sentence, words, show_details=False):\n", " sentence_words = clean_up_sentence(sentence)\n", " bag = [0]*len(words)\n", " for s in sentence_words:\n", " for i,w in enumerate(words):\n", " if w == s:\n", " bag[i] = 1\n", " if show_details:\n", " print (\"found in bag: %s\" % w)\n", " return(np.array(bag))\n", "\n", "ERROR_THRESHOLD = 0.25\n", "print(\"ERROR_THRESHOLD = 0.25\")\n", "\n", "def classify(sentence):\n", " # Prediction or To Get the Posibility or Probability from the Model\n", " results = model.predict([bow(sentence, words)])[0]\n", " # Exclude those results which are Below Threshold\n", " results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]\n", " # Sorting is Done because heigher Confidence Answer comes first.\n", " results.sort(key=lambda x: x[1], reverse=True)\n", " return_list = []\n", " for r in results:\n", " return_list.append((classes[r[0]], r[1])) #Tuppl -> Intent and Probability\n", " return return_list\n", "\n", "def response(sentence, userID='123', show_details=False):\n", " results = classify(sentence)\n", " if results:\n", " while results:\n", " for i in intents['intents']:\n", " if i['tag'] == results[0][0]:\n", " # Return a random response from the list of responses for the matching intent\n", " return random.choice(i['responses'])\n", " results.pop(0)\n", " # If no matching intent was found, return a default response\n", " return \"Sorry, I didn't understand that.\"\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7871\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "\n", "def chat_response(message):\n", " return response(message) # Return the response from the chatbot\n", "\n", "gr.Interface(fn=chat_response, inputs=\"text\", outputs=\"text\").launch()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }