Spaces:

Meghna05
/

csp-chat

Runtime error

App Files Files Community

Meghna05 commited on Apr 13, 2024

Commit

8e219d3

verified ·

1 Parent(s): e410fff

Upload 2 files

Browse files

Files changed (2) hide show

chatbot.ipynb +502 -0
requirements.txt +4 -0

chatbot.ipynb ADDED Viewed

	@@ -0,0 +1,502 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CHATBOTS - Using Natural Language Processing and Tensorflow"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": [
+    "# In this Jupyter Notebook, We are going to Build a Chatbot that Understands the Context of Sentense and Respond accordingly.\n",
+    "These are the Things that we are going to do in this Project -\n",
+    "1. Transforming the Conversational Intents into Tensorflow model (Neural Network using TFLEARN) using NLP and Save it as Pickle also.\n",
+    "2. Load the Same Pickle and Model to Build the Framework to Process the Responses.\n",
+    "3. At Last, We Show How the Inputs are Processed and Give the Reponses.\n",
+    "-------------------------------------------------------------------------------------------------------\n",
+    "\n",
+    "##### TFLEARN  - TFlearn is a modular and transparent deep learning library built on top of Tensorflow. It was designed to provide a higher-level API to TensorFlow in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it.  (http://tflearn.org/)\n",
+    "-------------------------------------------------------------------------------------------------------\n",
+    "##### TENSORFLOW - TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML powered applications.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+      "\n",
+      "WARNING:tensorflow:From C:\\Users\\meghn\\AppData\\Local\\Temp\\ipykernel_29544\\870218512.py:4: The name tf.disable_v2_behavior is deprecated. Please use tf.compat.v1.disable_v2_behavior instead.\n",
+      "\n",
+      "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\compat\\v2_compat.py:108: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "non-resource variables are not supported in the long term\n",
+      "curses is not supported on this machine (please install/reinstall curses for an optimal experience)\n",
+      "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Used in Tensorflow Model\n",
+    "import numpy as np\n",
+    "import tensorflow.compat.v1 as tf\n",
+    "tf.disable_v2_behavior()\n",
+    "import tflearn\n",
+    "import random\n",
+    "\n",
+    "#Usde to for Contextualisation and Other NLP Tasks.\n",
+    "import nltk\n",
+    "from nltk.stem.lancaster import LancasterStemmer\n",
+    "stemmer = LancasterStemmer()\n",
+    "\n",
+    "#Other\n",
+    "import json\n",
+    "import pickle\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing the Intents.....\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Processing the Intents.....\")\n",
+    "with open('intents.json') as json_data:\n",
+    "    intents = json.load(json_data)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Looping through the Intents to Convert them to words, classes, documents and ignore_words.......\n"
+     ]
+    }
+   ],
+   "source": [
+    "words = []\n",
+    "classes = []\n",
+    "documents = []\n",
+    "ignore_words = ['?']\n",
+    "print(\"Looping through the Intents to Convert them to words, classes, documents and ignore_words.......\")\n",
+    "for intent in intents['intents']:\n",
+    "    for pattern in intent['patterns']:\n",
+    "        # tokenize each word in the sentence\n",
+    "        w = nltk.word_tokenize(pattern)\n",
+    "        # add to our words list\n",
+    "        words.extend(w)\n",
+    "        # add to documents in our corpus\n",
+    "        documents.append((w, intent['tag']))\n",
+    "        # add to our classes list\n",
+    "        if intent['tag'] not in classes:\n",
+    "            classes.append(intent['tag'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Stemming, Lowering and Removing Duplicates.......\n",
+      "98 documents\n",
+      "30 classes ['application_dates', 'ba', 'baallb', 'bba', 'bdesign', 'blu_embers_timings', 'bsc', 'btech', 'cafes', 'cost_of_study', 'courses', 'cup_of_joe_timings', 'doctors', 'eligibility_criteria', 'emergency', 'exchange_program', 'faculty', 'goodbye', 'greeting', 'hours', 'mba', 'meal_menu', 'meal_timings', 'new_embers_timings', 'online_payments', 'other_requirements', 'phd', 'restaurants', 'rise_timings', 'thanks']\n",
+      "125 unique stemmed words [\"'\", \"'s\", '.', 'a', 'about', 'abroad', 'addit', 'am', 'anyon', 'apply', 'ar', 'assocy', 'at', 'avail', 'ba', 'bba', 'bdesign', 'beyond', 'blu', 'breakfast', 'bsc', 'btech', 'bye', 'caf', 'can', 'cas', 'coff', 'contact', 'cost', 'cours', 'criter', 'cup', 'dat', 'day', 'deadlin', 'degr', 'detail', 'din', 'do', 'doct', 'doe', 'eat', 'elig', 'els', 'emb', 'emerg', 'entail', 'exchang', 'expens', 'facul', 'for', 'get', 'giv', 'good', 'goodby', 'hello', 'help', 'hi', 'hono', 'hospit', 'hour', 'how', 'i', 'in', 'inform', 'is', 'it', 'joe', 'lat', 'lik', 'list', 'llb', 'lunch', 'mba', 'me', 'meal', 'memb', 'menu', 'method', 'mor', 'much', 'nee', 'new', 'numb', 'of', 'off', 'on', 'onlin', 'op', 'opt', 'oth', 'particip', 'pay', 'phd', 'plac', 'profess', 'program', 'requir', 'resta', 'ris', 'see', 'shop', 'should', 'stud', 'study', 'sunday', 'tel', 'thank', 'that', 'the', 'ther', 'tim', 'timelin', 'to', 'today', 'univers', 'what', 'when', 'wher', 'who', 'with', 'work', 'woxs', 'yo', 'you']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Stemming, Lowering and Removing Duplicates.......\")\n",
+    "words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]\n",
+    "words = sorted(list(set(words)))\n",
+    "\n",
+    "# remove duplicates\n",
+    "classes = sorted(list(set(classes)))\n",
+    "\n",
+    "print (len(documents), \"documents\")\n",
+    "print (len(classes), \"classes\", classes)\n",
+    "print (len(words), \"unique stemmed words\", words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating the Data for our Model.....\n",
+      "Creating an List (Empty) for Output.....\n",
+      "Creating Training Set, Bag of Words for our Model....\n",
+      "Shuffling Randomly and Converting into Numpy Array for Faster Processing......\n",
+      "Creating Train and Test Lists.....\n",
+      "Building Neural Network for Our Chatbot to be Contextual....\n",
+      "Resetting graph data....\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Creating the Data for our Model.....\")\n",
+    "training = []\n",
+    "output = []\n",
+    "print(\"Creating an List (Empty) for Output.....\")\n",
+    "output_empty = [0] * len(classes)\n",
+    "\n",
+    "print(\"Creating Training Set, Bag of Words for our Model....\")\n",
+    "for doc in documents:\n",
+    "    # Initialize our bag of words\n",
+    "    bag = []\n",
+    "    # List of tokenized words for the pattern\n",
+    "    pattern_words = doc[0]\n",
+    "    # Stem each word\n",
+    "    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n",
+    " \n",
+    "    # Create our bag of words array\n",
+    "    for w in words:\n",
+    "        bag.append(1) if w in pattern_words else bag.append(0)\n",
+    "\n",
+    "    # Output is a '0' for each tag and '1' for current tag\n",
+    "    output_row = list(output_empty)\n",
+    "    output_row[classes.index(doc[1])] = 1\n",
+    "\n",
+    "    # Append the feature vector and output row as a tuple\n",
+    "    training.append((bag, output_row))\n",
+    "\n",
+    "print(\"Shuffling Randomly and Converting into Numpy Array for Faster Processing......\")\n",
+    "random.shuffle(training)\n",
+    "\n",
+    "# Separate feature vectors and output rows into separate lists\n",
+    "train_x = np.array([x[0] for x in training])\n",
+    "train_y = np.array([x[1] for x in training])\n",
+    "\n",
+    "print(\"Creating Train and Test Lists.....\")\n",
+    "\n",
+    "print(\"Building Neural Network for Our Chatbot to be Contextual....\")\n",
+    "print(\"Resetting graph data....\")\n",
+    "tf.reset_default_graph()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\initializations.py:164: calling TruncatedNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
+      "WARNING:tensorflow:From C:\\Users\\meghn\\anaconda3\\Lib\\site-packages\\tflearn\\optimizers.py:238: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.\n",
+      "\n",
+      "Training....\n"
+     ]
+    }
+   ],
+   "source": [
+    "net = tflearn.input_data(shape=[None, len(train_x[0])])\n",
+    "net = tflearn.fully_connected(net, 8)\n",
+    "net = tflearn.fully_connected(net, 8)\n",
+    "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n",
+    "net = tflearn.regression(net)\n",
+    "print(\"Training....\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 12999  | total loss: \u001b[1m\u001b[32m0.00966\u001b[0m\u001b[0m | time: 0.050s\n",
+      "| Adam | epoch: 1000 | loss: 0.00966 - acc: 0.9996 -- iter: 96/98\n",
+      "Training Step: 13000  | total loss: \u001b[1m\u001b[32m0.00887\u001b[0m\u001b[0m | time: 0.054s\n",
+      "| Adam | epoch: 1000 | loss: 0.00887 - acc: 0.9997 -- iter: 98/98\n",
+      "--\n",
+      "Saving the Model.......\n",
+      "INFO:tensorflow:C:\\Users\\meghn\\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Training the Model.......\")\n",
+    "model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)\n",
+    "print(\"Saving the Model.......\")\n",
+    "model.save('model.tflearn')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pickle is also Saved..........\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Pickle is also Saved..........\")\n",
+    "#pickling \n",
+    "pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( \"training_data\", \"wb\" ) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading Pickle.....\n",
+      "Loading the Model......\n",
+      "INFO:tensorflow:Restoring parameters from C:\\Users\\meghn\\model.tflearn\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Loading Pickle.....\")\n",
+    "data = pickle.load( open( \"training_data\", \"rb\" ) )#serializes the dta (convert in byte stream)\n",
+    "words = data['words']\n",
+    "classes = data['classes']\n",
+    "train_x = data['train_x']\n",
+    "train_y = data['train_y']\n",
+    "\n",
+    "\n",
+    "with open('intents.json') as json_data:\n",
+    "    intents = json.load(json_data)\n",
+    "    \n",
+    "print(\"Loading the Model......\")\n",
+    "# load our saved model\n",
+    "model.load('./model.tflearn')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ERROR_THRESHOLD = 0.25\n"
+     ]
+    }
+   ],
+   "source": [
+    "def clean_up_sentence(sentence):\n",
+    "    # It Tokenize or Break it into the constituents parts of Sentense.\n",
+    "    sentence_words = nltk.word_tokenize(sentence)\n",
+    "    # Stemming means to find the root of the word.\n",
+    "    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n",
+    "    return sentence_words\n",
+    "\n",
+    "# Return the Array of Bag of Words: True or False and 0 or 1 for each word of bag that exists in the Sentence\n",
+    "def bow(sentence, words, show_details=False):\n",
+    "    sentence_words = clean_up_sentence(sentence)\n",
+    "    bag = [0]*len(words)\n",
+    "    for s in sentence_words:\n",
+    "        for i,w in enumerate(words):\n",
+    "            if w == s:\n",
+    "                bag[i] = 1\n",
+    "                if show_details:\n",
+    "                    print (\"found in bag: %s\" % w)\n",
+    "    return(np.array(bag))\n",
+    "\n",
+    "ERROR_THRESHOLD = 0.25\n",
+    "print(\"ERROR_THRESHOLD = 0.25\")\n",
+    "\n",
+    "def classify(sentence):\n",
+    "    # Prediction or To Get the Posibility or Probability from the Model\n",
+    "    results = model.predict([bow(sentence, words)])[0]\n",
+    "    # Exclude those results which are Below Threshold\n",
+    "    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]\n",
+    "    # Sorting is Done because heigher Confidence Answer comes first.\n",
+    "    results.sort(key=lambda x: x[1], reverse=True)\n",
+    "    return_list = []\n",
+    "    for r in results:\n",
+    "        return_list.append((classes[r[0]], r[1])) #Tuppl -> Intent and Probability\n",
+    "    return return_list\n",
+    "\n",
+    "def response(sentence, userID='123', show_details=False):\n",
+    "    results = classify(sentence)\n",
+    "    if results:\n",
+    "        while results:\n",
+    "            for i in intents['intents']:\n",
+    "                if i['tag'] == results[0][0]:\n",
+    "                    # Return a random response from the list of responses for the matching intent\n",
+    "                    return random.choice(i['responses'])\n",
+    "            results.pop(0)\n",
+    "    # If no matching intent was found, return a default response\n",
+    "    return \"Sorry, I didn't understand that.\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7871\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7871/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "def chat_response(message):\n",
+    "    return response(message)  # Return the response from the chatbot\n",
+    "\n",
+    "gr.Interface(fn=chat_response, inputs=\"text\", outputs=\"text\").launch()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy
+tensorflow==1.15.0  # Specify the version you are using
+tflearn
+nltk