csp-chat / app.py
Meghna05's picture
Update app.py
fa631d4 verified
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import tflearn
import random
#Usde to for Contextualisation and Other NLP Tasks.
import nltk
# Download the 'punkt' tokenizer data
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
#Other
import json
import pickle
import warnings
warnings.filterwarnings("ignore")
# In[6]:
print("Processing the Intents.....")
with open('intents.json') as json_data:
intents = json.load(json_data)
# In[7]:
words = []
classes = []
documents = []
ignore_words = ['?']
print("Looping through the Intents to Convert them to words, classes, documents and ignore_words.......")
for intent in intents['intents']:
for pattern in intent['patterns']:
# tokenize each word in the sentence
w = nltk.word_tokenize(pattern)
# add to our words list
words.extend(w)
# add to documents in our corpus
documents.append((w, intent['tag']))
# add to our classes list
if intent['tag'] not in classes:
classes.append(intent['tag'])
# In[8]:
print("Stemming, Lowering and Removing Duplicates.......")
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# remove duplicates
classes = sorted(list(set(classes)))
print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)
# In[9]:
print("Creating the Data for our Model.....")
training = []
output = []
print("Creating an List (Empty) for Output.....")
output_empty = [0] * len(classes)
print("Creating Training Set, Bag of Words for our Model....")
for doc in documents:
# Initialize our bag of words
bag = []
# List of tokenized words for the pattern
pattern_words = doc[0]
# Stem each word
pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
# Create our bag of words array
for w in words:
bag.append(1) if w in pattern_words else bag.append(0)
# Output is a '0' for each tag and '1' for current tag
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
# Append the feature vector and output row as a tuple
training.append((bag, output_row))
print("Shuffling Randomly and Converting into Numpy Array for Faster Processing......")
random.shuffle(training)
# Separate feature vectors and output rows into separate lists
train_x = np.array([x[0] for x in training])
train_y = np.array([x[1] for x in training])
print("Creating Train and Test Lists.....")
print("Building Neural Network for Our Chatbot to be Contextual....")
print("Resetting graph data....")
tf.reset_default_graph()
# In[ ]:
# In[10]:
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)
print("Training....")
# In[11]:
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
# In[12]:
print("Training the Model.......")
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
print("Saving the Model.......")
model.save('model.tflearn')
# In[13]:
print("Pickle is also Saved..........")
#pickling
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )
# In[14]:
print("Loading Pickle.....")
data = pickle.load( open( "training_data", "rb" ) )#serializes the dta (convert in byte stream)
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']
with open('intents.json') as json_data:
intents = json.load(json_data)
print("Loading the Model......")
# load our saved model
model.load('./model.tflearn')
# In[30]:
def clean_up_sentence(sentence):
# It Tokenize or Break it into the constituents parts of Sentense.
sentence_words = nltk.word_tokenize(sentence)
# Stemming means to find the root of the word.
sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
return sentence_words
# Return the Array of Bag of Words: True or False and 0 or 1 for each word of bag that exists in the Sentence
def bow(sentence, words, show_details=False):
sentence_words = clean_up_sentence(sentence)
bag = [0]*len(words)
for s in sentence_words:
for i,w in enumerate(words):
if w == s:
bag[i] = 1
if show_details:
print ("found in bag: %s" % w)
return(np.array(bag))
ERROR_THRESHOLD = 0.25
print("ERROR_THRESHOLD = 0.25")
def classify(sentence):
# Prediction or To Get the Posibility or Probability from the Model
results = model.predict([bow(sentence, words)])[0]
# Exclude those results which are Below Threshold
results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
# Sorting is Done because heigher Confidence Answer comes first.
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
return_list.append((classes[r[0]], r[1])) #Tuppl -> Intent and Probability
return return_list
def response(sentence, userID='123', show_details=False):
results = classify(sentence)
if results:
while results:
for i in intents['intents']:
if i['tag'] == results[0][0]:
# Return a random response from the list of responses for the matching intent
return random.choice(i['responses'])
results.pop(0)
# If no matching intent was found, return a default response
return "Sorry, I didn't understand that."
# In[ ]:
# In[31]:
# In[ ]:
def chatbot_response(message, chat_history=[]):
# This is a simple echo function, replace it with your chatbot model.
response_text = f"Chatbot: {response(message)}"
chat_history.append((input_text, response_text))
return response_text, chat_history
# Define the Gradio interface
interface = gr.Interface(
fn=chatbot_response, # Function to handle chatbot responses
inputs=[
gr.Textbox(lines=1, label="You"), # Input box for user input
gr.Chatbot(label="Chat History") # Chat history to display inputs and responses
],
outputs=[
gr.Chatbot(label="Chat History") # Chat history as output to display the conversation
],
title="Chatbot with History",
)
# Launch the Gradio interface
interface.launch()
# In[54]:
# In[44]:
# In[ ]: