ierhon
/

neural-chatbot

ierhon commited on Jul 29, 2023

Commit

d12bf65

1 Parent(s): d3cb051

sleepy hon forgot how to use a tokenizer

Files changed (1) hide show

test.py CHANGED Viewed

@@ -7,13 +7,17 @@ from vecs import *
 with open("dataset.json", "r") as f:
     dset = json.load(f)
 model = load_model("chatbot.keras", custom_objects={"SeqSelfAttention": SeqSelfAttention})
 def find_line_number(array):
     return sorted(zip(list(array), [x for x in range(len(array))]), key=lambda x:x[0], reverse=True)[0][1] # yeah, one big line, find the biggest value and return the number of the line
 def generate(text):
 if __name__ == "__main__": # if this code is not being imported, open the chat
     while True:

 with open("dataset.json", "r") as f:
     dset = json.load(f)
+tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
+tokenizer.fit_on_texts(list(dset.keys()))
 model = load_model("chatbot.keras", custom_objects={"SeqSelfAttention": SeqSelfAttention})
 def find_line_number(array):
     return sorted(zip(list(array), [x for x in range(len(array))]), key=lambda x:x[0], reverse=True)[0][1] # yeah, one big line, find the biggest value and return the number of the line
 def generate(text):
+    tokens = list(tokenizer.texts_to_sequences([text,])[0]) # text into tokens (almost words)
+    tokens =
 if __name__ == "__main__": # if this code is not being imported, open the chat
     while True: