Use new dataset.json
Browse files
train.py
CHANGED
|
@@ -8,10 +8,12 @@ from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
|
|
| 8 |
from model_settings import *
|
| 9 |
|
| 10 |
|
| 11 |
-
with open("dataset.json", "r") as f:
|
| 12 |
dset = json.load(f)
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
| 16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
| 17 |
|
|
@@ -31,11 +33,11 @@ model.add(Dense(dset_size, activation="linear")) # TBH it doesn't matter that mu
|
|
| 31 |
X = [] # we're loading the training data into input X
|
| 32 |
y = [] # and output y
|
| 33 |
|
| 34 |
-
for
|
| 35 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
| 36 |
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len])) # refusing to use pad_sequences for an unspecified reason and creating the worst line of code
|
| 37 |
output_array = np.zeros(dset_size)
|
| 38 |
-
output_array[
|
| 39 |
y.append(output_array)
|
| 40 |
|
| 41 |
X = np.array(X) # normal lists are way slower than numpy arrays (remember, a list and an array is not the same thing, an array is far more limited)
|
|
|
|
| 8 |
from model_settings import *
|
| 9 |
|
| 10 |
|
| 11 |
+
with open("dataset.json", "r") as f:
|
| 12 |
dset = json.load(f)
|
| 13 |
|
| 14 |
+
with open("responses.txt", "r") as f:
|
| 15 |
+
dset_size = len(f.readlines())
|
| 16 |
+
|
| 17 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
| 18 |
tokenizer.fit_on_texts(list(dset.keys()))
|
| 19 |
|
|
|
|
| 33 |
X = [] # we're loading the training data into input X
|
| 34 |
y = [] # and output y
|
| 35 |
|
| 36 |
+
for key in dset:
|
| 37 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
| 38 |
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len])) # refusing to use pad_sequences for an unspecified reason and creating the worst line of code
|
| 39 |
output_array = np.zeros(dset_size)
|
| 40 |
+
output_array[dset[key]] = 1 # 0 0 0 1 0 0 0 0 0, the neuron of the each line activates in the correct response
|
| 41 |
y.append(output_array)
|
| 42 |
|
| 43 |
X = np.array(X) # normal lists are way slower than numpy arrays (remember, a list and an array is not the same thing, an array is far more limited)
|