Fix vocab_size and a NameError numpy instead of np
Browse files
train.py
CHANGED
|
@@ -15,7 +15,7 @@ dset_size = len(dset)
|
|
| 15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
| 16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
| 17 |
|
| 18 |
-
vocab_size = len(tokenizer.
|
| 19 |
|
| 20 |
model = Sequential()
|
| 21 |
model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
|
|
@@ -33,7 +33,7 @@ y = [] # and output y
|
|
| 33 |
|
| 34 |
for line, key in enumerate(dset):
|
| 35 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
| 36 |
-
X.append(
|
| 37 |
output_array = np.zeros(dset_size)
|
| 38 |
output_array[line] = 1 # 0 0 0 1 0 0 0 0 0, the neuron of the each line activates in the correct response
|
| 39 |
y.append(output_array)
|
|
|
|
| 15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
| 16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
| 17 |
|
| 18 |
+
vocab_size = len(tokenizer.word_index)
|
| 19 |
|
| 20 |
model = Sequential()
|
| 21 |
model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
|
|
|
|
| 33 |
|
| 34 |
for line, key in enumerate(dset):
|
| 35 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
| 36 |
+
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len])) # refusing to use pad_sequences for an unspecified reason and creating the worst line of code
|
| 37 |
output_array = np.zeros(dset_size)
|
| 38 |
output_array[line] = 1 # 0 0 0 1 0 0 0 0 0, the neuron of the each line activates in the correct response
|
| 39 |
y.append(output_array)
|