Update train.py
Browse filesFix the tokenizer learning
train.py
CHANGED
|
@@ -10,7 +10,7 @@ with open("dataset.json", "r") as f:
|
|
| 10 |
dset = json.load(f)
|
| 11 |
|
| 12 |
tokenizer = Tokenizer()
|
| 13 |
-
tokenizer.fit_on_texts(dset)
|
| 14 |
|
| 15 |
emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
|
| 16 |
vocab_size = len(tokenizer.get_vocabulary())
|
|
|
|
| 10 |
dset = json.load(f)
|
| 11 |
|
| 12 |
tokenizer = Tokenizer()
|
| 13 |
+
tokenizer.fit_on_texts(list(dset.keys()))
|
| 14 |
|
| 15 |
emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
|
| 16 |
vocab_size = len(tokenizer.get_vocabulary())
|