Update train.py
Browse files
train.py
CHANGED
|
@@ -23,7 +23,7 @@ num_predict = 250
|
|
| 23 |
|
| 24 |
infer_every = 100
|
| 25 |
reset_state_every = 16
|
| 26 |
-
validate_every =
|
| 27 |
|
| 28 |
|
| 29 |
|
|
@@ -72,7 +72,7 @@ print(f"Dataset has {tokenized_datasets['train'].num_rows} rows of {batch_size}
|
|
| 72 |
# ============================
|
| 73 |
# Split Dataset into Train and Validation
|
| 74 |
# ============================
|
| 75 |
-
split_dataset = tokenized_datasets['train'].train_test_split(test_size
|
| 76 |
train_dataset = split_dataset['train']
|
| 77 |
valid_dataset = split_dataset['test']
|
| 78 |
|
|
|
|
| 23 |
|
| 24 |
infer_every = 100
|
| 25 |
reset_state_every = 16
|
| 26 |
+
validate_every = 100 # Perform validation every 100 training steps
|
| 27 |
|
| 28 |
|
| 29 |
|
|
|
|
| 72 |
# ============================
|
| 73 |
# Split Dataset into Train and Validation
|
| 74 |
# ============================
|
| 75 |
+
split_dataset = tokenized_datasets['train'].train_test_split(test_size=(1/validate_every))
|
| 76 |
train_dataset = split_dataset['train']
|
| 77 |
valid_dataset = split_dataset['test']
|
| 78 |
|