Commit ·
18b4250
1
Parent(s): 58c3693
add random print sample when eval
Browse files
main.py
CHANGED
|
@@ -150,7 +150,7 @@ if __name__ == "__main__":
|
|
| 150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
| 151 |
'cache-train-shard-{}.arrow'.format(
|
| 152 |
train_dataset_shard_idx))
|
| 153 |
-
)
|
| 154 |
# load test shard subset
|
| 155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
| 156 |
'shard_{}'.format(test_dataset_shard_idx)),
|
|
@@ -172,9 +172,6 @@ if __name__ == "__main__":
|
|
| 172 |
callbacks=[BreakEachEpoch()] # Manual break end of epoch because each epoch loop over a shard
|
| 173 |
)
|
| 174 |
|
| 175 |
-
# training_args.num_train_epochs = epoch_idx + 1
|
| 176 |
-
|
| 177 |
-
logging.get_logger().info('Train epoch {}'.format(training_args.num_train_epochs))
|
| 178 |
logging.get_logger().info('Train shard idx: {} / {}'.format(train_dataset_shard_idx + 1, num_train_shards))
|
| 179 |
logging.get_logger().info(
|
| 180 |
'Valid shard idx: {} / {} sub_shard: {}'.format(test_dataset_shard_idx + 1, num_test_shards, idx_sub_shard))
|
|
|
|
| 150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
| 151 |
'cache-train-shard-{}.arrow'.format(
|
| 152 |
train_dataset_shard_idx))
|
| 153 |
+
) # .shard(1000, 0) # Remove shard split when train
|
| 154 |
# load test shard subset
|
| 155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
| 156 |
'shard_{}'.format(test_dataset_shard_idx)),
|
|
|
|
| 172 |
callbacks=[BreakEachEpoch()] # Manual break end of epoch because each epoch loop over a shard
|
| 173 |
)
|
| 174 |
|
|
|
|
|
|
|
|
|
|
| 175 |
logging.get_logger().info('Train shard idx: {} / {}'.format(train_dataset_shard_idx + 1, num_train_shards))
|
| 176 |
logging.get_logger().info(
|
| 177 |
'Valid shard idx: {} / {} sub_shard: {}'.format(test_dataset_shard_idx + 1, num_test_shards, idx_sub_shard))
|