Upload folder using huggingface_hub
Browse files- added_tokens.json +4 -0
- all_results.json +1 -0
- config.json +31 -0
- generation_config.json +6 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- optimizer.bin +3 -0
- random_states_0.pkl +3 -0
- scheduler.bin +3 -0
- special_tokens_map.json +23 -0
- tokenizer_config.json +55 -0
- training.log +119 -0
- vocab.json +0 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<eos>": 32769,
|
| 3 |
+
"<s>": 32768
|
| 4 |
+
}
|
all_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"perplexity": 46.621519896025845}
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 32768,
|
| 8 |
+
"embd_pdrop": 0.1,
|
| 9 |
+
"eos_token_id": 32769,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"layer_norm_epsilon": 1e-05,
|
| 12 |
+
"model_type": "gpt2",
|
| 13 |
+
"n_embd": 512,
|
| 14 |
+
"n_head": 8,
|
| 15 |
+
"n_inner": null,
|
| 16 |
+
"n_layer": 4,
|
| 17 |
+
"n_positions": 1024,
|
| 18 |
+
"reorder_and_upcast_attn": false,
|
| 19 |
+
"resid_pdrop": 0.1,
|
| 20 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 21 |
+
"scale_attn_weights": true,
|
| 22 |
+
"summary_activation": null,
|
| 23 |
+
"summary_first_dropout": 0.1,
|
| 24 |
+
"summary_proj_to_labels": true,
|
| 25 |
+
"summary_type": "cls_index",
|
| 26 |
+
"summary_use_proj": true,
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.37.0",
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 32770
|
| 31 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 32768,
|
| 4 |
+
"eos_token_id": 32769,
|
| 5 |
+
"transformers_version": "4.37.0"
|
| 6 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18b4ab73576c573fca5f2a0031b5aedceb42663a6671f1be610ace94175fa282
|
| 3 |
+
size 119657520
|
optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cda813a90dd360d75ff3b9b125c659c1ddcec68607da467d56a2ed505c89b44
|
| 3 |
+
size 239348538
|
random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:370951e930d9408bb4cfe983040435451862e89032ea88f1270c1517f870d3f1
|
| 3 |
+
size 14280
|
scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2791ba824345c6824fda951b2a4a38eb48fca9be1cef8273296717a13459166
|
| 3 |
+
size 1064
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<eos>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"unk_token": {
|
| 17 |
+
"content": "<|endoftext|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
}
|
| 23 |
+
}
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<unk>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<pad>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "<|endoftext|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"32768": {
|
| 30 |
+
"content": "<s>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"32769": {
|
| 38 |
+
"content": "<eos>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"bos_token": "<s>",
|
| 47 |
+
"clean_up_tokenization_spaces": true,
|
| 48 |
+
"eos_token": "<eos>",
|
| 49 |
+
"errors": "replace",
|
| 50 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 51 |
+
"pad_token": null,
|
| 52 |
+
"token": null,
|
| 53 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 54 |
+
"unk_token": "<|endoftext|>"
|
| 55 |
+
}
|
training.log
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
01/08/2026 03:36:38 - INFO - __main__ - Distributed environment: DistributedType.NO
|
| 2 |
+
Num processes: 1
|
| 3 |
+
Process index: 0
|
| 4 |
+
Local process index: 0
|
| 5 |
+
Device: cuda
|
| 6 |
+
|
| 7 |
+
Mixed precision type: no
|
| 8 |
+
|
| 9 |
+
01/08/2026 03:36:38 - INFO - __main__ - Arguments:
|
| 10 |
+
01/08/2026 03:36:38 - INFO - __main__ - Namespace(train_file='data/preprocessed/dependency/train.sequential=False.random=False.convert_method=exponential.jsonl', validation_file='data/preprocessed/dependency/val.sequential=False.random=False.convert_method=exponential.jsonl', model_name_or_path=None, per_device_train_batch_size=32, per_device_eval_batch_size=32, learning_rate=0.0001, weight_decay=0.0, num_train_epochs=10, max_train_steps=None, gradient_accumulation_steps=1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, num_warmup_steps=0, output_dir='experiments/128/', seed=128, block_size=512, preprocessing_num_workers=None, overwrite_cache=False, trust_remote_code=False, checkpointing_steps='epoch', resume_from_checkpoint=None, with_tracking=True, report_to='wandb', low_cpu_mem_usage=False, n_positions=1024, n_embd=512, n_layer=4, n_head=8, n_inner=None, activation_function='gelu_new', resid_pdrop=0.1, embd_pdrop=0.1, attn_pdrop=0.1, layer_norm_epsilon=1e-05, initializer_range=0.02, attn_loss_weight=0.5, attn_loss_layers=[3], attn_loss_heads=[0], attn_loss_reduction='none')
|
| 11 |
+
01/08/2026 03:36:38 - INFO - __main__ - Training new model from scratch
|
| 12 |
+
01/08/2026 03:36:39 - INFO - __main__ - Sample 496287 of the training set: {'token_ids': [32768, 6067, 272, 515, 485, 318, 1388, 291, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17487770452710943, 0.17487770452710943, 0.17487770452710943, 0.4753668864186718, 0.0, 0.0, 0.0, 0.0], [0.14884758120207758, 0.14884758120207758, 0.14884758120207758, 0.4046096751916898, 0.14884758120207758, 0.0, 0.0, 0.0], [0.07088509576696811, 0.07088509576696811, 0.07088509576696811, 0.1926856677319286, 0.07088509576696811, 0.523773949200199, 0.0, 0.0], [0.12366807520346387, 0.12366807520346387, 0.12366807520346387, 0.3361646815860826, 0.12366807520346387, 0.12366807520346387, 0.045494942396598195, 0.0], [0.1100574786562937, 0.1100574786562937, 0.1100574786562937, 0.2991672443174225, 0.1100574786562937, 0.1100574786562937, 0.04048788374481527, 0.1100574786562937]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 6067, 272, 515, 485, 318, 1388, 291, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False], [False, False, True, False, False, False, False, False], [False, False, False, True, False, False, False, False], [False, False, False, False, True, False, False, False], [False, False, False, False, False, True, False, False], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False], [False, False, True, False, False, False, False, False], [False, False, False, True, False, False, False, False], [False, False, False, False, True, False, False, False], [False, False, False, False, False, True, False, False], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, True]]}.
|
| 13 |
+
01/08/2026 03:36:39 - INFO - __main__ - Sample 850736 of the training set: {'token_ids': [32768, 43, 1049, 314, 6556, 318, 267, 3950, 269, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17487770452710943, 0.17487770452710943, 0.17487770452710943, 0.4753668864186718, 0.0, 0.0, 0.0, 0.0], [0.07629314247787525, 0.07629314247787525, 0.07629314247787525, 0.20738626283364517, 0.5637343097327291, 0.0, 0.0, 0.0], [0.04878906985862268, 0.04878906985862268, 0.04878906985862268, 0.13262244202411294, 0.36050517420000955, 0.36050517420000955, 0.0, 0.0], [0.13847545210043435, 0.13847545210043435, 0.13847545210043435, 0.37641530513226173, 0.13847545210043435, 0.018740614531330278, 0.050942271934670616, 0.0], [0.12163235653869706, 0.12163235653869706, 0.12163235653869706, 0.33063102453179205, 0.12163235653869706, 0.016461149422901226, 0.0447460433518215, 0.12163235653869706]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False], [False, False, True, True, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 43, 1049, 314, 6556, 318, 267, 3950, 269, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, True, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False], [False, False, True, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]]}.
|
| 14 |
+
01/08/2026 03:36:39 - INFO - __main__ - Sample 1072717 of the training set: {'token_ids': [32768, 1162, 1594, 313, 27833, 3442, 397, 1207, 1702, 485, 2260, 566, 872, 298, 2168, 311, 267, 1474, 365, 1707, 1159, 365, 408, 267, 29500, 764, 9869, 365, 298, 861, 405, 560, 10219, 269, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26894142136999505, 0.731058578630005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09003057317038045, 0.2447284710547976, 0.665240955774822, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03205860328008498, 0.08714431874203254, 0.23688281808991007, 0.6439142598879724, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06745080586634483, 0.18335029990140392, 0.49839778846450256, 0.18335029990140392, 0.06745080586634483, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04501528658519023, 0.12236423552739882, 0.33262047788741095, 0.12236423552739882, 0.04501528658519023, 0.33262047788741095, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03377952487759491, 0.09182226864874651, 0.24959880431577242, 0.09182226864874651, 0.03377952487759491, 0.24959880431577242, 0.24959880431577242, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02012506971027169, 0.054705611289903075, 0.14870526908408752, 0.054705611289903075, 0.02012506971027169, 0.14870526908408752, 0.14870526908408752, 0.4042228307473879, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009588884358741272, 0.02606529010756156, 0.0708528044528979, 0.02606529010756156, 0.009588884358741272, 0.0708528044528979, 0.0708528044528979, 0.1925978908396745, 0.5235353468690261, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27065309628634215, 0.0995677098131401, 0.27065309628634215, 0.0995677098131401, 0.03662891344477831, 0.03662891344477831, 0.03662891344477831, 0.0995677098131401, 0.03662891344477831, 0.013475024208782176, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15593208485436483, 0.057364208236921656, 0.15593208485436483, 0.057364208236921656, 0.021103112869440988, 0.021103112869440988, 0.021103112869440988, 0.057364208236921656, 0.021103112869440988, 0.007763401369387824, 0.4238673527333538, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10951306984813354, 0.0402876069367005, 0.10951306984813354, 0.0402876069367005, 0.014820982326008102, 0.014820982326008102, 0.014820982326008102, 0.0402876069367005, 0.014820982326008102, 0.005452334695703684, 0.2976873877469477, 0.2976873877469477, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06053127227234526, 0.022268210616946796, 0.06053127227234526, 0.022268210616946796, 0.008192016877650367, 0.008192016877650367, 0.008192016877650367, 0.022268210616946796, 0.008192016877650367, 0.003013674591017041, 0.16454105747142297, 0.16454105747142297, 0.44726896656000464, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12271734636543012, 0.04514518880295676, 0.12271734636543012, 0.04514518880295676, 0.016607986828410987, 0.016607986828410987, 0.016607986828410987, 0.04514518880295676, 0.016607986828410987, 0.006109736913418509, 0.33358033266186343, 0.04514518880295676, 0.12271734636543012, 0.04514518880295676, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06435891335043971, 0.02367632107776104, 0.06435891335043971, 0.02367632107776104, 0.008710031767082373, 0.008710031767082373, 0.008710031767082373, 0.02367632107776104, 0.008710031767082373, 0.003204241619059774, 0.17494566465987052, 0.02367632107776104, 0.06435891335043971, 0.02367632107776104, 0.47555162121261596, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02807143598771085, 0.010326904184038983, 0.02807143598771085, 0.010326904184038983, 0.0037990557402552906, 0.0037990557402552906, 0.0037990557402552906, 0.010326904184038983, 0.0037990557402552906, 0.0013975945027042763, 0.07630607434414569, 0.010326904184038983, 0.02807143598771085, 0.010326904184038983, 0.20742141529073618, 0.5638298640180653, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01795044117880241, 0.006603598269638677, 0.01795044117880241, 0.006603598269638677, 0.00242932804115538, 0.00242932804115538, 0.00242932804115538, 0.006603598269638677, 0.00242932804115538, 0.0008936998422023558, 0.048794358069161554, 0.006603598269638677, 0.01795044117880241, 0.006603598269638677, 0.13263681687072582, 0.3605442490843441, 0.3605442490843441, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013193573961951758, 0.004853644616176905, 0.013193573961951758, 0.004853644616176905, 0.0017855560690439394, 0.0017855560690439394, 0.0017855560690439394, 0.004853644616176905, 0.0017855560690439394, 0.0006568693688601616, 0.03586385235320387, 0.004853644616176905, 0.013193573961951758, 0.004853644616176905, 0.09748805815025224, 0.2650000169615894, 0.2650000169615894, 0.2650000169615894, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010429702596875434, 0.0038368731629228755, 0.010429702596875434, 0.0038368731629228755, 0.0014115067550217718, 0.0014115067550217718, 0.0014115067550217718, 0.0038368731629228755, 0.0014115067550217718, 0.0005192643162471252, 0.028350871045318606, 0.0038368731629228755, 0.010429702596875434, 0.0038368731629228755, 0.07706565758347526, 0.20948617660740781, 0.20948617660740781, 0.20948617660740781, 0.20948617660740781, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0086232507643292, 0.003172316672262639, 0.0086232507643292, 0.003172316672262639, 0.001167030084610829, 0.001167030084610829, 0.001167030084610829, 0.003172316672262639, 0.001167030084610829, 0.0004293263753568929, 0.023440425854921634, 0.003172316672262639, 0.0086232507643292, 0.003172316672262639, 0.06371768365277505, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04824470580633239, 0.0177482354115142, 0.04824470580633239, 0.0177482354115142, 0.006529210924967047, 0.006529210924967047, 0.006529210924967047, 0.0177482354115142, 0.006529210924967047, 0.0024019624663673533, 0.13114270711270593, 0.0177482354115142, 0.04824470580633239, 0.0177482354115142, 0.35648283767939537, 0.13114270711270593, 0.0177482354115142, 0.0177482354115142, 0.0177482354115142, 0.0177482354115142, 0.04824470580633239, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.042651298994341065, 0.01569053603927429, 0.042651298994341065, 0.01569053603927429, 0.005772225629808603, 0.005772225629808603, 0.005772225629808603, 0.01569053603927429, 0.005772225629808603, 0.002123483139009465, 0.11593825101649086, 0.01569053603927429, 0.042651298994341065, 0.01569053603927429, 0.31515284096145063, 0.11593825101649086, 0.01569053603927429, 0.01569053603927429, 0.01569053603927429, 0.01569053603927429, 0.042651298994341065, 0.11593825101649086, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.022971880611964247, 0.008450882602186497, 0.022971880611964247, 0.008450882602186497, 0.003108905969097833, 0.003108905969097833, 0.003108905969097833, 0.008450882602186497, 0.003108905969097833, 0.001143702590566272, 0.06244404563303305, 0.008450882602186497, 0.022971880611964247, 0.008450882602186497, 0.16974051453974115, 0.06244404563303305, 0.008450882602186497, 0.008450882602186497, 0.008450882602186497, 0.008450882602186497, 0.022971880611964247, 0.06244404563303305, 0.4614025562266667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015719064205880078, 0.005782720555797185, 0.015719064205880078, 0.005782720555797185, 0.002127344006517281, 0.002127344006517281, 0.002127344006517281, 0.005782720555797185, 0.002127344006517281, 0.0007826061242969943, 0.04272884659122482, 0.005782720555797185, 0.015719064205880078, 0.005782720555797185, 0.11614904723994066, 0.04272884659122482, 0.005782720555797185, 0.005782720555797185, 0.005782720555797185, 0.005782720555797185, 0.015719064205880078, 0.04272884659122482, 0.3157258445051619, 0.3157258445051619, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011947066534815951, 0.004395080160466132, 0.011947066534815951, 0.004395080160466132, 0.0016168596333359737, 0.0016168596333359737, 0.0016168596333359737, 0.004395080160466132, 0.0016168596333359737, 0.0005948094183643012, 0.03247549386498137, 0.004395080160466132, 0.011947066534815951, 0.004395080160466132, 0.08827754484341206, 0.03247549386498137, 0.004395080160466132, 0.004395080160466132, 0.004395080160466132, 0.004395080160466132, 0.011947066534815951, 0.03247549386498137, 0.23996324600882557, 0.23996324600882557, 0.23996324600882557, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09240574017894794, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.012505757019806761, 0.012505757019806761, 0.012505757019806761, 0.03399417205806486, 0.012505757019806761, 0.004600610903872354, 0.25118484437374217, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.004600610903872354, 0.004600610903872354, 0.004600610903872354, 0.004600610903872354, 0.012505757019806761, 0.03399417205806486, 0.03399417205806486, 0.004600610903872354, 0.004600610903872354, 0.012505757019806761, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08458921148090166, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.011447904894529556, 0.011447904894529556, 0.011447904894529556, 0.031118631848727055, 0.011447904894529556, 0.0042114488551833525, 0.2299373164522143, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.0042114488551833525, 0.0042114488551833525, 0.0042114488551833525, 0.0042114488551833525, 0.011447904894529556, 0.031118631848727055, 0.031118631848727055, 0.0042114488551833525, 0.0042114488551833525, 0.011447904894529556, 0.08458921148090166, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07799193518198774, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.010555060638025849, 0.010555060638025849, 0.010555060638025849, 0.028691629530629002, 0.010555060638025849, 0.003882989809047637, 0.21200406017155304, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.003882989809047637, 0.003882989809047637, 0.003882989809047637, 0.003882989809047637, 0.010555060638025849, 0.028691629530629002, 0.028691629530629002, 0.003882989809047637, 0.003882989809047637, 0.010555060638025849, 0.07799193518198774, 0.07799193518198774, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04947826496904969, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.006696155003642512, 0.006696155003642512, 0.006696155003642512, 0.018202036466946552, 0.006696155003642512, 0.0024633777607373647, 0.13449586856904952, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.0024633777607373647, 0.0024633777607373647, 0.0024633777607373647, 0.0024633777607373647, 0.006696155003642512, 0.018202036466946552, 0.018202036466946552, 0.0024633777607373647, 0.0024633777607373647, 0.006696155003642512, 0.04947826496904969, 0.04947826496904969, 0.3655976755340633, 0.0, 0.0, 0.0, 0.0], [0.03623194873241091, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.004903461043915262, 0.004903461043915262, 0.004903461043915262, 0.013328989052231675, 0.004903461043915262, 0.0018038825086414835, 0.09848864784897231, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.0018038825086414835, 0.0018038825086414835, 0.0018038825086414835, 0.0018038825086414835, 0.004903461043915262, 0.013328989052231675, 0.013328989052231675, 0.0018038825086414835, 0.0018038825086414835, 0.004903461043915262, 0.03623194873241091, 0.03623194873241091, 0.2677199017573635, 0.2677199017573635, 0.0, 0.0, 0.0], [0.028580405405156724, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.003867937260524102, 0.003867937260524102, 0.003867937260524102, 0.010514143568902325, 0.003867937260524102, 0.0014229345978878062, 0.0776895966628302, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.0014229345978878062, 0.0014229345978878062, 0.0014229345978878062, 0.0014229345978878062, 0.003867937260524102, 0.010514143568902325, 0.010514143568902325, 0.0014229345978878062, 0.0014229345978878062, 0.003867937260524102, 0.028580405405156724, 0.028580405405156724, 0.21118221886888386, 0.21118221886888386, 0.21118221886888386, 0.0, 0.0], [0.06915443502737553, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.009359035051497799, 0.009359035051497799, 0.009359035051497799, 0.02544049491239773, 0.009359035051497799, 0.0034429965846489515, 0.18798124409226669, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.0034429965846489515, 0.0034429965846489515, 0.0034429965846489515, 0.0034429965846489515, 0.009359035051497799, 0.02544049491239773, 0.02544049491239773, 0.0034429965846489515, 0.0034429965846489515, 0.009359035051497799, 0.06915443502737553, 0.06915443502737553, 0.06915443502737553, 0.009359035051497799, 0.009359035051497799, 0.02544049491239773, 0.0], [0.06468142745496337, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.008753679304765885, 0.008753679304765885, 0.008753679304765885, 0.02379496738630311, 0.008753679304765885, 0.0032202986508312937, 0.17582234888961898, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.0032202986508312937, 0.0032202986508312937, 0.0032202986508312937, 0.0032202986508312937, 0.008753679304765885, 0.02379496738630311, 0.02379496738630311, 0.0032202986508312937, 0.0032202986508312937, 0.008753679304765885, 0.06468142745496337, 0.06468142745496337, 0.06468142745496337, 0.008753679304765885, 0.008753679304765885, 0.02379496738630311, 0.06468142745496337]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 1162, 1594, 313, 27833, 3442, 397, 1207, 1702, 485, 2260, 566, 872, 298, 2168, 311, 267, 1474, 365, 1707, 1159, 365, 408, 267, 29500, 764, 9869, 365, 298, 861, 405, 560, 10219, 269, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]]}.
|
| 15 |
+
01/08/2026 03:36:39 - INFO - __main__ - ***** Running training *****
|
| 16 |
+
01/08/2026 03:36:39 - INFO - __main__ - Num examples = 1132837
|
| 17 |
+
01/08/2026 03:36:39 - INFO - __main__ - Num Epochs = 10
|
| 18 |
+
01/08/2026 03:36:39 - INFO - __main__ - Instantaneous batch size per device = 32
|
| 19 |
+
01/08/2026 03:36:39 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32
|
| 20 |
+
01/08/2026 03:36:39 - INFO - __main__ - Gradient Accumulation steps = 1
|
| 21 |
+
01/08/2026 03:36:39 - INFO - __main__ - Total optimization steps = 354020
|
| 22 |
+
01/08/2026 03:36:39 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_0_init
|
| 23 |
+
01/08/2026 03:36:39 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 24 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_0_init/model.safetensors
|
| 25 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_0_init/optimizer.bin
|
| 26 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_0_init/scheduler.bin
|
| 27 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_0_init/sampler.bin
|
| 28 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_0_init/sampler_1.bin
|
| 29 |
+
01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_0_init/random_states_0.pkl
|
| 30 |
+
01/08/2026 03:53:49 - INFO - __main__ - epoch 0: perplexity: 61.45426781123526 eval_loss: 4.183549404144287 eval_nwp_loss: 4.118293285369873 eval_attn_loss: 0.13051171600818634
|
| 31 |
+
01/08/2026 03:53:49 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_1
|
| 32 |
+
01/08/2026 03:53:49 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 33 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_1/model.safetensors
|
| 34 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_1/optimizer.bin
|
| 35 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_1/scheduler.bin
|
| 36 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_1/sampler.bin
|
| 37 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_1/sampler_1.bin
|
| 38 |
+
01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_1/random_states_0.pkl
|
| 39 |
+
01/08/2026 04:12:15 - INFO - __main__ - epoch 1: perplexity: 54.16680628732148 eval_loss: 4.053577899932861 eval_nwp_loss: 3.992068290710449 eval_attn_loss: 0.12301884591579437
|
| 40 |
+
01/08/2026 04:12:15 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_2
|
| 41 |
+
01/08/2026 04:12:15 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 42 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_2/model.safetensors
|
| 43 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_2/optimizer.bin
|
| 44 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_2/scheduler.bin
|
| 45 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_2/sampler.bin
|
| 46 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_2/sampler_1.bin
|
| 47 |
+
01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_2/random_states_0.pkl
|
| 48 |
+
01/08/2026 04:30:23 - INFO - __main__ - epoch 2: perplexity: 51.11190373140643 eval_loss: 3.9937267303466797 eval_nwp_loss: 3.9340174198150635 eval_attn_loss: 0.11941886693239212
|
| 49 |
+
01/08/2026 04:30:23 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_3
|
| 50 |
+
01/08/2026 04:30:23 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 51 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_3/model.safetensors
|
| 52 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_3/optimizer.bin
|
| 53 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_3/scheduler.bin
|
| 54 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_3/sampler.bin
|
| 55 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_3/sampler_1.bin
|
| 56 |
+
01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_3/random_states_0.pkl
|
| 57 |
+
01/08/2026 04:49:04 - INFO - __main__ - epoch 3: perplexity: 49.0719534463731 eval_loss: 3.9519906044006348 eval_nwp_loss: 3.8932876586914062 eval_attn_loss: 0.11740673333406448
|
| 58 |
+
01/08/2026 04:49:04 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_4
|
| 59 |
+
01/08/2026 04:49:04 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 60 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_4/model.safetensors
|
| 61 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_4/optimizer.bin
|
| 62 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_4/scheduler.bin
|
| 63 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_4/sampler.bin
|
| 64 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_4/sampler_1.bin
|
| 65 |
+
01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_4/random_states_0.pkl
|
| 66 |
+
01/08/2026 05:07:52 - INFO - __main__ - epoch 4: perplexity: 48.164000025201815 eval_loss: 3.932680130004883 eval_nwp_loss: 3.8746118545532227 eval_attn_loss: 0.11613597720861435
|
| 67 |
+
01/08/2026 05:07:52 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_5
|
| 68 |
+
01/08/2026 05:07:52 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 69 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_5/model.safetensors
|
| 70 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_5/optimizer.bin
|
| 71 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_5/scheduler.bin
|
| 72 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_5/sampler.bin
|
| 73 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_5/sampler_1.bin
|
| 74 |
+
01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_5/random_states_0.pkl
|
| 75 |
+
01/08/2026 05:26:02 - INFO - __main__ - epoch 5: perplexity: 47.37661658975665 eval_loss: 3.9156994819641113 eval_nwp_loss: 3.858128786087036 eval_attn_loss: 0.11514072865247726
|
| 76 |
+
01/08/2026 05:26:02 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_6
|
| 77 |
+
01/08/2026 05:26:02 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 78 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_6/model.safetensors
|
| 79 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_6/optimizer.bin
|
| 80 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_6/scheduler.bin
|
| 81 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_6/sampler.bin
|
| 82 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_6/sampler_1.bin
|
| 83 |
+
01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_6/random_states_0.pkl
|
| 84 |
+
01/08/2026 05:44:03 - INFO - __main__ - epoch 6: perplexity: 47.01068597593352 eval_loss: 3.9075894355773926 eval_nwp_loss: 3.850374937057495 eval_attn_loss: 0.11442875117063522
|
| 85 |
+
01/08/2026 05:44:03 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_7
|
| 86 |
+
01/08/2026 05:44:03 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 87 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_7/model.safetensors
|
| 88 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_7/optimizer.bin
|
| 89 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_7/scheduler.bin
|
| 90 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_7/sampler.bin
|
| 91 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_7/sampler_1.bin
|
| 92 |
+
01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_7/random_states_0.pkl
|
| 93 |
+
01/08/2026 06:02:27 - INFO - __main__ - epoch 7: perplexity: 46.709559367074704 eval_loss: 3.900867223739624 eval_nwp_loss: 3.8439488410949707 eval_attn_loss: 0.11383768171072006
|
| 94 |
+
01/08/2026 06:02:27 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_8
|
| 95 |
+
01/08/2026 06:02:27 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 96 |
+
01/08/2026 06:02:27 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_8/model.safetensors
|
| 97 |
+
01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_8/optimizer.bin
|
| 98 |
+
01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_8/scheduler.bin
|
| 99 |
+
01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_8/sampler.bin
|
| 100 |
+
01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_8/sampler_1.bin
|
| 101 |
+
01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_8/random_states_0.pkl
|
| 102 |
+
01/08/2026 06:17:03 - INFO - __main__ - epoch 8: perplexity: 46.623820848165956 eval_loss: 3.898854970932007 eval_nwp_loss: 3.842111587524414 eval_attn_loss: 0.11348710209131241
|
| 103 |
+
01/08/2026 06:17:03 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_9
|
| 104 |
+
01/08/2026 06:17:03 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 105 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_9/model.safetensors
|
| 106 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_9/optimizer.bin
|
| 107 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_9/scheduler.bin
|
| 108 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_9/sampler.bin
|
| 109 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_9/sampler_1.bin
|
| 110 |
+
01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_9/random_states_0.pkl
|
| 111 |
+
01/08/2026 06:33:33 - INFO - __main__ - epoch 9: perplexity: 46.621519896025845 eval_loss: 3.898709297180176 eval_nwp_loss: 3.84206223487854 eval_attn_loss: 0.11329396814107895
|
| 112 |
+
01/08/2026 06:33:33 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_10
|
| 113 |
+
01/08/2026 06:33:33 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
| 114 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_10/model.safetensors
|
| 115 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_10/optimizer.bin
|
| 116 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_10/scheduler.bin
|
| 117 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_10/sampler.bin
|
| 118 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_10/sampler_1.bin
|
| 119 |
+
01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_10/random_states_0.pkl
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|