xiulinyang commited on
Commit
923f483
·
verified ·
1 Parent(s): 7b9ba02

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<eos>": 32769,
3
+ "<s>": 32768
4
+ }
all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"perplexity": 46.621519896025845}
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 32768,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 32769,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 512,
14
+ "n_head": 8,
15
+ "n_inner": null,
16
+ "n_layer": 4,
17
+ "n_positions": 1024,
18
+ "reorder_and_upcast_attn": false,
19
+ "resid_pdrop": 0.1,
20
+ "scale_attn_by_inverse_layer_idx": false,
21
+ "scale_attn_weights": true,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.37.0",
29
+ "use_cache": true,
30
+ "vocab_size": 32770
31
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 32768,
4
+ "eos_token_id": 32769,
5
+ "transformers_version": "4.37.0"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b4ab73576c573fca5f2a0031b5aedceb42663a6671f1be610ace94175fa282
3
+ size 119657520
optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cda813a90dd360d75ff3b9b125c659c1ddcec68607da467d56a2ed505c89b44
3
+ size 239348538
random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370951e930d9408bb4cfe983040435451862e89032ea88f1270c1517f870d3f1
3
+ size 14280
scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2791ba824345c6824fda951b2a4a38eb48fca9be1cef8273296717a13459166
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<eos>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<pad>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32768": {
30
+ "content": "<s>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32769": {
38
+ "content": "<eos>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "bos_token": "<s>",
47
+ "clean_up_tokenization_spaces": true,
48
+ "eos_token": "<eos>",
49
+ "errors": "replace",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": null,
52
+ "token": null,
53
+ "tokenizer_class": "GPT2Tokenizer",
54
+ "unk_token": "<|endoftext|>"
55
+ }
training.log ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 01/08/2026 03:36:38 - INFO - __main__ - Distributed environment: DistributedType.NO
2
+ Num processes: 1
3
+ Process index: 0
4
+ Local process index: 0
5
+ Device: cuda
6
+
7
+ Mixed precision type: no
8
+
9
+ 01/08/2026 03:36:38 - INFO - __main__ - Arguments:
10
+ 01/08/2026 03:36:38 - INFO - __main__ - Namespace(train_file='data/preprocessed/dependency/train.sequential=False.random=False.convert_method=exponential.jsonl', validation_file='data/preprocessed/dependency/val.sequential=False.random=False.convert_method=exponential.jsonl', model_name_or_path=None, per_device_train_batch_size=32, per_device_eval_batch_size=32, learning_rate=0.0001, weight_decay=0.0, num_train_epochs=10, max_train_steps=None, gradient_accumulation_steps=1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, num_warmup_steps=0, output_dir='experiments/128/', seed=128, block_size=512, preprocessing_num_workers=None, overwrite_cache=False, trust_remote_code=False, checkpointing_steps='epoch', resume_from_checkpoint=None, with_tracking=True, report_to='wandb', low_cpu_mem_usage=False, n_positions=1024, n_embd=512, n_layer=4, n_head=8, n_inner=None, activation_function='gelu_new', resid_pdrop=0.1, embd_pdrop=0.1, attn_pdrop=0.1, layer_norm_epsilon=1e-05, initializer_range=0.02, attn_loss_weight=0.5, attn_loss_layers=[3], attn_loss_heads=[0], attn_loss_reduction='none')
11
+ 01/08/2026 03:36:38 - INFO - __main__ - Training new model from scratch
12
+ 01/08/2026 03:36:39 - INFO - __main__ - Sample 496287 of the training set: {'token_ids': [32768, 6067, 272, 515, 485, 318, 1388, 291, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17487770452710943, 0.17487770452710943, 0.17487770452710943, 0.4753668864186718, 0.0, 0.0, 0.0, 0.0], [0.14884758120207758, 0.14884758120207758, 0.14884758120207758, 0.4046096751916898, 0.14884758120207758, 0.0, 0.0, 0.0], [0.07088509576696811, 0.07088509576696811, 0.07088509576696811, 0.1926856677319286, 0.07088509576696811, 0.523773949200199, 0.0, 0.0], [0.12366807520346387, 0.12366807520346387, 0.12366807520346387, 0.3361646815860826, 0.12366807520346387, 0.12366807520346387, 0.045494942396598195, 0.0], [0.1100574786562937, 0.1100574786562937, 0.1100574786562937, 0.2991672443174225, 0.1100574786562937, 0.1100574786562937, 0.04048788374481527, 0.1100574786562937]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 6067, 272, 515, 485, 318, 1388, 291, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False], [False, False, True, False, False, False, False, False], [False, False, False, True, False, False, False, False], [False, False, False, False, True, False, False, False], [False, False, False, False, False, True, False, False], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False], [False, False, True, False, False, False, False, False], [False, False, False, True, False, False, False, False], [False, False, False, False, True, False, False, False], [False, False, False, False, False, True, False, False], [False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, True]]}.
13
+ 01/08/2026 03:36:39 - INFO - __main__ - Sample 850736 of the training set: {'token_ids': [32768, 43, 1049, 314, 6556, 318, 267, 3950, 269, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17487770452710943, 0.17487770452710943, 0.17487770452710943, 0.4753668864186718, 0.0, 0.0, 0.0, 0.0], [0.07629314247787525, 0.07629314247787525, 0.07629314247787525, 0.20738626283364517, 0.5637343097327291, 0.0, 0.0, 0.0], [0.04878906985862268, 0.04878906985862268, 0.04878906985862268, 0.13262244202411294, 0.36050517420000955, 0.36050517420000955, 0.0, 0.0], [0.13847545210043435, 0.13847545210043435, 0.13847545210043435, 0.37641530513226173, 0.13847545210043435, 0.018740614531330278, 0.050942271934670616, 0.0], [0.12163235653869706, 0.12163235653869706, 0.12163235653869706, 0.33063102453179205, 0.12163235653869706, 0.016461149422901226, 0.0447460433518215, 0.12163235653869706]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False], [False, False, True, True, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 43, 1049, 314, 6556, 318, 267, 3950, 269, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, True, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False], [False, False, True, True, False, False, False, False, False], [False, False, False, False, True, False, False, False, False], [False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, True]]}.
14
+ 01/08/2026 03:36:39 - INFO - __main__ - Sample 1072717 of the training set: {'token_ids': [32768, 1162, 1594, 313, 27833, 3442, 397, 1207, 1702, 485, 2260, 566, 872, 298, 2168, 311, 267, 1474, 365, 1707, 1159, 365, 408, 267, 29500, 764, 9869, 365, 298, 861, 405, 560, 10219, 269, 32769], 'attn_matrix': [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26894142136999505, 0.731058578630005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09003057317038045, 0.2447284710547976, 0.665240955774822, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03205860328008498, 0.08714431874203254, 0.23688281808991007, 0.6439142598879724, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06745080586634483, 0.18335029990140392, 0.49839778846450256, 0.18335029990140392, 0.06745080586634483, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04501528658519023, 0.12236423552739882, 0.33262047788741095, 0.12236423552739882, 0.04501528658519023, 0.33262047788741095, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03377952487759491, 0.09182226864874651, 0.24959880431577242, 0.09182226864874651, 0.03377952487759491, 0.24959880431577242, 0.24959880431577242, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02012506971027169, 0.054705611289903075, 0.14870526908408752, 0.054705611289903075, 0.02012506971027169, 0.14870526908408752, 0.14870526908408752, 0.4042228307473879, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009588884358741272, 0.02606529010756156, 0.0708528044528979, 0.02606529010756156, 0.009588884358741272, 0.0708528044528979, 0.0708528044528979, 0.1925978908396745, 0.5235353468690261, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27065309628634215, 0.0995677098131401, 0.27065309628634215, 0.0995677098131401, 0.03662891344477831, 0.03662891344477831, 0.03662891344477831, 0.0995677098131401, 0.03662891344477831, 0.013475024208782176, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15593208485436483, 0.057364208236921656, 0.15593208485436483, 0.057364208236921656, 0.021103112869440988, 0.021103112869440988, 0.021103112869440988, 0.057364208236921656, 0.021103112869440988, 0.007763401369387824, 0.4238673527333538, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10951306984813354, 0.0402876069367005, 0.10951306984813354, 0.0402876069367005, 0.014820982326008102, 0.014820982326008102, 0.014820982326008102, 0.0402876069367005, 0.014820982326008102, 0.005452334695703684, 0.2976873877469477, 0.2976873877469477, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06053127227234526, 0.022268210616946796, 0.06053127227234526, 0.022268210616946796, 0.008192016877650367, 0.008192016877650367, 0.008192016877650367, 0.022268210616946796, 0.008192016877650367, 0.003013674591017041, 0.16454105747142297, 0.16454105747142297, 0.44726896656000464, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12271734636543012, 0.04514518880295676, 0.12271734636543012, 0.04514518880295676, 0.016607986828410987, 0.016607986828410987, 0.016607986828410987, 0.04514518880295676, 0.016607986828410987, 0.006109736913418509, 0.33358033266186343, 0.04514518880295676, 0.12271734636543012, 0.04514518880295676, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06435891335043971, 0.02367632107776104, 0.06435891335043971, 0.02367632107776104, 0.008710031767082373, 0.008710031767082373, 0.008710031767082373, 0.02367632107776104, 0.008710031767082373, 0.003204241619059774, 0.17494566465987052, 0.02367632107776104, 0.06435891335043971, 0.02367632107776104, 0.47555162121261596, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02807143598771085, 0.010326904184038983, 0.02807143598771085, 0.010326904184038983, 0.0037990557402552906, 0.0037990557402552906, 0.0037990557402552906, 0.010326904184038983, 0.0037990557402552906, 0.0013975945027042763, 0.07630607434414569, 0.010326904184038983, 0.02807143598771085, 0.010326904184038983, 0.20742141529073618, 0.5638298640180653, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01795044117880241, 0.006603598269638677, 0.01795044117880241, 0.006603598269638677, 0.00242932804115538, 0.00242932804115538, 0.00242932804115538, 0.006603598269638677, 0.00242932804115538, 0.0008936998422023558, 0.048794358069161554, 0.006603598269638677, 0.01795044117880241, 0.006603598269638677, 0.13263681687072582, 0.3605442490843441, 0.3605442490843441, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013193573961951758, 0.004853644616176905, 0.013193573961951758, 0.004853644616176905, 0.0017855560690439394, 0.0017855560690439394, 0.0017855560690439394, 0.004853644616176905, 0.0017855560690439394, 0.0006568693688601616, 0.03586385235320387, 0.004853644616176905, 0.013193573961951758, 0.004853644616176905, 0.09748805815025224, 0.2650000169615894, 0.2650000169615894, 0.2650000169615894, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010429702596875434, 0.0038368731629228755, 0.010429702596875434, 0.0038368731629228755, 0.0014115067550217718, 0.0014115067550217718, 0.0014115067550217718, 0.0038368731629228755, 0.0014115067550217718, 0.0005192643162471252, 0.028350871045318606, 0.0038368731629228755, 0.010429702596875434, 0.0038368731629228755, 0.07706565758347526, 0.20948617660740781, 0.20948617660740781, 0.20948617660740781, 0.20948617660740781, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0086232507643292, 0.003172316672262639, 0.0086232507643292, 0.003172316672262639, 0.001167030084610829, 0.001167030084610829, 0.001167030084610829, 0.003172316672262639, 0.001167030084610829, 0.0004293263753568929, 0.023440425854921634, 0.003172316672262639, 0.0086232507643292, 0.003172316672262639, 0.06371768365277505, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.17320262162484046, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04824470580633239, 0.0177482354115142, 0.04824470580633239, 0.0177482354115142, 0.006529210924967047, 0.006529210924967047, 0.006529210924967047, 0.0177482354115142, 0.006529210924967047, 0.0024019624663673533, 0.13114270711270593, 0.0177482354115142, 0.04824470580633239, 0.0177482354115142, 0.35648283767939537, 0.13114270711270593, 0.0177482354115142, 0.0177482354115142, 0.0177482354115142, 0.0177482354115142, 0.04824470580633239, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.042651298994341065, 0.01569053603927429, 0.042651298994341065, 0.01569053603927429, 0.005772225629808603, 0.005772225629808603, 0.005772225629808603, 0.01569053603927429, 0.005772225629808603, 0.002123483139009465, 0.11593825101649086, 0.01569053603927429, 0.042651298994341065, 0.01569053603927429, 0.31515284096145063, 0.11593825101649086, 0.01569053603927429, 0.01569053603927429, 0.01569053603927429, 0.01569053603927429, 0.042651298994341065, 0.11593825101649086, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.022971880611964247, 0.008450882602186497, 0.022971880611964247, 0.008450882602186497, 0.003108905969097833, 0.003108905969097833, 0.003108905969097833, 0.008450882602186497, 0.003108905969097833, 0.001143702590566272, 0.06244404563303305, 0.008450882602186497, 0.022971880611964247, 0.008450882602186497, 0.16974051453974115, 0.06244404563303305, 0.008450882602186497, 0.008450882602186497, 0.008450882602186497, 0.008450882602186497, 0.022971880611964247, 0.06244404563303305, 0.4614025562266667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015719064205880078, 0.005782720555797185, 0.015719064205880078, 0.005782720555797185, 0.002127344006517281, 0.002127344006517281, 0.002127344006517281, 0.005782720555797185, 0.002127344006517281, 0.0007826061242969943, 0.04272884659122482, 0.005782720555797185, 0.015719064205880078, 0.005782720555797185, 0.11614904723994066, 0.04272884659122482, 0.005782720555797185, 0.005782720555797185, 0.005782720555797185, 0.005782720555797185, 0.015719064205880078, 0.04272884659122482, 0.3157258445051619, 0.3157258445051619, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011947066534815951, 0.004395080160466132, 0.011947066534815951, 0.004395080160466132, 0.0016168596333359737, 0.0016168596333359737, 0.0016168596333359737, 0.004395080160466132, 0.0016168596333359737, 0.0005948094183643012, 0.03247549386498137, 0.004395080160466132, 0.011947066534815951, 0.004395080160466132, 0.08827754484341206, 0.03247549386498137, 0.004395080160466132, 0.004395080160466132, 0.004395080160466132, 0.004395080160466132, 0.011947066534815951, 0.03247549386498137, 0.23996324600882557, 0.23996324600882557, 0.23996324600882557, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09240574017894794, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.012505757019806761, 0.012505757019806761, 0.012505757019806761, 0.03399417205806486, 0.012505757019806761, 0.004600610903872354, 0.25118484437374217, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.09240574017894794, 0.03399417205806486, 0.004600610903872354, 0.004600610903872354, 0.004600610903872354, 0.004600610903872354, 0.012505757019806761, 0.03399417205806486, 0.03399417205806486, 0.004600610903872354, 0.004600610903872354, 0.012505757019806761, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08458921148090166, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.011447904894529556, 0.011447904894529556, 0.011447904894529556, 0.031118631848727055, 0.011447904894529556, 0.0042114488551833525, 0.2299373164522143, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.08458921148090166, 0.031118631848727055, 0.0042114488551833525, 0.0042114488551833525, 0.0042114488551833525, 0.0042114488551833525, 0.011447904894529556, 0.031118631848727055, 0.031118631848727055, 0.0042114488551833525, 0.0042114488551833525, 0.011447904894529556, 0.08458921148090166, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07799193518198774, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.010555060638025849, 0.010555060638025849, 0.010555060638025849, 0.028691629530629002, 0.010555060638025849, 0.003882989809047637, 0.21200406017155304, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.07799193518198774, 0.028691629530629002, 0.003882989809047637, 0.003882989809047637, 0.003882989809047637, 0.003882989809047637, 0.010555060638025849, 0.028691629530629002, 0.028691629530629002, 0.003882989809047637, 0.003882989809047637, 0.010555060638025849, 0.07799193518198774, 0.07799193518198774, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04947826496904969, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.006696155003642512, 0.006696155003642512, 0.006696155003642512, 0.018202036466946552, 0.006696155003642512, 0.0024633777607373647, 0.13449586856904952, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.04947826496904969, 0.018202036466946552, 0.0024633777607373647, 0.0024633777607373647, 0.0024633777607373647, 0.0024633777607373647, 0.006696155003642512, 0.018202036466946552, 0.018202036466946552, 0.0024633777607373647, 0.0024633777607373647, 0.006696155003642512, 0.04947826496904969, 0.04947826496904969, 0.3655976755340633, 0.0, 0.0, 0.0, 0.0], [0.03623194873241091, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.004903461043915262, 0.004903461043915262, 0.004903461043915262, 0.013328989052231675, 0.004903461043915262, 0.0018038825086414835, 0.09848864784897231, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.03623194873241091, 0.013328989052231675, 0.0018038825086414835, 0.0018038825086414835, 0.0018038825086414835, 0.0018038825086414835, 0.004903461043915262, 0.013328989052231675, 0.013328989052231675, 0.0018038825086414835, 0.0018038825086414835, 0.004903461043915262, 0.03623194873241091, 0.03623194873241091, 0.2677199017573635, 0.2677199017573635, 0.0, 0.0, 0.0], [0.028580405405156724, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.003867937260524102, 0.003867937260524102, 0.003867937260524102, 0.010514143568902325, 0.003867937260524102, 0.0014229345978878062, 0.0776895966628302, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.028580405405156724, 0.010514143568902325, 0.0014229345978878062, 0.0014229345978878062, 0.0014229345978878062, 0.0014229345978878062, 0.003867937260524102, 0.010514143568902325, 0.010514143568902325, 0.0014229345978878062, 0.0014229345978878062, 0.003867937260524102, 0.028580405405156724, 0.028580405405156724, 0.21118221886888386, 0.21118221886888386, 0.21118221886888386, 0.0, 0.0], [0.06915443502737553, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.009359035051497799, 0.009359035051497799, 0.009359035051497799, 0.02544049491239773, 0.009359035051497799, 0.0034429965846489515, 0.18798124409226669, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.06915443502737553, 0.02544049491239773, 0.0034429965846489515, 0.0034429965846489515, 0.0034429965846489515, 0.0034429965846489515, 0.009359035051497799, 0.02544049491239773, 0.02544049491239773, 0.0034429965846489515, 0.0034429965846489515, 0.009359035051497799, 0.06915443502737553, 0.06915443502737553, 0.06915443502737553, 0.009359035051497799, 0.009359035051497799, 0.02544049491239773, 0.0], [0.06468142745496337, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.008753679304765885, 0.008753679304765885, 0.008753679304765885, 0.02379496738630311, 0.008753679304765885, 0.0032202986508312937, 0.17582234888961898, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.06468142745496337, 0.02379496738630311, 0.0032202986508312937, 0.0032202986508312937, 0.0032202986508312937, 0.0032202986508312937, 0.008753679304765885, 0.02379496738630311, 0.02379496738630311, 0.0032202986508312937, 0.0032202986508312937, 0.008753679304765885, 0.06468142745496337, 0.06468142745496337, 0.06468142745496337, 0.008753679304765885, 0.008753679304765885, 0.02379496738630311, 0.06468142745496337]], 'word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]], 'input_ids': [32768, 1162, 1594, 313, 27833, 3442, 397, 1207, 1702, 485, 2260, 566, 872, 298, 2168, 311, 267, 1474, 365, 1707, 1159, 365, 408, 267, 29500, 764, 9869, 365, 298, 861, 405, 560, 10219, 269, 32769], 'row_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]], 'col_word_token_membership_mask': [[True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False], [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True]]}.
15
+ 01/08/2026 03:36:39 - INFO - __main__ - ***** Running training *****
16
+ 01/08/2026 03:36:39 - INFO - __main__ - Num examples = 1132837
17
+ 01/08/2026 03:36:39 - INFO - __main__ - Num Epochs = 10
18
+ 01/08/2026 03:36:39 - INFO - __main__ - Instantaneous batch size per device = 32
19
+ 01/08/2026 03:36:39 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32
20
+ 01/08/2026 03:36:39 - INFO - __main__ - Gradient Accumulation steps = 1
21
+ 01/08/2026 03:36:39 - INFO - __main__ - Total optimization steps = 354020
22
+ 01/08/2026 03:36:39 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_0_init
23
+ 01/08/2026 03:36:39 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
24
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_0_init/model.safetensors
25
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_0_init/optimizer.bin
26
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_0_init/scheduler.bin
27
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_0_init/sampler.bin
28
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_0_init/sampler_1.bin
29
+ 01/08/2026 03:36:39 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_0_init/random_states_0.pkl
30
+ 01/08/2026 03:53:49 - INFO - __main__ - epoch 0: perplexity: 61.45426781123526 eval_loss: 4.183549404144287 eval_nwp_loss: 4.118293285369873 eval_attn_loss: 0.13051171600818634
31
+ 01/08/2026 03:53:49 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_1
32
+ 01/08/2026 03:53:49 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
33
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_1/model.safetensors
34
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_1/optimizer.bin
35
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_1/scheduler.bin
36
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_1/sampler.bin
37
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_1/sampler_1.bin
38
+ 01/08/2026 03:53:49 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_1/random_states_0.pkl
39
+ 01/08/2026 04:12:15 - INFO - __main__ - epoch 1: perplexity: 54.16680628732148 eval_loss: 4.053577899932861 eval_nwp_loss: 3.992068290710449 eval_attn_loss: 0.12301884591579437
40
+ 01/08/2026 04:12:15 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_2
41
+ 01/08/2026 04:12:15 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
42
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_2/model.safetensors
43
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_2/optimizer.bin
44
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_2/scheduler.bin
45
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_2/sampler.bin
46
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_2/sampler_1.bin
47
+ 01/08/2026 04:12:15 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_2/random_states_0.pkl
48
+ 01/08/2026 04:30:23 - INFO - __main__ - epoch 2: perplexity: 51.11190373140643 eval_loss: 3.9937267303466797 eval_nwp_loss: 3.9340174198150635 eval_attn_loss: 0.11941886693239212
49
+ 01/08/2026 04:30:23 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_3
50
+ 01/08/2026 04:30:23 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
51
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_3/model.safetensors
52
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_3/optimizer.bin
53
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_3/scheduler.bin
54
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_3/sampler.bin
55
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_3/sampler_1.bin
56
+ 01/08/2026 04:30:23 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_3/random_states_0.pkl
57
+ 01/08/2026 04:49:04 - INFO - __main__ - epoch 3: perplexity: 49.0719534463731 eval_loss: 3.9519906044006348 eval_nwp_loss: 3.8932876586914062 eval_attn_loss: 0.11740673333406448
58
+ 01/08/2026 04:49:04 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_4
59
+ 01/08/2026 04:49:04 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
60
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_4/model.safetensors
61
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_4/optimizer.bin
62
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_4/scheduler.bin
63
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_4/sampler.bin
64
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_4/sampler_1.bin
65
+ 01/08/2026 04:49:04 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_4/random_states_0.pkl
66
+ 01/08/2026 05:07:52 - INFO - __main__ - epoch 4: perplexity: 48.164000025201815 eval_loss: 3.932680130004883 eval_nwp_loss: 3.8746118545532227 eval_attn_loss: 0.11613597720861435
67
+ 01/08/2026 05:07:52 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_5
68
+ 01/08/2026 05:07:52 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
69
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_5/model.safetensors
70
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_5/optimizer.bin
71
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_5/scheduler.bin
72
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_5/sampler.bin
73
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_5/sampler_1.bin
74
+ 01/08/2026 05:07:52 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_5/random_states_0.pkl
75
+ 01/08/2026 05:26:02 - INFO - __main__ - epoch 5: perplexity: 47.37661658975665 eval_loss: 3.9156994819641113 eval_nwp_loss: 3.858128786087036 eval_attn_loss: 0.11514072865247726
76
+ 01/08/2026 05:26:02 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_6
77
+ 01/08/2026 05:26:02 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
78
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_6/model.safetensors
79
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_6/optimizer.bin
80
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_6/scheduler.bin
81
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_6/sampler.bin
82
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_6/sampler_1.bin
83
+ 01/08/2026 05:26:02 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_6/random_states_0.pkl
84
+ 01/08/2026 05:44:03 - INFO - __main__ - epoch 6: perplexity: 47.01068597593352 eval_loss: 3.9075894355773926 eval_nwp_loss: 3.850374937057495 eval_attn_loss: 0.11442875117063522
85
+ 01/08/2026 05:44:03 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_7
86
+ 01/08/2026 05:44:03 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
87
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_7/model.safetensors
88
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_7/optimizer.bin
89
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_7/scheduler.bin
90
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_7/sampler.bin
91
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_7/sampler_1.bin
92
+ 01/08/2026 05:44:03 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_7/random_states_0.pkl
93
+ 01/08/2026 06:02:27 - INFO - __main__ - epoch 7: perplexity: 46.709559367074704 eval_loss: 3.900867223739624 eval_nwp_loss: 3.8439488410949707 eval_attn_loss: 0.11383768171072006
94
+ 01/08/2026 06:02:27 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_8
95
+ 01/08/2026 06:02:27 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
96
+ 01/08/2026 06:02:27 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_8/model.safetensors
97
+ 01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_8/optimizer.bin
98
+ 01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_8/scheduler.bin
99
+ 01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_8/sampler.bin
100
+ 01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_8/sampler_1.bin
101
+ 01/08/2026 06:02:28 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_8/random_states_0.pkl
102
+ 01/08/2026 06:17:03 - INFO - __main__ - epoch 8: perplexity: 46.623820848165956 eval_loss: 3.898854970932007 eval_nwp_loss: 3.842111587524414 eval_attn_loss: 0.11348710209131241
103
+ 01/08/2026 06:17:03 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_9
104
+ 01/08/2026 06:17:03 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
105
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_9/model.safetensors
106
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_9/optimizer.bin
107
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_9/scheduler.bin
108
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_9/sampler.bin
109
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_9/sampler_1.bin
110
+ 01/08/2026 06:17:03 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_9/random_states_0.pkl
111
+ 01/08/2026 06:33:33 - INFO - __main__ - epoch 9: perplexity: 46.621519896025845 eval_loss: 3.898709297180176 eval_nwp_loss: 3.84206223487854 eval_attn_loss: 0.11329396814107895
112
+ 01/08/2026 06:33:33 - INFO - accelerate.accelerator - Saving current state to experiments/128/epoch_10
113
+ 01/08/2026 06:33:33 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
114
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Model weights saved in experiments/128/epoch_10/model.safetensors
115
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Optimizer state saved in experiments/128/epoch_10/optimizer.bin
116
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Scheduler state saved in experiments/128/epoch_10/scheduler.bin
117
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in experiments/128/epoch_10/sampler.bin
118
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in experiments/128/epoch_10/sampler_1.bin
119
+ 01/08/2026 06:33:33 - INFO - accelerate.checkpointing - Random states saved in experiments/128/epoch_10/random_states_0.pkl
vocab.json ADDED
The diff for this file is too large to render. See raw diff