schreon commited on
Commit
8a5b222
·
1 Parent(s): b4a2537

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BigBirdForCausalLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "attention_type": "block_sparse",
7
+ "block_size": 64,
8
+ "bos_token_id": 1,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu_new",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "is_decoder": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 4096,
19
+ "model_type": "big_bird",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 12,
22
+ "num_random_blocks": 3,
23
+ "pad_token_id": 0,
24
+ "rescale_embeddings": false,
25
+ "sep_token_id": 66,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.24.0",
28
+ "type_vocab_size": 2,
29
+ "use_bias": true,
30
+ "use_cache": true,
31
+ "vocab_size": 40000
32
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55f85b43d485e9131b499b44f16582f7e1f873f30a3e5298987e1d0601a26d7
3
+ size 480686657
runs/Jan31_10-58-33_tardis/1675159121.616173/events.out.tfevents.1675159121.tardis.1781751.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddad2bb9f1195ba788237bf45913fc43dd3aeb08829ed2bc900c420df6d9191
3
+ size 5479
runs/Jan31_10-58-33_tardis/events.out.tfevents.1675159121.tardis.1781751.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6747c4c3f05e70ea4d462e9eaa7bc88565b36fb3806c8c69c982c6446226c078
3
+ size 3759
runs/Jan31_10-59-47_tardis/1675159192.0490732/events.out.tfevents.1675159192.tardis.1782105.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e03eb4080ca453664ba313e23b231b65ed6e06614d65299e791d87a3e8f7e3
3
+ size 5479
runs/Jan31_10-59-47_tardis/events.out.tfevents.1675159192.tardis.1782105.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ebcc8751955e90278b9064fdf0aacc0cfb37fa4104bddf44071c03805e2d7a4
3
+ size 3781
runs/Jan31_11-01-39_tardis/1675159304.356033/events.out.tfevents.1675159304.tardis.1783035.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26f4891cd319865dc1f425d99510d6d3426a27d9a891317c4cad912512fd58f
3
+ size 5479
runs/Jan31_11-01-39_tardis/events.out.tfevents.1675159304.tardis.1783035.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27b50f37895469bcd0290fdf96bcb7162e5825d12ca6d2a14f07d0daf4d1303b
3
+ size 3781
runs/Jan31_11-02-13_tardis/1675159337.6714537/events.out.tfevents.1675159337.tardis.1783627.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e79ae0c89969c1bacdff4b512cdd2f4dafafded3155e0d0ac63912b735205bd
3
+ size 5479
runs/Jan31_11-02-13_tardis/events.out.tfevents.1675159337.tardis.1783627.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5428bbe33712a06f870a918617d40a994563e77581c6d19e750373140329c0b6
3
+ size 3938
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<cls>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "</s>",
7
+ "sep_token": "<sep>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "name_or_path": "/home/ma/s/schroederl/XNEXT/xnext/data/tokenizer_fast",
3
+ "special_tokens_map_file": "/home/ma/s/schroederl/XNEXT/xnext/data/tokenizer_fast/special_tokens_map.json",
4
+ "tokenizer_class": "PreTrainedTokenizerFast"
5
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295fdccb4b1230676641397539dc00887e6b1de30611c21910bcf99fcd9cc6d6
3
+ size 3451