accesscreate012 commited on
Commit
c8df0db
·
verified ·
1 Parent(s): 6ba795b

Upload folder using huggingface_hub

Browse files
checkpoint-105/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPTNeoForCausalLM"
5
+ ],
6
+ "attention_dropout": 0,
7
+ "attention_layers": [
8
+ "global",
9
+ "local",
10
+ "global",
11
+ "local",
12
+ "global",
13
+ "local",
14
+ "global",
15
+ "local",
16
+ "global",
17
+ "local",
18
+ "global",
19
+ "local"
20
+ ],
21
+ "attention_types": [
22
+ [
23
+ [
24
+ "global",
25
+ "local"
26
+ ],
27
+ 6
28
+ ]
29
+ ],
30
+ "bos_token_id": 50256,
31
+ "classifier_dropout": 0.1,
32
+ "embed_dropout": 0,
33
+ "eos_token_id": 50256,
34
+ "gradient_checkpointing": false,
35
+ "hidden_size": 768,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": null,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "max_position_embeddings": 2048,
40
+ "model_type": "gpt_neo",
41
+ "num_heads": 12,
42
+ "num_layers": 12,
43
+ "resid_dropout": 0,
44
+ "summary_activation": null,
45
+ "summary_first_dropout": 0.1,
46
+ "summary_proj_to_labels": true,
47
+ "summary_type": "cls_index",
48
+ "summary_use_proj": true,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.50.2",
51
+ "use_cache": true,
52
+ "vocab_size": 50257,
53
+ "window_size": 256
54
+ }
checkpoint-105/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.50.2"
6
+ }
checkpoint-105/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-105/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8a6ca0689e9b1885700d5442f4807d28afc18b0b3d32070e2f1965ac8a6c83
3
+ size 500811336
checkpoint-105/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc31fed0f5fb57b5ffe148cce7de6c9ede1b3a2d929670f98de7b827bf27c4b6
3
+ size 1001718458
checkpoint-105/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3df57e80c63a0180993780917c38b61dd5642f2c1a380eae6d9f8237592ed69
3
+ size 13990
checkpoint-105/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5decee26d5a7d5e4d272e70cda798b2acecef18713985c3d216533d43fc029f8
3
+ size 1064
checkpoint-105/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-105/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-105/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 2048,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
checkpoint-105/trainer_state.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 13.133333333333333,
6
+ "eval_steps": 500,
7
+ "global_step": 105,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 2.1946465969085693,
15
+ "eval_runtime": 17.2738,
16
+ "eval_samples_per_second": 0.405,
17
+ "eval_steps_per_second": 0.058,
18
+ "step": 8
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 1.9301021099090576,
23
+ "eval_runtime": 17.1725,
24
+ "eval_samples_per_second": 0.408,
25
+ "eval_steps_per_second": 0.058,
26
+ "step": 16
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_loss": 1.8589046001434326,
31
+ "eval_runtime": 18.5191,
32
+ "eval_samples_per_second": 0.378,
33
+ "eval_steps_per_second": 0.054,
34
+ "step": 24
35
+ },
36
+ {
37
+ "epoch": 4.0,
38
+ "eval_loss": 1.839167833328247,
39
+ "eval_runtime": 17.3086,
40
+ "eval_samples_per_second": 0.404,
41
+ "eval_steps_per_second": 0.058,
42
+ "step": 32
43
+ },
44
+ {
45
+ "epoch": 5.0,
46
+ "eval_loss": 1.8779523372650146,
47
+ "eval_runtime": 17.3997,
48
+ "eval_samples_per_second": 0.402,
49
+ "eval_steps_per_second": 0.057,
50
+ "step": 40
51
+ },
52
+ {
53
+ "epoch": 6.0,
54
+ "eval_loss": 1.9238650798797607,
55
+ "eval_runtime": 18.6686,
56
+ "eval_samples_per_second": 0.375,
57
+ "eval_steps_per_second": 0.054,
58
+ "step": 48
59
+ },
60
+ {
61
+ "epoch": 7.0,
62
+ "eval_loss": 1.9594440460205078,
63
+ "eval_runtime": 17.6292,
64
+ "eval_samples_per_second": 0.397,
65
+ "eval_steps_per_second": 0.057,
66
+ "step": 56
67
+ },
68
+ {
69
+ "epoch": 8.0,
70
+ "eval_loss": 1.9980494976043701,
71
+ "eval_runtime": 17.4652,
72
+ "eval_samples_per_second": 0.401,
73
+ "eval_steps_per_second": 0.057,
74
+ "step": 64
75
+ },
76
+ {
77
+ "epoch": 9.0,
78
+ "eval_loss": 2.038095712661743,
79
+ "eval_runtime": 18.2047,
80
+ "eval_samples_per_second": 0.385,
81
+ "eval_steps_per_second": 0.055,
82
+ "step": 72
83
+ },
84
+ {
85
+ "epoch": 10.0,
86
+ "eval_loss": 2.095003366470337,
87
+ "eval_runtime": 18.5195,
88
+ "eval_samples_per_second": 0.378,
89
+ "eval_steps_per_second": 0.054,
90
+ "step": 80
91
+ },
92
+ {
93
+ "epoch": 11.0,
94
+ "eval_loss": 2.120067834854126,
95
+ "eval_runtime": 17.4162,
96
+ "eval_samples_per_second": 0.402,
97
+ "eval_steps_per_second": 0.057,
98
+ "step": 88
99
+ },
100
+ {
101
+ "epoch": 12.0,
102
+ "eval_loss": 2.1349480152130127,
103
+ "eval_runtime": 18.8636,
104
+ "eval_samples_per_second": 0.371,
105
+ "eval_steps_per_second": 0.053,
106
+ "step": 96
107
+ },
108
+ {
109
+ "epoch": 12.533333333333333,
110
+ "grad_norm": 3.702388286590576,
111
+ "learning_rate": 2.3809523809523808e-06,
112
+ "loss": 0.9068,
113
+ "step": 100
114
+ },
115
+ {
116
+ "epoch": 13.0,
117
+ "eval_loss": 2.1458475589752197,
118
+ "eval_runtime": 17.4429,
119
+ "eval_samples_per_second": 0.401,
120
+ "eval_steps_per_second": 0.057,
121
+ "step": 104
122
+ }
123
+ ],
124
+ "logging_steps": 100,
125
+ "max_steps": 105,
126
+ "num_input_tokens_seen": 0,
127
+ "num_train_epochs": 15,
128
+ "save_steps": 500,
129
+ "stateful_callbacks": {
130
+ "TrainerControl": {
131
+ "args": {
132
+ "should_epoch_stop": false,
133
+ "should_evaluate": false,
134
+ "should_log": false,
135
+ "should_save": true,
136
+ "should_training_stop": true
137
+ },
138
+ "attributes": {}
139
+ }
140
+ },
141
+ "total_flos": 202435500441600.0,
142
+ "train_batch_size": 4,
143
+ "trial_name": null,
144
+ "trial_params": null
145
+ }
checkpoint-105/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed61593b0a710d8846c0282e583515f8b1328535a1ef6295dbacd43e2b30cf3
3
+ size 5304
checkpoint-105/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPTNeoForCausalLM"
5
+ ],
6
+ "attention_dropout": 0,
7
+ "attention_layers": [
8
+ "global",
9
+ "local",
10
+ "global",
11
+ "local",
12
+ "global",
13
+ "local",
14
+ "global",
15
+ "local",
16
+ "global",
17
+ "local",
18
+ "global",
19
+ "local"
20
+ ],
21
+ "attention_types": [
22
+ [
23
+ [
24
+ "global",
25
+ "local"
26
+ ],
27
+ 6
28
+ ]
29
+ ],
30
+ "bos_token_id": 50256,
31
+ "classifier_dropout": 0.1,
32
+ "embed_dropout": 0,
33
+ "eos_token_id": 50256,
34
+ "gradient_checkpointing": false,
35
+ "hidden_size": 768,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": null,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "max_position_embeddings": 2048,
40
+ "model_type": "gpt_neo",
41
+ "num_heads": 12,
42
+ "num_layers": 12,
43
+ "resid_dropout": 0,
44
+ "summary_activation": null,
45
+ "summary_first_dropout": 0.1,
46
+ "summary_proj_to_labels": true,
47
+ "summary_type": "cls_index",
48
+ "summary_use_proj": true,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.50.2",
51
+ "use_cache": true,
52
+ "vocab_size": 50257,
53
+ "window_size": 256
54
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.50.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8a6ca0689e9b1885700d5442f4807d28afc18b0b3d32070e2f1965ac8a6c83
3
+ size 500811336
runs/Apr01_02-12-31_1aa2ffaf3dee/events.out.tfevents.1743473578.1aa2ffaf3dee.186.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7386e03ac3b6307d0f28eafea8a9333d3490ac554ef78dea175432633c2007
3
+ size 5485
runs/Apr01_02-16-14_1aa2ffaf3dee/events.out.tfevents.1743473785.1aa2ffaf3dee.186.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e850894c8eb7ea8afa4fa1dbd560389f812a436be983a8e62c55545757b7f1f5
3
+ size 5485
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 2048,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed61593b0a710d8846c0282e583515f8b1328535a1ef6295dbacd43e2b30cf3
3
+ size 5304
vocab.json ADDED
The diff for this file is too large to render. See raw diff