mllm-dev commited on
Commit
537a746
·
verified ·
1 Parent(s): 5491f4f

Upload folder using huggingface_hub

Browse files
Files changed (49) hide show
  1. checkpoint-1563/config.json +40 -0
  2. checkpoint-1563/generation_config.json +6 -0
  3. checkpoint-1563/merges.txt +0 -0
  4. checkpoint-1563/model.safetensors +3 -0
  5. checkpoint-1563/optimizer.pt +3 -0
  6. checkpoint-1563/rng_state.pth +3 -0
  7. checkpoint-1563/scheduler.pt +3 -0
  8. checkpoint-1563/special_tokens_map.json +24 -0
  9. checkpoint-1563/tokenizer_config.json +22 -0
  10. checkpoint-1563/trainer_state.json +50 -0
  11. checkpoint-1563/training_args.bin +3 -0
  12. checkpoint-1563/vocab.json +0 -0
  13. checkpoint-3126/config.json +40 -0
  14. checkpoint-3126/generation_config.json +6 -0
  15. checkpoint-3126/merges.txt +0 -0
  16. checkpoint-3126/model.safetensors +3 -0
  17. checkpoint-3126/optimizer.pt +3 -0
  18. checkpoint-3126/rng_state.pth +3 -0
  19. checkpoint-3126/scheduler.pt +3 -0
  20. checkpoint-3126/special_tokens_map.json +24 -0
  21. checkpoint-3126/tokenizer_config.json +22 -0
  22. checkpoint-3126/trainer_state.json +79 -0
  23. checkpoint-3126/training_args.bin +3 -0
  24. checkpoint-3126/vocab.json +0 -0
  25. checkpoint-4689/config.json +40 -0
  26. checkpoint-4689/generation_config.json +6 -0
  27. checkpoint-4689/merges.txt +0 -0
  28. checkpoint-4689/model.safetensors +3 -0
  29. checkpoint-4689/optimizer.pt +3 -0
  30. checkpoint-4689/rng_state.pth +3 -0
  31. checkpoint-4689/scheduler.pt +3 -0
  32. checkpoint-4689/special_tokens_map.json +24 -0
  33. checkpoint-4689/tokenizer_config.json +22 -0
  34. checkpoint-4689/trainer_state.json +108 -0
  35. checkpoint-4689/training_args.bin +3 -0
  36. checkpoint-4689/vocab.json +0 -0
  37. checkpoint-6252/config.json +40 -0
  38. checkpoint-6252/generation_config.json +6 -0
  39. checkpoint-6252/merges.txt +0 -0
  40. checkpoint-6252/model.safetensors +3 -0
  41. checkpoint-6252/optimizer.pt +3 -0
  42. checkpoint-6252/rng_state.pth +3 -0
  43. checkpoint-6252/scheduler.pt +3 -0
  44. checkpoint-6252/special_tokens_map.json +24 -0
  45. checkpoint-6252/tokenizer_config.json +22 -0
  46. checkpoint-6252/trainer_state.json +137 -0
  47. checkpoint-6252/training_args.bin +3 -0
  48. checkpoint-6252/vocab.json +0 -0
  49. runs/Mar12_23-21-03_lambda-hyperplane04/events.out.tfevents.1710310870.lambda-hyperplane04.1144229.0 +2 -2
checkpoint-1563/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.38.2",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
checkpoint-1563/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.38.2"
6
+ }
checkpoint-1563/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1563/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb85d57c39860aa18c37cf5e5720ae3f19ac29308fa2cec26cd4827ff2d531bc
3
+ size 497774208
checkpoint-1563/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82844e18d287a245376d4bb0693f6411fbc8f92cdcd650bc79f1bd6c23b223de
3
+ size 995641861
checkpoint-1563/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203de03d4fb0e79953aaa759e54343ca2c10d16450bd626b02caed60084de2f4
3
+ size 14575
checkpoint-1563/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be24028fc24c3aa38f2d5bda82b0de76fff44a634e409ea5c8f177f4c69b4d7
3
+ size 627
checkpoint-1563/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1563/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-1563/trainer_state.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0289978981018066,
3
+ "best_model_checkpoint": "gen_test/checkpoint-1563",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1563,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.32,
13
+ "grad_norm": 318423.84375,
14
+ "learning_rate": 1.8400511836212414e-05,
15
+ "loss": 2.1469,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.64,
20
+ "grad_norm": 273106.9375,
21
+ "learning_rate": 1.6801023672424827e-05,
22
+ "loss": 1.9333,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.96,
27
+ "grad_norm": 250709.375,
28
+ "learning_rate": 1.5201535508637238e-05,
29
+ "loss": 1.8887,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_loss": 2.0289978981018066,
35
+ "eval_runtime": 56.4689,
36
+ "eval_samples_per_second": 177.089,
37
+ "eval_steps_per_second": 2.78,
38
+ "step": 1563
39
+ }
40
+ ],
41
+ "logging_steps": 500,
42
+ "max_steps": 6252,
43
+ "num_input_tokens_seen": 0,
44
+ "num_train_epochs": 4,
45
+ "save_steps": 500,
46
+ "total_flos": 5.22584064e+16,
47
+ "train_batch_size": 64,
48
+ "trial_name": null,
49
+ "trial_params": null
50
+ }
checkpoint-1563/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693f0a20c38fd336f1834d29d5277963b0697a0c00490b4f0dfb5fd656990312
3
+ size 4539
checkpoint-1563/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3126/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.38.2",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
checkpoint-3126/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.38.2"
6
+ }
checkpoint-3126/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3126/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af44cf3877daea2fd5bffe271bd1d5709de4f91584c9098fb5b687fbe5f33dab
3
+ size 497774208
checkpoint-3126/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf191cb1f4d24fca2319015fb572394437490a283a3b7737b6d7f55bd4b7e65
3
+ size 995641861
checkpoint-3126/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab87e4b7614cd84e2ce43212e2906891080ecaa9c0601f398122e9a662e267ee
3
+ size 14575
checkpoint-3126/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d283301a2d1426e7b18dfa2bf011d4893d17ce9d31855fba77e59fd5b6c60ae
3
+ size 627
checkpoint-3126/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-3126/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-3126/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0003182888031006,
3
+ "best_model_checkpoint": "gen_test/checkpoint-3126",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3126,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.32,
13
+ "grad_norm": 318423.84375,
14
+ "learning_rate": 1.8400511836212414e-05,
15
+ "loss": 2.1469,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.64,
20
+ "grad_norm": 273106.9375,
21
+ "learning_rate": 1.6801023672424827e-05,
22
+ "loss": 1.9333,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.96,
27
+ "grad_norm": 250709.375,
28
+ "learning_rate": 1.5201535508637238e-05,
29
+ "loss": 1.8887,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_loss": 2.0289978981018066,
35
+ "eval_runtime": 56.4689,
36
+ "eval_samples_per_second": 177.089,
37
+ "eval_steps_per_second": 2.78,
38
+ "step": 1563
39
+ },
40
+ {
41
+ "epoch": 1.28,
42
+ "grad_norm": 241871.234375,
43
+ "learning_rate": 1.3602047344849649e-05,
44
+ "loss": 1.8677,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.6,
49
+ "grad_norm": 229897.359375,
50
+ "learning_rate": 1.2002559181062061e-05,
51
+ "loss": 1.8518,
52
+ "step": 2500
53
+ },
54
+ {
55
+ "epoch": 1.92,
56
+ "grad_norm": 201821.484375,
57
+ "learning_rate": 1.0403071017274472e-05,
58
+ "loss": 1.8464,
59
+ "step": 3000
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "eval_loss": 2.0003182888031006,
64
+ "eval_runtime": 56.3343,
65
+ "eval_samples_per_second": 177.512,
66
+ "eval_steps_per_second": 2.787,
67
+ "step": 3126
68
+ }
69
+ ],
70
+ "logging_steps": 500,
71
+ "max_steps": 6252,
72
+ "num_input_tokens_seen": 0,
73
+ "num_train_epochs": 4,
74
+ "save_steps": 500,
75
+ "total_flos": 1.045168128e+17,
76
+ "train_batch_size": 64,
77
+ "trial_name": null,
78
+ "trial_params": null
79
+ }
checkpoint-3126/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693f0a20c38fd336f1834d29d5277963b0697a0c00490b4f0dfb5fd656990312
3
+ size 4539
checkpoint-3126/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4689/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.38.2",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
checkpoint-4689/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.38.2"
6
+ }
checkpoint-4689/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4689/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681640960f3e4b6501e6ccb12a26ff351e57df38cb903b5719bb531592e71c9f
3
+ size 497774208
checkpoint-4689/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad6d80dab21a9278f5661ad4a8d805f5485a5bc18fc8987086c54093b5c3f6b
3
+ size 995641861
checkpoint-4689/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74670ec4c7e987f2a2d77b219935c544c9cd3f04304ad232c70ac492b376a3ec
3
+ size 14575
checkpoint-4689/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e832ef5b80067ad112b7700a196861f01317cb96208f02175085e397786cdca
3
+ size 627
checkpoint-4689/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-4689/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-4689/trainer_state.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.9800028800964355,
3
+ "best_model_checkpoint": "gen_test/checkpoint-4689",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4689,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.32,
13
+ "grad_norm": 318423.84375,
14
+ "learning_rate": 1.8400511836212414e-05,
15
+ "loss": 2.1469,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.64,
20
+ "grad_norm": 273106.9375,
21
+ "learning_rate": 1.6801023672424827e-05,
22
+ "loss": 1.9333,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.96,
27
+ "grad_norm": 250709.375,
28
+ "learning_rate": 1.5201535508637238e-05,
29
+ "loss": 1.8887,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_loss": 2.0289978981018066,
35
+ "eval_runtime": 56.4689,
36
+ "eval_samples_per_second": 177.089,
37
+ "eval_steps_per_second": 2.78,
38
+ "step": 1563
39
+ },
40
+ {
41
+ "epoch": 1.28,
42
+ "grad_norm": 241871.234375,
43
+ "learning_rate": 1.3602047344849649e-05,
44
+ "loss": 1.8677,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.6,
49
+ "grad_norm": 229897.359375,
50
+ "learning_rate": 1.2002559181062061e-05,
51
+ "loss": 1.8518,
52
+ "step": 2500
53
+ },
54
+ {
55
+ "epoch": 1.92,
56
+ "grad_norm": 201821.484375,
57
+ "learning_rate": 1.0403071017274472e-05,
58
+ "loss": 1.8464,
59
+ "step": 3000
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "eval_loss": 2.0003182888031006,
64
+ "eval_runtime": 56.3343,
65
+ "eval_samples_per_second": 177.512,
66
+ "eval_steps_per_second": 2.787,
67
+ "step": 3126
68
+ },
69
+ {
70
+ "epoch": 2.24,
71
+ "grad_norm": 229470.453125,
72
+ "learning_rate": 8.803582853486885e-06,
73
+ "loss": 1.8306,
74
+ "step": 3500
75
+ },
76
+ {
77
+ "epoch": 2.56,
78
+ "grad_norm": 209711.5,
79
+ "learning_rate": 7.204094689699297e-06,
80
+ "loss": 1.8225,
81
+ "step": 4000
82
+ },
83
+ {
84
+ "epoch": 2.88,
85
+ "grad_norm": 202330.5,
86
+ "learning_rate": 5.6046065259117085e-06,
87
+ "loss": 1.8148,
88
+ "step": 4500
89
+ },
90
+ {
91
+ "epoch": 3.0,
92
+ "eval_loss": 1.9800028800964355,
93
+ "eval_runtime": 56.3231,
94
+ "eval_samples_per_second": 177.547,
95
+ "eval_steps_per_second": 2.787,
96
+ "step": 4689
97
+ }
98
+ ],
99
+ "logging_steps": 500,
100
+ "max_steps": 6252,
101
+ "num_input_tokens_seen": 0,
102
+ "num_train_epochs": 4,
103
+ "save_steps": 500,
104
+ "total_flos": 1.567752192e+17,
105
+ "train_batch_size": 64,
106
+ "trial_name": null,
107
+ "trial_params": null
108
+ }
checkpoint-4689/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693f0a20c38fd336f1834d29d5277963b0697a0c00490b4f0dfb5fd656990312
3
+ size 4539
checkpoint-4689/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6252/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.38.2",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
checkpoint-6252/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.38.2"
6
+ }
checkpoint-6252/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6252/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06eb0e1e353da72baea37e7ad65a656d6421269fb59c9b9e85e6bcc457c374a9
3
+ size 497774208
checkpoint-6252/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09fa6a9430a1b66ff093b5439bb9a92ab23c95acf48add4102f108c9c6fcbe5
3
+ size 995641861
checkpoint-6252/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2420a0f6c139bd077592bf0e0ff0d798cc72a3f1a329fd4bc850d093eaeb879
3
+ size 14575
checkpoint-6252/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993fad062cecf1a7759fdc6ac23e23927e084399ee6737eb9c586b4b410bcd33
3
+ size 627
checkpoint-6252/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-6252/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-6252/trainer_state.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.9688726663589478,
3
+ "best_model_checkpoint": "gen_test/checkpoint-6252",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6252,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.32,
13
+ "grad_norm": 318423.84375,
14
+ "learning_rate": 1.8400511836212414e-05,
15
+ "loss": 2.1469,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.64,
20
+ "grad_norm": 273106.9375,
21
+ "learning_rate": 1.6801023672424827e-05,
22
+ "loss": 1.9333,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.96,
27
+ "grad_norm": 250709.375,
28
+ "learning_rate": 1.5201535508637238e-05,
29
+ "loss": 1.8887,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_loss": 2.0289978981018066,
35
+ "eval_runtime": 56.4689,
36
+ "eval_samples_per_second": 177.089,
37
+ "eval_steps_per_second": 2.78,
38
+ "step": 1563
39
+ },
40
+ {
41
+ "epoch": 1.28,
42
+ "grad_norm": 241871.234375,
43
+ "learning_rate": 1.3602047344849649e-05,
44
+ "loss": 1.8677,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.6,
49
+ "grad_norm": 229897.359375,
50
+ "learning_rate": 1.2002559181062061e-05,
51
+ "loss": 1.8518,
52
+ "step": 2500
53
+ },
54
+ {
55
+ "epoch": 1.92,
56
+ "grad_norm": 201821.484375,
57
+ "learning_rate": 1.0403071017274472e-05,
58
+ "loss": 1.8464,
59
+ "step": 3000
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "eval_loss": 2.0003182888031006,
64
+ "eval_runtime": 56.3343,
65
+ "eval_samples_per_second": 177.512,
66
+ "eval_steps_per_second": 2.787,
67
+ "step": 3126
68
+ },
69
+ {
70
+ "epoch": 2.24,
71
+ "grad_norm": 229470.453125,
72
+ "learning_rate": 8.803582853486885e-06,
73
+ "loss": 1.8306,
74
+ "step": 3500
75
+ },
76
+ {
77
+ "epoch": 2.56,
78
+ "grad_norm": 209711.5,
79
+ "learning_rate": 7.204094689699297e-06,
80
+ "loss": 1.8225,
81
+ "step": 4000
82
+ },
83
+ {
84
+ "epoch": 2.88,
85
+ "grad_norm": 202330.5,
86
+ "learning_rate": 5.6046065259117085e-06,
87
+ "loss": 1.8148,
88
+ "step": 4500
89
+ },
90
+ {
91
+ "epoch": 3.0,
92
+ "eval_loss": 1.9800028800964355,
93
+ "eval_runtime": 56.3231,
94
+ "eval_samples_per_second": 177.547,
95
+ "eval_steps_per_second": 2.787,
96
+ "step": 4689
97
+ },
98
+ {
99
+ "epoch": 3.2,
100
+ "grad_norm": 202021.6875,
101
+ "learning_rate": 4.005118362124121e-06,
102
+ "loss": 1.8124,
103
+ "step": 5000
104
+ },
105
+ {
106
+ "epoch": 3.52,
107
+ "grad_norm": 201338.140625,
108
+ "learning_rate": 2.4056301983365325e-06,
109
+ "loss": 1.801,
110
+ "step": 5500
111
+ },
112
+ {
113
+ "epoch": 3.84,
114
+ "grad_norm": 212086.015625,
115
+ "learning_rate": 8.061420345489445e-07,
116
+ "loss": 1.8055,
117
+ "step": 6000
118
+ },
119
+ {
120
+ "epoch": 4.0,
121
+ "eval_loss": 1.9688726663589478,
122
+ "eval_runtime": 56.3874,
123
+ "eval_samples_per_second": 177.345,
124
+ "eval_steps_per_second": 2.784,
125
+ "step": 6252
126
+ }
127
+ ],
128
+ "logging_steps": 500,
129
+ "max_steps": 6252,
130
+ "num_input_tokens_seen": 0,
131
+ "num_train_epochs": 4,
132
+ "save_steps": 500,
133
+ "total_flos": 2.090336256e+17,
134
+ "train_batch_size": 64,
135
+ "trial_name": null,
136
+ "trial_params": null
137
+ }
checkpoint-6252/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693f0a20c38fd336f1834d29d5277963b0697a0c00490b4f0dfb5fd656990312
3
+ size 4539
checkpoint-6252/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
runs/Mar12_23-21-03_lambda-hyperplane04/events.out.tfevents.1710310870.lambda-hyperplane04.1144229.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ac14bdb86167bd2bb963a4b0145f862930b5b799766826bac1f33f38d9de3d4
3
- size 5261
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e721cc5fa816734506dff126b7a443e22fc96ae3e12c571687ea4878fe122783
3
+ size 5472