mllm-dev commited on
Commit
245201c
·
verified ·
1 Parent(s): d731d7b

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. checkpoint-1240/config.json +39 -0
  2. checkpoint-1240/generation_config.json +5 -0
  3. checkpoint-1240/merges.txt +0 -0
  4. checkpoint-1240/model.safetensors +3 -0
  5. checkpoint-1240/optimizer.pt +3 -0
  6. checkpoint-1240/rng_state.pth +3 -0
  7. checkpoint-1240/scheduler.pt +3 -0
  8. checkpoint-1240/special_tokens_map.json +6 -0
  9. checkpoint-1240/tokenizer.json +0 -0
  10. checkpoint-1240/tokenizer_config.json +21 -0
  11. checkpoint-1240/trainer_state.json +100 -0
  12. checkpoint-1240/training_args.bin +3 -0
  13. checkpoint-1240/vocab.json +0 -0
  14. checkpoint-496/config.json +39 -0
  15. checkpoint-496/generation_config.json +5 -0
  16. checkpoint-496/merges.txt +0 -0
  17. checkpoint-496/model.safetensors +3 -0
  18. checkpoint-496/optimizer.pt +3 -0
  19. checkpoint-496/rng_state.pth +3 -0
  20. checkpoint-496/scheduler.pt +3 -0
  21. checkpoint-496/special_tokens_map.json +6 -0
  22. checkpoint-496/tokenizer.json +0 -0
  23. checkpoint-496/tokenizer_config.json +21 -0
  24. checkpoint-496/trainer_state.json +47 -0
  25. checkpoint-496/training_args.bin +3 -0
  26. checkpoint-496/vocab.json +0 -0
  27. checkpoint-992/config.json +39 -0
  28. checkpoint-992/generation_config.json +5 -0
  29. checkpoint-992/merges.txt +0 -0
  30. checkpoint-992/model.safetensors +3 -0
  31. checkpoint-992/optimizer.pt +3 -0
  32. checkpoint-992/rng_state.pth +3 -0
  33. checkpoint-992/scheduler.pt +3 -0
  34. checkpoint-992/special_tokens_map.json +6 -0
  35. checkpoint-992/tokenizer.json +0 -0
  36. checkpoint-992/tokenizer_config.json +21 -0
  37. checkpoint-992/trainer_state.json +80 -0
  38. checkpoint-992/training_args.bin +3 -0
  39. checkpoint-992/vocab.json +0 -0
checkpoint-1240/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1240/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-1240/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1240/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00f7d905caae3db5a4658614e8462e59a14d10a3b645e0816ff02663e7a050d
3
+ size 497774208
checkpoint-1240/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d4d234a45feb900bb322acda566345fae2dc886a73e635afc097536e03d523
3
+ size 995641861
checkpoint-1240/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a5011585a447b6ae3855a9f622af5bf659cdcd458755132024f2840d9d0fd2
3
+ size 14575
checkpoint-1240/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8969ce733a27aaa14f3a07ad328d6ff83e779c0eb2b8d332e21e28100e3ea7
3
+ size 627
checkpoint-1240/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-1240/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1240/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-1240/trainer_state.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 5.6794586181640625,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_3/checkpoint-496",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1240,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 25.0,
14
+ "eval_loss": 5.72403621673584,
15
+ "eval_rouge1": 0.1624,
16
+ "eval_rouge2": 0.0178,
17
+ "eval_rougeL": 0.1416,
18
+ "eval_rougeLsum": 0.1514,
19
+ "eval_runtime": 5.8118,
20
+ "eval_samples_per_second": 42.672,
21
+ "eval_steps_per_second": 10.668,
22
+ "step": 248
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 25.0,
27
+ "eval_loss": 5.6794586181640625,
28
+ "eval_rouge1": 0.1736,
29
+ "eval_rouge2": 0.0188,
30
+ "eval_rougeL": 0.1512,
31
+ "eval_rougeLsum": 0.1618,
32
+ "eval_runtime": 5.905,
33
+ "eval_samples_per_second": 41.998,
34
+ "eval_steps_per_second": 10.499,
35
+ "step": 496
36
+ },
37
+ {
38
+ "epoch": 2.02,
39
+ "grad_norm": 1349184.375,
40
+ "learning_rate": 3.580645161290323e-05,
41
+ "loss": 5.8134,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "eval_gen_len": 25.0,
47
+ "eval_loss": 5.689182281494141,
48
+ "eval_rouge1": 0.1636,
49
+ "eval_rouge2": 0.0174,
50
+ "eval_rougeL": 0.1451,
51
+ "eval_rougeLsum": 0.156,
52
+ "eval_runtime": 6.059,
53
+ "eval_samples_per_second": 40.931,
54
+ "eval_steps_per_second": 10.233,
55
+ "step": 744
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "eval_gen_len": 25.0,
60
+ "eval_loss": 5.705079078674316,
61
+ "eval_rouge1": 0.1625,
62
+ "eval_rouge2": 0.0178,
63
+ "eval_rougeL": 0.1444,
64
+ "eval_rougeLsum": 0.1549,
65
+ "eval_runtime": 5.7853,
66
+ "eval_samples_per_second": 42.868,
67
+ "eval_steps_per_second": 10.717,
68
+ "step": 992
69
+ },
70
+ {
71
+ "epoch": 4.03,
72
+ "grad_norm": 1369836.5,
73
+ "learning_rate": 1.1612903225806451e-05,
74
+ "loss": 5.3454,
75
+ "step": 1000
76
+ },
77
+ {
78
+ "epoch": 5.0,
79
+ "eval_gen_len": 25.0,
80
+ "eval_loss": 5.706656455993652,
81
+ "eval_rouge1": 0.1625,
82
+ "eval_rouge2": 0.0178,
83
+ "eval_rougeL": 0.1444,
84
+ "eval_rougeLsum": 0.1549,
85
+ "eval_runtime": 5.7722,
86
+ "eval_samples_per_second": 42.965,
87
+ "eval_steps_per_second": 10.741,
88
+ "step": 1240
89
+ }
90
+ ],
91
+ "logging_steps": 500,
92
+ "max_steps": 1240,
93
+ "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 5,
95
+ "save_steps": 500,
96
+ "total_flos": 50472230400000.0,
97
+ "train_batch_size": 4,
98
+ "trial_name": null,
99
+ "trial_params": null
100
+ }
checkpoint-1240/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091658d7b0b7435f21557e101ac1b5c52bb0268d57417626332a8488d3d185c9
3
+ size 4795
checkpoint-1240/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-496/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-496/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-496/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-496/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ced7ace1db6d903afa32427a70c58c4258798f27921a09922147a64b746e32
3
+ size 497774208
checkpoint-496/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e017cf8f0c9de82cb80fb21273e525d12bbc57daf3bb73b2dbf025e2af4d29a8
3
+ size 995641861
checkpoint-496/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0581ed48902907f0f3fdc04acacb678e2ada9451c7c08f7af3b65d47aa9356
3
+ size 14575
checkpoint-496/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4754391dc4166c4434f493e5670dfe2485abfd70a0ca812d8f9c16664b7e97
3
+ size 627
checkpoint-496/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-496/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-496/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-496/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 5.6794586181640625,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_3/checkpoint-496",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 496,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 25.0,
14
+ "eval_loss": 5.72403621673584,
15
+ "eval_rouge1": 0.1624,
16
+ "eval_rouge2": 0.0178,
17
+ "eval_rougeL": 0.1416,
18
+ "eval_rougeLsum": 0.1514,
19
+ "eval_runtime": 5.8118,
20
+ "eval_samples_per_second": 42.672,
21
+ "eval_steps_per_second": 10.668,
22
+ "step": 248
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 25.0,
27
+ "eval_loss": 5.6794586181640625,
28
+ "eval_rouge1": 0.1736,
29
+ "eval_rouge2": 0.0188,
30
+ "eval_rougeL": 0.1512,
31
+ "eval_rougeLsum": 0.1618,
32
+ "eval_runtime": 5.905,
33
+ "eval_samples_per_second": 41.998,
34
+ "eval_steps_per_second": 10.499,
35
+ "step": 496
36
+ }
37
+ ],
38
+ "logging_steps": 500,
39
+ "max_steps": 1240,
40
+ "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 5,
42
+ "save_steps": 500,
43
+ "total_flos": 20188892160000.0,
44
+ "train_batch_size": 4,
45
+ "trial_name": null,
46
+ "trial_params": null
47
+ }
checkpoint-496/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091658d7b0b7435f21557e101ac1b5c52bb0268d57417626332a8488d3d185c9
3
+ size 4795
checkpoint-496/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-992/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai-community/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-992/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "transformers_version": "4.40.0.dev0"
5
+ }
checkpoint-992/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-992/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449a5427d1b891e64a84fbdb288d708dd2b7c21e99c4064aa3a628bf3eafd386
3
+ size 497774208
checkpoint-992/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1595432495d587d855287eb81c5c39c1b97ef406cac3b3316267678262d7ee49
3
+ size 995641861
checkpoint-992/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3454b806249938d46c59ab21db23c34d64f21485a32e805780fc8734681b3198
3
+ size 14575
checkpoint-992/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57b116427d0f2260458083634536dd885f3c688293a7d7d21ed73fdacd607a9
3
+ size 627
checkpoint-992/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
checkpoint-992/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-992/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "padding_side": "left",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
checkpoint-992/trainer_state.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 5.6794586181640625,
3
+ "best_model_checkpoint": "bill_sum_finetune_test_gpt2_3/checkpoint-496",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 992,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 25.0,
14
+ "eval_loss": 5.72403621673584,
15
+ "eval_rouge1": 0.1624,
16
+ "eval_rouge2": 0.0178,
17
+ "eval_rougeL": 0.1416,
18
+ "eval_rougeLsum": 0.1514,
19
+ "eval_runtime": 5.8118,
20
+ "eval_samples_per_second": 42.672,
21
+ "eval_steps_per_second": 10.668,
22
+ "step": 248
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_gen_len": 25.0,
27
+ "eval_loss": 5.6794586181640625,
28
+ "eval_rouge1": 0.1736,
29
+ "eval_rouge2": 0.0188,
30
+ "eval_rougeL": 0.1512,
31
+ "eval_rougeLsum": 0.1618,
32
+ "eval_runtime": 5.905,
33
+ "eval_samples_per_second": 41.998,
34
+ "eval_steps_per_second": 10.499,
35
+ "step": 496
36
+ },
37
+ {
38
+ "epoch": 2.02,
39
+ "grad_norm": 1349184.375,
40
+ "learning_rate": 3.580645161290323e-05,
41
+ "loss": 5.8134,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "eval_gen_len": 25.0,
47
+ "eval_loss": 5.689182281494141,
48
+ "eval_rouge1": 0.1636,
49
+ "eval_rouge2": 0.0174,
50
+ "eval_rougeL": 0.1451,
51
+ "eval_rougeLsum": 0.156,
52
+ "eval_runtime": 6.059,
53
+ "eval_samples_per_second": 40.931,
54
+ "eval_steps_per_second": 10.233,
55
+ "step": 744
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "eval_gen_len": 25.0,
60
+ "eval_loss": 5.705079078674316,
61
+ "eval_rouge1": 0.1625,
62
+ "eval_rouge2": 0.0178,
63
+ "eval_rougeL": 0.1444,
64
+ "eval_rougeLsum": 0.1549,
65
+ "eval_runtime": 5.7853,
66
+ "eval_samples_per_second": 42.868,
67
+ "eval_steps_per_second": 10.717,
68
+ "step": 992
69
+ }
70
+ ],
71
+ "logging_steps": 500,
72
+ "max_steps": 1240,
73
+ "num_input_tokens_seen": 0,
74
+ "num_train_epochs": 5,
75
+ "save_steps": 500,
76
+ "total_flos": 40377784320000.0,
77
+ "train_batch_size": 4,
78
+ "trial_name": null,
79
+ "trial_params": null
80
+ }
checkpoint-992/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091658d7b0b7435f21557e101ac1b5c52bb0268d57417626332a8488d3d185c9
3
+ size 4795
checkpoint-992/vocab.json ADDED
The diff for this file is too large to render. See raw diff