ayhm23 commited on
Commit
c151ed0
·
verified ·
1 Parent(s): 2c87b16

Upload folder using huggingface_hub

Browse files
Files changed (29) hide show
  1. phase4_50steps_smoke/checkpoint-25/added_tokens.json +5 -0
  2. phase4_50steps_smoke/checkpoint-25/config.json +29 -0
  3. phase4_50steps_smoke/checkpoint-25/generation_config.json +14 -0
  4. phase4_50steps_smoke/checkpoint-25/merges.txt +0 -0
  5. phase4_50steps_smoke/checkpoint-25/model.safetensors +3 -0
  6. phase4_50steps_smoke/checkpoint-25/optimizer.pt +3 -0
  7. phase4_50steps_smoke/checkpoint-25/rng_state.pth +3 -0
  8. phase4_50steps_smoke/checkpoint-25/scheduler.pt +3 -0
  9. phase4_50steps_smoke/checkpoint-25/special_tokens_map.json +14 -0
  10. phase4_50steps_smoke/checkpoint-25/tokenizer.json +3 -0
  11. phase4_50steps_smoke/checkpoint-25/tokenizer_config.json +44 -0
  12. phase4_50steps_smoke/checkpoint-25/trainer_state.json +93 -0
  13. phase4_50steps_smoke/checkpoint-25/training_args.bin +3 -0
  14. phase4_50steps_smoke/checkpoint-25/vocab.json +0 -0
  15. phase4_50steps_smoke/checkpoint-50/added_tokens.json +5 -0
  16. phase4_50steps_smoke/checkpoint-50/config.json +29 -0
  17. phase4_50steps_smoke/checkpoint-50/generation_config.json +14 -0
  18. phase4_50steps_smoke/checkpoint-50/merges.txt +0 -0
  19. phase4_50steps_smoke/checkpoint-50/model.safetensors +3 -0
  20. phase4_50steps_smoke/checkpoint-50/optimizer.pt +3 -0
  21. phase4_50steps_smoke/checkpoint-50/rng_state.pth +3 -0
  22. phase4_50steps_smoke/checkpoint-50/scheduler.pt +3 -0
  23. phase4_50steps_smoke/checkpoint-50/special_tokens_map.json +14 -0
  24. phase4_50steps_smoke/checkpoint-50/tokenizer.json +3 -0
  25. phase4_50steps_smoke/checkpoint-50/tokenizer_config.json +44 -0
  26. phase4_50steps_smoke/checkpoint-50/trainer_state.json +153 -0
  27. phase4_50steps_smoke/checkpoint-50/training_args.bin +3 -0
  28. phase4_50steps_smoke/checkpoint-50/vocab.json +0 -0
  29. training_log_phase4.json +131 -0
phase4_50steps_smoke/checkpoint-25/added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
phase4_50steps_smoke/checkpoint-25/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.47.1",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151936
29
+ }
phase4_50steps_smoke/checkpoint-25/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.47.1"
14
+ }
phase4_50steps_smoke/checkpoint-25/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
phase4_50steps_smoke/checkpoint-25/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8cf44f285aa7ccf92adffcfebd3e138106614d299892c96bd5998835e7061ce
3
+ size 988097824
phase4_50steps_smoke/checkpoint-25/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e32ab6bb959cfb92f7e33b01a73ddab6db6850036e7a2812bd617f5c8a4e0cc
3
+ size 1976374202
phase4_50steps_smoke/checkpoint-25/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a225a70a9f40867bb4b43f22cb2f00d38476632e75acae9f866ea52253ec7eb
3
+ size 14244
phase4_50steps_smoke/checkpoint-25/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0147ee358028ddb9cb7d88061e4ce5373f63bb85dc349b4dd3f9fceb5e922b44
3
+ size 1064
phase4_50steps_smoke/checkpoint-25/special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": "<|im_end|>"
14
+ }
phase4_50steps_smoke/checkpoint-25/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17db16dda2e1867e40857a8ff53ecb50be68a9d5b5763bf7994271cc3b73dcb1
3
+ size 11418430
phase4_50steps_smoke/checkpoint-25/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "extra_special_tokens": {},
39
+ "model_max_length": 32768,
40
+ "pad_token": "<|im_end|>",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
phase4_50steps_smoke/checkpoint-25/trainer_state.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.8,
5
+ "eval_steps": 500,
6
+ "global_step": 25,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "completion_length": 87.5125,
13
+ "epoch": 0.16,
14
+ "grad_norm": 33.0,
15
+ "kl": 0.0008317360421642661,
16
+ "learning_rate": 5e-07,
17
+ "loss": 0.0,
18
+ "reward": -0.08881250247359276,
19
+ "reward_std": 0.1590106257237494,
20
+ "rewards/real_reward_fn": -0.08881250247359276,
21
+ "step": 5
22
+ },
23
+ {
24
+ "completion_length": 85.86875,
25
+ "epoch": 0.32,
26
+ "grad_norm": 55.25,
27
+ "kl": 0.001102981199801434,
28
+ "learning_rate": 5e-07,
29
+ "loss": 0.0,
30
+ "reward": -0.08131250264123083,
31
+ "reward_std": 0.1880020027048886,
32
+ "rewards/real_reward_fn": -0.08131250264123083,
33
+ "step": 10
34
+ },
35
+ {
36
+ "completion_length": 83.225,
37
+ "epoch": 0.48,
38
+ "grad_norm": 39.0,
39
+ "kl": 0.001013497701205779,
40
+ "learning_rate": 5e-07,
41
+ "loss": 0.0,
42
+ "reward": 0.02462499784305692,
43
+ "reward_std": 0.13523416444659234,
44
+ "rewards/real_reward_fn": 0.02462499784305692,
45
+ "step": 15
46
+ },
47
+ {
48
+ "completion_length": 80.8125,
49
+ "epoch": 0.64,
50
+ "grad_norm": 27.125,
51
+ "kl": 0.0011422165334806778,
52
+ "learning_rate": 5e-07,
53
+ "loss": 0.0,
54
+ "reward": -0.08068750295788049,
55
+ "reward_std": 0.18747167382389307,
56
+ "rewards/real_reward_fn": -0.08068750295788049,
57
+ "step": 20
58
+ },
59
+ {
60
+ "completion_length": 80.29375,
61
+ "epoch": 0.8,
62
+ "grad_norm": 30.5,
63
+ "kl": 0.0010913557125604711,
64
+ "learning_rate": 5e-07,
65
+ "loss": 0.0,
66
+ "reward": -0.03431250210851431,
67
+ "reward_std": 0.18764845319092274,
68
+ "rewards/real_reward_fn": -0.03431250210851431,
69
+ "step": 25
70
+ }
71
+ ],
72
+ "logging_steps": 5,
73
+ "max_steps": 50,
74
+ "num_input_tokens_seen": 0,
75
+ "num_train_epochs": 2,
76
+ "save_steps": 25,
77
+ "stateful_callbacks": {
78
+ "TrainerControl": {
79
+ "args": {
80
+ "should_epoch_stop": false,
81
+ "should_evaluate": false,
82
+ "should_log": false,
83
+ "should_save": true,
84
+ "should_training_stop": false
85
+ },
86
+ "attributes": {}
87
+ }
88
+ },
89
+ "total_flos": 0.0,
90
+ "train_batch_size": 2,
91
+ "trial_name": null,
92
+ "trial_params": null
93
+ }
phase4_50steps_smoke/checkpoint-25/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3ee00a3958bd8e3dd9fd7996d93814add2487f8f288a5fd88363fd1225305d
3
+ size 5560
phase4_50steps_smoke/checkpoint-25/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
phase4_50steps_smoke/checkpoint-50/added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
phase4_50steps_smoke/checkpoint-50/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.47.1",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151936
29
+ }
phase4_50steps_smoke/checkpoint-50/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.47.1"
14
+ }
phase4_50steps_smoke/checkpoint-50/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
phase4_50steps_smoke/checkpoint-50/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b6a26050b73c52cc643bddf5c0eb6780a4ebfeb59dffafa5f33b314e1a4249
3
+ size 988097824
phase4_50steps_smoke/checkpoint-50/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6c37c3d737412b0ea3a5186370ad1d09dce1f7227f23cdf723423eec5baafd
3
+ size 1976374202
phase4_50steps_smoke/checkpoint-50/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a6bb635f72161d724e2a0bc9be0fe4f34e8ab268b3521b539c9c44508c3fc2
3
+ size 14244
phase4_50steps_smoke/checkpoint-50/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236d066e7dadb3b02c6a940d84a1d2ac49885f42b02673c132b47bc8e889c5e6
3
+ size 1064
phase4_50steps_smoke/checkpoint-50/special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": "<|im_end|>"
14
+ }
phase4_50steps_smoke/checkpoint-50/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17db16dda2e1867e40857a8ff53ecb50be68a9d5b5763bf7994271cc3b73dcb1
3
+ size 11418430
phase4_50steps_smoke/checkpoint-50/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "extra_special_tokens": {},
39
+ "model_max_length": 32768,
40
+ "pad_token": "<|im_end|>",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
phase4_50steps_smoke/checkpoint-50/trainer_state.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.576,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "completion_length": 87.5125,
13
+ "epoch": 0.16,
14
+ "grad_norm": 33.0,
15
+ "kl": 0.0008317360421642661,
16
+ "learning_rate": 5e-07,
17
+ "loss": 0.0,
18
+ "reward": -0.08881250247359276,
19
+ "reward_std": 0.1590106257237494,
20
+ "rewards/real_reward_fn": -0.08881250247359276,
21
+ "step": 5
22
+ },
23
+ {
24
+ "completion_length": 85.86875,
25
+ "epoch": 0.32,
26
+ "grad_norm": 55.25,
27
+ "kl": 0.001102981199801434,
28
+ "learning_rate": 5e-07,
29
+ "loss": 0.0,
30
+ "reward": -0.08131250264123083,
31
+ "reward_std": 0.1880020027048886,
32
+ "rewards/real_reward_fn": -0.08131250264123083,
33
+ "step": 10
34
+ },
35
+ {
36
+ "completion_length": 83.225,
37
+ "epoch": 0.48,
38
+ "grad_norm": 39.0,
39
+ "kl": 0.001013497701205779,
40
+ "learning_rate": 5e-07,
41
+ "loss": 0.0,
42
+ "reward": 0.02462499784305692,
43
+ "reward_std": 0.13523416444659234,
44
+ "rewards/real_reward_fn": 0.02462499784305692,
45
+ "step": 15
46
+ },
47
+ {
48
+ "completion_length": 80.8125,
49
+ "epoch": 0.64,
50
+ "grad_norm": 27.125,
51
+ "kl": 0.0011422165334806778,
52
+ "learning_rate": 5e-07,
53
+ "loss": 0.0,
54
+ "reward": -0.08068750295788049,
55
+ "reward_std": 0.18747167382389307,
56
+ "rewards/real_reward_fn": -0.08068750295788049,
57
+ "step": 20
58
+ },
59
+ {
60
+ "completion_length": 80.29375,
61
+ "epoch": 0.8,
62
+ "grad_norm": 30.5,
63
+ "kl": 0.0010913557125604711,
64
+ "learning_rate": 5e-07,
65
+ "loss": 0.0,
66
+ "reward": -0.03431250210851431,
67
+ "reward_std": 0.18764845319092274,
68
+ "rewards/real_reward_fn": -0.03431250210851431,
69
+ "step": 25
70
+ },
71
+ {
72
+ "completion_length": 80.575,
73
+ "epoch": 0.96,
74
+ "grad_norm": 58.0,
75
+ "kl": 0.0010487768857274204,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.0,
78
+ "reward": 0.024874999094754456,
79
+ "reward_std": 0.1568009190261364,
80
+ "rewards/real_reward_fn": 0.024874999094754456,
81
+ "step": 30
82
+ },
83
+ {
84
+ "completion_length": 76.90441176470588,
85
+ "epoch": 1.096,
86
+ "grad_norm": 35.0,
87
+ "kl": 0.0010246854611015057,
88
+ "learning_rate": 5e-07,
89
+ "loss": 0.0,
90
+ "reward": -0.05720588389564963,
91
+ "reward_std": 0.14079742714324417,
92
+ "rewards/real_reward_fn": -0.05720588389564963,
93
+ "step": 35
94
+ },
95
+ {
96
+ "completion_length": 83.84375,
97
+ "epoch": 1.256,
98
+ "grad_norm": 28.125,
99
+ "kl": 0.001108163884782698,
100
+ "learning_rate": 5e-07,
101
+ "loss": 0.0,
102
+ "reward": -0.02056250227615237,
103
+ "reward_std": 0.14504527123644947,
104
+ "rewards/real_reward_fn": -0.02056250227615237,
105
+ "step": 40
106
+ },
107
+ {
108
+ "completion_length": 79.25,
109
+ "epoch": 1.416,
110
+ "grad_norm": 32.0,
111
+ "kl": 0.0010782188706798478,
112
+ "learning_rate": 5e-07,
113
+ "loss": 0.0,
114
+ "reward": 0.021437497437000276,
115
+ "reward_std": 0.17562763625755906,
116
+ "rewards/real_reward_fn": 0.021437497437000276,
117
+ "step": 45
118
+ },
119
+ {
120
+ "completion_length": 86.08125,
121
+ "epoch": 1.576,
122
+ "grad_norm": 35.75,
123
+ "kl": 0.0010450664689415135,
124
+ "learning_rate": 5e-07,
125
+ "loss": 0.0,
126
+ "reward": -0.032250001840293405,
127
+ "reward_std": 0.16334165595471858,
128
+ "rewards/real_reward_fn": -0.032250001840293405,
129
+ "step": 50
130
+ }
131
+ ],
132
+ "logging_steps": 5,
133
+ "max_steps": 50,
134
+ "num_input_tokens_seen": 0,
135
+ "num_train_epochs": 2,
136
+ "save_steps": 25,
137
+ "stateful_callbacks": {
138
+ "TrainerControl": {
139
+ "args": {
140
+ "should_epoch_stop": false,
141
+ "should_evaluate": false,
142
+ "should_log": false,
143
+ "should_save": true,
144
+ "should_training_stop": true
145
+ },
146
+ "attributes": {}
147
+ }
148
+ },
149
+ "total_flos": 0.0,
150
+ "train_batch_size": 2,
151
+ "trial_name": null,
152
+ "trial_params": null
153
+ }
phase4_50steps_smoke/checkpoint-50/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3ee00a3958bd8e3dd9fd7996d93814add2487f8f288a5fd88363fd1225305d
3
+ size 5560
phase4_50steps_smoke/checkpoint-50/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
training_log_phase4.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 0.0,
4
+ "grad_norm": 33.0,
5
+ "learning_rate": 5e-07,
6
+ "completion_length": 87.5125,
7
+ "rewards/real_reward_fn": -0.08881250247359276,
8
+ "reward": -0.08881250247359276,
9
+ "reward_std": 0.1590106257237494,
10
+ "kl": 0.0008317360421642661,
11
+ "epoch": 0.16,
12
+ "step": 5
13
+ },
14
+ {
15
+ "loss": 0.0,
16
+ "grad_norm": 55.25,
17
+ "learning_rate": 5e-07,
18
+ "completion_length": 85.86875,
19
+ "rewards/real_reward_fn": -0.08131250264123083,
20
+ "reward": -0.08131250264123083,
21
+ "reward_std": 0.1880020027048886,
22
+ "kl": 0.001102981199801434,
23
+ "epoch": 0.32,
24
+ "step": 10
25
+ },
26
+ {
27
+ "loss": 0.0,
28
+ "grad_norm": 39.0,
29
+ "learning_rate": 5e-07,
30
+ "completion_length": 83.225,
31
+ "rewards/real_reward_fn": 0.02462499784305692,
32
+ "reward": 0.02462499784305692,
33
+ "reward_std": 0.13523416444659234,
34
+ "kl": 0.001013497701205779,
35
+ "epoch": 0.48,
36
+ "step": 15
37
+ },
38
+ {
39
+ "loss": 0.0,
40
+ "grad_norm": 27.125,
41
+ "learning_rate": 5e-07,
42
+ "completion_length": 80.8125,
43
+ "rewards/real_reward_fn": -0.08068750295788049,
44
+ "reward": -0.08068750295788049,
45
+ "reward_std": 0.18747167382389307,
46
+ "kl": 0.0011422165334806778,
47
+ "epoch": 0.64,
48
+ "step": 20
49
+ },
50
+ {
51
+ "loss": 0.0,
52
+ "grad_norm": 30.5,
53
+ "learning_rate": 5e-07,
54
+ "completion_length": 80.29375,
55
+ "rewards/real_reward_fn": -0.03431250210851431,
56
+ "reward": -0.03431250210851431,
57
+ "reward_std": 0.18764845319092274,
58
+ "kl": 0.0010913557125604711,
59
+ "epoch": 0.8,
60
+ "step": 25
61
+ },
62
+ {
63
+ "loss": 0.0,
64
+ "grad_norm": 58.0,
65
+ "learning_rate": 5e-07,
66
+ "completion_length": 80.575,
67
+ "rewards/real_reward_fn": 0.024874999094754456,
68
+ "reward": 0.024874999094754456,
69
+ "reward_std": 0.1568009190261364,
70
+ "kl": 0.0010487768857274204,
71
+ "epoch": 0.96,
72
+ "step": 30
73
+ },
74
+ {
75
+ "loss": 0.0,
76
+ "grad_norm": 35.0,
77
+ "learning_rate": 5e-07,
78
+ "completion_length": 76.90441176470588,
79
+ "rewards/real_reward_fn": -0.05720588389564963,
80
+ "reward": -0.05720588389564963,
81
+ "reward_std": 0.14079742714324417,
82
+ "kl": 0.0010246854611015057,
83
+ "epoch": 1.096,
84
+ "step": 35
85
+ },
86
+ {
87
+ "loss": 0.0,
88
+ "grad_norm": 28.125,
89
+ "learning_rate": 5e-07,
90
+ "completion_length": 83.84375,
91
+ "rewards/real_reward_fn": -0.02056250227615237,
92
+ "reward": -0.02056250227615237,
93
+ "reward_std": 0.14504527123644947,
94
+ "kl": 0.001108163884782698,
95
+ "epoch": 1.256,
96
+ "step": 40
97
+ },
98
+ {
99
+ "loss": 0.0,
100
+ "grad_norm": 32.0,
101
+ "learning_rate": 5e-07,
102
+ "completion_length": 79.25,
103
+ "rewards/real_reward_fn": 0.021437497437000276,
104
+ "reward": 0.021437497437000276,
105
+ "reward_std": 0.17562763625755906,
106
+ "kl": 0.0010782188706798478,
107
+ "epoch": 1.416,
108
+ "step": 45
109
+ },
110
+ {
111
+ "loss": 0.0,
112
+ "grad_norm": 35.75,
113
+ "learning_rate": 5e-07,
114
+ "completion_length": 86.08125,
115
+ "rewards/real_reward_fn": -0.032250001840293405,
116
+ "reward": -0.032250001840293405,
117
+ "reward_std": 0.16334165595471858,
118
+ "kl": 0.0010450664689415135,
119
+ "epoch": 1.576,
120
+ "step": 50
121
+ },
122
+ {
123
+ "train_runtime": 1158.1834,
124
+ "train_samples_per_second": 0.691,
125
+ "train_steps_per_second": 0.043,
126
+ "total_flos": 0.0,
127
+ "train_loss": 4.1337845323141666e-05,
128
+ "epoch": 1.576,
129
+ "step": 50
130
+ }
131
+ ]