SimonWSY commited on
Commit
c62e26d
·
1 Parent(s): b2666e5

Upload 167 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. hackduke1/README.md +9 -0
  2. hackduke1/adapter_config.json +24 -0
  3. hackduke1/adapter_model.bin +3 -0
  4. hackduke1/all_results.json +7 -0
  5. hackduke1/checkpoint-100/README.md +9 -0
  6. hackduke1/checkpoint-100/adapter_config.json +24 -0
  7. hackduke1/checkpoint-100/adapter_model.bin +3 -0
  8. hackduke1/checkpoint-100/finetuning_args.json +19 -0
  9. hackduke1/checkpoint-100/optimizer.pt +3 -0
  10. hackduke1/checkpoint-100/rng_state.pth +3 -0
  11. hackduke1/checkpoint-100/scheduler.pt +3 -0
  12. hackduke1/checkpoint-100/trainer_state.json +79 -0
  13. hackduke1/checkpoint-100/training_args.bin +3 -0
  14. hackduke1/checkpoint-1000/README.md +9 -0
  15. hackduke1/checkpoint-1000/adapter_config.json +24 -0
  16. hackduke1/checkpoint-1000/adapter_model.bin +3 -0
  17. hackduke1/checkpoint-1000/finetuning_args.json +19 -0
  18. hackduke1/checkpoint-1000/optimizer.pt +3 -0
  19. hackduke1/checkpoint-1000/rng_state.pth +3 -0
  20. hackduke1/checkpoint-1000/scheduler.pt +3 -0
  21. hackduke1/checkpoint-1000/trainer_state.json +619 -0
  22. hackduke1/checkpoint-1000/training_args.bin +3 -0
  23. hackduke1/checkpoint-1100/README.md +9 -0
  24. hackduke1/checkpoint-1100/adapter_config.json +24 -0
  25. hackduke1/checkpoint-1100/adapter_model.bin +3 -0
  26. hackduke1/checkpoint-1100/finetuning_args.json +19 -0
  27. hackduke1/checkpoint-1100/optimizer.pt +3 -0
  28. hackduke1/checkpoint-1100/rng_state.pth +3 -0
  29. hackduke1/checkpoint-1100/scheduler.pt +3 -0
  30. hackduke1/checkpoint-1100/trainer_state.json +679 -0
  31. hackduke1/checkpoint-1100/training_args.bin +3 -0
  32. hackduke1/checkpoint-200/README.md +9 -0
  33. hackduke1/checkpoint-200/adapter_config.json +24 -0
  34. hackduke1/checkpoint-200/adapter_model.bin +3 -0
  35. hackduke1/checkpoint-200/finetuning_args.json +19 -0
  36. hackduke1/checkpoint-200/optimizer.pt +3 -0
  37. hackduke1/checkpoint-200/rng_state.pth +3 -0
  38. hackduke1/checkpoint-200/scheduler.pt +3 -0
  39. hackduke1/checkpoint-200/trainer_state.json +139 -0
  40. hackduke1/checkpoint-200/training_args.bin +3 -0
  41. hackduke1/checkpoint-300/README.md +9 -0
  42. hackduke1/checkpoint-300/adapter_config.json +24 -0
  43. hackduke1/checkpoint-300/adapter_model.bin +3 -0
  44. hackduke1/checkpoint-300/finetuning_args.json +19 -0
  45. hackduke1/checkpoint-300/optimizer.pt +3 -0
  46. hackduke1/checkpoint-300/rng_state.pth +3 -0
  47. hackduke1/checkpoint-300/scheduler.pt +3 -0
  48. hackduke1/checkpoint-300/trainer_state.json +199 -0
  49. hackduke1/checkpoint-300/training_args.bin +3 -0
  50. hackduke1/checkpoint-400/README.md +9 -0
hackduke1/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:116af431e95ed2f09284b37c37fc71f713d8d4f96fd3c1efc52dead97ca87b4c
3
+ size 33576177
hackduke1/all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.93,
3
+ "train_loss": 1.5241096448090117,
4
+ "train_runtime": 23566.1012,
5
+ "train_samples_per_second": 1.614,
6
+ "train_steps_per_second": 0.05
7
+ }
hackduke1/checkpoint-100/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/checkpoint-100/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55deaa7606be5a01e5039dc34669b4348778b72310cc1e0d9e243ac10b19d5a4
3
+ size 33576177
hackduke1/checkpoint-100/finetuning_args.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dpo_beta": 0.1,
3
+ "finetuning_type": "lora",
4
+ "lora_alpha": 32.0,
5
+ "lora_dropout": 0.1,
6
+ "lora_rank": 16,
7
+ "lora_target": [
8
+ "c_attn",
9
+ "o_proj",
10
+ "down_proj",
11
+ "up_proj",
12
+ "gate_proj"
13
+ ],
14
+ "name_module_trainable": "mlp",
15
+ "num_hidden_layers": 32,
16
+ "num_layer_trainable": 3,
17
+ "ppo_score_norm": false,
18
+ "resume_lora_training": true
19
+ }
hackduke1/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac0a142daf62a2a49db3546027f1df8b629f14a1e994e936547e398899f2122
3
+ size 67147589
hackduke1/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db50077f1eb9cbedc143814b21b2be4cafe9bbc984e227373d92c61ebf315465
3
+ size 14575
hackduke1/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d041a7e96c17819841a9a0c03258454e444965f215930b8240eeecb6ee97e88
3
+ size 627
hackduke1/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.8412197686645636,
5
+ "eval_steps": 100.0,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 5e-06,
14
+ "loss": 4.1311,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.17,
19
+ "learning_rate": 1e-05,
20
+ "loss": 4.0983,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.25,
25
+ "learning_rate": 1.5e-05,
26
+ "loss": 3.9944,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "learning_rate": 2e-05,
32
+ "loss": 3.8495,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.42,
37
+ "learning_rate": 2.5e-05,
38
+ "loss": 3.5944,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.5,
43
+ "learning_rate": 3e-05,
44
+ "loss": 3.1649,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.59,
49
+ "learning_rate": 3.5e-05,
50
+ "loss": 3.0072,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.67,
55
+ "learning_rate": 4e-05,
56
+ "loss": 2.9697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.76,
61
+ "learning_rate": 4.5e-05,
62
+ "loss": 2.8497,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.84,
67
+ "learning_rate": 5e-05,
68
+ "loss": 2.8376,
69
+ "step": 100
70
+ }
71
+ ],
72
+ "logging_steps": 10,
73
+ "max_steps": 1180,
74
+ "num_train_epochs": 10,
75
+ "save_steps": 100,
76
+ "total_flos": 8.641986753645773e+16,
77
+ "trial_name": null,
78
+ "trial_params": null
79
+ }
hackduke1/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de37053b6032a74e6fda2a983e6126718c64bf2e866e225af8abfa727bba20
3
+ size 3347
hackduke1/checkpoint-1000/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/checkpoint-1000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77465051c0e3b489a82396200a03f3aaa589feb7dfeb82a0afdee4ee67e02905
3
+ size 33576177
hackduke1/checkpoint-1000/finetuning_args.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dpo_beta": 0.1,
3
+ "finetuning_type": "lora",
4
+ "lora_alpha": 32.0,
5
+ "lora_dropout": 0.1,
6
+ "lora_rank": 16,
7
+ "lora_target": [
8
+ "c_attn",
9
+ "o_proj",
10
+ "down_proj",
11
+ "up_proj",
12
+ "gate_proj"
13
+ ],
14
+ "name_module_trainable": "mlp",
15
+ "num_hidden_layers": 32,
16
+ "num_layer_trainable": 3,
17
+ "ppo_score_norm": false,
18
+ "resume_lora_training": true
19
+ }
hackduke1/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03d67f3f8b3c98c291f9aaf18ab103185885eaac4648280aa3722c64b81998d
3
+ size 67147653
hackduke1/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1989b96431be4c9d668275f57ec5ec884da4ce3dcd28562a0ab40aae8fb83e14
3
+ size 14575
hackduke1/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a1d5d6425646b33604eed6cb67a751cd38fe0fd43bbe4c65d1ec71837a4a2b3
3
+ size 627
hackduke1/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,619 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.412197686645635,
5
+ "eval_steps": 100.0,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 5e-06,
14
+ "loss": 4.1311,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.17,
19
+ "learning_rate": 1e-05,
20
+ "loss": 4.0983,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.25,
25
+ "learning_rate": 1.5e-05,
26
+ "loss": 3.9944,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "learning_rate": 2e-05,
32
+ "loss": 3.8495,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.42,
37
+ "learning_rate": 2.5e-05,
38
+ "loss": 3.5944,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.5,
43
+ "learning_rate": 3e-05,
44
+ "loss": 3.1649,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.59,
49
+ "learning_rate": 3.5e-05,
50
+ "loss": 3.0072,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.67,
55
+ "learning_rate": 4e-05,
56
+ "loss": 2.9697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.76,
61
+ "learning_rate": 4.5e-05,
62
+ "loss": 2.8497,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.84,
67
+ "learning_rate": 5e-05,
68
+ "loss": 2.8376,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.93,
73
+ "learning_rate": 4.998942375205502e-05,
74
+ "loss": 2.8255,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 1.01,
79
+ "learning_rate": 4.995770395678171e-05,
80
+ "loss": 2.7066,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 1.09,
85
+ "learning_rate": 4.990486745229364e-05,
86
+ "loss": 2.6717,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 1.18,
91
+ "learning_rate": 4.983095894354858e-05,
92
+ "loss": 2.6093,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.26,
97
+ "learning_rate": 4.973604096452361e-05,
98
+ "loss": 2.588,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 1.35,
103
+ "learning_rate": 4.962019382530521e-05,
104
+ "loss": 2.5881,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 1.43,
109
+ "learning_rate": 4.948351554413879e-05,
110
+ "loss": 2.4645,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 1.51,
115
+ "learning_rate": 4.9326121764495596e-05,
116
+ "loss": 2.4118,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 1.6,
121
+ "learning_rate": 4.914814565722671e-05,
122
+ "loss": 2.5078,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 1.68,
127
+ "learning_rate": 4.894973780788722e-05,
128
+ "loss": 2.3461,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 1.77,
133
+ "learning_rate": 4.873106608932585e-05,
134
+ "loss": 2.3559,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 1.85,
139
+ "learning_rate": 4.849231551964771e-05,
140
+ "loss": 2.4097,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 1.93,
145
+ "learning_rate": 4.823368810567056e-05,
146
+ "loss": 2.2607,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 2.02,
151
+ "learning_rate": 4.7955402672006854e-05,
152
+ "loss": 2.1208,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 2.1,
157
+ "learning_rate": 4.765769467591625e-05,
158
+ "loss": 2.112,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 2.19,
163
+ "learning_rate": 4.734081600808531e-05,
164
+ "loss": 2.1259,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 2.27,
169
+ "learning_rate": 4.700503477950278e-05,
170
+ "loss": 2.1712,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 2.36,
175
+ "learning_rate": 4.665063509461097e-05,
176
+ "loss": 1.9872,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 2.44,
181
+ "learning_rate": 4.627791681092499e-05,
182
+ "loss": 1.9918,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 2.52,
187
+ "learning_rate": 4.588719528532342e-05,
188
+ "loss": 2.0882,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 2.61,
193
+ "learning_rate": 4.54788011072248e-05,
194
+ "loss": 1.9361,
195
+ "step": 310
196
+ },
197
+ {
198
+ "epoch": 2.69,
199
+ "learning_rate": 4.50530798188761e-05,
200
+ "loss": 1.9715,
201
+ "step": 320
202
+ },
203
+ {
204
+ "epoch": 2.78,
205
+ "learning_rate": 4.4610391622989396e-05,
206
+ "loss": 1.8659,
207
+ "step": 330
208
+ },
209
+ {
210
+ "epoch": 2.86,
211
+ "learning_rate": 4.415111107797445e-05,
212
+ "loss": 1.8453,
213
+ "step": 340
214
+ },
215
+ {
216
+ "epoch": 2.94,
217
+ "learning_rate": 4.36756267810249e-05,
218
+ "loss": 1.9347,
219
+ "step": 350
220
+ },
221
+ {
222
+ "epoch": 3.03,
223
+ "learning_rate": 4.318434103932622e-05,
224
+ "loss": 1.8471,
225
+ "step": 360
226
+ },
227
+ {
228
+ "epoch": 3.11,
229
+ "learning_rate": 4.267766952966369e-05,
230
+ "loss": 1.7312,
231
+ "step": 370
232
+ },
233
+ {
234
+ "epoch": 3.2,
235
+ "learning_rate": 4.215604094671835e-05,
236
+ "loss": 1.5693,
237
+ "step": 380
238
+ },
239
+ {
240
+ "epoch": 3.28,
241
+ "learning_rate": 4.1619896640348445e-05,
242
+ "loss": 1.7114,
243
+ "step": 390
244
+ },
245
+ {
246
+ "epoch": 3.36,
247
+ "learning_rate": 4.1069690242163484e-05,
248
+ "loss": 1.5693,
249
+ "step": 400
250
+ },
251
+ {
252
+ "epoch": 3.45,
253
+ "learning_rate": 4.05058872817065e-05,
254
+ "loss": 1.588,
255
+ "step": 410
256
+ },
257
+ {
258
+ "epoch": 3.53,
259
+ "learning_rate": 3.9928964792569655e-05,
260
+ "loss": 1.6776,
261
+ "step": 420
262
+ },
263
+ {
264
+ "epoch": 3.62,
265
+ "learning_rate": 3.933941090877615e-05,
266
+ "loss": 1.633,
267
+ "step": 430
268
+ },
269
+ {
270
+ "epoch": 3.7,
271
+ "learning_rate": 3.873772445177015e-05,
272
+ "loss": 1.5435,
273
+ "step": 440
274
+ },
275
+ {
276
+ "epoch": 3.79,
277
+ "learning_rate": 3.8124414508364e-05,
278
+ "loss": 1.522,
279
+ "step": 450
280
+ },
281
+ {
282
+ "epoch": 3.87,
283
+ "learning_rate": 3.7500000000000003e-05,
284
+ "loss": 1.4307,
285
+ "step": 460
286
+ },
287
+ {
288
+ "epoch": 3.95,
289
+ "learning_rate": 3.686500924369101e-05,
290
+ "loss": 1.6171,
291
+ "step": 470
292
+ },
293
+ {
294
+ "epoch": 4.04,
295
+ "learning_rate": 3.621997950501156e-05,
296
+ "loss": 1.482,
297
+ "step": 480
298
+ },
299
+ {
300
+ "epoch": 4.12,
301
+ "learning_rate": 3.556545654351749e-05,
302
+ "loss": 1.4552,
303
+ "step": 490
304
+ },
305
+ {
306
+ "epoch": 4.21,
307
+ "learning_rate": 3.490199415097892e-05,
308
+ "loss": 1.2726,
309
+ "step": 500
310
+ },
311
+ {
312
+ "epoch": 4.29,
313
+ "learning_rate": 3.423015368281711e-05,
314
+ "loss": 1.312,
315
+ "step": 510
316
+ },
317
+ {
318
+ "epoch": 4.37,
319
+ "learning_rate": 3.355050358314172e-05,
320
+ "loss": 1.3236,
321
+ "step": 520
322
+ },
323
+ {
324
+ "epoch": 4.46,
325
+ "learning_rate": 3.2863618903790346e-05,
326
+ "loss": 1.2786,
327
+ "step": 530
328
+ },
329
+ {
330
+ "epoch": 4.54,
331
+ "learning_rate": 3.217008081777726e-05,
332
+ "loss": 1.2587,
333
+ "step": 540
334
+ },
335
+ {
336
+ "epoch": 4.63,
337
+ "learning_rate": 3.147047612756302e-05,
338
+ "loss": 1.3509,
339
+ "step": 550
340
+ },
341
+ {
342
+ "epoch": 4.71,
343
+ "learning_rate": 3.076539676856101e-05,
344
+ "loss": 1.2632,
345
+ "step": 560
346
+ },
347
+ {
348
+ "epoch": 4.79,
349
+ "learning_rate": 3.0055439308300952e-05,
350
+ "loss": 1.2215,
351
+ "step": 570
352
+ },
353
+ {
354
+ "epoch": 4.88,
355
+ "learning_rate": 2.9341204441673266e-05,
356
+ "loss": 1.2773,
357
+ "step": 580
358
+ },
359
+ {
360
+ "epoch": 4.96,
361
+ "learning_rate": 2.8623296482681166e-05,
362
+ "loss": 1.3122,
363
+ "step": 590
364
+ },
365
+ {
366
+ "epoch": 5.05,
367
+ "learning_rate": 2.7902322853130757e-05,
368
+ "loss": 1.2999,
369
+ "step": 600
370
+ },
371
+ {
372
+ "epoch": 5.13,
373
+ "learning_rate": 2.717889356869146e-05,
374
+ "loss": 1.13,
375
+ "step": 610
376
+ },
377
+ {
378
+ "epoch": 5.22,
379
+ "learning_rate": 2.6453620722761896e-05,
380
+ "loss": 1.1283,
381
+ "step": 620
382
+ },
383
+ {
384
+ "epoch": 5.3,
385
+ "learning_rate": 2.5727117968577784e-05,
386
+ "loss": 1.0922,
387
+ "step": 630
388
+ },
389
+ {
390
+ "epoch": 5.38,
391
+ "learning_rate": 2.5e-05,
392
+ "loss": 1.0549,
393
+ "step": 640
394
+ },
395
+ {
396
+ "epoch": 5.47,
397
+ "learning_rate": 2.4272882031422215e-05,
398
+ "loss": 1.0966,
399
+ "step": 650
400
+ },
401
+ {
402
+ "epoch": 5.55,
403
+ "learning_rate": 2.3546379277238107e-05,
404
+ "loss": 1.2114,
405
+ "step": 660
406
+ },
407
+ {
408
+ "epoch": 5.64,
409
+ "learning_rate": 2.2821106431308544e-05,
410
+ "loss": 0.9685,
411
+ "step": 670
412
+ },
413
+ {
414
+ "epoch": 5.72,
415
+ "learning_rate": 2.2097677146869242e-05,
416
+ "loss": 0.9688,
417
+ "step": 680
418
+ },
419
+ {
420
+ "epoch": 5.8,
421
+ "learning_rate": 2.1376703517318837e-05,
422
+ "loss": 1.0943,
423
+ "step": 690
424
+ },
425
+ {
426
+ "epoch": 5.89,
427
+ "learning_rate": 2.0658795558326743e-05,
428
+ "loss": 0.9363,
429
+ "step": 700
430
+ },
431
+ {
432
+ "epoch": 5.97,
433
+ "learning_rate": 1.9944560691699057e-05,
434
+ "loss": 1.1054,
435
+ "step": 710
436
+ },
437
+ {
438
+ "epoch": 6.06,
439
+ "learning_rate": 1.9234603231438995e-05,
440
+ "loss": 1.0781,
441
+ "step": 720
442
+ },
443
+ {
444
+ "epoch": 6.14,
445
+ "learning_rate": 1.852952387243698e-05,
446
+ "loss": 0.8869,
447
+ "step": 730
448
+ },
449
+ {
450
+ "epoch": 6.23,
451
+ "learning_rate": 1.7829919182222752e-05,
452
+ "loss": 0.8957,
453
+ "step": 740
454
+ },
455
+ {
456
+ "epoch": 6.31,
457
+ "learning_rate": 1.7136381096209664e-05,
458
+ "loss": 1.0628,
459
+ "step": 750
460
+ },
461
+ {
462
+ "epoch": 6.39,
463
+ "learning_rate": 1.6449496416858284e-05,
464
+ "loss": 1.0005,
465
+ "step": 760
466
+ },
467
+ {
468
+ "epoch": 6.48,
469
+ "learning_rate": 1.5769846317182893e-05,
470
+ "loss": 0.9747,
471
+ "step": 770
472
+ },
473
+ {
474
+ "epoch": 6.56,
475
+ "learning_rate": 1.509800584902108e-05,
476
+ "loss": 1.0524,
477
+ "step": 780
478
+ },
479
+ {
480
+ "epoch": 6.65,
481
+ "learning_rate": 1.443454345648252e-05,
482
+ "loss": 1.0064,
483
+ "step": 790
484
+ },
485
+ {
486
+ "epoch": 6.73,
487
+ "learning_rate": 1.3780020494988446e-05,
488
+ "loss": 0.979,
489
+ "step": 800
490
+ },
491
+ {
492
+ "epoch": 6.81,
493
+ "learning_rate": 1.313499075630899e-05,
494
+ "loss": 0.9495,
495
+ "step": 810
496
+ },
497
+ {
498
+ "epoch": 6.9,
499
+ "learning_rate": 1.2500000000000006e-05,
500
+ "loss": 0.8098,
501
+ "step": 820
502
+ },
503
+ {
504
+ "epoch": 6.98,
505
+ "learning_rate": 1.1875585491636e-05,
506
+ "loss": 0.9074,
507
+ "step": 830
508
+ },
509
+ {
510
+ "epoch": 7.07,
511
+ "learning_rate": 1.126227554822985e-05,
512
+ "loss": 0.8522,
513
+ "step": 840
514
+ },
515
+ {
516
+ "epoch": 7.15,
517
+ "learning_rate": 1.0660589091223855e-05,
518
+ "loss": 0.8949,
519
+ "step": 850
520
+ },
521
+ {
522
+ "epoch": 7.23,
523
+ "learning_rate": 1.0071035207430352e-05,
524
+ "loss": 0.9344,
525
+ "step": 860
526
+ },
527
+ {
528
+ "epoch": 7.32,
529
+ "learning_rate": 9.494112718293501e-06,
530
+ "loss": 0.7258,
531
+ "step": 870
532
+ },
533
+ {
534
+ "epoch": 7.4,
535
+ "learning_rate": 8.930309757836517e-06,
536
+ "loss": 0.8315,
537
+ "step": 880
538
+ },
539
+ {
540
+ "epoch": 7.49,
541
+ "learning_rate": 8.380103359651553e-06,
542
+ "loss": 0.9103,
543
+ "step": 890
544
+ },
545
+ {
546
+ "epoch": 7.57,
547
+ "learning_rate": 7.843959053281663e-06,
548
+ "loss": 0.8161,
549
+ "step": 900
550
+ },
551
+ {
552
+ "epoch": 7.66,
553
+ "learning_rate": 7.3223304703363135e-06,
554
+ "loss": 0.8657,
555
+ "step": 910
556
+ },
557
+ {
558
+ "epoch": 7.74,
559
+ "learning_rate": 6.815658960673782e-06,
560
+ "loss": 0.8315,
561
+ "step": 920
562
+ },
563
+ {
564
+ "epoch": 7.82,
565
+ "learning_rate": 6.324373218975105e-06,
566
+ "loss": 0.8273,
567
+ "step": 930
568
+ },
569
+ {
570
+ "epoch": 7.91,
571
+ "learning_rate": 5.848888922025553e-06,
572
+ "loss": 0.8765,
573
+ "step": 940
574
+ },
575
+ {
576
+ "epoch": 7.99,
577
+ "learning_rate": 5.389608377010608e-06,
578
+ "loss": 0.8816,
579
+ "step": 950
580
+ },
581
+ {
582
+ "epoch": 8.08,
583
+ "learning_rate": 4.946920181123904e-06,
584
+ "loss": 0.8582,
585
+ "step": 960
586
+ },
587
+ {
588
+ "epoch": 8.16,
589
+ "learning_rate": 4.521198892775203e-06,
590
+ "loss": 0.7583,
591
+ "step": 970
592
+ },
593
+ {
594
+ "epoch": 8.24,
595
+ "learning_rate": 4.112804714676594e-06,
596
+ "loss": 0.862,
597
+ "step": 980
598
+ },
599
+ {
600
+ "epoch": 8.33,
601
+ "learning_rate": 3.7220831890750067e-06,
602
+ "loss": 0.8618,
603
+ "step": 990
604
+ },
605
+ {
606
+ "epoch": 8.41,
607
+ "learning_rate": 3.3493649053890326e-06,
608
+ "loss": 0.6312,
609
+ "step": 1000
610
+ }
611
+ ],
612
+ "logging_steps": 10,
613
+ "max_steps": 1180,
614
+ "num_train_epochs": 10,
615
+ "save_steps": 100,
616
+ "total_flos": 8.63087876644012e+17,
617
+ "trial_name": null,
618
+ "trial_params": null
619
+ }
hackduke1/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de37053b6032a74e6fda2a983e6126718c64bf2e866e225af8abfa727bba20
3
+ size 3347
hackduke1/checkpoint-1100/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/checkpoint-1100/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/checkpoint-1100/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc59d19d1ca82c85201de069cfc037a61f048a13dd3c481b1b04e5691e660de
3
+ size 33576177
hackduke1/checkpoint-1100/finetuning_args.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dpo_beta": 0.1,
3
+ "finetuning_type": "lora",
4
+ "lora_alpha": 32.0,
5
+ "lora_dropout": 0.1,
6
+ "lora_rank": 16,
7
+ "lora_target": [
8
+ "c_attn",
9
+ "o_proj",
10
+ "down_proj",
11
+ "up_proj",
12
+ "gate_proj"
13
+ ],
14
+ "name_module_trainable": "mlp",
15
+ "num_hidden_layers": 32,
16
+ "num_layer_trainable": 3,
17
+ "ppo_score_norm": false,
18
+ "resume_lora_training": true
19
+ }
hackduke1/checkpoint-1100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d4cf7300a93886bdfdaabcd8ea4203feb38cdba1dd85d1be25f29cd41e305a
3
+ size 67147653
hackduke1/checkpoint-1100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e3b3f700cfedc34d382047ee40e12ae29301b40125f7c8fbdc132d03520170
3
+ size 14575
hackduke1/checkpoint-1100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57781510400d40de02766b75b37cd6fbec2419b43e817559e3a8cf8ed6822b95
3
+ size 627
hackduke1/checkpoint-1100/trainer_state.json ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.253417455310199,
5
+ "eval_steps": 100.0,
6
+ "global_step": 1100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 5e-06,
14
+ "loss": 4.1311,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.17,
19
+ "learning_rate": 1e-05,
20
+ "loss": 4.0983,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.25,
25
+ "learning_rate": 1.5e-05,
26
+ "loss": 3.9944,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "learning_rate": 2e-05,
32
+ "loss": 3.8495,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.42,
37
+ "learning_rate": 2.5e-05,
38
+ "loss": 3.5944,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.5,
43
+ "learning_rate": 3e-05,
44
+ "loss": 3.1649,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.59,
49
+ "learning_rate": 3.5e-05,
50
+ "loss": 3.0072,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.67,
55
+ "learning_rate": 4e-05,
56
+ "loss": 2.9697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.76,
61
+ "learning_rate": 4.5e-05,
62
+ "loss": 2.8497,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.84,
67
+ "learning_rate": 5e-05,
68
+ "loss": 2.8376,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.93,
73
+ "learning_rate": 4.998942375205502e-05,
74
+ "loss": 2.8255,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 1.01,
79
+ "learning_rate": 4.995770395678171e-05,
80
+ "loss": 2.7066,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 1.09,
85
+ "learning_rate": 4.990486745229364e-05,
86
+ "loss": 2.6717,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 1.18,
91
+ "learning_rate": 4.983095894354858e-05,
92
+ "loss": 2.6093,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.26,
97
+ "learning_rate": 4.973604096452361e-05,
98
+ "loss": 2.588,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 1.35,
103
+ "learning_rate": 4.962019382530521e-05,
104
+ "loss": 2.5881,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 1.43,
109
+ "learning_rate": 4.948351554413879e-05,
110
+ "loss": 2.4645,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 1.51,
115
+ "learning_rate": 4.9326121764495596e-05,
116
+ "loss": 2.4118,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 1.6,
121
+ "learning_rate": 4.914814565722671e-05,
122
+ "loss": 2.5078,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 1.68,
127
+ "learning_rate": 4.894973780788722e-05,
128
+ "loss": 2.3461,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 1.77,
133
+ "learning_rate": 4.873106608932585e-05,
134
+ "loss": 2.3559,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 1.85,
139
+ "learning_rate": 4.849231551964771e-05,
140
+ "loss": 2.4097,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 1.93,
145
+ "learning_rate": 4.823368810567056e-05,
146
+ "loss": 2.2607,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 2.02,
151
+ "learning_rate": 4.7955402672006854e-05,
152
+ "loss": 2.1208,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 2.1,
157
+ "learning_rate": 4.765769467591625e-05,
158
+ "loss": 2.112,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 2.19,
163
+ "learning_rate": 4.734081600808531e-05,
164
+ "loss": 2.1259,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 2.27,
169
+ "learning_rate": 4.700503477950278e-05,
170
+ "loss": 2.1712,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 2.36,
175
+ "learning_rate": 4.665063509461097e-05,
176
+ "loss": 1.9872,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 2.44,
181
+ "learning_rate": 4.627791681092499e-05,
182
+ "loss": 1.9918,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 2.52,
187
+ "learning_rate": 4.588719528532342e-05,
188
+ "loss": 2.0882,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 2.61,
193
+ "learning_rate": 4.54788011072248e-05,
194
+ "loss": 1.9361,
195
+ "step": 310
196
+ },
197
+ {
198
+ "epoch": 2.69,
199
+ "learning_rate": 4.50530798188761e-05,
200
+ "loss": 1.9715,
201
+ "step": 320
202
+ },
203
+ {
204
+ "epoch": 2.78,
205
+ "learning_rate": 4.4610391622989396e-05,
206
+ "loss": 1.8659,
207
+ "step": 330
208
+ },
209
+ {
210
+ "epoch": 2.86,
211
+ "learning_rate": 4.415111107797445e-05,
212
+ "loss": 1.8453,
213
+ "step": 340
214
+ },
215
+ {
216
+ "epoch": 2.94,
217
+ "learning_rate": 4.36756267810249e-05,
218
+ "loss": 1.9347,
219
+ "step": 350
220
+ },
221
+ {
222
+ "epoch": 3.03,
223
+ "learning_rate": 4.318434103932622e-05,
224
+ "loss": 1.8471,
225
+ "step": 360
226
+ },
227
+ {
228
+ "epoch": 3.11,
229
+ "learning_rate": 4.267766952966369e-05,
230
+ "loss": 1.7312,
231
+ "step": 370
232
+ },
233
+ {
234
+ "epoch": 3.2,
235
+ "learning_rate": 4.215604094671835e-05,
236
+ "loss": 1.5693,
237
+ "step": 380
238
+ },
239
+ {
240
+ "epoch": 3.28,
241
+ "learning_rate": 4.1619896640348445e-05,
242
+ "loss": 1.7114,
243
+ "step": 390
244
+ },
245
+ {
246
+ "epoch": 3.36,
247
+ "learning_rate": 4.1069690242163484e-05,
248
+ "loss": 1.5693,
249
+ "step": 400
250
+ },
251
+ {
252
+ "epoch": 3.45,
253
+ "learning_rate": 4.05058872817065e-05,
254
+ "loss": 1.588,
255
+ "step": 410
256
+ },
257
+ {
258
+ "epoch": 3.53,
259
+ "learning_rate": 3.9928964792569655e-05,
260
+ "loss": 1.6776,
261
+ "step": 420
262
+ },
263
+ {
264
+ "epoch": 3.62,
265
+ "learning_rate": 3.933941090877615e-05,
266
+ "loss": 1.633,
267
+ "step": 430
268
+ },
269
+ {
270
+ "epoch": 3.7,
271
+ "learning_rate": 3.873772445177015e-05,
272
+ "loss": 1.5435,
273
+ "step": 440
274
+ },
275
+ {
276
+ "epoch": 3.79,
277
+ "learning_rate": 3.8124414508364e-05,
278
+ "loss": 1.522,
279
+ "step": 450
280
+ },
281
+ {
282
+ "epoch": 3.87,
283
+ "learning_rate": 3.7500000000000003e-05,
284
+ "loss": 1.4307,
285
+ "step": 460
286
+ },
287
+ {
288
+ "epoch": 3.95,
289
+ "learning_rate": 3.686500924369101e-05,
290
+ "loss": 1.6171,
291
+ "step": 470
292
+ },
293
+ {
294
+ "epoch": 4.04,
295
+ "learning_rate": 3.621997950501156e-05,
296
+ "loss": 1.482,
297
+ "step": 480
298
+ },
299
+ {
300
+ "epoch": 4.12,
301
+ "learning_rate": 3.556545654351749e-05,
302
+ "loss": 1.4552,
303
+ "step": 490
304
+ },
305
+ {
306
+ "epoch": 4.21,
307
+ "learning_rate": 3.490199415097892e-05,
308
+ "loss": 1.2726,
309
+ "step": 500
310
+ },
311
+ {
312
+ "epoch": 4.29,
313
+ "learning_rate": 3.423015368281711e-05,
314
+ "loss": 1.312,
315
+ "step": 510
316
+ },
317
+ {
318
+ "epoch": 4.37,
319
+ "learning_rate": 3.355050358314172e-05,
320
+ "loss": 1.3236,
321
+ "step": 520
322
+ },
323
+ {
324
+ "epoch": 4.46,
325
+ "learning_rate": 3.2863618903790346e-05,
326
+ "loss": 1.2786,
327
+ "step": 530
328
+ },
329
+ {
330
+ "epoch": 4.54,
331
+ "learning_rate": 3.217008081777726e-05,
332
+ "loss": 1.2587,
333
+ "step": 540
334
+ },
335
+ {
336
+ "epoch": 4.63,
337
+ "learning_rate": 3.147047612756302e-05,
338
+ "loss": 1.3509,
339
+ "step": 550
340
+ },
341
+ {
342
+ "epoch": 4.71,
343
+ "learning_rate": 3.076539676856101e-05,
344
+ "loss": 1.2632,
345
+ "step": 560
346
+ },
347
+ {
348
+ "epoch": 4.79,
349
+ "learning_rate": 3.0055439308300952e-05,
350
+ "loss": 1.2215,
351
+ "step": 570
352
+ },
353
+ {
354
+ "epoch": 4.88,
355
+ "learning_rate": 2.9341204441673266e-05,
356
+ "loss": 1.2773,
357
+ "step": 580
358
+ },
359
+ {
360
+ "epoch": 4.96,
361
+ "learning_rate": 2.8623296482681166e-05,
362
+ "loss": 1.3122,
363
+ "step": 590
364
+ },
365
+ {
366
+ "epoch": 5.05,
367
+ "learning_rate": 2.7902322853130757e-05,
368
+ "loss": 1.2999,
369
+ "step": 600
370
+ },
371
+ {
372
+ "epoch": 5.13,
373
+ "learning_rate": 2.717889356869146e-05,
374
+ "loss": 1.13,
375
+ "step": 610
376
+ },
377
+ {
378
+ "epoch": 5.22,
379
+ "learning_rate": 2.6453620722761896e-05,
380
+ "loss": 1.1283,
381
+ "step": 620
382
+ },
383
+ {
384
+ "epoch": 5.3,
385
+ "learning_rate": 2.5727117968577784e-05,
386
+ "loss": 1.0922,
387
+ "step": 630
388
+ },
389
+ {
390
+ "epoch": 5.38,
391
+ "learning_rate": 2.5e-05,
392
+ "loss": 1.0549,
393
+ "step": 640
394
+ },
395
+ {
396
+ "epoch": 5.47,
397
+ "learning_rate": 2.4272882031422215e-05,
398
+ "loss": 1.0966,
399
+ "step": 650
400
+ },
401
+ {
402
+ "epoch": 5.55,
403
+ "learning_rate": 2.3546379277238107e-05,
404
+ "loss": 1.2114,
405
+ "step": 660
406
+ },
407
+ {
408
+ "epoch": 5.64,
409
+ "learning_rate": 2.2821106431308544e-05,
410
+ "loss": 0.9685,
411
+ "step": 670
412
+ },
413
+ {
414
+ "epoch": 5.72,
415
+ "learning_rate": 2.2097677146869242e-05,
416
+ "loss": 0.9688,
417
+ "step": 680
418
+ },
419
+ {
420
+ "epoch": 5.8,
421
+ "learning_rate": 2.1376703517318837e-05,
422
+ "loss": 1.0943,
423
+ "step": 690
424
+ },
425
+ {
426
+ "epoch": 5.89,
427
+ "learning_rate": 2.0658795558326743e-05,
428
+ "loss": 0.9363,
429
+ "step": 700
430
+ },
431
+ {
432
+ "epoch": 5.97,
433
+ "learning_rate": 1.9944560691699057e-05,
434
+ "loss": 1.1054,
435
+ "step": 710
436
+ },
437
+ {
438
+ "epoch": 6.06,
439
+ "learning_rate": 1.9234603231438995e-05,
440
+ "loss": 1.0781,
441
+ "step": 720
442
+ },
443
+ {
444
+ "epoch": 6.14,
445
+ "learning_rate": 1.852952387243698e-05,
446
+ "loss": 0.8869,
447
+ "step": 730
448
+ },
449
+ {
450
+ "epoch": 6.23,
451
+ "learning_rate": 1.7829919182222752e-05,
452
+ "loss": 0.8957,
453
+ "step": 740
454
+ },
455
+ {
456
+ "epoch": 6.31,
457
+ "learning_rate": 1.7136381096209664e-05,
458
+ "loss": 1.0628,
459
+ "step": 750
460
+ },
461
+ {
462
+ "epoch": 6.39,
463
+ "learning_rate": 1.6449496416858284e-05,
464
+ "loss": 1.0005,
465
+ "step": 760
466
+ },
467
+ {
468
+ "epoch": 6.48,
469
+ "learning_rate": 1.5769846317182893e-05,
470
+ "loss": 0.9747,
471
+ "step": 770
472
+ },
473
+ {
474
+ "epoch": 6.56,
475
+ "learning_rate": 1.509800584902108e-05,
476
+ "loss": 1.0524,
477
+ "step": 780
478
+ },
479
+ {
480
+ "epoch": 6.65,
481
+ "learning_rate": 1.443454345648252e-05,
482
+ "loss": 1.0064,
483
+ "step": 790
484
+ },
485
+ {
486
+ "epoch": 6.73,
487
+ "learning_rate": 1.3780020494988446e-05,
488
+ "loss": 0.979,
489
+ "step": 800
490
+ },
491
+ {
492
+ "epoch": 6.81,
493
+ "learning_rate": 1.313499075630899e-05,
494
+ "loss": 0.9495,
495
+ "step": 810
496
+ },
497
+ {
498
+ "epoch": 6.9,
499
+ "learning_rate": 1.2500000000000006e-05,
500
+ "loss": 0.8098,
501
+ "step": 820
502
+ },
503
+ {
504
+ "epoch": 6.98,
505
+ "learning_rate": 1.1875585491636e-05,
506
+ "loss": 0.9074,
507
+ "step": 830
508
+ },
509
+ {
510
+ "epoch": 7.07,
511
+ "learning_rate": 1.126227554822985e-05,
512
+ "loss": 0.8522,
513
+ "step": 840
514
+ },
515
+ {
516
+ "epoch": 7.15,
517
+ "learning_rate": 1.0660589091223855e-05,
518
+ "loss": 0.8949,
519
+ "step": 850
520
+ },
521
+ {
522
+ "epoch": 7.23,
523
+ "learning_rate": 1.0071035207430352e-05,
524
+ "loss": 0.9344,
525
+ "step": 860
526
+ },
527
+ {
528
+ "epoch": 7.32,
529
+ "learning_rate": 9.494112718293501e-06,
530
+ "loss": 0.7258,
531
+ "step": 870
532
+ },
533
+ {
534
+ "epoch": 7.4,
535
+ "learning_rate": 8.930309757836517e-06,
536
+ "loss": 0.8315,
537
+ "step": 880
538
+ },
539
+ {
540
+ "epoch": 7.49,
541
+ "learning_rate": 8.380103359651553e-06,
542
+ "loss": 0.9103,
543
+ "step": 890
544
+ },
545
+ {
546
+ "epoch": 7.57,
547
+ "learning_rate": 7.843959053281663e-06,
548
+ "loss": 0.8161,
549
+ "step": 900
550
+ },
551
+ {
552
+ "epoch": 7.66,
553
+ "learning_rate": 7.3223304703363135e-06,
554
+ "loss": 0.8657,
555
+ "step": 910
556
+ },
557
+ {
558
+ "epoch": 7.74,
559
+ "learning_rate": 6.815658960673782e-06,
560
+ "loss": 0.8315,
561
+ "step": 920
562
+ },
563
+ {
564
+ "epoch": 7.82,
565
+ "learning_rate": 6.324373218975105e-06,
566
+ "loss": 0.8273,
567
+ "step": 930
568
+ },
569
+ {
570
+ "epoch": 7.91,
571
+ "learning_rate": 5.848888922025553e-06,
572
+ "loss": 0.8765,
573
+ "step": 940
574
+ },
575
+ {
576
+ "epoch": 7.99,
577
+ "learning_rate": 5.389608377010608e-06,
578
+ "loss": 0.8816,
579
+ "step": 950
580
+ },
581
+ {
582
+ "epoch": 8.08,
583
+ "learning_rate": 4.946920181123904e-06,
584
+ "loss": 0.8582,
585
+ "step": 960
586
+ },
587
+ {
588
+ "epoch": 8.16,
589
+ "learning_rate": 4.521198892775203e-06,
590
+ "loss": 0.7583,
591
+ "step": 970
592
+ },
593
+ {
594
+ "epoch": 8.24,
595
+ "learning_rate": 4.112804714676594e-06,
596
+ "loss": 0.862,
597
+ "step": 980
598
+ },
599
+ {
600
+ "epoch": 8.33,
601
+ "learning_rate": 3.7220831890750067e-06,
602
+ "loss": 0.8618,
603
+ "step": 990
604
+ },
605
+ {
606
+ "epoch": 8.41,
607
+ "learning_rate": 3.3493649053890326e-06,
608
+ "loss": 0.6312,
609
+ "step": 1000
610
+ },
611
+ {
612
+ "epoch": 8.5,
613
+ "learning_rate": 2.9949652204972254e-06,
614
+ "loss": 0.8316,
615
+ "step": 1010
616
+ },
617
+ {
618
+ "epoch": 8.58,
619
+ "learning_rate": 2.659183991914696e-06,
620
+ "loss": 0.8118,
621
+ "step": 1020
622
+ },
623
+ {
624
+ "epoch": 8.66,
625
+ "learning_rate": 2.3423053240837515e-06,
626
+ "loss": 0.8255,
627
+ "step": 1030
628
+ },
629
+ {
630
+ "epoch": 8.75,
631
+ "learning_rate": 2.044597327993153e-06,
632
+ "loss": 0.7131,
633
+ "step": 1040
634
+ },
635
+ {
636
+ "epoch": 8.83,
637
+ "learning_rate": 1.7663118943294366e-06,
638
+ "loss": 0.8605,
639
+ "step": 1050
640
+ },
641
+ {
642
+ "epoch": 8.92,
643
+ "learning_rate": 1.5076844803522922e-06,
644
+ "loss": 0.7849,
645
+ "step": 1060
646
+ },
647
+ {
648
+ "epoch": 9.0,
649
+ "learning_rate": 1.2689339106741527e-06,
650
+ "loss": 0.8457,
651
+ "step": 1070
652
+ },
653
+ {
654
+ "epoch": 9.09,
655
+ "learning_rate": 1.0502621921127776e-06,
656
+ "loss": 0.7074,
657
+ "step": 1080
658
+ },
659
+ {
660
+ "epoch": 9.17,
661
+ "learning_rate": 8.51854342773295e-07,
662
+ "loss": 0.7921,
663
+ "step": 1090
664
+ },
665
+ {
666
+ "epoch": 9.25,
667
+ "learning_rate": 6.738782355044049e-07,
668
+ "loss": 0.7745,
669
+ "step": 1100
670
+ }
671
+ ],
672
+ "logging_steps": 10,
673
+ "max_steps": 1180,
674
+ "num_train_epochs": 10,
675
+ "save_steps": 100,
676
+ "total_flos": 9.486456470161981e+17,
677
+ "trial_name": null,
678
+ "trial_params": null
679
+ }
hackduke1/checkpoint-1100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de37053b6032a74e6fda2a983e6126718c64bf2e866e225af8abfa727bba20
3
+ size 3347
hackduke1/checkpoint-200/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/checkpoint-200/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc38b0fc94d5a1f32262242e8bfa00dcce919950c0d2395cd70b6e8d37e2082
3
+ size 33576177
hackduke1/checkpoint-200/finetuning_args.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dpo_beta": 0.1,
3
+ "finetuning_type": "lora",
4
+ "lora_alpha": 32.0,
5
+ "lora_dropout": 0.1,
6
+ "lora_rank": 16,
7
+ "lora_target": [
8
+ "c_attn",
9
+ "o_proj",
10
+ "down_proj",
11
+ "up_proj",
12
+ "gate_proj"
13
+ ],
14
+ "name_module_trainable": "mlp",
15
+ "num_hidden_layers": 32,
16
+ "num_layer_trainable": 3,
17
+ "ppo_score_norm": false,
18
+ "resume_lora_training": true
19
+ }
hackduke1/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863092e962bafa936ac7809c037a3f8d0734753f6f4329a4ec99edf773fd08b5
3
+ size 67147589
hackduke1/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ebf53d5d68fb637ac98b718cd5102b8def2c6ed4594c45d75daad7b25554081
3
+ size 14575
hackduke1/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3b91ae626c54779c8c48b864855fc030493662d74d678106549aa69ee92904
3
+ size 627
hackduke1/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.6824395373291272,
5
+ "eval_steps": 100.0,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 5e-06,
14
+ "loss": 4.1311,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.17,
19
+ "learning_rate": 1e-05,
20
+ "loss": 4.0983,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.25,
25
+ "learning_rate": 1.5e-05,
26
+ "loss": 3.9944,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "learning_rate": 2e-05,
32
+ "loss": 3.8495,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.42,
37
+ "learning_rate": 2.5e-05,
38
+ "loss": 3.5944,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.5,
43
+ "learning_rate": 3e-05,
44
+ "loss": 3.1649,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.59,
49
+ "learning_rate": 3.5e-05,
50
+ "loss": 3.0072,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.67,
55
+ "learning_rate": 4e-05,
56
+ "loss": 2.9697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.76,
61
+ "learning_rate": 4.5e-05,
62
+ "loss": 2.8497,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.84,
67
+ "learning_rate": 5e-05,
68
+ "loss": 2.8376,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.93,
73
+ "learning_rate": 4.998942375205502e-05,
74
+ "loss": 2.8255,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 1.01,
79
+ "learning_rate": 4.995770395678171e-05,
80
+ "loss": 2.7066,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 1.09,
85
+ "learning_rate": 4.990486745229364e-05,
86
+ "loss": 2.6717,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 1.18,
91
+ "learning_rate": 4.983095894354858e-05,
92
+ "loss": 2.6093,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.26,
97
+ "learning_rate": 4.973604096452361e-05,
98
+ "loss": 2.588,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 1.35,
103
+ "learning_rate": 4.962019382530521e-05,
104
+ "loss": 2.5881,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 1.43,
109
+ "learning_rate": 4.948351554413879e-05,
110
+ "loss": 2.4645,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 1.51,
115
+ "learning_rate": 4.9326121764495596e-05,
116
+ "loss": 2.4118,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 1.6,
121
+ "learning_rate": 4.914814565722671e-05,
122
+ "loss": 2.5078,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 1.68,
127
+ "learning_rate": 4.894973780788722e-05,
128
+ "loss": 2.3461,
129
+ "step": 200
130
+ }
131
+ ],
132
+ "logging_steps": 10,
133
+ "max_steps": 1180,
134
+ "num_train_epochs": 10,
135
+ "save_steps": 100,
136
+ "total_flos": 1.733059225580667e+17,
137
+ "trial_name": null,
138
+ "trial_params": null
139
+ }
hackduke1/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de37053b6032a74e6fda2a983e6126718c64bf2e866e225af8abfa727bba20
3
+ size 3347
hackduke1/checkpoint-300/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
hackduke1/checkpoint-300/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "Models/Qwen-7B-chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn",
18
+ "o_proj",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
hackduke1/checkpoint-300/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7c5504ba98370ec6e102751572e15826d1dfcf25c3137242366ea11ff021ef
3
+ size 33576177
hackduke1/checkpoint-300/finetuning_args.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dpo_beta": 0.1,
3
+ "finetuning_type": "lora",
4
+ "lora_alpha": 32.0,
5
+ "lora_dropout": 0.1,
6
+ "lora_rank": 16,
7
+ "lora_target": [
8
+ "c_attn",
9
+ "o_proj",
10
+ "down_proj",
11
+ "up_proj",
12
+ "gate_proj"
13
+ ],
14
+ "name_module_trainable": "mlp",
15
+ "num_hidden_layers": 32,
16
+ "num_layer_trainable": 3,
17
+ "ppo_score_norm": false,
18
+ "resume_lora_training": true
19
+ }
hackduke1/checkpoint-300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90559a30008bb929e83d33121e2eff56956e57267dc8ebd2da0146e970821bd6
3
+ size 67147653
hackduke1/checkpoint-300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36db46935b973ab40f77c618c613f66b954aa6d0944c46d67697e4dc5e9e0136
3
+ size 14575
hackduke1/checkpoint-300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0e8ef46b0af798d6a98afd906d7eda3caa728904822a41e829a48ad5e7aa72
3
+ size 627
hackduke1/checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.5236593059936907,
5
+ "eval_steps": 100.0,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 5e-06,
14
+ "loss": 4.1311,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.17,
19
+ "learning_rate": 1e-05,
20
+ "loss": 4.0983,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.25,
25
+ "learning_rate": 1.5e-05,
26
+ "loss": 3.9944,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.34,
31
+ "learning_rate": 2e-05,
32
+ "loss": 3.8495,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.42,
37
+ "learning_rate": 2.5e-05,
38
+ "loss": 3.5944,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.5,
43
+ "learning_rate": 3e-05,
44
+ "loss": 3.1649,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.59,
49
+ "learning_rate": 3.5e-05,
50
+ "loss": 3.0072,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.67,
55
+ "learning_rate": 4e-05,
56
+ "loss": 2.9697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.76,
61
+ "learning_rate": 4.5e-05,
62
+ "loss": 2.8497,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.84,
67
+ "learning_rate": 5e-05,
68
+ "loss": 2.8376,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.93,
73
+ "learning_rate": 4.998942375205502e-05,
74
+ "loss": 2.8255,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 1.01,
79
+ "learning_rate": 4.995770395678171e-05,
80
+ "loss": 2.7066,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 1.09,
85
+ "learning_rate": 4.990486745229364e-05,
86
+ "loss": 2.6717,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 1.18,
91
+ "learning_rate": 4.983095894354858e-05,
92
+ "loss": 2.6093,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.26,
97
+ "learning_rate": 4.973604096452361e-05,
98
+ "loss": 2.588,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 1.35,
103
+ "learning_rate": 4.962019382530521e-05,
104
+ "loss": 2.5881,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 1.43,
109
+ "learning_rate": 4.948351554413879e-05,
110
+ "loss": 2.4645,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 1.51,
115
+ "learning_rate": 4.9326121764495596e-05,
116
+ "loss": 2.4118,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 1.6,
121
+ "learning_rate": 4.914814565722671e-05,
122
+ "loss": 2.5078,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 1.68,
127
+ "learning_rate": 4.894973780788722e-05,
128
+ "loss": 2.3461,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 1.77,
133
+ "learning_rate": 4.873106608932585e-05,
134
+ "loss": 2.3559,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 1.85,
139
+ "learning_rate": 4.849231551964771e-05,
140
+ "loss": 2.4097,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 1.93,
145
+ "learning_rate": 4.823368810567056e-05,
146
+ "loss": 2.2607,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 2.02,
151
+ "learning_rate": 4.7955402672006854e-05,
152
+ "loss": 2.1208,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 2.1,
157
+ "learning_rate": 4.765769467591625e-05,
158
+ "loss": 2.112,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 2.19,
163
+ "learning_rate": 4.734081600808531e-05,
164
+ "loss": 2.1259,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 2.27,
169
+ "learning_rate": 4.700503477950278e-05,
170
+ "loss": 2.1712,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 2.36,
175
+ "learning_rate": 4.665063509461097e-05,
176
+ "loss": 1.9872,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 2.44,
181
+ "learning_rate": 4.627791681092499e-05,
182
+ "loss": 1.9918,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 2.52,
187
+ "learning_rate": 4.588719528532342e-05,
188
+ "loss": 2.0882,
189
+ "step": 300
190
+ }
191
+ ],
192
+ "logging_steps": 10,
193
+ "max_steps": 1180,
194
+ "num_train_epochs": 10,
195
+ "save_steps": 100,
196
+ "total_flos": 2.5973073683329843e+17,
197
+ "trial_name": null,
198
+ "trial_params": null
199
+ }
hackduke1/checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de37053b6032a74e6fda2a983e6126718c64bf2e866e225af8abfa727bba20
3
+ size 3347
hackduke1/checkpoint-400/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0