jackkuo commited on
Commit
c852879
·
1 Parent(s): 1dba2fa

Upload 9 files

Browse files
README.md CHANGED
@@ -1,3 +1,10 @@
1
  ---
2
- license: bigscience-openrail-m
3
  ---
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: peft
3
  ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+ - PEFT 0.4.0
9
+
10
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/HOME/jack/model/chatglm-6b/",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "query_key_value"
18
+ ],
19
+ "task_type": "CAUSAL_LM"
20
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676b20b731081c69a9deae3f0111c5fa0ecb2ba49b132d34c223b7dd607c8ebf
3
+ size 14700057
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.29589878051511703,
4
+ "train_runtime": 277.1475,
5
+ "train_samples_per_second": 2.219,
6
+ "train_steps_per_second": 0.559
7
+ }
finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "query_key_value"
8
+ ],
9
+ "name_module_trainable": "mlp",
10
+ "num_layer_trainable": 3,
11
+ "pre_seq_len": 64,
12
+ "prefix_projection": false
13
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.29589878051511703,
4
+ "train_runtime": 277.1475,
5
+ "train_samples_per_second": 2.219,
6
+ "train_steps_per_second": 0.559
7
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 155, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0009897649706262473, "epoch": 0.32, "percentage": 6.45, "elapsed_time": "0:00:35", "remaining_time": "0:08:28"}
2
+ {"current_steps": 20, "total_steps": 155, "loss": 0.4332, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0009594789058101153, "epoch": 0.65, "percentage": 12.9, "elapsed_time": "0:01:05", "remaining_time": "0:07:24"}
3
+ {"current_steps": 30, "total_steps": 155, "loss": 0.41, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0009103817206036382, "epoch": 0.97, "percentage": 19.35, "elapsed_time": "0:01:28", "remaining_time": "0:06:10"}
4
+ {"current_steps": 40, "total_steps": 155, "loss": 0.4098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0008444834595378434, "epoch": 1.29, "percentage": 25.81, "elapsed_time": "0:01:43", "remaining_time": "0:04:58"}
5
+ {"current_steps": 50, "total_steps": 155, "loss": 0.308, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0007644820051634812, "epoch": 1.61, "percentage": 32.26, "elapsed_time": "0:01:58", "remaining_time": "0:04:08"}
6
+ {"current_steps": 60, "total_steps": 155, "loss": 0.3254, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0006736526264224101, "epoch": 1.94, "percentage": 38.71, "elapsed_time": "0:02:13", "remaining_time": "0:03:31"}
7
+ {"current_steps": 70, "total_steps": 155, "loss": 0.1859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0005757138887522884, "epoch": 2.26, "percentage": 45.16, "elapsed_time": "0:02:27", "remaining_time": "0:02:59"}
8
+ {"current_steps": 80, "total_steps": 155, "loss": 0.2975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0004746754155806437, "epoch": 2.58, "percentage": 51.61, "elapsed_time": "0:02:42", "remaining_time": "0:02:32"}
9
+ {"current_steps": 90, "total_steps": 155, "loss": 0.2897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.00037467373387063967, "epoch": 2.9, "percentage": 58.06, "elapsed_time": "0:02:58", "remaining_time": "0:02:09"}
10
+ {"current_steps": 100, "total_steps": 155, "loss": 0.1815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0002798029242211828, "epoch": 3.23, "percentage": 64.52, "elapsed_time": "0:03:13", "remaining_time": "0:01:46"}
11
+ {"current_steps": 110, "total_steps": 155, "loss": 0.2632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.00019394700872616856, "epoch": 3.55, "percentage": 70.97, "elapsed_time": "0:03:29", "remaining_time": "0:01:25"}
12
+ {"current_steps": 120, "total_steps": 155, "loss": 0.109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.00012062093865360457, "epoch": 3.87, "percentage": 77.42, "elapsed_time": "0:03:44", "remaining_time": "0:01:05"}
13
+ {"current_steps": 130, "total_steps": 155, "loss": 0.2943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.282669192770896e-05, "epoch": 4.19, "percentage": 83.87, "elapsed_time": "0:03:59", "remaining_time": "0:00:46"}
14
+ {"current_steps": 140, "total_steps": 155, "loss": 0.0821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2930371799975592e-05, "epoch": 4.52, "percentage": 90.32, "elapsed_time": "0:04:14", "remaining_time": "0:00:27"}
15
+ {"current_steps": 150, "total_steps": 155, "loss": 0.1791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5653383040524227e-06, "epoch": 4.84, "percentage": 96.77, "elapsed_time": "0:04:29", "remaining_time": "0:00:08"}
16
+ {"current_steps": 155, "total_steps": 155, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:04:37", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 155,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.32,
12
+ "learning_rate": 0.0009897649706262473,
13
+ "loss": 0.7217,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.65,
18
+ "learning_rate": 0.0009594789058101153,
19
+ "loss": 0.4332,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.97,
24
+ "learning_rate": 0.0009103817206036382,
25
+ "loss": 0.41,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 1.29,
30
+ "learning_rate": 0.0008444834595378434,
31
+ "loss": 0.4098,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.61,
36
+ "learning_rate": 0.0007644820051634812,
37
+ "loss": 0.308,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 1.94,
42
+ "learning_rate": 0.0006736526264224101,
43
+ "loss": 0.3254,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 2.26,
48
+ "learning_rate": 0.0005757138887522884,
49
+ "loss": 0.1859,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 2.58,
54
+ "learning_rate": 0.0004746754155806437,
55
+ "loss": 0.2975,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 2.9,
60
+ "learning_rate": 0.00037467373387063967,
61
+ "loss": 0.2897,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 3.23,
66
+ "learning_rate": 0.0002798029242211828,
67
+ "loss": 0.1815,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 3.55,
72
+ "learning_rate": 0.00019394700872616856,
73
+ "loss": 0.2632,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 3.87,
78
+ "learning_rate": 0.00012062093865360457,
79
+ "loss": 0.109,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 4.19,
84
+ "learning_rate": 6.282669192770896e-05,
85
+ "loss": 0.2943,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 4.52,
90
+ "learning_rate": 2.2930371799975592e-05,
91
+ "loss": 0.0821,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 4.84,
96
+ "learning_rate": 2.5653383040524227e-06,
97
+ "loss": 0.1791,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 5.0,
102
+ "step": 155,
103
+ "total_flos": 1.7897091891019776e+16,
104
+ "train_loss": 0.29589878051511703,
105
+ "train_runtime": 277.1475,
106
+ "train_samples_per_second": 2.219,
107
+ "train_steps_per_second": 0.559
108
+ }
109
+ ],
110
+ "max_steps": 155,
111
+ "num_train_epochs": 5,
112
+ "total_flos": 1.7897091891019776e+16,
113
+ "trial_name": null,
114
+ "trial_params": null
115
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e31fb62b9330340d05178325de3b580880975dceb0d71e8057bea364092e24e
3
+ size 3326