besimray commited on
Commit
15f854f
·
verified ·
1 Parent(s): 1deee84

Training in progress, step 5, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "down_proj",
25
  "v_proj",
26
  "q_proj",
 
27
  "gate_proj",
28
- "up_proj",
29
- "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "up_proj",
 
24
  "v_proj",
25
  "q_proj",
26
+ "k_proj",
27
  "gate_proj",
28
+ "o_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a34986d516a8123dc79eaa153c61fe787189362d8d2b3bdd79c43a19022fbb41
3
  size 67662840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9880f9a14e7e1b16646467ab0d161d117f1598c30881f29672a2b60ced9327ee
3
  size 67662840
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5183c8fbde9b90b9c4373d8d4dd017eae4d483dba61449434e7d1c4c1f5248f2
3
  size 34607610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb444bf5ba451c33cc607315d448f7fa02a9ff9b3bbe6e66bca96ebcbf0fa021
3
  size 34607610
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:340f6406b96d7e0f17315436c2bccb605d75c9d254ec958f91a4f659e4ffcdd7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca66d4a74945f7af156bf782963ee63f197435058aae7c08031504382213d7e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bedc6307a5e33c9ada0599345089662b71de7faf79562d8c952efe37fda8ce81
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e9b4e25668eb135b65c8a2efd824713d5ec7e9a404d37d2fd3021c6e5610a3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,162 +1,87 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.14705882352941177,
5
  "eval_steps": 1,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.014705882352941176,
13
- "grad_norm": 1.309019684791565,
14
- "learning_rate": 5e-06,
15
  "loss": 1.3327,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.014705882352941176,
20
  "eval_loss": 1.269364356994629,
21
- "eval_runtime": 2.5609,
22
- "eval_samples_per_second": 39.05,
23
- "eval_steps_per_second": 5.857,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.029411764705882353,
28
- "grad_norm": 1.260735034942627,
29
- "learning_rate": 1e-05,
30
  "loss": 1.1887,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.029411764705882353,
35
- "eval_loss": 1.270459771156311,
36
- "eval_runtime": 2.4757,
37
- "eval_samples_per_second": 40.392,
38
- "eval_steps_per_second": 6.059,
39
  "step": 2
40
  },
41
  {
42
  "epoch": 0.04411764705882353,
43
- "grad_norm": 1.3263417482376099,
44
- "learning_rate": 1.5e-05,
45
- "loss": 1.5717,
46
  "step": 3
47
  },
48
  {
49
  "epoch": 0.04411764705882353,
50
- "eval_loss": 1.265608310699463,
51
- "eval_runtime": 2.4864,
52
- "eval_samples_per_second": 40.218,
53
- "eval_steps_per_second": 6.033,
54
  "step": 3
55
  },
56
  {
57
  "epoch": 0.058823529411764705,
58
- "grad_norm": 1.1789089441299438,
59
- "learning_rate": 2e-05,
60
- "loss": 1.3113,
61
  "step": 4
62
  },
63
  {
64
  "epoch": 0.058823529411764705,
65
- "eval_loss": 1.2618610858917236,
66
- "eval_runtime": 2.4817,
67
- "eval_samples_per_second": 40.295,
68
- "eval_steps_per_second": 6.044,
69
  "step": 4
70
  },
71
  {
72
  "epoch": 0.07352941176470588,
73
- "grad_norm": 1.2095729112625122,
74
- "learning_rate": 2.5e-05,
75
- "loss": 1.3671,
76
  "step": 5
77
  },
78
  {
79
  "epoch": 0.07352941176470588,
80
- "eval_loss": 1.2535583972930908,
81
- "eval_runtime": 2.5307,
82
- "eval_samples_per_second": 39.515,
83
- "eval_steps_per_second": 5.927,
84
  "step": 5
85
- },
86
- {
87
- "epoch": 0.08823529411764706,
88
- "grad_norm": 1.2121447324752808,
89
- "learning_rate": 3e-05,
90
- "loss": 1.4151,
91
- "step": 6
92
- },
93
- {
94
- "epoch": 0.08823529411764706,
95
- "eval_loss": 1.2435675859451294,
96
- "eval_runtime": 2.5551,
97
- "eval_samples_per_second": 39.137,
98
- "eval_steps_per_second": 5.871,
99
- "step": 6
100
- },
101
- {
102
- "epoch": 0.10294117647058823,
103
- "grad_norm": 0.8902429938316345,
104
- "learning_rate": 3.5e-05,
105
- "loss": 1.2607,
106
- "step": 7
107
- },
108
- {
109
- "epoch": 0.10294117647058823,
110
- "eval_loss": 1.2300522327423096,
111
- "eval_runtime": 2.5552,
112
- "eval_samples_per_second": 39.136,
113
- "eval_steps_per_second": 5.87,
114
- "step": 7
115
- },
116
- {
117
- "epoch": 0.11764705882352941,
118
- "grad_norm": 1.3416252136230469,
119
- "learning_rate": 4e-05,
120
- "loss": 1.4189,
121
- "step": 8
122
- },
123
- {
124
- "epoch": 0.11764705882352941,
125
- "eval_loss": 1.225598931312561,
126
- "eval_runtime": 2.529,
127
- "eval_samples_per_second": 39.541,
128
- "eval_steps_per_second": 5.931,
129
- "step": 8
130
- },
131
- {
132
- "epoch": 0.1323529411764706,
133
- "grad_norm": 0.9632052779197693,
134
- "learning_rate": 4.5e-05,
135
- "loss": 1.3843,
136
- "step": 9
137
- },
138
- {
139
- "epoch": 0.1323529411764706,
140
- "eval_loss": 1.223665475845337,
141
- "eval_runtime": 2.5804,
142
- "eval_samples_per_second": 38.753,
143
- "eval_steps_per_second": 5.813,
144
- "step": 9
145
- },
146
- {
147
- "epoch": 0.14705882352941177,
148
- "grad_norm": 0.9364734292030334,
149
- "learning_rate": 5e-05,
150
- "loss": 1.3753,
151
- "step": 10
152
- },
153
- {
154
- "epoch": 0.14705882352941177,
155
- "eval_loss": 1.2201740741729736,
156
- "eval_runtime": 2.4614,
157
- "eval_samples_per_second": 40.628,
158
- "eval_steps_per_second": 6.094,
159
- "step": 10
160
  }
161
  ],
162
  "logging_steps": 1,
@@ -171,12 +96,12 @@
171
  "should_evaluate": false,
172
  "should_log": false,
173
  "should_save": true,
174
- "should_training_stop": true
175
  },
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 628058829619200.0,
180
  "train_batch_size": 7,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07352941176470588,
5
  "eval_steps": 1,
6
+ "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.014705882352941176,
13
+ "grad_norm": 1.3115363121032715,
14
+ "learning_rate": 5.000000000000001e-07,
15
  "loss": 1.3327,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.014705882352941176,
20
  "eval_loss": 1.269364356994629,
21
+ "eval_runtime": 2.5109,
22
+ "eval_samples_per_second": 39.826,
23
+ "eval_steps_per_second": 5.974,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.029411764705882353,
28
+ "grad_norm": 1.260526180267334,
29
+ "learning_rate": 1.0000000000000002e-06,
30
  "loss": 1.1887,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.029411764705882353,
35
+ "eval_loss": 1.267616868019104,
36
+ "eval_runtime": 2.4914,
37
+ "eval_samples_per_second": 40.138,
38
+ "eval_steps_per_second": 6.021,
39
  "step": 2
40
  },
41
  {
42
  "epoch": 0.04411764705882353,
43
+ "grad_norm": 1.3265948295593262,
44
+ "learning_rate": 1.5e-06,
45
+ "loss": 1.5761,
46
  "step": 3
47
  },
48
  {
49
  "epoch": 0.04411764705882353,
50
+ "eval_loss": 1.267909288406372,
51
+ "eval_runtime": 2.4844,
52
+ "eval_samples_per_second": 40.251,
53
+ "eval_steps_per_second": 6.038,
54
  "step": 3
55
  },
56
  {
57
  "epoch": 0.058823529411764705,
58
+ "grad_norm": 1.2248256206512451,
59
+ "learning_rate": 2.0000000000000003e-06,
60
+ "loss": 1.3197,
61
  "step": 4
62
  },
63
  {
64
  "epoch": 0.058823529411764705,
65
+ "eval_loss": 1.2693428993225098,
66
+ "eval_runtime": 3.2428,
67
+ "eval_samples_per_second": 30.838,
68
+ "eval_steps_per_second": 4.626,
69
  "step": 4
70
  },
71
  {
72
  "epoch": 0.07352941176470588,
73
+ "grad_norm": 1.2737431526184082,
74
+ "learning_rate": 2.5e-06,
75
+ "loss": 1.3721,
76
  "step": 5
77
  },
78
  {
79
  "epoch": 0.07352941176470588,
80
+ "eval_loss": 1.267417311668396,
81
+ "eval_runtime": 3.3541,
82
+ "eval_samples_per_second": 29.814,
83
+ "eval_steps_per_second": 4.472,
84
  "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 1,
 
96
  "should_evaluate": false,
97
  "should_log": false,
98
  "should_save": true,
99
+ "should_training_stop": false
100
  },
101
  "attributes": {}
102
  }
103
  },
104
+ "total_flos": 314029414809600.0,
105
  "train_batch_size": 7,
106
  "trial_name": null,
107
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75498497562a7e64e3f8e51308001a48b87d7bec418686da140d400a6112b73f
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a08ac483867b17b481761ba9b309a0d55e03c0ec5de5cd02764b9f24cd7d6afe
3
  size 6648