tokhey commited on
Commit
be623c3
·
verified ·
1 Parent(s): 97bd105

Training in progress, step 114, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4627a384ccfb90bc59a01e8a25eb50a60e5c9d9a7cbc4781a18e1ed2574854e
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91556df02a9492e4ed535604154a309f209fa3a7e2fcf4bd56b539e4e83f286a
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca2150c51d63f89d1bc1d324172bc8d004bf72d5ec3681b8fa7f23855405bf6f
3
  size 148053627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf915dd720836012c07e2ba039e6167731cfebbcc2ab85674c1bd895783e189
3
  size 148053627
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97ac64e05ebf46af43b314fca9573cea163c2749e9f4c0fcee8ac426f60f5872
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9840a2e654651667352c6dcbb85523a16aa0b41cfd2a4a5b46fb280d4019f8a8
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:254aa974d264413ca1b60d5980136d8531f6cb9e9b28340b9a2daa2496d48c4c
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9f4d1e39b6645a7b25d344c5b4f804a6f6c61e0865871d9bb1dbfa28a7cc9d
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f784d5566457719ec24fd9e3c13871706202c23a784efd0979d8609621573572
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9b13455c65c7b9f204af76375fe10bffaade07e56cd2645998a3d0cb0ca5ff
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.1066666666666665,
6
  "eval_steps": 100,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -120,6 +120,56 @@
120
  "learning_rate": 0.0001317266107909975,
121
  "loss": 0.6193,
122
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 5,
@@ -134,12 +184,12 @@
134
  "should_evaluate": false,
135
  "should_log": false,
136
  "should_save": true,
137
- "should_training_stop": false
138
  },
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 5159761905451008.0,
143
  "train_batch_size": 1,
144
  "trial_name": null,
145
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 100,
7
+ "global_step": 114,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
120
  "learning_rate": 0.0001317266107909975,
121
  "loss": 0.6193,
122
  "step": 80
123
+ },
124
+ {
125
+ "epoch": 2.24,
126
+ "grad_norm": 0.19134873151779175,
127
+ "learning_rate": 9.934134090518593e-05,
128
+ "loss": 0.5563,
129
+ "step": 85
130
+ },
131
+ {
132
+ "epoch": 2.3733333333333335,
133
+ "grad_norm": 0.19149252772331238,
134
+ "learning_rate": 7.052201923388955e-05,
135
+ "loss": 0.5774,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 2.506666666666667,
140
+ "grad_norm": 0.2055027186870575,
141
+ "learning_rate": 4.59507719109446e-05,
142
+ "loss": 0.5472,
143
+ "step": 95
144
+ },
145
+ {
146
+ "epoch": 2.64,
147
+ "grad_norm": 0.20931023359298706,
148
+ "learning_rate": 2.6209177161234443e-05,
149
+ "loss": 0.5133,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 2.64,
154
+ "eval_loss": 0.8378572463989258,
155
+ "eval_runtime": 6.3722,
156
+ "eval_samples_per_second": 2.668,
157
+ "eval_steps_per_second": 2.668,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 2.7733333333333334,
162
+ "grad_norm": 0.19245073199272156,
163
+ "learning_rate": 1.1764499893210877e-05,
164
+ "loss": 0.5179,
165
+ "step": 105
166
+ },
167
+ {
168
+ "epoch": 2.9066666666666667,
169
+ "grad_norm": 0.20502211153507233,
170
+ "learning_rate": 2.9586319796851556e-06,
171
+ "loss": 0.525,
172
+ "step": 110
173
  }
174
  ],
175
  "logging_steps": 5,
 
184
  "should_evaluate": false,
185
  "should_log": false,
186
  "should_save": true,
187
+ "should_training_stop": true
188
  },
189
  "attributes": {}
190
  }
191
  },
192
+ "total_flos": 7347762207129600.0,
193
  "train_batch_size": 1,
194
  "trial_name": null,
195
  "trial_params": null