Sabbir772 commited on
Commit
4bdb542
·
verified ·
1 Parent(s): a45fb67

Training in progress, epoch 28, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727012ff11d243aa3f50de8258ad959513310b901aef61c0f3c57fba9b72cea7
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1bdcc22122be16bcad201b13438cbcd5bb3a61bbd2cb3d243f13927651c8ef3
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc975ad13f0fa89a81b5fa79d6bf3b2541171d36c7eb13e67661a24c57543b1
3
  size 1980541387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bba534d6f1ca378d37f9424710e1fbb0cf4f775c1e889d437a4390c2e6da59
3
  size 1980541387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a55fd7ac78bf4b5e6b13f1c5a1f5fb7258744c10b012de8c0c0b5edc12e58da
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c33bcc6689ffa514c871162fcc88c5e26610e3e356b556757408394db2158e3
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84c0eceb8506ba57b63c709b65cc1be3871eb480c744855060e8151b4276a67
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37380fd84c1c4a4c2909f470440f6cf70cc0f0dbedd46d88c29bfc45ff95dfcc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 27.0,
6
  "eval_steps": 500,
7
- "global_step": 41553,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3129,6 +3129,119 @@
3129
  "eval_samples_per_second": 22.098,
3130
  "eval_steps_per_second": 2.762,
3131
  "step": 41553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3132
  }
3133
  ],
3134
  "logging_steps": 100,
@@ -3148,7 +3261,7 @@
3148
  "attributes": {}
3149
  }
3150
  },
3151
- "total_flos": 5.931144958338662e+16,
3152
  "train_batch_size": 8,
3153
  "trial_name": null,
3154
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 28.0,
6
  "eval_steps": 500,
7
+ "global_step": 43092,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3129
  "eval_samples_per_second": 22.098,
3130
  "eval_steps_per_second": 2.762,
3131
  "step": 41553
3132
+ },
3133
+ {
3134
+ "epoch": 27.030539311241064,
3135
+ "grad_norm": 4.933349609375,
3136
+ "learning_rate": 4.9501841022308866e-06,
3137
+ "loss": 1.3968,
3138
+ "step": 41600
3139
+ },
3140
+ {
3141
+ "epoch": 27.09551656920078,
3142
+ "grad_norm": 5.9000444412231445,
3143
+ "learning_rate": 4.841888672298029e-06,
3144
+ "loss": 1.484,
3145
+ "step": 41700
3146
+ },
3147
+ {
3148
+ "epoch": 27.160493827160494,
3149
+ "grad_norm": 4.207714080810547,
3150
+ "learning_rate": 4.7335932423651726e-06,
3151
+ "loss": 1.3794,
3152
+ "step": 41800
3153
+ },
3154
+ {
3155
+ "epoch": 27.22547108512021,
3156
+ "grad_norm": 3.1242659091949463,
3157
+ "learning_rate": 4.625297812432316e-06,
3158
+ "loss": 1.4728,
3159
+ "step": 41900
3160
+ },
3161
+ {
3162
+ "epoch": 27.290448343079923,
3163
+ "grad_norm": 5.078500270843506,
3164
+ "learning_rate": 4.5170023824994586e-06,
3165
+ "loss": 1.4554,
3166
+ "step": 42000
3167
+ },
3168
+ {
3169
+ "epoch": 27.355425601039634,
3170
+ "grad_norm": 4.096863269805908,
3171
+ "learning_rate": 4.408706952566602e-06,
3172
+ "loss": 1.453,
3173
+ "step": 42100
3174
+ },
3175
+ {
3176
+ "epoch": 27.42040285899935,
3177
+ "grad_norm": 3.733389377593994,
3178
+ "learning_rate": 4.300411522633745e-06,
3179
+ "loss": 1.4627,
3180
+ "step": 42200
3181
+ },
3182
+ {
3183
+ "epoch": 27.485380116959064,
3184
+ "grad_norm": 4.735873222351074,
3185
+ "learning_rate": 4.192116092700888e-06,
3186
+ "loss": 1.4533,
3187
+ "step": 42300
3188
+ },
3189
+ {
3190
+ "epoch": 27.55035737491878,
3191
+ "grad_norm": 5.443370819091797,
3192
+ "learning_rate": 4.083820662768031e-06,
3193
+ "loss": 1.4395,
3194
+ "step": 42400
3195
+ },
3196
+ {
3197
+ "epoch": 27.615334632878493,
3198
+ "grad_norm": 5.1538238525390625,
3199
+ "learning_rate": 3.975525232835175e-06,
3200
+ "loss": 1.3913,
3201
+ "step": 42500
3202
+ },
3203
+ {
3204
+ "epoch": 27.680311890838208,
3205
+ "grad_norm": 6.172743797302246,
3206
+ "learning_rate": 3.867229802902317e-06,
3207
+ "loss": 1.5319,
3208
+ "step": 42600
3209
+ },
3210
+ {
3211
+ "epoch": 27.74528914879792,
3212
+ "grad_norm": 2.933137893676758,
3213
+ "learning_rate": 3.758934372969461e-06,
3214
+ "loss": 1.4268,
3215
+ "step": 42700
3216
+ },
3217
+ {
3218
+ "epoch": 27.810266406757634,
3219
+ "grad_norm": 4.5866475105285645,
3220
+ "learning_rate": 3.6506389430366043e-06,
3221
+ "loss": 1.4041,
3222
+ "step": 42800
3223
+ },
3224
+ {
3225
+ "epoch": 27.87524366471735,
3226
+ "grad_norm": 4.22698450088501,
3227
+ "learning_rate": 3.542343513103747e-06,
3228
+ "loss": 1.4158,
3229
+ "step": 42900
3230
+ },
3231
+ {
3232
+ "epoch": 27.940220922677064,
3233
+ "grad_norm": 4.885856628417969,
3234
+ "learning_rate": 3.4340480831708903e-06,
3235
+ "loss": 1.4515,
3236
+ "step": 43000
3237
+ },
3238
+ {
3239
+ "epoch": 28.0,
3240
+ "eval_loss": 1.3683773279190063,
3241
+ "eval_runtime": 61.9894,
3242
+ "eval_samples_per_second": 22.068,
3243
+ "eval_steps_per_second": 2.759,
3244
+ "step": 43092
3245
  }
3246
  ],
3247
  "logging_steps": 100,
 
3261
  "attributes": {}
3262
  }
3263
  },
3264
+ "total_flos": 6.352643822557594e+16,
3265
  "train_batch_size": 8,
3266
  "trial_name": null,
3267
  "trial_params": null