FredericFan commited on
Commit
13775f7
·
verified ·
1 Parent(s): 8ca4447

Training in progress, step 20500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1998440dc1fd1017b8e8ae1d999fce13a94dffb0b42736c732ef0d40ee60bd0
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f364037fe3d6208b2c05dda635ce09c71590d8662e232f0b7b434a1610e5c6b
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd973c109381ca03cc4c7ff8271e54697feff8b75e9d2abe1f7ad064426cfc27
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b999ec0e9bc401face62bc16ac08f4e745f2cd6b0ffba6a9f05615f8c9650e5f
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eec7922e0e8f954a67c405890db8015d9bb8a0c99cdf61294b3077009dcff9eb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d0e4637157719f5fcffcd5d4a99e903acaab012174cc7599b33a508d13c5ca
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf75de0e462da04981b7e7eaad4e35f0906a2b31e58f69cacf60ddca173fc0ea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a5ed47396b325271b233c59cffa14dc5086d4af5c552b3c7216a7a0ac3fa86
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
- "epoch": 1.6,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3127,6 +3127,84 @@
3127
  "eval_samples_per_second": 22.716,
3128
  "eval_steps_per_second": 5.679,
3129
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3130
  }
3131
  ],
3132
  "logging_steps": 50,
@@ -3146,7 +3224,7 @@
3146
  "attributes": {}
3147
  }
3148
  },
3149
- "total_flos": 4.87166312448e+16,
3150
  "train_batch_size": 4,
3151
  "trial_name": null,
3152
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
+ "epoch": 1.6400000000000001,
5
  "eval_steps": 500,
6
+ "global_step": 20500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3127
  "eval_samples_per_second": 22.716,
3128
  "eval_steps_per_second": 5.679,
3129
  "step": 20000
3130
+ },
3131
+ {
3132
+ "epoch": 1.604,
3133
+ "grad_norm": 0.18323849141597748,
3134
+ "learning_rate": 5.9472e-06,
3135
+ "loss": 0.0555,
3136
+ "step": 20050
3137
+ },
3138
+ {
3139
+ "epoch": 1.608,
3140
+ "grad_norm": 0.10411707311868668,
3141
+ "learning_rate": 5.8872e-06,
3142
+ "loss": 0.0506,
3143
+ "step": 20100
3144
+ },
3145
+ {
3146
+ "epoch": 1.612,
3147
+ "grad_norm": 0.18998867273330688,
3148
+ "learning_rate": 5.8272e-06,
3149
+ "loss": 0.0522,
3150
+ "step": 20150
3151
+ },
3152
+ {
3153
+ "epoch": 1.616,
3154
+ "grad_norm": 0.159571573138237,
3155
+ "learning_rate": 5.7672e-06,
3156
+ "loss": 0.0527,
3157
+ "step": 20200
3158
+ },
3159
+ {
3160
+ "epoch": 1.62,
3161
+ "grad_norm": 0.10246822983026505,
3162
+ "learning_rate": 5.7072e-06,
3163
+ "loss": 0.0517,
3164
+ "step": 20250
3165
+ },
3166
+ {
3167
+ "epoch": 1.624,
3168
+ "grad_norm": 0.17232735455036163,
3169
+ "learning_rate": 5.6472e-06,
3170
+ "loss": 0.0559,
3171
+ "step": 20300
3172
+ },
3173
+ {
3174
+ "epoch": 1.6280000000000001,
3175
+ "grad_norm": 0.13078981637954712,
3176
+ "learning_rate": 5.5872e-06,
3177
+ "loss": 0.0497,
3178
+ "step": 20350
3179
+ },
3180
+ {
3181
+ "epoch": 1.6320000000000001,
3182
+ "grad_norm": 0.06083545461297035,
3183
+ "learning_rate": 5.527199999999999e-06,
3184
+ "loss": 0.0502,
3185
+ "step": 20400
3186
+ },
3187
+ {
3188
+ "epoch": 1.6360000000000001,
3189
+ "grad_norm": 0.1311408132314682,
3190
+ "learning_rate": 5.467200000000001e-06,
3191
+ "loss": 0.0572,
3192
+ "step": 20450
3193
+ },
3194
+ {
3195
+ "epoch": 1.6400000000000001,
3196
+ "grad_norm": 0.16154557466506958,
3197
+ "learning_rate": 5.4072000000000005e-06,
3198
+ "loss": 0.0545,
3199
+ "step": 20500
3200
+ },
3201
+ {
3202
+ "epoch": 1.6400000000000001,
3203
+ "eval_loss": 0.08202797174453735,
3204
+ "eval_runtime": 87.9943,
3205
+ "eval_samples_per_second": 22.729,
3206
+ "eval_steps_per_second": 5.682,
3207
+ "step": 20500
3208
  }
3209
  ],
3210
  "logging_steps": 50,
 
3224
  "attributes": {}
3225
  }
3226
  },
3227
+ "total_flos": 4.993454702592e+16,
3228
  "train_batch_size": 4,
3229
  "trial_name": null,
3230
  "trial_params": null