FredericFan commited on
Commit
babb761
·
verified ·
1 Parent(s): 85dd772

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f364037fe3d6208b2c05dda635ce09c71590d8662e232f0b7b434a1610e5c6b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a93f403a198a0abf134a3fd5cbeca3aa8c16276f10e0b35daa2bc2bf8a2a6957
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b999ec0e9bc401face62bc16ac08f4e745f2cd6b0ffba6a9f05615f8c9650e5f
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18372560aadc54215809cfae0eaf7225bb168ffc940aa3b172c422f28f9cfff5
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91d0e4637157719f5fcffcd5d4a99e903acaab012174cc7599b33a508d13c5ca
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9c5f7443e1222c25c8a224aeec2cab3e754343ab09e424a8f337440ada3c79
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3a5ed47396b325271b233c59cffa14dc5086d4af5c552b3c7216a7a0ac3fa86
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a88db37888ba561bfce26ae8fef54113ba48b68f86826f4ed7d7cb198ed4fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08186879754066467,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
- "epoch": 1.6400000000000001,
5
  "eval_steps": 500,
6
- "global_step": 20500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3205,6 +3205,84 @@
3205
  "eval_samples_per_second": 22.729,
3206
  "eval_steps_per_second": 5.682,
3207
  "step": 20500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3208
  }
3209
  ],
3210
  "logging_steps": 50,
@@ -3224,7 +3302,7 @@
3224
  "attributes": {}
3225
  }
3226
  },
3227
- "total_flos": 4.993454702592e+16,
3228
  "train_batch_size": 4,
3229
  "trial_name": null,
3230
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0817028358578682,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-21000",
4
+ "epoch": 1.6800000000000002,
5
  "eval_steps": 500,
6
+ "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3205
  "eval_samples_per_second": 22.729,
3206
  "eval_steps_per_second": 5.682,
3207
  "step": 20500
3208
+ },
3209
+ {
3210
+ "epoch": 1.6440000000000001,
3211
+ "grad_norm": 0.13027295470237732,
3212
+ "learning_rate": 5.3472e-06,
3213
+ "loss": 0.0551,
3214
+ "step": 20550
3215
+ },
3216
+ {
3217
+ "epoch": 1.6480000000000001,
3218
+ "grad_norm": 0.1394919753074646,
3219
+ "learning_rate": 5.2872e-06,
3220
+ "loss": 0.054,
3221
+ "step": 20600
3222
+ },
3223
+ {
3224
+ "epoch": 1.6520000000000001,
3225
+ "grad_norm": 0.16753709316253662,
3226
+ "learning_rate": 5.2272000000000005e-06,
3227
+ "loss": 0.0501,
3228
+ "step": 20650
3229
+ },
3230
+ {
3231
+ "epoch": 1.6560000000000001,
3232
+ "grad_norm": 0.1509876549243927,
3233
+ "learning_rate": 5.1672e-06,
3234
+ "loss": 0.0527,
3235
+ "step": 20700
3236
+ },
3237
+ {
3238
+ "epoch": 1.6600000000000001,
3239
+ "grad_norm": 0.13625292479991913,
3240
+ "learning_rate": 5.1072e-06,
3241
+ "loss": 0.0508,
3242
+ "step": 20750
3243
+ },
3244
+ {
3245
+ "epoch": 1.6640000000000001,
3246
+ "grad_norm": 0.1552583873271942,
3247
+ "learning_rate": 5.0472000000000006e-06,
3248
+ "loss": 0.0548,
3249
+ "step": 20800
3250
+ },
3251
+ {
3252
+ "epoch": 1.6680000000000001,
3253
+ "grad_norm": 0.1763962060213089,
3254
+ "learning_rate": 4.9872e-06,
3255
+ "loss": 0.0585,
3256
+ "step": 20850
3257
+ },
3258
+ {
3259
+ "epoch": 1.6720000000000002,
3260
+ "grad_norm": 0.11216771602630615,
3261
+ "learning_rate": 4.9272e-06,
3262
+ "loss": 0.0567,
3263
+ "step": 20900
3264
+ },
3265
+ {
3266
+ "epoch": 1.6760000000000002,
3267
+ "grad_norm": 0.08550629019737244,
3268
+ "learning_rate": 4.8672e-06,
3269
+ "loss": 0.0523,
3270
+ "step": 20950
3271
+ },
3272
+ {
3273
+ "epoch": 1.6800000000000002,
3274
+ "grad_norm": 0.11488083750009537,
3275
+ "learning_rate": 4.8072e-06,
3276
+ "loss": 0.0503,
3277
+ "step": 21000
3278
+ },
3279
+ {
3280
+ "epoch": 1.6800000000000002,
3281
+ "eval_loss": 0.0817028358578682,
3282
+ "eval_runtime": 88.097,
3283
+ "eval_samples_per_second": 22.702,
3284
+ "eval_steps_per_second": 5.676,
3285
+ "step": 21000
3286
  }
3287
  ],
3288
  "logging_steps": 50,
 
3302
  "attributes": {}
3303
  }
3304
  },
3305
+ "total_flos": 5.115246280704e+16,
3306
  "train_batch_size": 4,
3307
  "trial_name": null,
3308
  "trial_params": null