FredericFan commited on
Commit
0821fea
·
verified ·
1 Parent(s): fda6f64

Training in progress, step 21500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a93f403a198a0abf134a3fd5cbeca3aa8c16276f10e0b35daa2bc2bf8a2a6957
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46faaddaed72c5d090165795ebfd05b22b125c42c26c4875edbc091a5e362e43
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18372560aadc54215809cfae0eaf7225bb168ffc940aa3b172c422f28f9cfff5
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035583387e98c556efc70cb4c61a29b96c0d5a81f688d104a9d1131166859bd1
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d9c5f7443e1222c25c8a224aeec2cab3e754343ab09e424a8f337440ada3c79
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4dd066f1fef3b26a8b4d2763e6247adb5ad5bebee331f997006d84e56e797cb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67a88db37888ba561bfce26ae8fef54113ba48b68f86826f4ed7d7cb198ed4fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed9e6e0ddb2712997439a77e00b8aab494233fbe2995e22f0fcce2c0862afad
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0817028358578682,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-21000",
4
- "epoch": 1.6800000000000002,
5
  "eval_steps": 500,
6
- "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3283,6 +3283,84 @@
3283
  "eval_samples_per_second": 22.702,
3284
  "eval_steps_per_second": 5.676,
3285
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3286
  }
3287
  ],
3288
  "logging_steps": 50,
@@ -3302,7 +3380,7 @@
3302
  "attributes": {}
3303
  }
3304
  },
3305
- "total_flos": 5.115246280704e+16,
3306
  "train_batch_size": 4,
3307
  "trial_name": null,
3308
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0817028358578682,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-21000",
4
+ "epoch": 1.72,
5
  "eval_steps": 500,
6
+ "global_step": 21500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3283
  "eval_samples_per_second": 22.702,
3284
  "eval_steps_per_second": 5.676,
3285
  "step": 21000
3286
+ },
3287
+ {
3288
+ "epoch": 1.6840000000000002,
3289
+ "grad_norm": 0.1464158296585083,
3290
+ "learning_rate": 4.7472e-06,
3291
+ "loss": 0.0557,
3292
+ "step": 21050
3293
+ },
3294
+ {
3295
+ "epoch": 1.688,
3296
+ "grad_norm": 0.1558839976787567,
3297
+ "learning_rate": 4.6872e-06,
3298
+ "loss": 0.0527,
3299
+ "step": 21100
3300
+ },
3301
+ {
3302
+ "epoch": 1.692,
3303
+ "grad_norm": 0.093449167907238,
3304
+ "learning_rate": 4.6271999999999995e-06,
3305
+ "loss": 0.0544,
3306
+ "step": 21150
3307
+ },
3308
+ {
3309
+ "epoch": 1.696,
3310
+ "grad_norm": 0.1413930058479309,
3311
+ "learning_rate": 4.5672e-06,
3312
+ "loss": 0.0571,
3313
+ "step": 21200
3314
+ },
3315
+ {
3316
+ "epoch": 1.7,
3317
+ "grad_norm": 0.19774900376796722,
3318
+ "learning_rate": 4.507200000000001e-06,
3319
+ "loss": 0.0474,
3320
+ "step": 21250
3321
+ },
3322
+ {
3323
+ "epoch": 1.704,
3324
+ "grad_norm": 0.092808298766613,
3325
+ "learning_rate": 4.4472e-06,
3326
+ "loss": 0.05,
3327
+ "step": 21300
3328
+ },
3329
+ {
3330
+ "epoch": 1.708,
3331
+ "grad_norm": 0.09530337899923325,
3332
+ "learning_rate": 4.3872e-06,
3333
+ "loss": 0.0564,
3334
+ "step": 21350
3335
+ },
3336
+ {
3337
+ "epoch": 1.712,
3338
+ "grad_norm": 0.1850968301296234,
3339
+ "learning_rate": 4.327200000000001e-06,
3340
+ "loss": 0.0541,
3341
+ "step": 21400
3342
+ },
3343
+ {
3344
+ "epoch": 1.716,
3345
+ "grad_norm": 0.23416727781295776,
3346
+ "learning_rate": 4.2672e-06,
3347
+ "loss": 0.0576,
3348
+ "step": 21450
3349
+ },
3350
+ {
3351
+ "epoch": 1.72,
3352
+ "grad_norm": 0.11343374848365784,
3353
+ "learning_rate": 4.2072e-06,
3354
+ "loss": 0.0529,
3355
+ "step": 21500
3356
+ },
3357
+ {
3358
+ "epoch": 1.72,
3359
+ "eval_loss": 0.08182183653116226,
3360
+ "eval_runtime": 88.0562,
3361
+ "eval_samples_per_second": 22.713,
3362
+ "eval_steps_per_second": 5.678,
3363
+ "step": 21500
3364
  }
3365
  ],
3366
  "logging_steps": 50,
 
3380
  "attributes": {}
3381
  }
3382
  },
3383
+ "total_flos": 5.237037858816e+16,
3384
  "train_batch_size": 4,
3385
  "trial_name": null,
3386
  "trial_params": null