FredericFan commited on
Commit
0acec4d
·
verified ·
1 Parent(s): 55cc817

Training in progress, step 15500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8b87db48356f50aba644c7f62751e63e8a04ec2268f84e4eba37dd50c1a0411
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538d7daf5a013ebf60e116edb876eb64dea7f4748e30f57ec47e2c9572446b86
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85a117e1ecba729ee34a78722b6baa410d7ae44754ab32c14568e8bd85298764
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e768d48c93dac8fb1cb5f2fa432f2e55b47df8d6600926440ac628e20aa6cf2
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cb981e2bdbf592329578c66e6b81722ec222bc355b0ddd3bdc9f7689a385e7a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd5917ddffeb704126b23aed843c5f451e0f433388bd43b50327c7ee53765e0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0054105d8fb5cdfddd8876e0968830f9a7aa658759cebb2ad97ea276facb582b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2432b3f1647ef3becac2600c0e8902c31d4989fbe6245d5565a62826aeca81
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08243728429079056,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
- "epoch": 1.2,
5
  "eval_steps": 500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2347,6 +2347,84 @@
2347
  "eval_samples_per_second": 22.719,
2348
  "eval_steps_per_second": 5.68,
2349
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2350
  }
2351
  ],
2352
  "logging_steps": 50,
@@ -2366,7 +2444,7 @@
2366
  "attributes": {}
2367
  }
2368
  },
2369
- "total_flos": 3.65374734336e+16,
2370
  "train_batch_size": 4,
2371
  "trial_name": null,
2372
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08243728429079056,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
+ "epoch": 1.24,
5
  "eval_steps": 500,
6
+ "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2347
  "eval_samples_per_second": 22.719,
2348
  "eval_steps_per_second": 5.68,
2349
  "step": 15000
2350
+ },
2351
+ {
2352
+ "epoch": 1.204,
2353
+ "grad_norm": 0.15492039918899536,
2354
+ "learning_rate": 1.19436e-05,
2355
+ "loss": 0.0535,
2356
+ "step": 15050
2357
+ },
2358
+ {
2359
+ "epoch": 1.208,
2360
+ "grad_norm": 0.0744326263666153,
2361
+ "learning_rate": 1.1883600000000001e-05,
2362
+ "loss": 0.0572,
2363
+ "step": 15100
2364
+ },
2365
+ {
2366
+ "epoch": 1.212,
2367
+ "grad_norm": 0.20596753060817719,
2368
+ "learning_rate": 1.1823600000000001e-05,
2369
+ "loss": 0.0539,
2370
+ "step": 15150
2371
+ },
2372
+ {
2373
+ "epoch": 1.216,
2374
+ "grad_norm": 0.15914830565452576,
2375
+ "learning_rate": 1.17636e-05,
2376
+ "loss": 0.0583,
2377
+ "step": 15200
2378
+ },
2379
+ {
2380
+ "epoch": 1.22,
2381
+ "grad_norm": 0.16685660183429718,
2382
+ "learning_rate": 1.1703600000000001e-05,
2383
+ "loss": 0.0566,
2384
+ "step": 15250
2385
+ },
2386
+ {
2387
+ "epoch": 1.224,
2388
+ "grad_norm": 0.11385693401098251,
2389
+ "learning_rate": 1.1643600000000001e-05,
2390
+ "loss": 0.0542,
2391
+ "step": 15300
2392
+ },
2393
+ {
2394
+ "epoch": 1.228,
2395
+ "grad_norm": 0.12850815057754517,
2396
+ "learning_rate": 1.15836e-05,
2397
+ "loss": 0.0622,
2398
+ "step": 15350
2399
+ },
2400
+ {
2401
+ "epoch": 1.232,
2402
+ "grad_norm": 0.17093044519424438,
2403
+ "learning_rate": 1.1523600000000001e-05,
2404
+ "loss": 0.0536,
2405
+ "step": 15400
2406
+ },
2407
+ {
2408
+ "epoch": 1.236,
2409
+ "grad_norm": 0.18593502044677734,
2410
+ "learning_rate": 1.1463600000000002e-05,
2411
+ "loss": 0.0541,
2412
+ "step": 15450
2413
+ },
2414
+ {
2415
+ "epoch": 1.24,
2416
+ "grad_norm": 0.16509443521499634,
2417
+ "learning_rate": 1.14036e-05,
2418
+ "loss": 0.0501,
2419
+ "step": 15500
2420
+ },
2421
+ {
2422
+ "epoch": 1.24,
2423
+ "eval_loss": 0.08269675076007843,
2424
+ "eval_runtime": 88.0811,
2425
+ "eval_samples_per_second": 22.706,
2426
+ "eval_steps_per_second": 5.677,
2427
+ "step": 15500
2428
  }
2429
  ],
2430
  "logging_steps": 50,
 
2444
  "attributes": {}
2445
  }
2446
  },
2447
+ "total_flos": 3.775538921472e+16,
2448
  "train_batch_size": 4,
2449
  "trial_name": null,
2450
  "trial_params": null