FredericFan commited on
Commit
0d4f9ec
·
verified ·
1 Parent(s): 04e62af

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a37da1bec04a539e9083a0690c020dd6b5a85ca6ba96130597ced0a592b992f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c001192b49ac33ee8f47a5309a370cfdfa4c3a70dda715a94b188da6ca70162
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b600d17da29b382922c79850abe38cc3a0b9b7af51d7d358a1ba2bfc872d80d7
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7637d351cab3d505a7c81380e8e251c1b0641e2bf3a5583d4012a983c8e533e9
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d49a79be8c359f422cb59c77ee0154f081dfd7e588f93c61b503afdc15d8e88d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0233e0c60d0eeda67a49aa9719818a64279caead7bb9a9439f543fe78e64a894
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33bcf6e84bd960ce66a36a7bd45e4c58615ca69233e24115c69f6a6b57693ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79009f389b4fe7467240d87a72862340c60d6549edf4f6a20e75882e0962455
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08841572701931,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
4
- "epoch": 0.2,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,6 +397,84 @@
397
  "eval_samples_per_second": 22.791,
398
  "eval_steps_per_second": 5.698,
399
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 50,
@@ -416,7 +494,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 6089578905600000.0,
420
  "train_batch_size": 4,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08755213767290115,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
+ "epoch": 0.24,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "eval_samples_per_second": 22.791,
398
  "eval_steps_per_second": 5.698,
399
  "step": 2500
400
+ },
401
+ {
402
+ "epoch": 0.204,
403
+ "grad_norm": 0.14937053620815277,
404
+ "learning_rate": 2.69412e-05,
405
+ "loss": 0.0727,
406
+ "step": 2550
407
+ },
408
+ {
409
+ "epoch": 0.208,
410
+ "grad_norm": 0.15660254657268524,
411
+ "learning_rate": 2.68812e-05,
412
+ "loss": 0.0656,
413
+ "step": 2600
414
+ },
415
+ {
416
+ "epoch": 0.212,
417
+ "grad_norm": 0.09593763947486877,
418
+ "learning_rate": 2.68212e-05,
419
+ "loss": 0.0726,
420
+ "step": 2650
421
+ },
422
+ {
423
+ "epoch": 0.216,
424
+ "grad_norm": 0.25192323327064514,
425
+ "learning_rate": 2.67624e-05,
426
+ "loss": 0.0753,
427
+ "step": 2700
428
+ },
429
+ {
430
+ "epoch": 0.22,
431
+ "grad_norm": 0.1267642378807068,
432
+ "learning_rate": 2.67024e-05,
433
+ "loss": 0.0707,
434
+ "step": 2750
435
+ },
436
+ {
437
+ "epoch": 0.224,
438
+ "grad_norm": 0.13844658434391022,
439
+ "learning_rate": 2.66424e-05,
440
+ "loss": 0.0711,
441
+ "step": 2800
442
+ },
443
+ {
444
+ "epoch": 0.228,
445
+ "grad_norm": 0.15095186233520508,
446
+ "learning_rate": 2.65824e-05,
447
+ "loss": 0.0696,
448
+ "step": 2850
449
+ },
450
+ {
451
+ "epoch": 0.232,
452
+ "grad_norm": 0.09553442895412445,
453
+ "learning_rate": 2.65224e-05,
454
+ "loss": 0.0709,
455
+ "step": 2900
456
+ },
457
+ {
458
+ "epoch": 0.236,
459
+ "grad_norm": 0.21425922214984894,
460
+ "learning_rate": 2.64624e-05,
461
+ "loss": 0.0662,
462
+ "step": 2950
463
+ },
464
+ {
465
+ "epoch": 0.24,
466
+ "grad_norm": 0.10706017166376114,
467
+ "learning_rate": 2.64024e-05,
468
+ "loss": 0.0721,
469
+ "step": 3000
470
+ },
471
+ {
472
+ "epoch": 0.24,
473
+ "eval_loss": 0.08755213767290115,
474
+ "eval_runtime": 87.7619,
475
+ "eval_samples_per_second": 22.789,
476
+ "eval_steps_per_second": 5.697,
477
+ "step": 3000
478
  }
479
  ],
480
  "logging_steps": 50,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 7307494686720000.0,
498
  "train_batch_size": 4,
499
  "trial_name": null,
500
  "trial_params": null