FredericFan commited on
Commit
9af97c8
·
verified ·
1 Parent(s): 9d62705

Training in progress, step 16500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7224364f596ebfca3a99c1d3b9d449fb04ce1b01252f00b468ebb7f2583738
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08df383ddf8539b1c156d553a75182fefc1bd9cb8d1be80a3bb2d48549c72268
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c146dfaf517426e21001d58ecb591e013237c1f0457fc2b2e18c8128376dd232
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4942b168045dd01e1507ab88999a6fa88c39bef7244ce53f7c7f5c00a6c5e0d5
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e2dd927672719d15b56be948c4d341bf0d4717fdddd441106e1853c80a9c881
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7de248f1039c222833b4241b4257bf2afda51b5d7ba778b660b2c2050b7e97
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc65b1cc8bef515a746903c5c0ceccfd01df11517dd7fe9004c63360d9c9a98b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c923446f6a9040a7fff312fb9744a6058c7cd7ec45e59a8a878177b2e96073c7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08243728429079056,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
- "epoch": 1.28,
5
  "eval_steps": 500,
6
- "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2503,6 +2503,84 @@
2503
  "eval_samples_per_second": 22.708,
2504
  "eval_steps_per_second": 5.677,
2505
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2506
  }
2507
  ],
2508
  "logging_steps": 50,
@@ -2522,7 +2600,7 @@
2522
  "attributes": {}
2523
  }
2524
  },
2525
- "total_flos": 3.897330499584e+16,
2526
  "train_batch_size": 4,
2527
  "trial_name": null,
2528
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08243728429079056,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
+ "epoch": 1.32,
5
  "eval_steps": 500,
6
+ "global_step": 16500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2503
  "eval_samples_per_second": 22.708,
2504
  "eval_steps_per_second": 5.677,
2505
  "step": 16000
2506
+ },
2507
+ {
2508
+ "epoch": 1.284,
2509
+ "grad_norm": 0.14214898645877838,
2510
+ "learning_rate": 1.07448e-05,
2511
+ "loss": 0.056,
2512
+ "step": 16050
2513
+ },
2514
+ {
2515
+ "epoch": 1.288,
2516
+ "grad_norm": 0.13236215710639954,
2517
+ "learning_rate": 1.06848e-05,
2518
+ "loss": 0.0566,
2519
+ "step": 16100
2520
+ },
2521
+ {
2522
+ "epoch": 1.292,
2523
+ "grad_norm": 0.1475580334663391,
2524
+ "learning_rate": 1.06248e-05,
2525
+ "loss": 0.0507,
2526
+ "step": 16150
2527
+ },
2528
+ {
2529
+ "epoch": 1.296,
2530
+ "grad_norm": 0.13150113821029663,
2531
+ "learning_rate": 1.05648e-05,
2532
+ "loss": 0.0488,
2533
+ "step": 16200
2534
+ },
2535
+ {
2536
+ "epoch": 1.3,
2537
+ "grad_norm": 0.0869784876704216,
2538
+ "learning_rate": 1.0504800000000001e-05,
2539
+ "loss": 0.0498,
2540
+ "step": 16250
2541
+ },
2542
+ {
2543
+ "epoch": 1.304,
2544
+ "grad_norm": 0.15695451200008392,
2545
+ "learning_rate": 1.04448e-05,
2546
+ "loss": 0.0517,
2547
+ "step": 16300
2548
+ },
2549
+ {
2550
+ "epoch": 1.308,
2551
+ "grad_norm": 0.1383635401725769,
2552
+ "learning_rate": 1.0384800000000001e-05,
2553
+ "loss": 0.0509,
2554
+ "step": 16350
2555
+ },
2556
+ {
2557
+ "epoch": 1.312,
2558
+ "grad_norm": 0.17300955951213837,
2559
+ "learning_rate": 1.0324800000000002e-05,
2560
+ "loss": 0.0587,
2561
+ "step": 16400
2562
+ },
2563
+ {
2564
+ "epoch": 1.316,
2565
+ "grad_norm": 0.1608356237411499,
2566
+ "learning_rate": 1.02648e-05,
2567
+ "loss": 0.0523,
2568
+ "step": 16450
2569
+ },
2570
+ {
2571
+ "epoch": 1.32,
2572
+ "grad_norm": 0.1598045974969864,
2573
+ "learning_rate": 1.0204800000000001e-05,
2574
+ "loss": 0.0524,
2575
+ "step": 16500
2576
+ },
2577
+ {
2578
+ "epoch": 1.32,
2579
+ "eval_loss": 0.08258219808340073,
2580
+ "eval_runtime": 88.0858,
2581
+ "eval_samples_per_second": 22.705,
2582
+ "eval_steps_per_second": 5.676,
2583
+ "step": 16500
2584
  }
2585
  ],
2586
  "logging_steps": 50,
 
2600
  "attributes": {}
2601
  }
2602
  },
2603
+ "total_flos": 4.019122077696e+16,
2604
  "train_batch_size": 4,
2605
  "trial_name": null,
2606
  "trial_params": null