Baselhany commited on
Commit
22ba004
·
verified ·
1 Parent(s): 9e6b85b

Training in progress, step 33000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:411df52b26563077a81a7b08db369d27d102503f690c603f3564427326c2c3fb
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cfc2369854afc94fdfa5a772fefa0c2c28012dcdc0bc0ea80f23b73f346c34a
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315eea51d967b636aee06c9fab61b28d456ea20155b0f905d9a2894b1cd4bb1c
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a0c5737e43b8c351af0642dc5e297ba4cfc310d2333fb3515ae84242ec7206
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2615f31c2bde960c4cad72e76e8275160a20b219557b38cecad80fda50c2154
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:576380a256a4340c462ffc82306c52abbab7f38e65a7b12e9ab2634163e0fae7
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e91277e0d15d4cadb5461d7609fda108b605cd367d55a41118269783b39513b9
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5680c8a2f1d27c25e08e493d045a3f651f13f689a05908b8e76432cb1f7ace3b
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06016ca44c3c2f7f6ce102c4b35b77805b28e5c34909f86718e837c850c7f578
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb6ef00c1c865d5fe6826f62f53b17ea0c3488cfae31fdb4898bd3e20b4343e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 18.746338605741066,
6
  "eval_steps": 1000,
7
- "global_step": 32000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2536,6 +2536,85 @@
2536
  "eval_steps_per_second": 0.433,
2537
  "eval_wer": 0.18376068376068377,
2538
  "step": 32000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2539
  }
2540
  ],
2541
  "logging_steps": 100,
@@ -2555,7 +2634,7 @@
2555
  "attributes": {}
2556
  }
2557
  },
2558
- "total_flos": 4.162335931367424e+19,
2559
  "train_batch_size": 8,
2560
  "trial_name": null,
2561
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 19.332161687170476,
6
  "eval_steps": 1000,
7
+ "global_step": 33000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2536
  "eval_steps_per_second": 0.433,
2537
  "eval_wer": 0.18376068376068377,
2538
  "step": 32000
2539
+ },
2540
+ {
2541
+ "epoch": 18.804920913884008,
2542
+ "grad_norm": 11.049666404724121,
2543
+ "learning_rate": 1.4764937658552383e-05,
2544
+ "loss": 0.7523,
2545
+ "step": 32100
2546
+ },
2547
+ {
2548
+ "epoch": 18.86350322202695,
2549
+ "grad_norm": 5.7121782302856445,
2550
+ "learning_rate": 1.4495061261942031e-05,
2551
+ "loss": 0.7462,
2552
+ "step": 32200
2553
+ },
2554
+ {
2555
+ "epoch": 18.922085530169888,
2556
+ "grad_norm": 6.261909008026123,
2557
+ "learning_rate": 1.4225184865331678e-05,
2558
+ "loss": 0.6894,
2559
+ "step": 32300
2560
+ },
2561
+ {
2562
+ "epoch": 18.98066783831283,
2563
+ "grad_norm": 9.238897323608398,
2564
+ "learning_rate": 1.3955308468721326e-05,
2565
+ "loss": 0.7625,
2566
+ "step": 32400
2567
+ },
2568
+ {
2569
+ "epoch": 19.03925014645577,
2570
+ "grad_norm": 5.913239002227783,
2571
+ "learning_rate": 1.3685432072110973e-05,
2572
+ "loss": 0.7324,
2573
+ "step": 32500
2574
+ },
2575
+ {
2576
+ "epoch": 19.097832454598713,
2577
+ "grad_norm": 6.142359733581543,
2578
+ "learning_rate": 1.3415555675500621e-05,
2579
+ "loss": 0.7275,
2580
+ "step": 32600
2581
+ },
2582
+ {
2583
+ "epoch": 19.15641476274165,
2584
+ "grad_norm": 8.006820678710938,
2585
+ "learning_rate": 1.314567927889027e-05,
2586
+ "loss": 0.7335,
2587
+ "step": 32700
2588
+ },
2589
+ {
2590
+ "epoch": 19.214997070884593,
2591
+ "grad_norm": 10.434711456298828,
2592
+ "learning_rate": 1.2875802882279916e-05,
2593
+ "loss": 0.7093,
2594
+ "step": 32800
2595
+ },
2596
+ {
2597
+ "epoch": 19.273579379027534,
2598
+ "grad_norm": 7.119588375091553,
2599
+ "learning_rate": 1.2605926485669564e-05,
2600
+ "loss": 0.7213,
2601
+ "step": 32900
2602
+ },
2603
+ {
2604
+ "epoch": 19.332161687170476,
2605
+ "grad_norm": 6.306182861328125,
2606
+ "learning_rate": 1.2336050089059211e-05,
2607
+ "loss": 0.7008,
2608
+ "step": 33000
2609
+ },
2610
+ {
2611
+ "epoch": 19.332161687170476,
2612
+ "eval_loss": 0.08348561823368073,
2613
+ "eval_runtime": 148.0399,
2614
+ "eval_samples_per_second": 3.377,
2615
+ "eval_steps_per_second": 0.426,
2616
+ "eval_wer": 0.18641320365458297,
2617
+ "step": 33000
2618
  }
2619
  ],
2620
  "logging_steps": 100,
 
2634
  "attributes": {}
2635
  }
2636
  },
2637
+ "total_flos": 4.292401815355392e+19,
2638
  "train_batch_size": 8,
2639
  "trial_name": null,
2640
  "trial_params": null