Baselhany commited on
Commit
e581c5c
·
verified ·
1 Parent(s): 7895496

Training in progress, step 33740, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35630aaf8d25704318c506c5652dcc7989eb3675f345f27f818f2ea9f5a27b3e
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:892f5770babae72f86fbe7676dc8bcf97db73722f0f63eb0b42e15455a57d7d2
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2919ac48beca92f152cebfc8ede282362c2a9fbcf84e083ca71a4741ea04a4
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d045ce4d30a954f6ce12c0249fd382623d3fffc707129bca5a2387cf707b5a
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:576380a256a4340c462ffc82306c52abbab7f38e65a7b12e9ab2634163e0fae7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a999e0010ea13513073a8d97dfabf347a8d02268da2cd188d9e2ffb955a37853
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:466cefc251c3bff2bf1a7b8f775c63823fd9874f5797306ccc17b28b5d55b904
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb779461c157e920976902f7e10da553ef08690b8cb1e678f1ad0ceb27b3b0a1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bade4422d18437c221217e78bc3118e350dee6be4d722036a744d311fb763b10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce728e16d5ac03050b00527a8e1a90dc74db0b69a4e3e2d0415dc2af3e021509
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 12000,
3
  "best_metric": 0.1958443854995579,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-12000",
5
- "epoch": 19.561351511558982,
6
  "eval_steps": 1000,
7
- "global_step": 33000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2615,6 +2615,55 @@
2615
  "eval_steps_per_second": 0.426,
2616
  "eval_wer": 0.2076333628057766,
2617
  "step": 33000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2618
  }
2619
  ],
2620
  "logging_steps": 100,
@@ -2629,12 +2678,12 @@
2629
  "should_evaluate": false,
2630
  "should_log": false,
2631
  "should_save": true,
2632
- "should_training_stop": false
2633
  },
2634
  "attributes": {}
2635
  }
2636
  },
2637
- "total_flos": 4.292170106535936e+19,
2638
  "train_batch_size": 8,
2639
  "trial_name": null,
2640
  "trial_params": null
 
2
  "best_global_step": 12000,
3
  "best_metric": 0.1958443854995579,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-12000",
5
+ "epoch": 20.0,
6
  "eval_steps": 1000,
7
+ "global_step": 33740,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2615
  "eval_steps_per_second": 0.426,
2616
  "eval_wer": 0.2076333628057766,
2617
  "step": 33000
2618
+ },
2619
+ {
2620
+ "epoch": 19.62062833432128,
2621
+ "grad_norm": 6.8571367263793945,
2622
+ "learning_rate": 1.976534296028881e-06,
2623
+ "loss": 0.8777,
2624
+ "step": 33100
2625
+ },
2626
+ {
2627
+ "epoch": 19.67990515708358,
2628
+ "grad_norm": 8.974102020263672,
2629
+ "learning_rate": 1.6756919374247895e-06,
2630
+ "loss": 0.8382,
2631
+ "step": 33200
2632
+ },
2633
+ {
2634
+ "epoch": 19.73918197984588,
2635
+ "grad_norm": 8.73436450958252,
2636
+ "learning_rate": 1.377858002406739e-06,
2637
+ "loss": 0.854,
2638
+ "step": 33300
2639
+ },
2640
+ {
2641
+ "epoch": 19.79845880260818,
2642
+ "grad_norm": 11.128707885742188,
2643
+ "learning_rate": 1.0770156438026475e-06,
2644
+ "loss": 0.8173,
2645
+ "step": 33400
2646
+ },
2647
+ {
2648
+ "epoch": 19.85773562537048,
2649
+ "grad_norm": 6.551008701324463,
2650
+ "learning_rate": 7.76173285198556e-07,
2651
+ "loss": 0.8194,
2652
+ "step": 33500
2653
+ },
2654
+ {
2655
+ "epoch": 19.91701244813278,
2656
+ "grad_norm": 7.966579914093018,
2657
+ "learning_rate": 4.753309265944645e-07,
2658
+ "loss": 0.8342,
2659
+ "step": 33600
2660
+ },
2661
+ {
2662
+ "epoch": 19.97628927089508,
2663
+ "grad_norm": 7.3733978271484375,
2664
+ "learning_rate": 1.7448856799037304e-07,
2665
+ "loss": 0.8733,
2666
+ "step": 33700
2667
  }
2668
  ],
2669
  "logging_steps": 100,
 
2678
  "should_evaluate": false,
2679
  "should_log": false,
2680
  "should_save": true,
2681
+ "should_training_stop": true
2682
  },
2683
  "attributes": {}
2684
  }
2685
  },
2686
+ "total_flos": 4.38840243781632e+19,
2687
  "train_batch_size": 8,
2688
  "trial_name": null,
2689
  "trial_params": null