Baselhany commited on
Commit
b58e9ba
·
verified ·
1 Parent(s): db33a03

Training in progress, step 32000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:869a0e57d1d1ee9a2fb1d0c01f9524bb4c80a900cd8117beeba130d80f1bf321
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:411df52b26563077a81a7b08db369d27d102503f690c603f3564427326c2c3fb
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb63c0ce7275a1c9ee86300c27e661af044d42bb26561a6ccd9d69c137ff500d
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:315eea51d967b636aee06c9fab61b28d456ea20155b0f905d9a2894b1cd4bb1c
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc6f8b922ca5c3d24f7537f685a761eded3c26ca293345adbe507125769101a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2615f31c2bde960c4cad72e76e8275160a20b219557b38cecad80fda50c2154
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3931a748c5d903224857a1ab9abba558951fac8f6dc32f736599e211fe96e5ce
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e91277e0d15d4cadb5461d7609fda108b605cd367d55a41118269783b39513b9
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:def63641a6ca9901171b06d19cec2f06e49d8d9307e1c178a24fd97f04041d2a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06016ca44c3c2f7f6ce102c4b35b77805b28e5c34909f86718e837c850c7f578
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 18.16051552431166,
6
  "eval_steps": 1000,
7
- "global_step": 31000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2457,6 +2457,85 @@
2457
  "eval_steps_per_second": 0.427,
2458
  "eval_wer": 0.1905393457117595,
2459
  "step": 31000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2460
  }
2461
  ],
2462
  "logging_steps": 100,
@@ -2476,7 +2555,7 @@
2476
  "attributes": {}
2477
  }
2478
  },
2479
- "total_flos": 4.032253787111424e+19,
2480
  "train_batch_size": 8,
2481
  "trial_name": null,
2482
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 18.746338605741066,
6
  "eval_steps": 1000,
7
+ "global_step": 32000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2457
  "eval_steps_per_second": 0.427,
2458
  "eval_wer": 0.1905393457117595,
2459
  "step": 31000
2460
+ },
2461
+ {
2462
+ "epoch": 18.219097832454597,
2463
+ "grad_norm": 6.72420597076416,
2464
+ "learning_rate": 1.74583040967237e-05,
2465
+ "loss": 0.7028,
2466
+ "step": 31100
2467
+ },
2468
+ {
2469
+ "epoch": 18.27768014059754,
2470
+ "grad_norm": 5.8699140548706055,
2471
+ "learning_rate": 1.7188427700113348e-05,
2472
+ "loss": 0.7548,
2473
+ "step": 31200
2474
+ },
2475
+ {
2476
+ "epoch": 18.33626244874048,
2477
+ "grad_norm": 6.714815139770508,
2478
+ "learning_rate": 1.6921250067469098e-05,
2479
+ "loss": 0.7619,
2480
+ "step": 31300
2481
+ },
2482
+ {
2483
+ "epoch": 18.394844756883423,
2484
+ "grad_norm": 9.205123901367188,
2485
+ "learning_rate": 1.6651373670858748e-05,
2486
+ "loss": 0.7146,
2487
+ "step": 31400
2488
+ },
2489
+ {
2490
+ "epoch": 18.45342706502636,
2491
+ "grad_norm": 7.359625339508057,
2492
+ "learning_rate": 1.6381497274248395e-05,
2493
+ "loss": 0.7425,
2494
+ "step": 31500
2495
+ },
2496
+ {
2497
+ "epoch": 18.512009373169303,
2498
+ "grad_norm": 12.034134864807129,
2499
+ "learning_rate": 1.6114319641604145e-05,
2500
+ "loss": 0.7325,
2501
+ "step": 31600
2502
+ },
2503
+ {
2504
+ "epoch": 18.570591681312244,
2505
+ "grad_norm": 12.600486755371094,
2506
+ "learning_rate": 1.584444324499379e-05,
2507
+ "loss": 0.7628,
2508
+ "step": 31700
2509
+ },
2510
+ {
2511
+ "epoch": 18.629173989455186,
2512
+ "grad_norm": 5.2866530418396,
2513
+ "learning_rate": 1.557456684838344e-05,
2514
+ "loss": 0.7636,
2515
+ "step": 31800
2516
+ },
2517
+ {
2518
+ "epoch": 18.687756297598124,
2519
+ "grad_norm": 5.383816719055176,
2520
+ "learning_rate": 1.5304690451773088e-05,
2521
+ "loss": 0.727,
2522
+ "step": 31900
2523
+ },
2524
+ {
2525
+ "epoch": 18.746338605741066,
2526
+ "grad_norm": 8.094857215881348,
2527
+ "learning_rate": 1.5034814055162735e-05,
2528
+ "loss": 0.7976,
2529
+ "step": 32000
2530
+ },
2531
+ {
2532
+ "epoch": 18.746338605741066,
2533
+ "eval_loss": 0.08411071449518204,
2534
+ "eval_runtime": 145.6438,
2535
+ "eval_samples_per_second": 3.433,
2536
+ "eval_steps_per_second": 0.433,
2537
+ "eval_wer": 0.18376068376068377,
2538
+ "step": 32000
2539
  }
2540
  ],
2541
  "logging_steps": 100,
 
2555
  "attributes": {}
2556
  }
2557
  },
2558
+ "total_flos": 4.162335931367424e+19,
2559
  "train_batch_size": 8,
2560
  "trial_name": null,
2561
  "trial_params": null