Baselhany commited on
Commit
879c2de
·
verified ·
1 Parent(s): d2d3023

Training in progress, step 24800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:871e1a0f89abd57c35a7dbf130d5524fd292047262616b92ffd261d640fea12d
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c736fd594c380dd002a6342fa0604bc644313aed80303796815ff4b03e46442e
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d4051e416251f823f3179b18d1769dda91712dedabff1b2dc39dc8e904b7e65
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e2b9eff304abf490d045d1420af73881798301b076f854de745fb8bb32c5f5
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdf8ed808c01d757c6e38911720948b5f9db18cf7ee348b7781f51b3aebdcb61
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2eaa69a43fe9ca74e4bd1e79b0bbd6af7b0c407425e6e1e3b18bd47b766849b
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6467edd96925a2153b86ff151f4249484a23bca6d403095f0ddb5893900e1b8a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15e488492623c161675a7bb297a1a2ecfcf210cea79271160fd0a257b5fa31e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d6539ca7d27098218a78f61589dfb4cc73931bd4c3c71ec49d7760c7d425bf5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd454cefdd5cc5bafc5dee97883508d8606ab9e8e2795ebeb6c8caeb65187032
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 4000,
3
  "best_metric": 0.18950781019746538,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-4000",
5
- "epoch": 14.463544754001186,
6
  "eval_steps": 400,
7
- "global_step": 24400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2265,6 +2265,43 @@
2265
  "eval_steps_per_second": 0.427,
2266
  "eval_wer": 0.20247568523430592,
2267
  "step": 24400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2268
  }
2269
  ],
2270
  "logging_steps": 100,
@@ -2284,7 +2321,7 @@
2284
  "attributes": {}
2285
  }
2286
  },
2287
- "total_flos": 3.173605943279616e+19,
2288
  "train_batch_size": 8,
2289
  "trial_name": null,
2290
  "trial_params": null
 
2
  "best_global_step": 4000,
3
  "best_metric": 0.18950781019746538,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-4000",
5
+ "epoch": 14.700652045050385,
6
  "eval_steps": 400,
7
+ "global_step": 24800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2265
  "eval_steps_per_second": 0.427,
2266
  "eval_wer": 0.20247568523430592,
2267
  "step": 24400
2268
+ },
2269
+ {
2270
+ "epoch": 14.522821576763485,
2271
+ "grad_norm": 8.844592094421387,
2272
+ "learning_rate": 3.301753678693812e-06,
2273
+ "loss": 0.941,
2274
+ "step": 24500
2275
+ },
2276
+ {
2277
+ "epoch": 14.582098399525785,
2278
+ "grad_norm": 10.255487442016602,
2279
+ "learning_rate": 2.89860915138077e-06,
2280
+ "loss": 0.9698,
2281
+ "step": 24600
2282
+ },
2283
+ {
2284
+ "epoch": 14.641375222288085,
2285
+ "grad_norm": 8.5720796585083,
2286
+ "learning_rate": 2.4954646240677284e-06,
2287
+ "loss": 0.9443,
2288
+ "step": 24700
2289
+ },
2290
+ {
2291
+ "epoch": 14.700652045050385,
2292
+ "grad_norm": 7.877289295196533,
2293
+ "learning_rate": 2.0923200967546868e-06,
2294
+ "loss": 0.922,
2295
+ "step": 24800
2296
+ },
2297
+ {
2298
+ "epoch": 14.700652045050385,
2299
+ "eval_loss": 0.0880463495850563,
2300
+ "eval_runtime": 146.9212,
2301
+ "eval_samples_per_second": 3.403,
2302
+ "eval_steps_per_second": 0.429,
2303
+ "eval_wer": 0.20085470085470086,
2304
+ "step": 24800
2305
  }
2306
  ],
2307
  "logging_steps": 100,
 
2321
  "attributes": {}
2322
  }
2323
  },
2324
+ "total_flos": 3.225638800982016e+19,
2325
  "train_batch_size": 8,
2326
  "trial_name": null,
2327
  "trial_params": null