Baselhany commited on
Commit
92abf26
·
verified ·
1 Parent(s): a084d73

Training in progress, step 27000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3d8f77727b5b4ef950b93d62c1b07493ebd7d4bea85543ea1654b59f3c483f3
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3400935acbe3cf5f906085ec93d6116aa5d1e99af0633a73d3e706f2cd16a6
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f224cb291da070e142cb6ea868df08935add46875a306306495bd60678bf1d
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdd076291545d47b6464e519aaf46f4e9dfe2f8be4e7566d7c31c135a1c9323
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66432b4a08797e94140bc153b8542d6959b5455a4424229f739a5d8c8a8076a0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f831aeb48368d39337afe7a48ffe4212a4e417eb7aff215653e8d3fa1dcee0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c33ea924ad35e1a1156b5f295ad34a99c2c9f60887c2357eaef9fdd16be7edee
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8355cb3493000d81a40060fd9086d3994b8013129a708704b751a5d7398bbe
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f76f154dd8dd104593482c38bad816b91e8cdf002cfd8b0ab080c2e1683fb35d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae2ff98708f58a5d722c91db9dc346e6b11df9cbdf8ea9de78b33a14ae6319e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 25000,
3
  "best_metric": 0.18243442381373415,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-25000",
5
- "epoch": 15.231400117164617,
6
  "eval_steps": 1000,
7
- "global_step": 26000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2062,6 +2062,85 @@
2062
  "eval_steps_per_second": 0.43,
2063
  "eval_wer": 0.1825817860300619,
2064
  "step": 26000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2065
  }
2066
  ],
2067
  "logging_steps": 100,
@@ -2081,7 +2160,7 @@
2081
  "attributes": {}
2082
  }
2083
  },
2084
- "total_flos": 3.38189184663552e+19,
2085
  "train_batch_size": 8,
2086
  "trial_name": null,
2087
  "trial_params": null
 
2
  "best_global_step": 25000,
3
  "best_metric": 0.18243442381373415,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-25000",
5
+ "epoch": 15.817223198594025,
6
  "eval_steps": 1000,
7
+ "global_step": 27000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2062
  "eval_steps_per_second": 0.43,
2063
  "eval_wer": 0.1825817860300619,
2064
  "step": 26000
2065
+ },
2066
+ {
2067
+ "epoch": 15.289982425307556,
2068
+ "grad_norm": 6.403947353363037,
2069
+ "learning_rate": 4.572579442040878e-06,
2070
+ "loss": 0.728,
2071
+ "step": 26100
2072
+ },
2073
+ {
2074
+ "epoch": 15.348564733450498,
2075
+ "grad_norm": 6.236737251281738,
2076
+ "learning_rate": 4.199612113978816e-06,
2077
+ "loss": 0.7462,
2078
+ "step": 26200
2079
+ },
2080
+ {
2081
+ "epoch": 15.407147041593438,
2082
+ "grad_norm": 5.554813385009766,
2083
+ "learning_rate": 3.826644785916754e-06,
2084
+ "loss": 0.7403,
2085
+ "step": 26300
2086
+ },
2087
+ {
2088
+ "epoch": 15.46572934973638,
2089
+ "grad_norm": 4.909285068511963,
2090
+ "learning_rate": 3.453677457854692e-06,
2091
+ "loss": 0.7653,
2092
+ "step": 26400
2093
+ },
2094
+ {
2095
+ "epoch": 15.52431165787932,
2096
+ "grad_norm": 5.502344608306885,
2097
+ "learning_rate": 3.0807101297926304e-06,
2098
+ "loss": 0.7319,
2099
+ "step": 26500
2100
+ },
2101
+ {
2102
+ "epoch": 15.582893966022262,
2103
+ "grad_norm": 7.525850772857666,
2104
+ "learning_rate": 2.7077428017305685e-06,
2105
+ "loss": 0.7295,
2106
+ "step": 26600
2107
+ },
2108
+ {
2109
+ "epoch": 15.641476274165202,
2110
+ "grad_norm": 7.245991230010986,
2111
+ "learning_rate": 2.3347754736685067e-06,
2112
+ "loss": 0.7382,
2113
+ "step": 26700
2114
+ },
2115
+ {
2116
+ "epoch": 15.700058582308143,
2117
+ "grad_norm": 5.762548923492432,
2118
+ "learning_rate": 1.961808145606445e-06,
2119
+ "loss": 0.7253,
2120
+ "step": 26800
2121
+ },
2122
+ {
2123
+ "epoch": 15.758640890451083,
2124
+ "grad_norm": 6.127166271209717,
2125
+ "learning_rate": 1.5888408175443833e-06,
2126
+ "loss": 0.7399,
2127
+ "step": 26900
2128
+ },
2129
+ {
2130
+ "epoch": 15.817223198594025,
2131
+ "grad_norm": 6.195973873138428,
2132
+ "learning_rate": 1.2158734894823213e-06,
2133
+ "loss": 0.7379,
2134
+ "step": 27000
2135
+ },
2136
+ {
2137
+ "epoch": 15.817223198594025,
2138
+ "eval_loss": 0.08341451734304428,
2139
+ "eval_runtime": 146.5011,
2140
+ "eval_samples_per_second": 3.413,
2141
+ "eval_steps_per_second": 0.43,
2142
+ "eval_wer": 0.18287651046271736,
2143
+ "step": 27000
2144
  }
2145
  ],
2146
  "logging_steps": 100,
 
2160
  "attributes": {}
2161
  }
2162
  },
2163
+ "total_flos": 3.51197399089152e+19,
2164
  "train_batch_size": 8,
2165
  "trial_name": null,
2166
  "trial_params": null