NairaRahim commited on
Commit
a7ef385
·
verified ·
1 Parent(s): e30483a

Training in progress, epoch 22, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:759e11911622505bdb5a77d511b901f277ce0bbc06c416da623a53f9c0f0b663
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6753c78b7c0ef33c0e06df98fc2b17af826195a3d977ff76dfe9dc01bc829d36
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c251d2bf032c0284e58db70391eefabbbff2c2fb3bc7e50e9d6ba42d7c0345d5
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017a44d3ac93ea4bfa9fd21994bffd93889bffa070c7758ab5420752aaad3c5b
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4db8bac039de345d3f184975bc589d15c0b93c070db1287ed5147dba9f5ba405
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd02ebed0dfeff533894ca511f1b34ff916fe512d4945de5b16f69750a67af48
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7adf3363afe258d5bb46d306ca21d84331e969bff032715bb85737194722fbc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4565350eeb4b76edcdc222b4a803476b9ee67ea5c3c110e2c10f3af6e808474b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.54485321044922,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
- "epoch": 21.0,
5
  "eval_steps": 500,
6
- "global_step": 27405,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2093,6 +2093,105 @@
2093
  "eval_samples_per_second": 26.452,
2094
  "eval_steps_per_second": 3.324,
2095
  "step": 27405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2096
  }
2097
  ],
2098
  "logging_steps": 100,
@@ -2107,7 +2206,7 @@
2107
  "early_stopping_threshold": 0.0
2108
  },
2109
  "attributes": {
2110
- "early_stopping_patience_counter": 5
2111
  }
2112
  },
2113
  "TrainerControl": {
@@ -2121,7 +2220,7 @@
2121
  "attributes": {}
2122
  }
2123
  },
2124
- "total_flos": 2.955352896399053e+16,
2125
  "train_batch_size": 8,
2126
  "trial_name": null,
2127
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.53865432739258,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
4
+ "epoch": 22.0,
5
  "eval_steps": 500,
6
+ "global_step": 28710,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2093
  "eval_samples_per_second": 26.452,
2094
  "eval_steps_per_second": 3.324,
2095
  "step": 27405
2096
+ },
2097
+ {
2098
+ "epoch": 21.0727969348659,
2099
+ "grad_norm": 2.287121534347534,
2100
+ "learning_rate": 3.683572796934866e-05,
2101
+ "loss": 32.9962,
2102
+ "step": 27500
2103
+ },
2104
+ {
2105
+ "epoch": 21.149425287356323,
2106
+ "grad_norm": 2.5622124671936035,
2107
+ "learning_rate": 3.678783524904214e-05,
2108
+ "loss": 33.2565,
2109
+ "step": 27600
2110
+ },
2111
+ {
2112
+ "epoch": 21.226053639846743,
2113
+ "grad_norm": 2.2134974002838135,
2114
+ "learning_rate": 3.673994252873563e-05,
2115
+ "loss": 33.7442,
2116
+ "step": 27700
2117
+ },
2118
+ {
2119
+ "epoch": 21.302681992337163,
2120
+ "grad_norm": 2.574054002761841,
2121
+ "learning_rate": 3.669204980842912e-05,
2122
+ "loss": 33.7998,
2123
+ "step": 27800
2124
+ },
2125
+ {
2126
+ "epoch": 21.379310344827587,
2127
+ "grad_norm": 2.8479721546173096,
2128
+ "learning_rate": 3.6644157088122604e-05,
2129
+ "loss": 33.2015,
2130
+ "step": 27900
2131
+ },
2132
+ {
2133
+ "epoch": 21.455938697318008,
2134
+ "grad_norm": 4.845319747924805,
2135
+ "learning_rate": 3.659626436781609e-05,
2136
+ "loss": 33.7904,
2137
+ "step": 28000
2138
+ },
2139
+ {
2140
+ "epoch": 21.532567049808428,
2141
+ "grad_norm": 2.353726863861084,
2142
+ "learning_rate": 3.6548371647509584e-05,
2143
+ "loss": 33.7207,
2144
+ "step": 28100
2145
+ },
2146
+ {
2147
+ "epoch": 21.60919540229885,
2148
+ "grad_norm": 3.003556966781616,
2149
+ "learning_rate": 3.650047892720307e-05,
2150
+ "loss": 33.297,
2151
+ "step": 28200
2152
+ },
2153
+ {
2154
+ "epoch": 21.685823754789272,
2155
+ "grad_norm": 4.815252304077148,
2156
+ "learning_rate": 3.645258620689656e-05,
2157
+ "loss": 33.3036,
2158
+ "step": 28300
2159
+ },
2160
+ {
2161
+ "epoch": 21.762452107279692,
2162
+ "grad_norm": 3.0622081756591797,
2163
+ "learning_rate": 3.640469348659004e-05,
2164
+ "loss": 33.3661,
2165
+ "step": 28400
2166
+ },
2167
+ {
2168
+ "epoch": 21.839080459770116,
2169
+ "grad_norm": 3.3728883266448975,
2170
+ "learning_rate": 3.6356800766283525e-05,
2171
+ "loss": 32.8782,
2172
+ "step": 28500
2173
+ },
2174
+ {
2175
+ "epoch": 21.915708812260537,
2176
+ "grad_norm": 2.2338080406188965,
2177
+ "learning_rate": 3.630890804597701e-05,
2178
+ "loss": 33.0412,
2179
+ "step": 28600
2180
+ },
2181
+ {
2182
+ "epoch": 21.992337164750957,
2183
+ "grad_norm": 3.717360019683838,
2184
+ "learning_rate": 3.62610153256705e-05,
2185
+ "loss": 33.0318,
2186
+ "step": 28700
2187
+ },
2188
+ {
2189
+ "epoch": 22.0,
2190
+ "eval_loss": 34.53865432739258,
2191
+ "eval_runtime": 49.3318,
2192
+ "eval_samples_per_second": 26.454,
2193
+ "eval_steps_per_second": 3.324,
2194
+ "step": 28710
2195
  }
2196
  ],
2197
  "logging_steps": 100,
 
2206
  "early_stopping_threshold": 0.0
2207
  },
2208
  "attributes": {
2209
+ "early_stopping_patience_counter": 0
2210
  }
2211
  },
2212
  "TrainerControl": {
 
2220
  "attributes": {}
2221
  }
2222
  },
2223
+ "total_flos": 3.0960839867037696e+16,
2224
  "train_batch_size": 8,
2225
  "trial_name": null,
2226
  "trial_params": null