NairaRahim commited on
Commit
44ca499
·
verified ·
1 Parent(s): 8cc9427

Training in progress, epoch 12, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db4b5d9091a6dbab9d2b4be7cf992134ba4a3e0d729e96284bc4512ac0932620
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d087532161fc3d3113f958d4327ca8ab76fb93d1b9005d7b72d8341648a7f95e
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d9dbc9961f1b825d07e327826af5885ae6801dfe3867c659b03e9c90764c433
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d359385b3376fb641197873abbd6f199bc67d84ad37382d398095c1f51b664a9
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1671761f2a32f97e49b389d83fe64fe54fae391ec682766d59ea01e911801f0d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4d71a933e8a99a1b5e03ca178837d4af39c5cb9255b1959f57ce6925e566d0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a8f171c30ec70e8b7de39e28734b3eb14c402c92c5675eccaa14ecf588e6cff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f18daae1b94bcadba9e921cdd5d160fa2fe3e4c34c14e032eed270d5a8a3cca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 14355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1096,6 +1096,105 @@
1096
  "eval_samples_per_second": 26.474,
1097
  "eval_steps_per_second": 3.327,
1098
  "step": 14355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1099
  }
1100
  ],
1101
  "logging_steps": 100,
@@ -1110,7 +1209,7 @@
1110
  "early_stopping_threshold": 0.0
1111
  },
1112
  "attributes": {
1113
- "early_stopping_patience_counter": 1
1114
  }
1115
  },
1116
  "TrainerControl": {
@@ -1124,7 +1223,7 @@
1124
  "attributes": {}
1125
  }
1126
  },
1127
- "total_flos": 1.5480419933518848e+16,
1128
  "train_batch_size": 8,
1129
  "trial_name": null,
1130
  "trial_params": null
 
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 15660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1096
  "eval_samples_per_second": 26.474,
1097
  "eval_steps_per_second": 3.327,
1098
  "step": 14355
1099
+ },
1100
+ {
1101
+ "epoch": 11.03448275862069,
1102
+ "grad_norm": 2.7724273204803467,
1103
+ "learning_rate": 4.310632183908046e-05,
1104
+ "loss": 33.7759,
1105
+ "step": 14400
1106
+ },
1107
+ {
1108
+ "epoch": 11.11111111111111,
1109
+ "grad_norm": 3.9663071632385254,
1110
+ "learning_rate": 4.305842911877395e-05,
1111
+ "loss": 33.6063,
1112
+ "step": 14500
1113
+ },
1114
+ {
1115
+ "epoch": 11.187739463601533,
1116
+ "grad_norm": 2.53495717048645,
1117
+ "learning_rate": 4.3010536398467435e-05,
1118
+ "loss": 32.9251,
1119
+ "step": 14600
1120
+ },
1121
+ {
1122
+ "epoch": 11.264367816091955,
1123
+ "grad_norm": 3.928633689880371,
1124
+ "learning_rate": 4.296264367816092e-05,
1125
+ "loss": 33.41,
1126
+ "step": 14700
1127
+ },
1128
+ {
1129
+ "epoch": 11.340996168582375,
1130
+ "grad_norm": 1.888804316520691,
1131
+ "learning_rate": 4.291475095785441e-05,
1132
+ "loss": 33.147,
1133
+ "step": 14800
1134
+ },
1135
+ {
1136
+ "epoch": 11.417624521072797,
1137
+ "grad_norm": 3.151488780975342,
1138
+ "learning_rate": 4.2866858237547896e-05,
1139
+ "loss": 34.011,
1140
+ "step": 14900
1141
+ },
1142
+ {
1143
+ "epoch": 11.494252873563218,
1144
+ "grad_norm": 2.659867286682129,
1145
+ "learning_rate": 4.281896551724138e-05,
1146
+ "loss": 33.3559,
1147
+ "step": 15000
1148
+ },
1149
+ {
1150
+ "epoch": 11.57088122605364,
1151
+ "grad_norm": 4.092405319213867,
1152
+ "learning_rate": 4.277107279693487e-05,
1153
+ "loss": 33.2301,
1154
+ "step": 15100
1155
+ },
1156
+ {
1157
+ "epoch": 11.647509578544062,
1158
+ "grad_norm": 4.295740127563477,
1159
+ "learning_rate": 4.2723659003831415e-05,
1160
+ "loss": 33.1047,
1161
+ "step": 15200
1162
+ },
1163
+ {
1164
+ "epoch": 11.724137931034482,
1165
+ "grad_norm": 2.4472806453704834,
1166
+ "learning_rate": 4.26757662835249e-05,
1167
+ "loss": 33.8206,
1168
+ "step": 15300
1169
+ },
1170
+ {
1171
+ "epoch": 11.800766283524904,
1172
+ "grad_norm": 2.716550350189209,
1173
+ "learning_rate": 4.262787356321839e-05,
1174
+ "loss": 33.7173,
1175
+ "step": 15400
1176
+ },
1177
+ {
1178
+ "epoch": 11.877394636015326,
1179
+ "grad_norm": 3.1278491020202637,
1180
+ "learning_rate": 4.257998084291188e-05,
1181
+ "loss": 34.0344,
1182
+ "step": 15500
1183
+ },
1184
+ {
1185
+ "epoch": 11.954022988505747,
1186
+ "grad_norm": 2.4835212230682373,
1187
+ "learning_rate": 4.253208812260537e-05,
1188
+ "loss": 33.8397,
1189
+ "step": 15600
1190
+ },
1191
+ {
1192
+ "epoch": 12.0,
1193
+ "eval_loss": 34.70100402832031,
1194
+ "eval_runtime": 49.2554,
1195
+ "eval_samples_per_second": 26.495,
1196
+ "eval_steps_per_second": 3.33,
1197
+ "step": 15660
1198
  }
1199
  ],
1200
  "logging_steps": 100,
 
1209
  "early_stopping_threshold": 0.0
1210
  },
1211
  "attributes": {
1212
+ "early_stopping_patience_counter": 2
1213
  }
1214
  },
1215
  "TrainerControl": {
 
1223
  "attributes": {}
1224
  }
1225
  },
1226
+ "total_flos": 1.6887730836566016e+16,
1227
  "train_batch_size": 8,
1228
  "trial_name": null,
1229
  "trial_params": null