error577 commited on
Commit
a72f51e
·
verified ·
1 Parent(s): 9a3645c

Training in progress, step 3200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79e591185c5787608005de8d8831090ba6f8a6c00a5984c86cdfacfc02b9b6ae
3
  size 140815952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac64ee4e0bd3e418c59a709a7c428ec4b44f8460b46d24fcd19a23cc0c24348
3
  size 140815952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f2c8d71fdea890cdd7280a63c893d6a6973e58651c83a53957c4ae4afa7a1cf
3
  size 71878996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:535c31667b572c7c3bd2e6a0105994a38f0e527ba03bd2d1bde2921d11fed6a7
3
  size 71878996
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0137d0597034267edc3e3565a05c0ee1e39a9556a01de18abcbfcef3e9d971ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a3933da542d8dfc5421dd39329fbc22a08897e0013e8ccf4211b3870efde85
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1057b10206a8bc19b3240669fb0d495c051dd5cbd952d8300b67a072578e46d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b86e2ef4af2ecabfaf8859176e0c352600a43b6f303ea279fbdaec73765bdc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8309042784869501,
5
  "eval_steps": 100,
6
- "global_step": 3100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21963,6 +21963,714 @@
21963
  "eval_samples_per_second": 39.073,
21964
  "eval_steps_per_second": 9.768,
21965
  "step": 3100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21966
  }
21967
  ],
21968
  "logging_steps": 1,
@@ -21982,7 +22690,7 @@
21982
  "attributes": {}
21983
  }
21984
  },
21985
- "total_flos": 1.197914776928256e+17,
21986
  "train_batch_size": 4,
21987
  "trial_name": null,
21988
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8577076423091098,
5
  "eval_steps": 100,
6
+ "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21963
  "eval_samples_per_second": 39.073,
21964
  "eval_steps_per_second": 9.768,
21965
  "step": 3100
21966
+ },
21967
+ {
21968
+ "epoch": 0.8311723121251717,
21969
+ "grad_norm": 3.765991687774658,
21970
+ "learning_rate": 8.251519339467657e-05,
21971
+ "loss": 3.3596,
21972
+ "step": 3101
21973
+ },
21974
+ {
21975
+ "epoch": 0.8314403457633933,
21976
+ "grad_norm": 2.9936914443969727,
21977
+ "learning_rate": 8.250449951301103e-05,
21978
+ "loss": 2.7545,
21979
+ "step": 3102
21980
+ },
21981
+ {
21982
+ "epoch": 0.8317083794016149,
21983
+ "grad_norm": 2.693584442138672,
21984
+ "learning_rate": 8.249380305552949e-05,
21985
+ "loss": 2.6637,
21986
+ "step": 3103
21987
+ },
21988
+ {
21989
+ "epoch": 0.8319764130398365,
21990
+ "grad_norm": 3.2070119380950928,
21991
+ "learning_rate": 8.248310402307961e-05,
21992
+ "loss": 2.9294,
21993
+ "step": 3104
21994
+ },
21995
+ {
21996
+ "epoch": 0.8322444466780581,
21997
+ "grad_norm": 3.2056386470794678,
21998
+ "learning_rate": 8.247240241650918e-05,
21999
+ "loss": 2.9327,
22000
+ "step": 3105
22001
+ },
22002
+ {
22003
+ "epoch": 0.8325124803162797,
22004
+ "grad_norm": 2.5587897300720215,
22005
+ "learning_rate": 8.24616982366663e-05,
22006
+ "loss": 2.7247,
22007
+ "step": 3106
22008
+ },
22009
+ {
22010
+ "epoch": 0.8327805139545013,
22011
+ "grad_norm": 3.0326316356658936,
22012
+ "learning_rate": 8.24509914843992e-05,
22013
+ "loss": 2.8525,
22014
+ "step": 3107
22015
+ },
22016
+ {
22017
+ "epoch": 0.8330485475927228,
22018
+ "grad_norm": 3.1906213760375977,
22019
+ "learning_rate": 8.244028216055634e-05,
22020
+ "loss": 2.9844,
22021
+ "step": 3108
22022
+ },
22023
+ {
22024
+ "epoch": 0.8333165812309444,
22025
+ "grad_norm": 3.0366783142089844,
22026
+ "learning_rate": 8.242957026598638e-05,
22027
+ "loss": 2.8466,
22028
+ "step": 3109
22029
+ },
22030
+ {
22031
+ "epoch": 0.8335846148691661,
22032
+ "grad_norm": 2.767413377761841,
22033
+ "learning_rate": 8.241885580153818e-05,
22034
+ "loss": 2.773,
22035
+ "step": 3110
22036
+ },
22037
+ {
22038
+ "epoch": 0.8338526485073877,
22039
+ "grad_norm": 2.5532186031341553,
22040
+ "learning_rate": 8.240813876806079e-05,
22041
+ "loss": 2.4941,
22042
+ "step": 3111
22043
+ },
22044
+ {
22045
+ "epoch": 0.8341206821456093,
22046
+ "grad_norm": 2.848140001296997,
22047
+ "learning_rate": 8.239741916640351e-05,
22048
+ "loss": 2.8352,
22049
+ "step": 3112
22050
+ },
22051
+ {
22052
+ "epoch": 0.8343887157838309,
22053
+ "grad_norm": 2.937314987182617,
22054
+ "learning_rate": 8.23866969974158e-05,
22055
+ "loss": 2.6597,
22056
+ "step": 3113
22057
+ },
22058
+ {
22059
+ "epoch": 0.8346567494220525,
22060
+ "grad_norm": 2.6668167114257812,
22061
+ "learning_rate": 8.237597226194733e-05,
22062
+ "loss": 2.6354,
22063
+ "step": 3114
22064
+ },
22065
+ {
22066
+ "epoch": 0.8349247830602741,
22067
+ "grad_norm": 3.1095848083496094,
22068
+ "learning_rate": 8.236524496084801e-05,
22069
+ "loss": 2.8998,
22070
+ "step": 3115
22071
+ },
22072
+ {
22073
+ "epoch": 0.8351928166984957,
22074
+ "grad_norm": 2.8646724224090576,
22075
+ "learning_rate": 8.235451509496789e-05,
22076
+ "loss": 2.7162,
22077
+ "step": 3116
22078
+ },
22079
+ {
22080
+ "epoch": 0.8354608503367172,
22081
+ "grad_norm": 3.2314741611480713,
22082
+ "learning_rate": 8.234378266515727e-05,
22083
+ "loss": 2.963,
22084
+ "step": 3117
22085
+ },
22086
+ {
22087
+ "epoch": 0.8357288839749388,
22088
+ "grad_norm": 2.798070192337036,
22089
+ "learning_rate": 8.233304767226663e-05,
22090
+ "loss": 2.7093,
22091
+ "step": 3118
22092
+ },
22093
+ {
22094
+ "epoch": 0.8359969176131604,
22095
+ "grad_norm": 3.209622383117676,
22096
+ "learning_rate": 8.23223101171467e-05,
22097
+ "loss": 3.0303,
22098
+ "step": 3119
22099
+ },
22100
+ {
22101
+ "epoch": 0.836264951251382,
22102
+ "grad_norm": 3.1647841930389404,
22103
+ "learning_rate": 8.231157000064833e-05,
22104
+ "loss": 2.9435,
22105
+ "step": 3120
22106
+ },
22107
+ {
22108
+ "epoch": 0.8365329848896036,
22109
+ "grad_norm": 3.2297987937927246,
22110
+ "learning_rate": 8.230082732362264e-05,
22111
+ "loss": 3.0019,
22112
+ "step": 3121
22113
+ },
22114
+ {
22115
+ "epoch": 0.8368010185278253,
22116
+ "grad_norm": 2.8666510581970215,
22117
+ "learning_rate": 8.229008208692093e-05,
22118
+ "loss": 2.7266,
22119
+ "step": 3122
22120
+ },
22121
+ {
22122
+ "epoch": 0.8370690521660469,
22123
+ "grad_norm": 2.94901442527771,
22124
+ "learning_rate": 8.227933429139471e-05,
22125
+ "loss": 2.9068,
22126
+ "step": 3123
22127
+ },
22128
+ {
22129
+ "epoch": 0.8373370858042685,
22130
+ "grad_norm": 3.306313991546631,
22131
+ "learning_rate": 8.226858393789571e-05,
22132
+ "loss": 2.9639,
22133
+ "step": 3124
22134
+ },
22135
+ {
22136
+ "epoch": 0.83760511944249,
22137
+ "grad_norm": 3.987175464630127,
22138
+ "learning_rate": 8.22578310272758e-05,
22139
+ "loss": 2.7636,
22140
+ "step": 3125
22141
+ },
22142
+ {
22143
+ "epoch": 0.8378731530807116,
22144
+ "grad_norm": 2.8015623092651367,
22145
+ "learning_rate": 8.22470755603871e-05,
22146
+ "loss": 2.7572,
22147
+ "step": 3126
22148
+ },
22149
+ {
22150
+ "epoch": 0.8381411867189332,
22151
+ "grad_norm": 2.992177724838257,
22152
+ "learning_rate": 8.223631753808195e-05,
22153
+ "loss": 2.8723,
22154
+ "step": 3127
22155
+ },
22156
+ {
22157
+ "epoch": 0.8384092203571548,
22158
+ "grad_norm": 3.115063190460205,
22159
+ "learning_rate": 8.222555696121283e-05,
22160
+ "loss": 3.0286,
22161
+ "step": 3128
22162
+ },
22163
+ {
22164
+ "epoch": 0.8386772539953764,
22165
+ "grad_norm": 3.0285449028015137,
22166
+ "learning_rate": 8.22147938306325e-05,
22167
+ "loss": 2.826,
22168
+ "step": 3129
22169
+ },
22170
+ {
22171
+ "epoch": 0.838945287633598,
22172
+ "grad_norm": 3.2307591438293457,
22173
+ "learning_rate": 8.220402814719387e-05,
22174
+ "loss": 2.8402,
22175
+ "step": 3130
22176
+ },
22177
+ {
22178
+ "epoch": 0.8392133212718196,
22179
+ "grad_norm": 2.8563008308410645,
22180
+ "learning_rate": 8.219325991175009e-05,
22181
+ "loss": 2.5679,
22182
+ "step": 3131
22183
+ },
22184
+ {
22185
+ "epoch": 0.8394813549100412,
22186
+ "grad_norm": 2.7312331199645996,
22187
+ "learning_rate": 8.218248912515442e-05,
22188
+ "loss": 2.6174,
22189
+ "step": 3132
22190
+ },
22191
+ {
22192
+ "epoch": 0.8397493885482628,
22193
+ "grad_norm": 2.7081151008605957,
22194
+ "learning_rate": 8.217171578826046e-05,
22195
+ "loss": 2.9534,
22196
+ "step": 3133
22197
+ },
22198
+ {
22199
+ "epoch": 0.8400174221864845,
22200
+ "grad_norm": 2.9271397590637207,
22201
+ "learning_rate": 8.21609399019219e-05,
22202
+ "loss": 3.0468,
22203
+ "step": 3134
22204
+ },
22205
+ {
22206
+ "epoch": 0.840285455824706,
22207
+ "grad_norm": 3.3061866760253906,
22208
+ "learning_rate": 8.21501614669927e-05,
22209
+ "loss": 3.0003,
22210
+ "step": 3135
22211
+ },
22212
+ {
22213
+ "epoch": 0.8405534894629276,
22214
+ "grad_norm": 3.312804937362671,
22215
+ "learning_rate": 8.213938048432697e-05,
22216
+ "loss": 3.0239,
22217
+ "step": 3136
22218
+ },
22219
+ {
22220
+ "epoch": 0.8408215231011492,
22221
+ "grad_norm": 2.8543965816497803,
22222
+ "learning_rate": 8.212859695477906e-05,
22223
+ "loss": 2.5733,
22224
+ "step": 3137
22225
+ },
22226
+ {
22227
+ "epoch": 0.8410895567393708,
22228
+ "grad_norm": 3.0889506340026855,
22229
+ "learning_rate": 8.211781087920353e-05,
22230
+ "loss": 2.9983,
22231
+ "step": 3138
22232
+ },
22233
+ {
22234
+ "epoch": 0.8413575903775924,
22235
+ "grad_norm": 3.14336895942688,
22236
+ "learning_rate": 8.210702225845511e-05,
22237
+ "loss": 2.8062,
22238
+ "step": 3139
22239
+ },
22240
+ {
22241
+ "epoch": 0.841625624015814,
22242
+ "grad_norm": 2.58722186088562,
22243
+ "learning_rate": 8.209623109338871e-05,
22244
+ "loss": 2.7192,
22245
+ "step": 3140
22246
+ },
22247
+ {
22248
+ "epoch": 0.8418936576540356,
22249
+ "grad_norm": 3.014845371246338,
22250
+ "learning_rate": 8.20854373848595e-05,
22251
+ "loss": 3.005,
22252
+ "step": 3141
22253
+ },
22254
+ {
22255
+ "epoch": 0.8421616912922572,
22256
+ "grad_norm": 2.933680534362793,
22257
+ "learning_rate": 8.207464113372283e-05,
22258
+ "loss": 2.97,
22259
+ "step": 3142
22260
+ },
22261
+ {
22262
+ "epoch": 0.8424297249304787,
22263
+ "grad_norm": 2.8684470653533936,
22264
+ "learning_rate": 8.206384234083427e-05,
22265
+ "loss": 2.5981,
22266
+ "step": 3143
22267
+ },
22268
+ {
22269
+ "epoch": 0.8426977585687003,
22270
+ "grad_norm": 3.119670867919922,
22271
+ "learning_rate": 8.205304100704953e-05,
22272
+ "loss": 2.8613,
22273
+ "step": 3144
22274
+ },
22275
+ {
22276
+ "epoch": 0.8429657922069219,
22277
+ "grad_norm": 2.9081950187683105,
22278
+ "learning_rate": 8.204223713322457e-05,
22279
+ "loss": 2.7478,
22280
+ "step": 3145
22281
+ },
22282
+ {
22283
+ "epoch": 0.8432338258451436,
22284
+ "grad_norm": 2.779106855392456,
22285
+ "learning_rate": 8.203143072021556e-05,
22286
+ "loss": 2.9309,
22287
+ "step": 3146
22288
+ },
22289
+ {
22290
+ "epoch": 0.8435018594833652,
22291
+ "grad_norm": 2.790799379348755,
22292
+ "learning_rate": 8.202062176887883e-05,
22293
+ "loss": 2.7294,
22294
+ "step": 3147
22295
+ },
22296
+ {
22297
+ "epoch": 0.8437698931215868,
22298
+ "grad_norm": 3.0613210201263428,
22299
+ "learning_rate": 8.200981028007095e-05,
22300
+ "loss": 2.876,
22301
+ "step": 3148
22302
+ },
22303
+ {
22304
+ "epoch": 0.8440379267598084,
22305
+ "grad_norm": 2.910881280899048,
22306
+ "learning_rate": 8.199899625464867e-05,
22307
+ "loss": 2.5908,
22308
+ "step": 3149
22309
+ },
22310
+ {
22311
+ "epoch": 0.84430596039803,
22312
+ "grad_norm": 2.8150923252105713,
22313
+ "learning_rate": 8.198817969346894e-05,
22314
+ "loss": 2.7145,
22315
+ "step": 3150
22316
+ },
22317
+ {
22318
+ "epoch": 0.8445739940362516,
22319
+ "grad_norm": 2.830840587615967,
22320
+ "learning_rate": 8.197736059738894e-05,
22321
+ "loss": 2.74,
22322
+ "step": 3151
22323
+ },
22324
+ {
22325
+ "epoch": 0.8448420276744731,
22326
+ "grad_norm": 2.8052470684051514,
22327
+ "learning_rate": 8.196653896726601e-05,
22328
+ "loss": 2.6825,
22329
+ "step": 3152
22330
+ },
22331
+ {
22332
+ "epoch": 0.8451100613126947,
22333
+ "grad_norm": 2.8668699264526367,
22334
+ "learning_rate": 8.19557148039577e-05,
22335
+ "loss": 2.9702,
22336
+ "step": 3153
22337
+ },
22338
+ {
22339
+ "epoch": 0.8453780949509163,
22340
+ "grad_norm": 2.7870476245880127,
22341
+ "learning_rate": 8.194488810832179e-05,
22342
+ "loss": 2.951,
22343
+ "step": 3154
22344
+ },
22345
+ {
22346
+ "epoch": 0.8456461285891379,
22347
+ "grad_norm": 3.0203659534454346,
22348
+ "learning_rate": 8.193405888121622e-05,
22349
+ "loss": 2.8264,
22350
+ "step": 3155
22351
+ },
22352
+ {
22353
+ "epoch": 0.8459141622273595,
22354
+ "grad_norm": 3.251181125640869,
22355
+ "learning_rate": 8.192322712349917e-05,
22356
+ "loss": 3.0736,
22357
+ "step": 3156
22358
+ },
22359
+ {
22360
+ "epoch": 0.8461821958655811,
22361
+ "grad_norm": 2.867091417312622,
22362
+ "learning_rate": 8.1912392836029e-05,
22363
+ "loss": 2.5752,
22364
+ "step": 3157
22365
+ },
22366
+ {
22367
+ "epoch": 0.8464502295038028,
22368
+ "grad_norm": 3.3381619453430176,
22369
+ "learning_rate": 8.190155601966427e-05,
22370
+ "loss": 2.9668,
22371
+ "step": 3158
22372
+ },
22373
+ {
22374
+ "epoch": 0.8467182631420244,
22375
+ "grad_norm": 3.1242899894714355,
22376
+ "learning_rate": 8.189071667526373e-05,
22377
+ "loss": 2.8379,
22378
+ "step": 3159
22379
+ },
22380
+ {
22381
+ "epoch": 0.846986296780246,
22382
+ "grad_norm": 3.4567618370056152,
22383
+ "learning_rate": 8.187987480368637e-05,
22384
+ "loss": 3.0123,
22385
+ "step": 3160
22386
+ },
22387
+ {
22388
+ "epoch": 0.8472543304184675,
22389
+ "grad_norm": 2.864577293395996,
22390
+ "learning_rate": 8.186903040579131e-05,
22391
+ "loss": 2.5651,
22392
+ "step": 3161
22393
+ },
22394
+ {
22395
+ "epoch": 0.8475223640566891,
22396
+ "grad_norm": 3.081716775894165,
22397
+ "learning_rate": 8.185818348243796e-05,
22398
+ "loss": 2.775,
22399
+ "step": 3162
22400
+ },
22401
+ {
22402
+ "epoch": 0.8477903976949107,
22403
+ "grad_norm": 2.9339306354522705,
22404
+ "learning_rate": 8.184733403448585e-05,
22405
+ "loss": 2.4889,
22406
+ "step": 3163
22407
+ },
22408
+ {
22409
+ "epoch": 0.8480584313331323,
22410
+ "grad_norm": 3.1311564445495605,
22411
+ "learning_rate": 8.183648206279475e-05,
22412
+ "loss": 2.7713,
22413
+ "step": 3164
22414
+ },
22415
+ {
22416
+ "epoch": 0.8483264649713539,
22417
+ "grad_norm": 3.117518663406372,
22418
+ "learning_rate": 8.182562756822464e-05,
22419
+ "loss": 2.8101,
22420
+ "step": 3165
22421
+ },
22422
+ {
22423
+ "epoch": 0.8485944986095755,
22424
+ "grad_norm": 3.273800849914551,
22425
+ "learning_rate": 8.181477055163567e-05,
22426
+ "loss": 2.7604,
22427
+ "step": 3166
22428
+ },
22429
+ {
22430
+ "epoch": 0.8488625322477971,
22431
+ "grad_norm": 3.348430871963501,
22432
+ "learning_rate": 8.18039110138882e-05,
22433
+ "loss": 2.7476,
22434
+ "step": 3167
22435
+ },
22436
+ {
22437
+ "epoch": 0.8491305658860187,
22438
+ "grad_norm": 3.052398204803467,
22439
+ "learning_rate": 8.17930489558428e-05,
22440
+ "loss": 2.8796,
22441
+ "step": 3168
22442
+ },
22443
+ {
22444
+ "epoch": 0.8493985995242402,
22445
+ "grad_norm": 3.4873738288879395,
22446
+ "learning_rate": 8.178218437836023e-05,
22447
+ "loss": 2.7686,
22448
+ "step": 3169
22449
+ },
22450
+ {
22451
+ "epoch": 0.8496666331624619,
22452
+ "grad_norm": 3.0493202209472656,
22453
+ "learning_rate": 8.177131728230148e-05,
22454
+ "loss": 2.8739,
22455
+ "step": 3170
22456
+ },
22457
+ {
22458
+ "epoch": 0.8499346668006835,
22459
+ "grad_norm": 2.8752858638763428,
22460
+ "learning_rate": 8.176044766852766e-05,
22461
+ "loss": 2.7826,
22462
+ "step": 3171
22463
+ },
22464
+ {
22465
+ "epoch": 0.8502027004389051,
22466
+ "grad_norm": 3.091642379760742,
22467
+ "learning_rate": 8.174957553790014e-05,
22468
+ "loss": 2.8947,
22469
+ "step": 3172
22470
+ },
22471
+ {
22472
+ "epoch": 0.8504707340771267,
22473
+ "grad_norm": 3.901031255722046,
22474
+ "learning_rate": 8.173870089128053e-05,
22475
+ "loss": 3.2836,
22476
+ "step": 3173
22477
+ },
22478
+ {
22479
+ "epoch": 0.8507387677153483,
22480
+ "grad_norm": 3.0523319244384766,
22481
+ "learning_rate": 8.172782372953055e-05,
22482
+ "loss": 3.0609,
22483
+ "step": 3174
22484
+ },
22485
+ {
22486
+ "epoch": 0.8510068013535699,
22487
+ "grad_norm": 2.8478736877441406,
22488
+ "learning_rate": 8.171694405351216e-05,
22489
+ "loss": 2.7312,
22490
+ "step": 3175
22491
+ },
22492
+ {
22493
+ "epoch": 0.8512748349917915,
22494
+ "grad_norm": 3.260422706604004,
22495
+ "learning_rate": 8.170606186408755e-05,
22496
+ "loss": 3.0813,
22497
+ "step": 3176
22498
+ },
22499
+ {
22500
+ "epoch": 0.851542868630013,
22501
+ "grad_norm": 3.0320775508880615,
22502
+ "learning_rate": 8.169517716211902e-05,
22503
+ "loss": 2.6246,
22504
+ "step": 3177
22505
+ },
22506
+ {
22507
+ "epoch": 0.8518109022682346,
22508
+ "grad_norm": 3.255002021789551,
22509
+ "learning_rate": 8.168428994846919e-05,
22510
+ "loss": 2.6883,
22511
+ "step": 3178
22512
+ },
22513
+ {
22514
+ "epoch": 0.8520789359064562,
22515
+ "grad_norm": 2.799616813659668,
22516
+ "learning_rate": 8.167340022400078e-05,
22517
+ "loss": 2.4905,
22518
+ "step": 3179
22519
+ },
22520
+ {
22521
+ "epoch": 0.8523469695446778,
22522
+ "grad_norm": 3.230668067932129,
22523
+ "learning_rate": 8.166250798957676e-05,
22524
+ "loss": 2.7647,
22525
+ "step": 3180
22526
+ },
22527
+ {
22528
+ "epoch": 0.8526150031828994,
22529
+ "grad_norm": 2.8910818099975586,
22530
+ "learning_rate": 8.165161324606026e-05,
22531
+ "loss": 3.0522,
22532
+ "step": 3181
22533
+ },
22534
+ {
22535
+ "epoch": 0.8528830368211211,
22536
+ "grad_norm": 2.608095407485962,
22537
+ "learning_rate": 8.164071599431467e-05,
22538
+ "loss": 2.9134,
22539
+ "step": 3182
22540
+ },
22541
+ {
22542
+ "epoch": 0.8531510704593427,
22543
+ "grad_norm": 3.173567533493042,
22544
+ "learning_rate": 8.162981623520352e-05,
22545
+ "loss": 3.1328,
22546
+ "step": 3183
22547
+ },
22548
+ {
22549
+ "epoch": 0.8534191040975643,
22550
+ "grad_norm": 3.0655722618103027,
22551
+ "learning_rate": 8.161891396959057e-05,
22552
+ "loss": 2.6358,
22553
+ "step": 3184
22554
+ },
22555
+ {
22556
+ "epoch": 0.8536871377357859,
22557
+ "grad_norm": 2.8257834911346436,
22558
+ "learning_rate": 8.160800919833973e-05,
22559
+ "loss": 2.8067,
22560
+ "step": 3185
22561
+ },
22562
+ {
22563
+ "epoch": 0.8539551713740074,
22564
+ "grad_norm": 3.129920721054077,
22565
+ "learning_rate": 8.15971019223152e-05,
22566
+ "loss": 2.9928,
22567
+ "step": 3186
22568
+ },
22569
+ {
22570
+ "epoch": 0.854223205012229,
22571
+ "grad_norm": 3.318128824234009,
22572
+ "learning_rate": 8.15861921423813e-05,
22573
+ "loss": 2.6948,
22574
+ "step": 3187
22575
+ },
22576
+ {
22577
+ "epoch": 0.8544912386504506,
22578
+ "grad_norm": 2.9098756313323975,
22579
+ "learning_rate": 8.157527985940258e-05,
22580
+ "loss": 2.833,
22581
+ "step": 3188
22582
+ },
22583
+ {
22584
+ "epoch": 0.8547592722886722,
22585
+ "grad_norm": 3.071049690246582,
22586
+ "learning_rate": 8.15643650742438e-05,
22587
+ "loss": 2.7623,
22588
+ "step": 3189
22589
+ },
22590
+ {
22591
+ "epoch": 0.8550273059268938,
22592
+ "grad_norm": 3.198852300643921,
22593
+ "learning_rate": 8.155344778776987e-05,
22594
+ "loss": 2.496,
22595
+ "step": 3190
22596
+ },
22597
+ {
22598
+ "epoch": 0.8552953395651154,
22599
+ "grad_norm": 2.8775556087493896,
22600
+ "learning_rate": 8.154252800084595e-05,
22601
+ "loss": 2.991,
22602
+ "step": 3191
22603
+ },
22604
+ {
22605
+ "epoch": 0.855563373203337,
22606
+ "grad_norm": 3.092869281768799,
22607
+ "learning_rate": 8.153160571433737e-05,
22608
+ "loss": 2.8313,
22609
+ "step": 3192
22610
+ },
22611
+ {
22612
+ "epoch": 0.8558314068415586,
22613
+ "grad_norm": 2.7808609008789062,
22614
+ "learning_rate": 8.152068092910967e-05,
22615
+ "loss": 2.8599,
22616
+ "step": 3193
22617
+ },
22618
+ {
22619
+ "epoch": 0.8560994404797803,
22620
+ "grad_norm": 2.8283629417419434,
22621
+ "learning_rate": 8.150975364602857e-05,
22622
+ "loss": 2.8706,
22623
+ "step": 3194
22624
+ },
22625
+ {
22626
+ "epoch": 0.8563674741180018,
22627
+ "grad_norm": 2.813171863555908,
22628
+ "learning_rate": 8.149882386596002e-05,
22629
+ "loss": 2.8007,
22630
+ "step": 3195
22631
+ },
22632
+ {
22633
+ "epoch": 0.8566355077562234,
22634
+ "grad_norm": 2.916139602661133,
22635
+ "learning_rate": 8.148789158977013e-05,
22636
+ "loss": 2.973,
22637
+ "step": 3196
22638
+ },
22639
+ {
22640
+ "epoch": 0.856903541394445,
22641
+ "grad_norm": 2.494389533996582,
22642
+ "learning_rate": 8.147695681832523e-05,
22643
+ "loss": 2.702,
22644
+ "step": 3197
22645
+ },
22646
+ {
22647
+ "epoch": 0.8571715750326666,
22648
+ "grad_norm": 2.7285983562469482,
22649
+ "learning_rate": 8.146601955249188e-05,
22650
+ "loss": 2.757,
22651
+ "step": 3198
22652
+ },
22653
+ {
22654
+ "epoch": 0.8574396086708882,
22655
+ "grad_norm": 3.0228183269500732,
22656
+ "learning_rate": 8.145507979313675e-05,
22657
+ "loss": 2.8903,
22658
+ "step": 3199
22659
+ },
22660
+ {
22661
+ "epoch": 0.8577076423091098,
22662
+ "grad_norm": 2.881899118423462,
22663
+ "learning_rate": 8.14441375411268e-05,
22664
+ "loss": 2.89,
22665
+ "step": 3200
22666
+ },
22667
+ {
22668
+ "epoch": 0.8577076423091098,
22669
+ "eval_loss": 2.834207057952881,
22670
+ "eval_runtime": 15.3558,
22671
+ "eval_samples_per_second": 39.073,
22672
+ "eval_steps_per_second": 9.768,
22673
+ "step": 3200
22674
  }
22675
  ],
22676
  "logging_steps": 1,
 
22690
  "attributes": {}
22691
  }
22692
  },
22693
+ "total_flos": 1.236557189087232e+17,
22694
  "train_batch_size": 4,
22695
  "trial_name": null,
22696
  "trial_params": null