shulijia commited on
Commit
311067b
·
verified ·
1 Parent(s): fc97b8d

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f7f960c526aaddf3dc6988e73942fac836299ec8e275266c1eba5701ae94d95
3
  size 2384234968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85553c2cc4b71cc764d219a255a3d7c329d548c46a05c1b60f352b7a9a28b2a1
3
  size 2384234968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1ba615b7681549fb237aead953796280a2ad4be16081ccbd5f79689ec8c3f9c
3
  size 4768663315
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cef3423760a08b83e2c1f1529056dce5e88b5150c5b965e4bf1c35daa74b70f
3
  size 4768663315
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de69a2834426ff9ef8199d077e00892579278af31d8969d77f98235b5cfc010a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edf1be9c157afb4ca46e7843711b38a681d679ee3bcd0c31f21d197c72d6bbf2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1bf8ba09c7a33e82766bf9f5af704c56a2c04ffb9328ada50fa2f824e9badd
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9574580227392515,
6
  "eval_steps": 100,
7
- "global_step": 6500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5858,6 +5858,456 @@
5858
  "mean_token_accuracy": 0.7979574371129274,
5859
  "num_tokens": 53241856.0,
5860
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5861
  }
5862
  ],
5863
  "logging_steps": 10,
@@ -5877,7 +6327,7 @@
5877
  "attributes": {}
5878
  }
5879
  },
5880
- "total_flos": 1.4070782919023002e+17,
5881
  "train_batch_size": 2,
5882
  "trial_name": null,
5883
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.107823206083879,
6
  "eval_steps": 100,
7
+ "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5858
  "mean_token_accuracy": 0.7979574371129274,
5859
  "num_tokens": 53241856.0,
5860
  "step": 6500
5861
+ },
5862
+ {
5863
+ "epoch": 1.9604698441382427,
5864
+ "grad_norm": 1.5861074924468994,
5865
+ "learning_rate": 3.852331028329244e-06,
5866
+ "loss": 0.1401,
5867
+ "mean_token_accuracy": 0.7667318984866143,
5868
+ "num_tokens": 53323776.0,
5869
+ "step": 6510
5870
+ },
5871
+ {
5872
+ "epoch": 1.9634816655372336,
5873
+ "grad_norm": 1.195090889930725,
5874
+ "learning_rate": 3.841177782734776e-06,
5875
+ "loss": 0.1359,
5876
+ "mean_token_accuracy": 0.7738136008381844,
5877
+ "num_tokens": 53405696.0,
5878
+ "step": 6520
5879
+ },
5880
+ {
5881
+ "epoch": 1.9664934869362247,
5882
+ "grad_norm": 1.410537600517273,
5883
+ "learning_rate": 3.830024537140309e-06,
5884
+ "loss": 0.1116,
5885
+ "mean_token_accuracy": 0.7794153623282909,
5886
+ "num_tokens": 53487616.0,
5887
+ "step": 6530
5888
+ },
5889
+ {
5890
+ "epoch": 1.9695053083352159,
5891
+ "grad_norm": 1.2453457117080688,
5892
+ "learning_rate": 3.81887129154584e-06,
5893
+ "loss": 0.1626,
5894
+ "mean_token_accuracy": 0.7636007871478796,
5895
+ "num_tokens": 53569536.0,
5896
+ "step": 6540
5897
+ },
5898
+ {
5899
+ "epoch": 1.9725171297342068,
5900
+ "grad_norm": 1.5458024740219116,
5901
+ "learning_rate": 3.8077180459513723e-06,
5902
+ "loss": 0.1225,
5903
+ "mean_token_accuracy": 0.7851883560419083,
5904
+ "num_tokens": 53651456.0,
5905
+ "step": 6550
5906
+ },
5907
+ {
5908
+ "epoch": 1.9755289511331977,
5909
+ "grad_norm": 1.335051417350769,
5910
+ "learning_rate": 3.7965648003569045e-06,
5911
+ "loss": 0.1244,
5912
+ "mean_token_accuracy": 0.7711105648428201,
5913
+ "num_tokens": 53733376.0,
5914
+ "step": 6560
5915
+ },
5916
+ {
5917
+ "epoch": 1.9785407725321889,
5918
+ "grad_norm": 1.1321961879730225,
5919
+ "learning_rate": 3.785411554762436e-06,
5920
+ "loss": 0.1145,
5921
+ "mean_token_accuracy": 0.7770547956228256,
5922
+ "num_tokens": 53815296.0,
5923
+ "step": 6570
5924
+ },
5925
+ {
5926
+ "epoch": 1.98155259393118,
5927
+ "grad_norm": 1.4666228294372559,
5928
+ "learning_rate": 3.7742583091679678e-06,
5929
+ "loss": 0.1128,
5930
+ "mean_token_accuracy": 0.8008316993713379,
5931
+ "num_tokens": 53897216.0,
5932
+ "step": 6580
5933
+ },
5934
+ {
5935
+ "epoch": 1.984564415330171,
5936
+ "grad_norm": 1.1132220029830933,
5937
+ "learning_rate": 3.7631050635735e-06,
5938
+ "loss": 0.1223,
5939
+ "mean_token_accuracy": 0.7956457916647196,
5940
+ "num_tokens": 53979136.0,
5941
+ "step": 6590
5942
+ },
5943
+ {
5944
+ "epoch": 1.9875762367291618,
5945
+ "grad_norm": 1.015281319618225,
5946
+ "learning_rate": 3.751951817979032e-06,
5947
+ "loss": 0.1115,
5948
+ "mean_token_accuracy": 0.7839285705238581,
5949
+ "num_tokens": 54061056.0,
5950
+ "step": 6600
5951
+ },
5952
+ {
5953
+ "epoch": 1.990588058128153,
5954
+ "grad_norm": 1.3019957542419434,
5955
+ "learning_rate": 3.740798572384564e-06,
5956
+ "loss": 0.1132,
5957
+ "mean_token_accuracy": 0.7918786682188511,
5958
+ "num_tokens": 54142976.0,
5959
+ "step": 6610
5960
+ },
5961
+ {
5962
+ "epoch": 1.9935998795271441,
5963
+ "grad_norm": 1.3737001419067383,
5964
+ "learning_rate": 3.729645326790096e-06,
5965
+ "loss": 0.12,
5966
+ "mean_token_accuracy": 0.7895425636321306,
5967
+ "num_tokens": 54224896.0,
5968
+ "step": 6620
5969
+ },
5970
+ {
5971
+ "epoch": 1.996611700926135,
5972
+ "grad_norm": 1.220357060432434,
5973
+ "learning_rate": 3.7184920811956282e-06,
5974
+ "loss": 0.1267,
5975
+ "mean_token_accuracy": 0.7734589025378227,
5976
+ "num_tokens": 54306816.0,
5977
+ "step": 6630
5978
+ },
5979
+ {
5980
+ "epoch": 1.999623522325126,
5981
+ "grad_norm": 0.9205222725868225,
5982
+ "learning_rate": 3.70733883560116e-06,
5983
+ "loss": 0.1376,
5984
+ "mean_token_accuracy": 0.77977005392313,
5985
+ "num_tokens": 54388736.0,
5986
+ "step": 6640
5987
+ },
5988
+ {
5989
+ "epoch": 2.002409457119193,
5990
+ "grad_norm": 1.058834433555603,
5991
+ "learning_rate": 3.6961855900066923e-06,
5992
+ "loss": 0.1066,
5993
+ "mean_token_accuracy": 0.7967816152282663,
5994
+ "num_tokens": 54464512.0,
5995
+ "step": 6650
5996
+ },
5997
+ {
5998
+ "epoch": 2.0054212785181837,
5999
+ "grad_norm": 1.4777971506118774,
6000
+ "learning_rate": 3.685032344412224e-06,
6001
+ "loss": 0.1153,
6002
+ "mean_token_accuracy": 0.783109100162983,
6003
+ "num_tokens": 54546432.0,
6004
+ "step": 6660
6005
+ },
6006
+ {
6007
+ "epoch": 2.008433099917175,
6008
+ "grad_norm": 1.3833023309707642,
6009
+ "learning_rate": 3.6738790988177564e-06,
6010
+ "loss": 0.1312,
6011
+ "mean_token_accuracy": 0.7731409035623074,
6012
+ "num_tokens": 54628352.0,
6013
+ "step": 6670
6014
+ },
6015
+ {
6016
+ "epoch": 2.011444921316166,
6017
+ "grad_norm": 1.062574028968811,
6018
+ "learning_rate": 3.6627258532232887e-06,
6019
+ "loss": 0.0978,
6020
+ "mean_token_accuracy": 0.7889799430966378,
6021
+ "num_tokens": 54710272.0,
6022
+ "step": 6680
6023
+ },
6024
+ {
6025
+ "epoch": 2.014456742715157,
6026
+ "grad_norm": 1.269668459892273,
6027
+ "learning_rate": 3.6515726076288205e-06,
6028
+ "loss": 0.1001,
6029
+ "mean_token_accuracy": 0.7908879652619362,
6030
+ "num_tokens": 54792192.0,
6031
+ "step": 6690
6032
+ },
6033
+ {
6034
+ "epoch": 2.017468564114148,
6035
+ "grad_norm": 1.7478396892547607,
6036
+ "learning_rate": 3.6404193620343527e-06,
6037
+ "loss": 0.1288,
6038
+ "mean_token_accuracy": 0.7696673195809126,
6039
+ "num_tokens": 54874112.0,
6040
+ "step": 6700
6041
+ },
6042
+ {
6043
+ "epoch": 2.0204803855131392,
6044
+ "grad_norm": 1.484840989112854,
6045
+ "learning_rate": 3.6292661164398846e-06,
6046
+ "loss": 0.1461,
6047
+ "mean_token_accuracy": 0.779011744260788,
6048
+ "num_tokens": 54956032.0,
6049
+ "step": 6710
6050
+ },
6051
+ {
6052
+ "epoch": 2.02349220691213,
6053
+ "grad_norm": 1.2291215658187866,
6054
+ "learning_rate": 3.618112870845416e-06,
6055
+ "loss": 0.1269,
6056
+ "mean_token_accuracy": 0.7728228956460953,
6057
+ "num_tokens": 55037952.0,
6058
+ "step": 6720
6059
+ },
6060
+ {
6061
+ "epoch": 2.026504028311121,
6062
+ "grad_norm": 1.2073824405670166,
6063
+ "learning_rate": 3.6069596252509482e-06,
6064
+ "loss": 0.1097,
6065
+ "mean_token_accuracy": 0.7927470624446868,
6066
+ "num_tokens": 55119872.0,
6067
+ "step": 6730
6068
+ },
6069
+ {
6070
+ "epoch": 2.029515849710112,
6071
+ "grad_norm": 1.3367125988006592,
6072
+ "learning_rate": 3.59580637965648e-06,
6073
+ "loss": 0.0825,
6074
+ "mean_token_accuracy": 0.8145425617694855,
6075
+ "num_tokens": 55201792.0,
6076
+ "step": 6740
6077
+ },
6078
+ {
6079
+ "epoch": 2.0325276711091034,
6080
+ "grad_norm": 0.9058095812797546,
6081
+ "learning_rate": 3.5846531340620123e-06,
6082
+ "loss": 0.1062,
6083
+ "mean_token_accuracy": 0.8042319010943174,
6084
+ "num_tokens": 55283712.0,
6085
+ "step": 6750
6086
+ },
6087
+ {
6088
+ "epoch": 2.0355394925080943,
6089
+ "grad_norm": 1.2049607038497925,
6090
+ "learning_rate": 3.573499888467544e-06,
6091
+ "loss": 0.1278,
6092
+ "mean_token_accuracy": 0.7739603724330664,
6093
+ "num_tokens": 55365632.0,
6094
+ "step": 6760
6095
+ },
6096
+ {
6097
+ "epoch": 2.038551313907085,
6098
+ "grad_norm": 1.4414746761322021,
6099
+ "learning_rate": 3.5623466428730764e-06,
6100
+ "loss": 0.0992,
6101
+ "mean_token_accuracy": 0.8063600823283196,
6102
+ "num_tokens": 55447552.0,
6103
+ "step": 6770
6104
+ },
6105
+ {
6106
+ "epoch": 2.041563135306076,
6107
+ "grad_norm": 1.0376569032669067,
6108
+ "learning_rate": 3.5511933972786083e-06,
6109
+ "loss": 0.1134,
6110
+ "mean_token_accuracy": 0.7815435409545899,
6111
+ "num_tokens": 55529472.0,
6112
+ "step": 6780
6113
+ },
6114
+ {
6115
+ "epoch": 2.0445749567050675,
6116
+ "grad_norm": 1.3576596975326538,
6117
+ "learning_rate": 3.5400401516841405e-06,
6118
+ "loss": 0.1019,
6119
+ "mean_token_accuracy": 0.7937255371361971,
6120
+ "num_tokens": 55611392.0,
6121
+ "step": 6790
6122
+ },
6123
+ {
6124
+ "epoch": 2.0475867781040584,
6125
+ "grad_norm": 0.9655880331993103,
6126
+ "learning_rate": 3.5288869060896724e-06,
6127
+ "loss": 0.1065,
6128
+ "mean_token_accuracy": 0.7986423678696155,
6129
+ "num_tokens": 55693312.0,
6130
+ "step": 6800
6131
+ },
6132
+ {
6133
+ "epoch": 2.0505985995030493,
6134
+ "grad_norm": 1.2648464441299438,
6135
+ "learning_rate": 3.5177336604952046e-06,
6136
+ "loss": 0.1086,
6137
+ "mean_token_accuracy": 0.795303326100111,
6138
+ "num_tokens": 55775232.0,
6139
+ "step": 6810
6140
+ },
6141
+ {
6142
+ "epoch": 2.0536104209020407,
6143
+ "grad_norm": 1.6027874946594238,
6144
+ "learning_rate": 3.5065804149007364e-06,
6145
+ "loss": 0.0982,
6146
+ "mean_token_accuracy": 0.7956213317811489,
6147
+ "num_tokens": 55857152.0,
6148
+ "step": 6820
6149
+ },
6150
+ {
6151
+ "epoch": 2.0566222423010316,
6152
+ "grad_norm": 1.4525415897369385,
6153
+ "learning_rate": 3.4954271693062687e-06,
6154
+ "loss": 0.1175,
6155
+ "mean_token_accuracy": 0.7873654570430517,
6156
+ "num_tokens": 55939072.0,
6157
+ "step": 6830
6158
+ },
6159
+ {
6160
+ "epoch": 2.0596340637000226,
6161
+ "grad_norm": 1.5248804092407227,
6162
+ "learning_rate": 3.4842739237118005e-06,
6163
+ "loss": 0.0992,
6164
+ "mean_token_accuracy": 0.7903008766472339,
6165
+ "num_tokens": 56020992.0,
6166
+ "step": 6840
6167
+ },
6168
+ {
6169
+ "epoch": 2.0626458850990135,
6170
+ "grad_norm": 1.1746339797973633,
6171
+ "learning_rate": 3.473120678117333e-06,
6172
+ "loss": 0.1205,
6173
+ "mean_token_accuracy": 0.7796355158090591,
6174
+ "num_tokens": 56102912.0,
6175
+ "step": 6850
6176
+ },
6177
+ {
6178
+ "epoch": 2.065657706498005,
6179
+ "grad_norm": 1.181340217590332,
6180
+ "learning_rate": 3.4619674325228646e-06,
6181
+ "loss": 0.1235,
6182
+ "mean_token_accuracy": 0.7802837561815977,
6183
+ "num_tokens": 56184832.0,
6184
+ "step": 6860
6185
+ },
6186
+ {
6187
+ "epoch": 2.0686695278969958,
6188
+ "grad_norm": 1.4108185768127441,
6189
+ "learning_rate": 3.450814186928396e-06,
6190
+ "loss": 0.1011,
6191
+ "mean_token_accuracy": 0.8037915851920843,
6192
+ "num_tokens": 56266752.0,
6193
+ "step": 6870
6194
+ },
6195
+ {
6196
+ "epoch": 2.0716813492959867,
6197
+ "grad_norm": 1.146896481513977,
6198
+ "learning_rate": 3.4396609413339283e-06,
6199
+ "loss": 0.1233,
6200
+ "mean_token_accuracy": 0.7929427601397038,
6201
+ "num_tokens": 56348672.0,
6202
+ "step": 6880
6203
+ },
6204
+ {
6205
+ "epoch": 2.0746931706949776,
6206
+ "grad_norm": 1.2894806861877441,
6207
+ "learning_rate": 3.42850769573946e-06,
6208
+ "loss": 0.1127,
6209
+ "mean_token_accuracy": 0.7803816046565771,
6210
+ "num_tokens": 56430592.0,
6211
+ "step": 6890
6212
+ },
6213
+ {
6214
+ "epoch": 2.077704992093969,
6215
+ "grad_norm": 0.9775878190994263,
6216
+ "learning_rate": 3.4173544501449924e-06,
6217
+ "loss": 0.1012,
6218
+ "mean_token_accuracy": 0.7998899217694998,
6219
+ "num_tokens": 56512512.0,
6220
+ "step": 6900
6221
+ },
6222
+ {
6223
+ "epoch": 2.08071681349296,
6224
+ "grad_norm": 1.141923427581787,
6225
+ "learning_rate": 3.4062012045505242e-06,
6226
+ "loss": 0.1032,
6227
+ "mean_token_accuracy": 0.7836839504539966,
6228
+ "num_tokens": 56594432.0,
6229
+ "step": 6910
6230
+ },
6231
+ {
6232
+ "epoch": 2.083728634891951,
6233
+ "grad_norm": 1.037724494934082,
6234
+ "learning_rate": 3.3950479589560565e-06,
6235
+ "loss": 0.1049,
6236
+ "mean_token_accuracy": 0.8032534249126911,
6237
+ "num_tokens": 56676352.0,
6238
+ "step": 6920
6239
+ },
6240
+ {
6241
+ "epoch": 2.0867404562909417,
6242
+ "grad_norm": 1.3930587768554688,
6243
+ "learning_rate": 3.3838947133615883e-06,
6244
+ "loss": 0.1065,
6245
+ "mean_token_accuracy": 0.786497063934803,
6246
+ "num_tokens": 56758272.0,
6247
+ "step": 6930
6248
+ },
6249
+ {
6250
+ "epoch": 2.089752277689933,
6251
+ "grad_norm": 0.9995868802070618,
6252
+ "learning_rate": 3.3727414677671206e-06,
6253
+ "loss": 0.1105,
6254
+ "mean_token_accuracy": 0.7776051837950945,
6255
+ "num_tokens": 56840192.0,
6256
+ "step": 6940
6257
+ },
6258
+ {
6259
+ "epoch": 2.092764099088924,
6260
+ "grad_norm": 1.704577088356018,
6261
+ "learning_rate": 3.3615882221726524e-06,
6262
+ "loss": 0.1174,
6263
+ "mean_token_accuracy": 0.7858610555529595,
6264
+ "num_tokens": 56922112.0,
6265
+ "step": 6950
6266
+ },
6267
+ {
6268
+ "epoch": 2.095775920487915,
6269
+ "grad_norm": 1.1011236906051636,
6270
+ "learning_rate": 3.3504349765781847e-06,
6271
+ "loss": 0.1084,
6272
+ "mean_token_accuracy": 0.7746819939464331,
6273
+ "num_tokens": 57004032.0,
6274
+ "step": 6960
6275
+ },
6276
+ {
6277
+ "epoch": 2.0987877418869063,
6278
+ "grad_norm": 0.932067334651947,
6279
+ "learning_rate": 3.3392817309837165e-06,
6280
+ "loss": 0.1242,
6281
+ "mean_token_accuracy": 0.7733732841908931,
6282
+ "num_tokens": 57085952.0,
6283
+ "step": 6970
6284
+ },
6285
+ {
6286
+ "epoch": 2.1017995632858972,
6287
+ "grad_norm": 0.9481123685836792,
6288
+ "learning_rate": 3.3281284853892487e-06,
6289
+ "loss": 0.1079,
6290
+ "mean_token_accuracy": 0.7982142839580775,
6291
+ "num_tokens": 57167872.0,
6292
+ "step": 6980
6293
+ },
6294
+ {
6295
+ "epoch": 2.104811384684888,
6296
+ "grad_norm": 1.3651145696640015,
6297
+ "learning_rate": 3.3169752397947806e-06,
6298
+ "loss": 0.1265,
6299
+ "mean_token_accuracy": 0.7904231909662485,
6300
+ "num_tokens": 57249792.0,
6301
+ "step": 6990
6302
+ },
6303
+ {
6304
+ "epoch": 2.107823206083879,
6305
+ "grad_norm": 1.0314269065856934,
6306
+ "learning_rate": 3.305821994200313e-06,
6307
+ "loss": 0.1519,
6308
+ "mean_token_accuracy": 0.7659491188824177,
6309
+ "num_tokens": 57331712.0,
6310
+ "step": 7000
6311
  }
6312
  ],
6313
  "logging_steps": 10,
 
6327
  "attributes": {}
6328
  }
6329
  },
6330
+ "total_flos": 1.5151651999658803e+17,
6331
  "train_batch_size": 2,
6332
  "trial_name": null,
6333
  "trial_params": null