new2

Browse files

Files changed (7) hide show

model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +239 -197
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26f736f87452b4150295584bbcab1fe84d5ecafe14a279bb3c8e0b320856b973
 size 500919936

 version https://git-lfs.github.com/spec/v1
+oid sha256:c315e975b88b1a4451f38d8fe92965799e8b364c835be4dbdd1762b3823d977c
 size 500919936

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48fe01390dab2921ca8d3d1c18947d9e7911303736141637d0346c1d93221e33
 size 1001933754

 version https://git-lfs.github.com/spec/v1
+oid sha256:a12ef0b50a2b87fd317fa9ec54fc2777fa083b52c989fb4555b47b9d2d28d3b1
 size 1001933754

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e16ad8ca36309e69b7bca3cd12ec8eb8069ac7ffe9b311cbfd202b730f9505e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3267e805d95f4b264d4ce34da080d7a5029ce4ac21071ebde8326f60745dd768
 size 14244

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5da4ea0c1bcacc6c536f51e41f20fb1c9301dc84cb8e04333e56f06168b8cb83
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:42d56a21a2ea57f9d44b815019499ae7e6f44931fb0bfae7a8f5927e0d13f9a4
 size 988

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db1b074aa330c0b0803c07179be61b9ac93aa3e1585ced1974992df5aeedb9c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:adf5bfaed0cc0ebd3b22f80394894ecc910b5c9558c3f5d87418070a45b84240
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,354 +2,396 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.8237082066869301,
   "eval_steps": 500,
-  "global_step": 4800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.037993920972644375,
-      "grad_norm": 0.5791582465171814,
-      "learning_rate": 0.0004952982522796353,
-      "loss": 1.9579,
       "step": 100
     },
     {
-      "epoch": 0.07598784194528875,
-      "grad_norm": 0.578816831111908,
-      "learning_rate": 0.0004905490121580547,
-      "loss": 1.5938,
       "step": 200
     },
     {
-      "epoch": 0.11398176291793313,
-      "grad_norm": 0.7128555774688721,
-      "learning_rate": 0.00048579977203647417,
-      "loss": 1.6012,
       "step": 300
     },
     {
-      "epoch": 0.1519756838905775,
-      "grad_norm": 0.5568099021911621,
-      "learning_rate": 0.0004810505319148936,
-      "loss": 1.6271,
       "step": 400
     },
     {
-      "epoch": 0.1899696048632219,
-      "grad_norm": 0.5209512114524841,
-      "learning_rate": 0.0004763012917933131,
-      "loss": 1.5465,
       "step": 500
     },
     {
-      "epoch": 0.22796352583586627,
-      "grad_norm": 0.739372968673706,
-      "learning_rate": 0.00047155205167173257,
-      "loss": 1.5851,
       "step": 600
     },
     {
-      "epoch": 0.26595744680851063,
-      "grad_norm": 0.48252037167549133,
-      "learning_rate": 0.000466802811550152,
-      "loss": 1.5936,
       "step": 700
     },
     {
-      "epoch": 0.303951367781155,
-      "grad_norm": 0.5574118494987488,
-      "learning_rate": 0.0004620535714285715,
-      "loss": 1.547,
       "step": 800
     },
     {
-      "epoch": 0.34194528875379937,
-      "grad_norm": 0.4584231674671173,
-      "learning_rate": 0.0004573043313069909,
-      "loss": 1.5526,
       "step": 900
     },
     {
-      "epoch": 0.3799392097264438,
-      "grad_norm": 0.5431011319160461,
-      "learning_rate": 0.00045255509118541034,
-      "loss": 1.5691,
       "step": 1000
     },
     {
-      "epoch": 0.41793313069908816,
-      "grad_norm": 0.5586347579956055,
-      "learning_rate": 0.00044780585106382983,
-      "loss": 1.5839,
       "step": 1100
     },
     {
-      "epoch": 0.45592705167173253,
-      "grad_norm": 0.4354408085346222,
-      "learning_rate": 0.00044305661094224926,
-      "loss": 1.55,
       "step": 1200
     },
     {
-      "epoch": 0.4939209726443769,
-      "grad_norm": 0.5265607237815857,
-      "learning_rate": 0.0004383073708206687,
-      "loss": 1.5339,
       "step": 1300
     },
     {
-      "epoch": 0.5319148936170213,
-      "grad_norm": 0.4391303062438965,
-      "learning_rate": 0.0004335581306990882,
-      "loss": 1.5296,
       "step": 1400
     },
     {
-      "epoch": 0.5699088145896657,
-      "grad_norm": 0.7169055342674255,
-      "learning_rate": 0.0004288088905775076,
-      "loss": 1.5336,
       "step": 1500
     },
     {
-      "epoch": 0.60790273556231,
-      "grad_norm": 0.5826978087425232,
-      "learning_rate": 0.00042405965045592704,
-      "loss": 1.5803,
       "step": 1600
     },
     {
-      "epoch": 0.6458966565349544,
-      "grad_norm": 0.6483295559883118,
-      "learning_rate": 0.0004193104103343465,
-      "loss": 1.5538,
       "step": 1700
     },
     {
-      "epoch": 0.6838905775075987,
-      "grad_norm": 0.4572024941444397,
-      "learning_rate": 0.00041456117021276595,
-      "loss": 1.5207,
       "step": 1800
     },
     {
-      "epoch": 0.7218844984802432,
-      "grad_norm": 0.509039580821991,
-      "learning_rate": 0.0004098119300911854,
-      "loss": 1.535,
       "step": 1900
     },
     {
-      "epoch": 0.7598784194528876,
-      "grad_norm": 0.45722949504852295,
-      "learning_rate": 0.00040506268996960487,
-      "loss": 1.5684,
       "step": 2000
     },
     {
-      "epoch": 0.7978723404255319,
-      "grad_norm": 0.4910150170326233,
-      "learning_rate": 0.0004003134498480243,
-      "loss": 1.581,
       "step": 2100
     },
     {
-      "epoch": 0.8358662613981763,
-      "grad_norm": 0.4546220600605011,
-      "learning_rate": 0.0003955642097264438,
-      "loss": 1.5244,
       "step": 2200
     },
     {
-      "epoch": 0.8738601823708206,
-      "grad_norm": 0.5551008582115173,
-      "learning_rate": 0.0003908149696048632,
-      "loss": 1.5148,
       "step": 2300
     },
     {
-      "epoch": 0.9118541033434651,
-      "grad_norm": 0.5794001817703247,
-      "learning_rate": 0.00038606572948328264,
-      "loss": 1.5236,
       "step": 2400
     },
     {
-      "epoch": 0.9498480243161094,
-      "grad_norm": 0.4178274869918823,
-      "learning_rate": 0.0003813164893617022,
-      "loss": 1.4909,
       "step": 2500
     },
     {
-      "epoch": 0.9878419452887538,
-      "grad_norm": 0.4754296541213989,
-      "learning_rate": 0.0003765672492401216,
-      "loss": 1.5292,
       "step": 2600
     },
     {
-      "epoch": 1.0258358662613982,
-      "grad_norm": 0.6230200529098511,
-      "learning_rate": 0.00037181800911854104,
-      "loss": 1.3509,
       "step": 2700
     },
     {
-      "epoch": 1.0638297872340425,
-      "grad_norm": 0.6134634017944336,
-      "learning_rate": 0.00036706876899696053,
-      "loss": 1.2918,
       "step": 2800
     },
     {
-      "epoch": 1.1018237082066868,
-      "grad_norm": 0.41924381256103516,
-      "learning_rate": 0.00036231952887537996,
-      "loss": 1.2676,
       "step": 2900
     },
     {
-      "epoch": 1.1398176291793314,
-      "grad_norm": 0.43015140295028687,
-      "learning_rate": 0.0003575702887537994,
-      "loss": 1.3059,
       "step": 3000
     },
     {
-      "epoch": 1.1778115501519757,
-      "grad_norm": 0.4945460855960846,
-      "learning_rate": 0.0003528210486322189,
-      "loss": 1.2662,
       "step": 3100
     },
     {
-      "epoch": 1.21580547112462,
-      "grad_norm": 0.529432475566864,
-      "learning_rate": 0.0003480718085106383,
-      "loss": 1.3159,
       "step": 3200
     },
     {
-      "epoch": 1.2537993920972643,
-      "grad_norm": 0.4257758557796478,
-      "learning_rate": 0.00034332256838905773,
-      "loss": 1.332,
       "step": 3300
     },
     {
-      "epoch": 1.2917933130699089,
-      "grad_norm": 0.5150781869888306,
-      "learning_rate": 0.0003385733282674772,
-      "loss": 1.3296,
       "step": 3400
     },
     {
-      "epoch": 1.3297872340425532,
-      "grad_norm": 0.6550915241241455,
-      "learning_rate": 0.00033382408814589665,
-      "loss": 1.3199,
       "step": 3500
     },
     {
-      "epoch": 1.3677811550151975,
-      "grad_norm": 0.5338163375854492,
-      "learning_rate": 0.00032907484802431613,
-      "loss": 1.3259,
       "step": 3600
     },
     {
-      "epoch": 1.405775075987842,
-      "grad_norm": 0.47377586364746094,
-      "learning_rate": 0.00032432560790273556,
-      "loss": 1.3242,
       "step": 3700
     },
     {
-      "epoch": 1.4437689969604863,
-      "grad_norm": 0.5152885317802429,
-      "learning_rate": 0.000319576367781155,
-      "loss": 1.3159,
       "step": 3800
     },
     {
-      "epoch": 1.4817629179331306,
-      "grad_norm": 0.539071261882782,
-      "learning_rate": 0.0003148271276595745,
-      "loss": 1.2949,
       "step": 3900
     },
     {
-      "epoch": 1.5197568389057752,
-      "grad_norm": 0.48675107955932617,
-      "learning_rate": 0.0003100778875379939,
-      "loss": 1.3048,
       "step": 4000
     },
     {
-      "epoch": 1.5577507598784195,
-      "grad_norm": 0.49419140815734863,
-      "learning_rate": 0.00030532864741641334,
-      "loss": 1.3517,
       "step": 4100
     },
     {
-      "epoch": 1.5957446808510638,
-      "grad_norm": 0.5641041398048401,
-      "learning_rate": 0.0003005794072948328,
-      "loss": 1.3546,
       "step": 4200
     },
     {
-      "epoch": 1.6337386018237083,
-      "grad_norm": 0.5435624122619629,
-      "learning_rate": 0.0002958301671732523,
-      "loss": 1.3001,
       "step": 4300
     },
     {
-      "epoch": 1.6717325227963524,
-      "grad_norm": 0.5236312747001648,
-      "learning_rate": 0.00029108092705167174,
-      "loss": 1.3159,
       "step": 4400
     },
     {
-      "epoch": 1.709726443768997,
-      "grad_norm": 0.5345861315727234,
-      "learning_rate": 0.0002863316869300912,
-      "loss": 1.3137,
       "step": 4500
     },
     {
-      "epoch": 1.7477203647416415,
-      "grad_norm": 0.6117902994155884,
-      "learning_rate": 0.00028158244680851066,
-      "loss": 1.3116,
       "step": 4600
     },
     {
-      "epoch": 1.7857142857142856,
-      "grad_norm": 0.45284005999565125,
-      "learning_rate": 0.0002768332066869301,
-      "loss": 1.3079,
       "step": 4700
     },
     {
-      "epoch": 1.8237082066869301,
-      "grad_norm": 0.46459677815437317,
-      "learning_rate": 0.00027208396656534957,
-      "loss": 1.3346,
       "step": 4800
     }
   ],
   "logging_steps": 100,
-  "max_steps": 10528,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 300,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -363,7 +405,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.7625268731904e+16,
   "train_batch_size": 15,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6827233074151337,
   "eval_steps": 500,
+  "global_step": 5400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.012643024211391365,
+      "grad_norm": 0.8759542107582092,
+      "learning_rate": 0.0004987484197218711,
+      "loss": 3.8722,
       "step": 100
     },
     {
+      "epoch": 0.02528604842278273,
+      "grad_norm": 0.9236809015274048,
+      "learning_rate": 0.0004974841972187105,
+      "loss": 3.4858,
       "step": 200
     },
     {
+      "epoch": 0.037929072634174096,
+      "grad_norm": 0.8505849242210388,
+      "learning_rate": 0.0004962199747155499,
+      "loss": 3.3438,
       "step": 300
     },
     {
+      "epoch": 0.05057209684556546,
+      "grad_norm": 0.8044902682304382,
+      "learning_rate": 0.0004949557522123893,
+      "loss": 3.2837,
       "step": 400
     },
     {
+      "epoch": 0.06321512105695683,
+      "grad_norm": 0.7873915433883667,
+      "learning_rate": 0.0004936915297092288,
+      "loss": 3.194,
       "step": 500
     },
     {
+      "epoch": 0.07585814526834819,
+      "grad_norm": 0.7622674107551575,
+      "learning_rate": 0.0004924273072060682,
+      "loss": 3.126,
       "step": 600
     },
     {
+      "epoch": 0.08850116947973956,
+      "grad_norm": 0.8418383002281189,
+      "learning_rate": 0.0004911630847029077,
+      "loss": 3.0518,
       "step": 700
     },
     {
+      "epoch": 0.10114419369113092,
+      "grad_norm": 0.7434802055358887,
+      "learning_rate": 0.0004898988621997471,
+      "loss": 3.0434,
       "step": 800
     },
     {
+      "epoch": 0.11378721790252229,
+      "grad_norm": 0.8024940490722656,
+      "learning_rate": 0.0004886346396965867,
+      "loss": 2.9942,
       "step": 900
     },
     {
+      "epoch": 0.12643024211391365,
+      "grad_norm": 0.8081286549568176,
+      "learning_rate": 0.00048737041719342606,
+      "loss": 2.9878,
       "step": 1000
     },
     {
+      "epoch": 0.139073266325305,
+      "grad_norm": 0.7084025144577026,
+      "learning_rate": 0.0004861061946902655,
+      "loss": 2.9314,
       "step": 1100
     },
     {
+      "epoch": 0.15171629053669639,
+      "grad_norm": 0.7388598322868347,
+      "learning_rate": 0.000484841972187105,
+      "loss": 2.9152,
       "step": 1200
     },
     {
+      "epoch": 0.16435931474808774,
+      "grad_norm": 0.7991167306900024,
+      "learning_rate": 0.0004835777496839444,
+      "loss": 2.917,
       "step": 1300
     },
     {
+      "epoch": 0.17700233895947912,
+      "grad_norm": 0.7912219762802124,
+      "learning_rate": 0.0004823135271807838,
+      "loss": 2.8725,
       "step": 1400
     },
     {
+      "epoch": 0.18964536317087047,
+      "grad_norm": 0.8445726633071899,
+      "learning_rate": 0.00048104930467762324,
+      "loss": 2.8843,
       "step": 1500
     },
     {
+      "epoch": 0.20228838738226185,
+      "grad_norm": 0.7209933400154114,
+      "learning_rate": 0.0004797850821744627,
+      "loss": 2.8298,
       "step": 1600
     },
     {
+      "epoch": 0.2149314115936532,
+      "grad_norm": 0.7905689477920532,
+      "learning_rate": 0.00047852085967130215,
+      "loss": 2.862,
       "step": 1700
     },
     {
+      "epoch": 0.22757443580504458,
+      "grad_norm": 0.745158314704895,
+      "learning_rate": 0.0004772566371681416,
+      "loss": 2.781,
       "step": 1800
     },
     {
+      "epoch": 0.24021746001643593,
+      "grad_norm": 0.7118976712226868,
+      "learning_rate": 0.00047599241466498107,
+      "loss": 2.7783,
       "step": 1900
     },
     {
+      "epoch": 0.2528604842278273,
+      "grad_norm": 0.7946869730949402,
+      "learning_rate": 0.0004747281921618205,
+      "loss": 2.7825,
       "step": 2000
     },
     {
+      "epoch": 0.26550350843921866,
+      "grad_norm": 0.7247060537338257,
+      "learning_rate": 0.00047346396965865993,
+      "loss": 2.7839,
       "step": 2100
     },
     {
+      "epoch": 0.27814653265061,
+      "grad_norm": 0.7256483435630798,
+      "learning_rate": 0.0004721997471554994,
+      "loss": 2.7731,
       "step": 2200
     },
     {
+      "epoch": 0.29078955686200136,
+      "grad_norm": 0.7218326926231384,
+      "learning_rate": 0.0004709355246523388,
+      "loss": 2.8133,
       "step": 2300
     },
     {
+      "epoch": 0.30343258107339277,
+      "grad_norm": 0.7010550498962402,
+      "learning_rate": 0.00046967130214917825,
+      "loss": 2.7432,
       "step": 2400
     },
     {
+      "epoch": 0.3160756052847841,
+      "grad_norm": 0.7964794635772705,
+      "learning_rate": 0.0004684070796460177,
+      "loss": 2.7811,
       "step": 2500
     },
     {
+      "epoch": 0.3287186294961755,
+      "grad_norm": 0.8072954416275024,
+      "learning_rate": 0.00046714285714285716,
+      "loss": 2.7089,
       "step": 2600
     },
     {
+      "epoch": 0.3413616537075668,
+      "grad_norm": 0.6594070196151733,
+      "learning_rate": 0.0004658786346396966,
+      "loss": 2.7161,
       "step": 2700
     },
     {
+      "epoch": 0.35400467791895823,
+      "grad_norm": 0.704298734664917,
+      "learning_rate": 0.000464614412136536,
+      "loss": 2.698,
       "step": 2800
     },
     {
+      "epoch": 0.3666477021303496,
+      "grad_norm": 0.7253355383872986,
+      "learning_rate": 0.0004633501896333755,
+      "loss": 2.696,
       "step": 2900
     },
     {
+      "epoch": 0.37929072634174094,
+      "grad_norm": 0.7043545246124268,
+      "learning_rate": 0.00046208596713021493,
+      "loss": 2.6807,
       "step": 3000
     },
     {
+      "epoch": 0.3919337505531323,
+      "grad_norm": 0.6532794237136841,
+      "learning_rate": 0.0004608217446270544,
+      "loss": 2.6985,
       "step": 3100
     },
     {
+      "epoch": 0.4045767747645237,
+      "grad_norm": 0.7272788286209106,
+      "learning_rate": 0.0004595575221238938,
+      "loss": 2.6767,
       "step": 3200
     },
     {
+      "epoch": 0.41721979897591505,
+      "grad_norm": 0.695071280002594,
+      "learning_rate": 0.00045829329962073325,
+      "loss": 2.6609,
       "step": 3300
     },
     {
+      "epoch": 0.4298628231873064,
+      "grad_norm": 0.7230761051177979,
+      "learning_rate": 0.0004570290771175727,
+      "loss": 2.6488,
       "step": 3400
     },
     {
+      "epoch": 0.44250584739869775,
+      "grad_norm": 0.7420136332511902,
+      "learning_rate": 0.00045576485461441217,
+      "loss": 2.6507,
       "step": 3500
     },
     {
+      "epoch": 0.45514887161008916,
+      "grad_norm": 0.7115824222564697,
+      "learning_rate": 0.00045450063211125157,
+      "loss": 2.644,
       "step": 3600
     },
     {
+      "epoch": 0.4677918958214805,
+      "grad_norm": 0.6667810678482056,
+      "learning_rate": 0.000453236409608091,
+      "loss": 2.6841,
       "step": 3700
     },
     {
+      "epoch": 0.48043492003287186,
+      "grad_norm": 0.6836283802986145,
+      "learning_rate": 0.0004519721871049305,
+      "loss": 2.6462,
       "step": 3800
     },
     {
+      "epoch": 0.4930779442442632,
+      "grad_norm": 0.7117214202880859,
+      "learning_rate": 0.00045070796460176994,
+      "loss": 2.6201,
       "step": 3900
     },
     {
+      "epoch": 0.5057209684556546,
+      "grad_norm": 0.6085230708122253,
+      "learning_rate": 0.0004494437420986094,
+      "loss": 2.6198,
       "step": 4000
     },
     {
+      "epoch": 0.5183639926670459,
+      "grad_norm": 0.663446843624115,
+      "learning_rate": 0.0004481795195954488,
+      "loss": 2.5972,
       "step": 4100
     },
     {
+      "epoch": 0.5310070168784373,
+      "grad_norm": 0.670093297958374,
+      "learning_rate": 0.00044691529709228826,
+      "loss": 2.6052,
       "step": 4200
     },
     {
+      "epoch": 0.5436500410898287,
+      "grad_norm": 0.6052363514900208,
+      "learning_rate": 0.00044565107458912766,
+      "loss": 2.6038,
       "step": 4300
     },
     {
+      "epoch": 0.55629306530122,
+      "grad_norm": 0.6686172485351562,
+      "learning_rate": 0.0004443868520859671,
+      "loss": 2.5484,
       "step": 4400
     },
     {
+      "epoch": 0.5689360895126114,
+      "grad_norm": 0.6228762865066528,
+      "learning_rate": 0.0004431226295828066,
+      "loss": 2.6119,
       "step": 4500
     },
     {
+      "epoch": 0.5815791137240027,
+      "grad_norm": 0.6712014079093933,
+      "learning_rate": 0.00044185840707964603,
+      "loss": 2.581,
       "step": 4600
     },
     {
+      "epoch": 0.5942221379353941,
+      "grad_norm": 0.6657222509384155,
+      "learning_rate": 0.0004405941845764855,
+      "loss": 2.5822,
       "step": 4700
     },
     {
+      "epoch": 0.6068651621467855,
+      "grad_norm": 0.639202356338501,
+      "learning_rate": 0.00043932996207332494,
+      "loss": 2.5736,
       "step": 4800
+    },
+    {
+      "epoch": 0.6195081863581768,
+      "grad_norm": 0.654742419719696,
+      "learning_rate": 0.0004380657395701644,
+      "loss": 2.5515,
+      "step": 4900
+    },
+    {
+      "epoch": 0.6321512105695682,
+      "grad_norm": 0.704134464263916,
+      "learning_rate": 0.0004368015170670038,
+      "loss": 2.5499,
+      "step": 5000
+    },
+    {
+      "epoch": 0.6447942347809597,
+      "grad_norm": 0.6817001104354858,
+      "learning_rate": 0.0004355372945638432,
+      "loss": 2.611,
+      "step": 5100
+    },
+    {
+      "epoch": 0.657437258992351,
+      "grad_norm": 0.6351118087768555,
+      "learning_rate": 0.00043427307206068266,
+      "loss": 2.566,
+      "step": 5200
+    },
+    {
+      "epoch": 0.6700802832037424,
+      "grad_norm": 0.6755563020706177,
+      "learning_rate": 0.0004330088495575221,
+      "loss": 2.5771,
+      "step": 5300
+    },
+    {
+      "epoch": 0.6827233074151337,
+      "grad_norm": 0.6010642647743225,
+      "learning_rate": 0.0004317446270543616,
+      "loss": 2.5216,
+      "step": 5400
     }
   ],
   "logging_steps": 100,
+  "max_steps": 39550,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 300,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.2329309184e+16,
   "train_batch_size": 15,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2531b729b2c525145b70199c5dd822cae4174318f25eaeb788a247f5ac17d01f
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:799f468600eb7d15c4e6dadcb31f6a81d66af87e10abcb142c666aa51a0864bd
 size 5304