| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017889087656529516, |
| "grad_norm": 1.506656527519226, |
| "learning_rate": 1.7142857142857143e-06, |
| "loss": 1.2584, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03577817531305903, |
| "grad_norm": 0.9500389695167542, |
| "learning_rate": 3.857142857142857e-06, |
| "loss": 1.2642, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05366726296958855, |
| "grad_norm": 0.7999431490898132, |
| "learning_rate": 6e-06, |
| "loss": 1.3278, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07155635062611806, |
| "grad_norm": 0.679018497467041, |
| "learning_rate": 8.142857142857142e-06, |
| "loss": 1.2183, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08944543828264759, |
| "grad_norm": 0.5348811745643616, |
| "learning_rate": 1.0285714285714286e-05, |
| "loss": 1.2252, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1073345259391771, |
| "grad_norm": 0.6638433337211609, |
| "learning_rate": 1.242857142857143e-05, |
| "loss": 1.2385, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1252236135957066, |
| "grad_norm": 0.7031165361404419, |
| "learning_rate": 1.4571428571428571e-05, |
| "loss": 1.2, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.14311270125223613, |
| "grad_norm": 0.39686235785484314, |
| "learning_rate": 1.6714285714285716e-05, |
| "loss": 1.161, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16100178890876565, |
| "grad_norm": 0.5343768000602722, |
| "learning_rate": 1.8857142857142856e-05, |
| "loss": 1.2075, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.17889087656529518, |
| "grad_norm": 0.47290146350860596, |
| "learning_rate": 2.1e-05, |
| "loss": 1.186, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1967799642218247, |
| "grad_norm": 0.5593580007553101, |
| "learning_rate": 2.3142857142857145e-05, |
| "loss": 1.1161, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2146690518783542, |
| "grad_norm": 0.4979853332042694, |
| "learning_rate": 2.5285714285714285e-05, |
| "loss": 1.176, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 0.5528405904769897, |
| "learning_rate": 2.7428571428571428e-05, |
| "loss": 1.1656, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2504472271914132, |
| "grad_norm": 0.46704745292663574, |
| "learning_rate": 2.9571428571428575e-05, |
| "loss": 1.1207, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.26833631484794274, |
| "grad_norm": 0.6753780245780945, |
| "learning_rate": 2.99993304631594e-05, |
| "loss": 1.0555, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.28622540250447226, |
| "grad_norm": 0.4959678649902344, |
| "learning_rate": 2.999661057218302e-05, |
| "loss": 1.0949, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3041144901610018, |
| "grad_norm": 0.5355138778686523, |
| "learning_rate": 2.999179886011389e-05, |
| "loss": 1.0936, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3220035778175313, |
| "grad_norm": 0.5441014766693115, |
| "learning_rate": 2.9984895998119723e-05, |
| "loss": 1.0513, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.33989266547406083, |
| "grad_norm": 0.5444539785385132, |
| "learning_rate": 2.99759029490549e-05, |
| "loss": 1.0729, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.35778175313059035, |
| "grad_norm": 0.5886111855506897, |
| "learning_rate": 2.996482096732619e-05, |
| "loss": 1.0365, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3756708407871199, |
| "grad_norm": 0.6149312853813171, |
| "learning_rate": 2.9951651598717757e-05, |
| "loss": 1.0499, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3935599284436494, |
| "grad_norm": 0.5841829180717468, |
| "learning_rate": 2.9936396680175547e-05, |
| "loss": 0.9816, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.41144901610017887, |
| "grad_norm": 0.7359195947647095, |
| "learning_rate": 2.9919058339551068e-05, |
| "loss": 0.9981, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4293381037567084, |
| "grad_norm": 0.6694313883781433, |
| "learning_rate": 2.9899638995304575e-05, |
| "loss": 0.955, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4472271914132379, |
| "grad_norm": 0.7220502495765686, |
| "learning_rate": 2.9878141356167725e-05, |
| "loss": 0.8753, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.6825446486473083, |
| "learning_rate": 2.9854568420765768e-05, |
| "loss": 0.961, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.48300536672629696, |
| "grad_norm": 0.7498815655708313, |
| "learning_rate": 2.982892347719925e-05, |
| "loss": 0.9166, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5008944543828264, |
| "grad_norm": 0.7284743189811707, |
| "learning_rate": 2.9801210102585393e-05, |
| "loss": 0.9351, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.518783542039356, |
| "grad_norm": 0.8820511102676392, |
| "learning_rate": 2.9771432162559113e-05, |
| "loss": 0.9124, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5366726296958855, |
| "grad_norm": 0.7539326548576355, |
| "learning_rate": 2.973959381073384e-05, |
| "loss": 0.846, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.554561717352415, |
| "grad_norm": 0.7277606129646301, |
| "learning_rate": 2.970569948812214e-05, |
| "loss": 0.9432, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5724508050089445, |
| "grad_norm": 0.7533125281333923, |
| "learning_rate": 2.966975392251624e-05, |
| "loss": 0.8557, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.590339892665474, |
| "grad_norm": 0.7943282723426819, |
| "learning_rate": 2.9631762127828584e-05, |
| "loss": 0.8635, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6082289803220036, |
| "grad_norm": 0.8900126814842224, |
| "learning_rate": 2.9591729403392447e-05, |
| "loss": 0.856, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6261180679785331, |
| "grad_norm": 0.8534295558929443, |
| "learning_rate": 2.9549661333222764e-05, |
| "loss": 0.8737, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6440071556350626, |
| "grad_norm": 0.7521919012069702, |
| "learning_rate": 2.950556378523723e-05, |
| "loss": 0.8129, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6618962432915921, |
| "grad_norm": 0.8599235415458679, |
| "learning_rate": 2.9459442910437798e-05, |
| "loss": 0.7975, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6797853309481217, |
| "grad_norm": 0.9698217511177063, |
| "learning_rate": 2.9411305142052725e-05, |
| "loss": 0.8128, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.9345831871032715, |
| "learning_rate": 2.9361157194639184e-05, |
| "loss": 0.7551, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7155635062611807, |
| "grad_norm": 0.7919726967811584, |
| "learning_rate": 2.9309006063146716e-05, |
| "loss": 0.7606, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7334525939177102, |
| "grad_norm": 0.8289109468460083, |
| "learning_rate": 2.925485902194151e-05, |
| "loss": 0.7793, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7513416815742398, |
| "grad_norm": 0.9304677248001099, |
| "learning_rate": 2.9198723623791724e-05, |
| "loss": 0.711, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 1.0459802150726318, |
| "learning_rate": 2.9140607698814e-05, |
| "loss": 0.737, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7871198568872988, |
| "grad_norm": 0.9233027696609497, |
| "learning_rate": 2.9080519353381243e-05, |
| "loss": 0.7234, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8050089445438283, |
| "grad_norm": 0.969067394733429, |
| "learning_rate": 2.9018466968991913e-05, |
| "loss": 0.712, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8228980322003577, |
| "grad_norm": 0.8197952508926392, |
| "learning_rate": 2.8954459201100916e-05, |
| "loss": 0.7021, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8407871198568873, |
| "grad_norm": 0.9914915561676025, |
| "learning_rate": 2.8888504977912284e-05, |
| "loss": 0.7463, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8586762075134168, |
| "grad_norm": 1.0100288391113281, |
| "learning_rate": 2.8820613499133814e-05, |
| "loss": 0.7186, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8765652951699463, |
| "grad_norm": 0.9397743344306946, |
| "learning_rate": 2.875079423469384e-05, |
| "loss": 0.704, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8944543828264758, |
| "grad_norm": 1.116220235824585, |
| "learning_rate": 2.8679056923420294e-05, |
| "loss": 0.705, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9123434704830053, |
| "grad_norm": 0.9566684365272522, |
| "learning_rate": 2.8605411571682295e-05, |
| "loss": 0.6645, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.884859561920166, |
| "learning_rate": 2.8529868451994387e-05, |
| "loss": 0.6604, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9481216457960644, |
| "grad_norm": 1.0066081285476685, |
| "learning_rate": 2.8452438101583648e-05, |
| "loss": 0.6933, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9660107334525939, |
| "grad_norm": 0.992435097694397, |
| "learning_rate": 2.8373131320919936e-05, |
| "loss": 0.6576, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9838998211091234, |
| "grad_norm": 1.1710649728775024, |
| "learning_rate": 2.8291959172209314e-05, |
| "loss": 0.6487, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4341217279434204, |
| "learning_rate": 2.820893297785107e-05, |
| "loss": 0.6293, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0178890876565294, |
| "grad_norm": 1.1913830041885376, |
| "learning_rate": 2.812406431885838e-05, |
| "loss": 0.5798, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.035778175313059, |
| "grad_norm": 0.9532171487808228, |
| "learning_rate": 2.8037365033242917e-05, |
| "loss": 0.5752, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0536672629695885, |
| "grad_norm": 1.3926500082015991, |
| "learning_rate": 2.794884721436361e-05, |
| "loss": 0.6038, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.071556350626118, |
| "grad_norm": 1.0542120933532715, |
| "learning_rate": 2.7858523209239785e-05, |
| "loss": 0.5491, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0894454382826475, |
| "grad_norm": 1.1710113286972046, |
| "learning_rate": 2.7766405616828938e-05, |
| "loss": 0.5183, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.1073345259391771, |
| "grad_norm": 0.9291325211524963, |
| "learning_rate": 2.7672507286269332e-05, |
| "loss": 0.5193, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1252236135957066, |
| "grad_norm": 0.9673988223075867, |
| "learning_rate": 2.7576841315087744e-05, |
| "loss": 0.4956, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.1431127012522362, |
| "grad_norm": 1.3483326435089111, |
| "learning_rate": 2.747942104737252e-05, |
| "loss": 0.5407, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1610017889087656, |
| "grad_norm": 0.9651196002960205, |
| "learning_rate": 2.738026007191226e-05, |
| "loss": 0.5261, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.1788908765652952, |
| "grad_norm": 1.0171095132827759, |
| "learning_rate": 2.727937222030039e-05, |
| "loss": 0.4733, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1967799642218246, |
| "grad_norm": 1.0522226095199585, |
| "learning_rate": 2.7176771565005804e-05, |
| "loss": 0.496, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.2146690518783543, |
| "grad_norm": 1.230334758758545, |
| "learning_rate": 2.7072472417410002e-05, |
| "loss": 0.5143, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2325581395348837, |
| "grad_norm": 1.0268306732177734, |
| "learning_rate": 2.6966489325810793e-05, |
| "loss": 0.5221, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.250447227191413, |
| "grad_norm": 0.9560613632202148, |
| "learning_rate": 2.685883707339305e-05, |
| "loss": 0.5079, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2683363148479427, |
| "grad_norm": 1.0826609134674072, |
| "learning_rate": 2.6749530676166633e-05, |
| "loss": 0.4486, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.2862254025044724, |
| "grad_norm": 1.1852221488952637, |
| "learning_rate": 2.663858538087188e-05, |
| "loss": 0.5147, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3041144901610018, |
| "grad_norm": 1.1893274784088135, |
| "learning_rate": 2.6526016662852887e-05, |
| "loss": 0.4878, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.3220035778175312, |
| "grad_norm": 1.0597422122955322, |
| "learning_rate": 2.6411840223898902e-05, |
| "loss": 0.4802, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3398926654740608, |
| "grad_norm": 1.0407767295837402, |
| "learning_rate": 2.6296071990054167e-05, |
| "loss": 0.4368, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.3577817531305905, |
| "grad_norm": 1.2436342239379883, |
| "learning_rate": 2.6178728109396413e-05, |
| "loss": 0.4877, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3756708407871199, |
| "grad_norm": 1.279242753982544, |
| "learning_rate": 2.6059824949784474e-05, |
| "loss": 0.4394, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.3935599284436493, |
| "grad_norm": 1.060095191001892, |
| "learning_rate": 2.5939379096575156e-05, |
| "loss": 0.4371, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.411449016100179, |
| "grad_norm": 1.1264623403549194, |
| "learning_rate": 2.5817407350309825e-05, |
| "loss": 0.4361, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.4293381037567083, |
| "grad_norm": 1.0541681051254272, |
| "learning_rate": 2.5693926724370958e-05, |
| "loss": 0.4431, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.447227191413238, |
| "grad_norm": 1.0380645990371704, |
| "learning_rate": 2.5568954442609016e-05, |
| "loss": 0.4025, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.4651162790697674, |
| "grad_norm": 1.122382640838623, |
| "learning_rate": 2.544250793693995e-05, |
| "loss": 0.4379, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.483005366726297, |
| "grad_norm": 1.0751020908355713, |
| "learning_rate": 2.531460484491368e-05, |
| "loss": 0.4328, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.5008944543828264, |
| "grad_norm": 1.234891653060913, |
| "learning_rate": 2.5185263007253912e-05, |
| "loss": 0.4375, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.518783542039356, |
| "grad_norm": 1.4189293384552002, |
| "learning_rate": 2.5054500465369597e-05, |
| "loss": 0.4321, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.5366726296958855, |
| "grad_norm": 1.1898143291473389, |
| "learning_rate": 2.4922335458838397e-05, |
| "loss": 0.4428, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.5545617173524149, |
| "grad_norm": 0.9852802753448486, |
| "learning_rate": 2.478878642286253e-05, |
| "loss": 0.4321, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.5724508050089445, |
| "grad_norm": 1.0897939205169678, |
| "learning_rate": 2.465387198569729e-05, |
| "loss": 0.4173, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.5903398926654742, |
| "grad_norm": 1.1859195232391357, |
| "learning_rate": 2.4517610966052682e-05, |
| "loss": 0.3929, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.6082289803220036, |
| "grad_norm": 1.2279844284057617, |
| "learning_rate": 2.4380022370468464e-05, |
| "loss": 0.4279, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.626118067978533, |
| "grad_norm": 1.1750357151031494, |
| "learning_rate": 2.4241125390662982e-05, |
| "loss": 0.3863, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.6440071556350626, |
| "grad_norm": 1.1458582878112793, |
| "learning_rate": 2.4100939400856216e-05, |
| "loss": 0.4131, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6618962432915922, |
| "grad_norm": 1.0942723751068115, |
| "learning_rate": 2.395948395506731e-05, |
| "loss": 0.3943, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.6797853309481217, |
| "grad_norm": 0.9955680966377258, |
| "learning_rate": 2.3816778784387097e-05, |
| "loss": 0.3774, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.697674418604651, |
| "grad_norm": 1.2159160375595093, |
| "learning_rate": 2.367284379422584e-05, |
| "loss": 0.3674, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.7155635062611807, |
| "grad_norm": 1.143467664718628, |
| "learning_rate": 2.3527699061536726e-05, |
| "loss": 0.3777, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7334525939177103, |
| "grad_norm": 1.0947456359863281, |
| "learning_rate": 2.338136483201539e-05, |
| "loss": 0.3947, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.7513416815742398, |
| "grad_norm": 1.0965192317962646, |
| "learning_rate": 2.323386151727595e-05, |
| "loss": 0.3732, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 1.1917636394500732, |
| "learning_rate": 2.3085209692003836e-05, |
| "loss": 0.369, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.7871198568872988, |
| "grad_norm": 1.117609977722168, |
| "learning_rate": 2.2935430091085904e-05, |
| "loss": 0.3933, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8050089445438284, |
| "grad_norm": 1.2263697385787964, |
| "learning_rate": 2.2784543606718227e-05, |
| "loss": 0.4066, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.8228980322003578, |
| "grad_norm": 1.1519149541854858, |
| "learning_rate": 2.263257128549191e-05, |
| "loss": 0.3776, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8407871198568873, |
| "grad_norm": 1.001869797706604, |
| "learning_rate": 2.2479534325457374e-05, |
| "loss": 0.3576, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.8586762075134167, |
| "grad_norm": 1.0438801050186157, |
| "learning_rate": 2.2325454073167518e-05, |
| "loss": 0.3951, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8765652951699463, |
| "grad_norm": 1.1146963834762573, |
| "learning_rate": 2.2170352020700187e-05, |
| "loss": 0.3637, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.894454382826476, |
| "grad_norm": 1.0773694515228271, |
| "learning_rate": 2.2014249802660297e-05, |
| "loss": 0.3596, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9123434704830053, |
| "grad_norm": 1.0742276906967163, |
| "learning_rate": 2.185716919316212e-05, |
| "loss": 0.3641, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.9302325581395348, |
| "grad_norm": 1.118898630142212, |
| "learning_rate": 2.16991321027921e-05, |
| "loss": 0.3598, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.9481216457960644, |
| "grad_norm": 1.1367541551589966, |
| "learning_rate": 2.1540160575552604e-05, |
| "loss": 0.3532, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.966010733452594, |
| "grad_norm": 1.0174490213394165, |
| "learning_rate": 2.138027678578712e-05, |
| "loss": 0.3277, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9838998211091234, |
| "grad_norm": 1.1238727569580078, |
| "learning_rate": 2.1219503035087202e-05, |
| "loss": 0.302, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.766844391822815, |
| "learning_rate": 2.1057861749181743e-05, |
| "loss": 0.3191, |
| "step": 560 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.135566147023012e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|