{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500.0, "global_step": 469, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021321961620469083, "grad_norm": 43.0, "learning_rate": 1.3333333333333334e-06, "loss": 2.9051132202148438, "step": 1, "token_acc": 0.7277310924369748 }, { "epoch": 0.021321961620469083, "grad_norm": 22.625, "learning_rate": 1.3333333333333333e-05, "loss": 2.441012912326389, "step": 10, "token_acc": 0.7579183406521536 }, { "epoch": 0.042643923240938165, "grad_norm": 5.4375, "learning_rate": 1.9994015129908345e-05, "loss": 0.8593629837036133, "step": 20, "token_acc": 0.7911111111111111 }, { "epoch": 0.06396588486140725, "grad_norm": 4.625, "learning_rate": 1.9946179143004327e-05, "loss": 0.6798801422119141, "step": 30, "token_acc": 0.8122553191489361 }, { "epoch": 0.08528784648187633, "grad_norm": 4.84375, "learning_rate": 1.9850736134393288e-05, "loss": 0.6295328140258789, "step": 40, "token_acc": 0.8224060022167278 }, { "epoch": 0.10660980810234541, "grad_norm": 5.28125, "learning_rate": 1.970814293853572e-05, "loss": 0.6145264625549316, "step": 50, "token_acc": 0.8277070335868729 }, { "epoch": 0.1279317697228145, "grad_norm": 5.71875, "learning_rate": 1.951908207253421e-05, "loss": 0.5874358177185058, "step": 60, "token_acc": 0.832194705380017 }, { "epoch": 0.14925373134328357, "grad_norm": 4.84375, "learning_rate": 1.9284458469290354e-05, "loss": 0.5833245754241944, "step": 70, "token_acc": 0.8283689140156949 }, { "epoch": 0.17057569296375266, "grad_norm": 5.3125, "learning_rate": 1.9005395146076618e-05, "loss": 0.588797664642334, "step": 80, "token_acc": 0.8285130996937734 }, { "epoch": 0.19189765458422176, "grad_norm": 4.9375, "learning_rate": 1.8683227829255333e-05, "loss": 0.5961318969726562, "step": 90, "token_acc": 0.8281675749318801 }, { "epoch": 0.21321961620469082, "grad_norm": 5.625, "learning_rate": 1.8319498560873475e-05, "loss": 0.5879468441009521, "step": 100, "token_acc": 0.8282130056323604 }, { "epoch": 0.2345415778251599, "grad_norm": 4.9375, "learning_rate": 1.791594831773494e-05, "loss": 0.5458186626434326, "step": 110, "token_acc": 0.8368572395128552 }, { "epoch": 0.255863539445629, "grad_norm": 4.8125, "learning_rate": 1.7474508678278914e-05, "loss": 0.5461445331573487, "step": 120, "token_acc": 0.8417710802573654 }, { "epoch": 0.2771855010660981, "grad_norm": 4.8125, "learning_rate": 1.699729257715053e-05, "loss": 0.5506069183349609, "step": 130, "token_acc": 0.8393039049235993 }, { "epoch": 0.29850746268656714, "grad_norm": 4.59375, "learning_rate": 1.648658419171666e-05, "loss": 0.56076021194458, "step": 140, "token_acc": 0.832009565291656 }, { "epoch": 0.31982942430703626, "grad_norm": 4.375, "learning_rate": 1.594482800893474e-05, "loss": 0.5413748741149902, "step": 150, "token_acc": 0.8403925186614107 }, { "epoch": 0.3411513859275053, "grad_norm": 4.84375, "learning_rate": 1.5374617124905565e-05, "loss": 0.5552651405334472, "step": 160, "token_acc": 0.8403886776510351 }, { "epoch": 0.3624733475479744, "grad_norm": 17.625, "learning_rate": 1.4778680833113928e-05, "loss": 0.5687627792358398, "step": 170, "token_acc": 0.8335312075983717 }, { "epoch": 0.3837953091684435, "grad_norm": 4.65625, "learning_rate": 1.4159871560765432e-05, "loss": 0.5532018184661865, "step": 180, "token_acc": 0.8383640846866763 }, { "epoch": 0.4051172707889126, "grad_norm": 4.84375, "learning_rate": 1.352115121574829e-05, "loss": 0.5434678077697754, "step": 190, "token_acc": 0.8393686165273909 }, { "epoch": 0.42643923240938164, "grad_norm": 5.625, "learning_rate": 1.2865577009569825e-05, "loss": 0.5481713294982911, "step": 200, "token_acc": 0.8371288798920378 }, { "epoch": 0.44776119402985076, "grad_norm": 7.6875, "learning_rate": 1.2196286824125728e-05, "loss": 0.5360486030578613, "step": 210, "token_acc": 0.8407146451937595 }, { "epoch": 0.4690831556503198, "grad_norm": 4.53125, "learning_rate": 1.1516484192343425e-05, "loss": 0.5320311546325683, "step": 220, "token_acc": 0.8416673741404194 }, { "epoch": 0.4904051172707889, "grad_norm": 5.3125, "learning_rate": 1.082942296458922e-05, "loss": 0.5446446418762207, "step": 230, "token_acc": 0.8414119236615436 }, { "epoch": 0.511727078891258, "grad_norm": 5.09375, "learning_rate": 1.0138391734232832e-05, "loss": 0.5085601806640625, "step": 240, "token_acc": 0.8489312782082943 }, { "epoch": 0.5330490405117271, "grad_norm": 4.75, "learning_rate": 9.446698096915848e-06, "loss": 0.5495187759399414, "step": 250, "token_acc": 0.8368993037867211 }, { "epoch": 0.5543710021321961, "grad_norm": 4.9375, "learning_rate": 8.75765281886647e-06, "loss": 0.5613573551177978, "step": 260, "token_acc": 0.8375316990701606 }, { "epoch": 0.5756929637526652, "grad_norm": 4.8125, "learning_rate": 8.074553990038396e-06, "loss": 0.5430341720581054, "step": 270, "token_acc": 0.8400202719824309 }, { "epoch": 0.5970149253731343, "grad_norm": 4.5625, "learning_rate": 7.400671237924202e-06, "loss": 0.5135886192321777, "step": 280, "token_acc": 0.8479239195041182 }, { "epoch": 0.6183368869936035, "grad_norm": 5.875, "learning_rate": 6.739230077603259e-06, "loss": 0.5393872261047363, "step": 290, "token_acc": 0.8358839942504439 }, { "epoch": 0.6396588486140725, "grad_norm": 4.71875, "learning_rate": 6.093396472932103e-06, "loss": 0.5145103454589843, "step": 300, "token_acc": 0.841726618705036 }, { "epoch": 0.6609808102345416, "grad_norm": 4.8125, "learning_rate": 5.466261682774519e-06, "loss": 0.5321601867675781, "step": 310, "token_acc": 0.8421635434412266 }, { "epoch": 0.6823027718550106, "grad_norm": 4.78125, "learning_rate": 4.860827464804383e-06, "loss": 0.53086256980896, "step": 320, "token_acc": 0.841112047177759 }, { "epoch": 0.7036247334754797, "grad_norm": 5.0, "learning_rate": 4.2799917077026394e-06, "loss": 0.5327719688415528, "step": 330, "token_acc": 0.8390072111353346 }, { "epoch": 0.7249466950959488, "grad_norm": 5.03125, "learning_rate": 3.7265345605193814e-06, "loss": 0.5248814582824707, "step": 340, "token_acc": 0.8462834990729816 }, { "epoch": 0.746268656716418, "grad_norm": 5.0, "learning_rate": 3.2031051255924082e-06, "loss": 0.5124996662139892, "step": 350, "token_acc": 0.8435970110796187 }, { "epoch": 0.767590618336887, "grad_norm": 5.09375, "learning_rate": 2.7122087787160168e-06, "loss": 0.5450184345245361, "step": 360, "token_acc": 0.8357380688124306 }, { "epoch": 0.7889125799573561, "grad_norm": 4.90625, "learning_rate": 2.256195177251659e-06, "loss": 0.5345516204833984, "step": 370, "token_acc": 0.8386361718288463 }, { "epoch": 0.8102345415778252, "grad_norm": 5.09375, "learning_rate": 1.8372470135791344e-06, "loss": 0.5218245029449463, "step": 380, "token_acc": 0.8449079962689732 }, { "epoch": 0.8315565031982942, "grad_norm": 5.03125, "learning_rate": 1.457369567719581e-06, "loss": 0.5236470222473144, "step": 390, "token_acc": 0.8448715767987753 }, { "epoch": 0.8528784648187633, "grad_norm": 4.90625, "learning_rate": 1.118381109136174e-06, "loss": 0.49837121963500974, "step": 400, "token_acc": 0.8512574027980431 }, { "epoch": 0.8742004264392325, "grad_norm": 4.8125, "learning_rate": 8.219041936540395e-07, "loss": 0.5413351058959961, "step": 410, "token_acc": 0.841102587878277 }, { "epoch": 0.8955223880597015, "grad_norm": 4.84375, "learning_rate": 5.693578971562963e-07, "loss": 0.5475930213928223, "step": 420, "token_acc": 0.8388290713324361 }, { "epoch": 0.9168443496801706, "grad_norm": 5.0, "learning_rate": 3.6195102322937545e-07, "loss": 0.5307172775268555, "step": 430, "token_acc": 0.8429056924384027 }, { "epoch": 0.9381663113006397, "grad_norm": 5.0625, "learning_rate": 2.006763172689996e-07, "loss": 0.5404001712799072, "step": 440, "token_acc": 0.8392032826124124 }, { "epoch": 0.9594882729211087, "grad_norm": 4.75, "learning_rate": 8.630571474074312e-08, "loss": 0.5237876892089843, "step": 450, "token_acc": 0.8413437129801997 }, { "epoch": 0.9808102345415778, "grad_norm": 4.65625, "learning_rate": 1.9386646339238925e-08, "loss": 0.534755277633667, "step": 460, "token_acc": 0.8429147755925366 } ], "logging_steps": 10, "max_steps": 469, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.015212785725604e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }