| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500.0, | |
| "global_step": 469, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021321961620469083, | |
| "grad_norm": 43.0, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 2.9051132202148438, | |
| "step": 1, | |
| "token_acc": 0.7277310924369748 | |
| }, | |
| { | |
| "epoch": 0.021321961620469083, | |
| "grad_norm": 22.625, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.441012912326389, | |
| "step": 10, | |
| "token_acc": 0.7579183406521536 | |
| }, | |
| { | |
| "epoch": 0.042643923240938165, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 1.9994015129908345e-05, | |
| "loss": 0.8593629837036133, | |
| "step": 20, | |
| "token_acc": 0.7911111111111111 | |
| }, | |
| { | |
| "epoch": 0.06396588486140725, | |
| "grad_norm": 4.625, | |
| "learning_rate": 1.9946179143004327e-05, | |
| "loss": 0.6798801422119141, | |
| "step": 30, | |
| "token_acc": 0.8122553191489361 | |
| }, | |
| { | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 1.9850736134393288e-05, | |
| "loss": 0.6295328140258789, | |
| "step": 40, | |
| "token_acc": 0.8224060022167278 | |
| }, | |
| { | |
| "epoch": 0.10660980810234541, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.970814293853572e-05, | |
| "loss": 0.6145264625549316, | |
| "step": 50, | |
| "token_acc": 0.8277070335868729 | |
| }, | |
| { | |
| "epoch": 0.1279317697228145, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 1.951908207253421e-05, | |
| "loss": 0.5874358177185058, | |
| "step": 60, | |
| "token_acc": 0.832194705380017 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 1.9284458469290354e-05, | |
| "loss": 0.5833245754241944, | |
| "step": 70, | |
| "token_acc": 0.8283689140156949 | |
| }, | |
| { | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.9005395146076618e-05, | |
| "loss": 0.588797664642334, | |
| "step": 80, | |
| "token_acc": 0.8285130996937734 | |
| }, | |
| { | |
| "epoch": 0.19189765458422176, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.8683227829255333e-05, | |
| "loss": 0.5961318969726562, | |
| "step": 90, | |
| "token_acc": 0.8281675749318801 | |
| }, | |
| { | |
| "epoch": 0.21321961620469082, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.8319498560873475e-05, | |
| "loss": 0.5879468441009521, | |
| "step": 100, | |
| "token_acc": 0.8282130056323604 | |
| }, | |
| { | |
| "epoch": 0.2345415778251599, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.791594831773494e-05, | |
| "loss": 0.5458186626434326, | |
| "step": 110, | |
| "token_acc": 0.8368572395128552 | |
| }, | |
| { | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 1.7474508678278914e-05, | |
| "loss": 0.5461445331573487, | |
| "step": 120, | |
| "token_acc": 0.8417710802573654 | |
| }, | |
| { | |
| "epoch": 0.2771855010660981, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 1.699729257715053e-05, | |
| "loss": 0.5506069183349609, | |
| "step": 130, | |
| "token_acc": 0.8393039049235993 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 1.648658419171666e-05, | |
| "loss": 0.56076021194458, | |
| "step": 140, | |
| "token_acc": 0.832009565291656 | |
| }, | |
| { | |
| "epoch": 0.31982942430703626, | |
| "grad_norm": 4.375, | |
| "learning_rate": 1.594482800893474e-05, | |
| "loss": 0.5413748741149902, | |
| "step": 150, | |
| "token_acc": 0.8403925186614107 | |
| }, | |
| { | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 1.5374617124905565e-05, | |
| "loss": 0.5552651405334472, | |
| "step": 160, | |
| "token_acc": 0.8403886776510351 | |
| }, | |
| { | |
| "epoch": 0.3624733475479744, | |
| "grad_norm": 17.625, | |
| "learning_rate": 1.4778680833113928e-05, | |
| "loss": 0.5687627792358398, | |
| "step": 170, | |
| "token_acc": 0.8335312075983717 | |
| }, | |
| { | |
| "epoch": 0.3837953091684435, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.4159871560765432e-05, | |
| "loss": 0.5532018184661865, | |
| "step": 180, | |
| "token_acc": 0.8383640846866763 | |
| }, | |
| { | |
| "epoch": 0.4051172707889126, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 1.352115121574829e-05, | |
| "loss": 0.5434678077697754, | |
| "step": 190, | |
| "token_acc": 0.8393686165273909 | |
| }, | |
| { | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.2865577009569825e-05, | |
| "loss": 0.5481713294982911, | |
| "step": 200, | |
| "token_acc": 0.8371288798920378 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 1.2196286824125728e-05, | |
| "loss": 0.5360486030578613, | |
| "step": 210, | |
| "token_acc": 0.8407146451937595 | |
| }, | |
| { | |
| "epoch": 0.4690831556503198, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 1.1516484192343425e-05, | |
| "loss": 0.5320311546325683, | |
| "step": 220, | |
| "token_acc": 0.8416673741404194 | |
| }, | |
| { | |
| "epoch": 0.4904051172707889, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.082942296458922e-05, | |
| "loss": 0.5446446418762207, | |
| "step": 230, | |
| "token_acc": 0.8414119236615436 | |
| }, | |
| { | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 1.0138391734232832e-05, | |
| "loss": 0.5085601806640625, | |
| "step": 240, | |
| "token_acc": 0.8489312782082943 | |
| }, | |
| { | |
| "epoch": 0.5330490405117271, | |
| "grad_norm": 4.75, | |
| "learning_rate": 9.446698096915848e-06, | |
| "loss": 0.5495187759399414, | |
| "step": 250, | |
| "token_acc": 0.8368993037867211 | |
| }, | |
| { | |
| "epoch": 0.5543710021321961, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 8.75765281886647e-06, | |
| "loss": 0.5613573551177978, | |
| "step": 260, | |
| "token_acc": 0.8375316990701606 | |
| }, | |
| { | |
| "epoch": 0.5756929637526652, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 8.074553990038396e-06, | |
| "loss": 0.5430341720581054, | |
| "step": 270, | |
| "token_acc": 0.8400202719824309 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 7.400671237924202e-06, | |
| "loss": 0.5135886192321777, | |
| "step": 280, | |
| "token_acc": 0.8479239195041182 | |
| }, | |
| { | |
| "epoch": 0.6183368869936035, | |
| "grad_norm": 5.875, | |
| "learning_rate": 6.739230077603259e-06, | |
| "loss": 0.5393872261047363, | |
| "step": 290, | |
| "token_acc": 0.8358839942504439 | |
| }, | |
| { | |
| "epoch": 0.6396588486140725, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 6.093396472932103e-06, | |
| "loss": 0.5145103454589843, | |
| "step": 300, | |
| "token_acc": 0.841726618705036 | |
| }, | |
| { | |
| "epoch": 0.6609808102345416, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 5.466261682774519e-06, | |
| "loss": 0.5321601867675781, | |
| "step": 310, | |
| "token_acc": 0.8421635434412266 | |
| }, | |
| { | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 4.860827464804383e-06, | |
| "loss": 0.53086256980896, | |
| "step": 320, | |
| "token_acc": 0.841112047177759 | |
| }, | |
| { | |
| "epoch": 0.7036247334754797, | |
| "grad_norm": 5.0, | |
| "learning_rate": 4.2799917077026394e-06, | |
| "loss": 0.5327719688415528, | |
| "step": 330, | |
| "token_acc": 0.8390072111353346 | |
| }, | |
| { | |
| "epoch": 0.7249466950959488, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 3.7265345605193814e-06, | |
| "loss": 0.5248814582824707, | |
| "step": 340, | |
| "token_acc": 0.8462834990729816 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 5.0, | |
| "learning_rate": 3.2031051255924082e-06, | |
| "loss": 0.5124996662139892, | |
| "step": 350, | |
| "token_acc": 0.8435970110796187 | |
| }, | |
| { | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 2.7122087787160168e-06, | |
| "loss": 0.5450184345245361, | |
| "step": 360, | |
| "token_acc": 0.8357380688124306 | |
| }, | |
| { | |
| "epoch": 0.7889125799573561, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 2.256195177251659e-06, | |
| "loss": 0.5345516204833984, | |
| "step": 370, | |
| "token_acc": 0.8386361718288463 | |
| }, | |
| { | |
| "epoch": 0.8102345415778252, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 1.8372470135791344e-06, | |
| "loss": 0.5218245029449463, | |
| "step": 380, | |
| "token_acc": 0.8449079962689732 | |
| }, | |
| { | |
| "epoch": 0.8315565031982942, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.457369567719581e-06, | |
| "loss": 0.5236470222473144, | |
| "step": 390, | |
| "token_acc": 0.8448715767987753 | |
| }, | |
| { | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.118381109136174e-06, | |
| "loss": 0.49837121963500974, | |
| "step": 400, | |
| "token_acc": 0.8512574027980431 | |
| }, | |
| { | |
| "epoch": 0.8742004264392325, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 8.219041936540395e-07, | |
| "loss": 0.5413351058959961, | |
| "step": 410, | |
| "token_acc": 0.841102587878277 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 5.693578971562963e-07, | |
| "loss": 0.5475930213928223, | |
| "step": 420, | |
| "token_acc": 0.8388290713324361 | |
| }, | |
| { | |
| "epoch": 0.9168443496801706, | |
| "grad_norm": 5.0, | |
| "learning_rate": 3.6195102322937545e-07, | |
| "loss": 0.5307172775268555, | |
| "step": 430, | |
| "token_acc": 0.8429056924384027 | |
| }, | |
| { | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 2.006763172689996e-07, | |
| "loss": 0.5404001712799072, | |
| "step": 440, | |
| "token_acc": 0.8392032826124124 | |
| }, | |
| { | |
| "epoch": 0.9594882729211087, | |
| "grad_norm": 4.75, | |
| "learning_rate": 8.630571474074312e-08, | |
| "loss": 0.5237876892089843, | |
| "step": 450, | |
| "token_acc": 0.8413437129801997 | |
| }, | |
| { | |
| "epoch": 0.9808102345415778, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.9386646339238925e-08, | |
| "loss": 0.534755277633667, | |
| "step": 460, | |
| "token_acc": 0.8429147755925366 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 469, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.015212785725604e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |