| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 25, | |
| "global_step": 750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.6712856006622314, | |
| "epoch": 0.1, | |
| "grad_norm": 0.6304148435592651, | |
| "learning_rate": 0.0001999990663152786, | |
| "loss": 2.0491, | |
| "mean_token_accuracy": 0.6684889650344848, | |
| "num_tokens": 122580.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_entropy": 0.9992010481655598, | |
| "eval_loss": 0.9745309948921204, | |
| "eval_mean_token_accuracy": 0.815976720303297, | |
| "eval_num_tokens": 122580.0, | |
| "eval_runtime": 66.8951, | |
| "eval_samples_per_second": 29.898, | |
| "eval_steps_per_second": 0.478, | |
| "step": 25 | |
| }, | |
| { | |
| "entropy": 0.9771250176429749, | |
| "epoch": 0.2, | |
| "grad_norm": 0.4267440736293793, | |
| "learning_rate": 0.0001993694918299864, | |
| "loss": 0.9676, | |
| "mean_token_accuracy": 0.8179380083084107, | |
| "num_tokens": 244723.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_entropy": 0.9376886244863272, | |
| "eval_loss": 0.940742015838623, | |
| "eval_mean_token_accuracy": 0.8212726972997189, | |
| "eval_num_tokens": 244723.0, | |
| "eval_runtime": 66.3094, | |
| "eval_samples_per_second": 30.162, | |
| "eval_steps_per_second": 0.483, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.9467579674720764, | |
| "epoch": 0.3, | |
| "grad_norm": 0.39123401045799255, | |
| "learning_rate": 0.0001975812958575343, | |
| "loss": 0.9473, | |
| "mean_token_accuracy": 0.820980327129364, | |
| "num_tokens": 366803.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_entropy": 0.930314002558589, | |
| "eval_loss": 0.9318345785140991, | |
| "eval_mean_token_accuracy": 0.8218902982771397, | |
| "eval_num_tokens": 366803.0, | |
| "eval_runtime": 66.1507, | |
| "eval_samples_per_second": 30.234, | |
| "eval_steps_per_second": 0.484, | |
| "step": 75 | |
| }, | |
| { | |
| "entropy": 0.9574691414833069, | |
| "epoch": 0.4, | |
| "grad_norm": 0.3710257112979889, | |
| "learning_rate": 0.00019465532828090735, | |
| "loss": 0.9545, | |
| "mean_token_accuracy": 0.8186049485206603, | |
| "num_tokens": 489374.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_entropy": 0.9250399190932512, | |
| "eval_loss": 0.927577555179596, | |
| "eval_mean_token_accuracy": 0.8226032145321369, | |
| "eval_num_tokens": 489374.0, | |
| "eval_runtime": 66.6912, | |
| "eval_samples_per_second": 29.989, | |
| "eval_steps_per_second": 0.48, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.9415939354896545, | |
| "epoch": 0.5, | |
| "grad_norm": 0.3609308898448944, | |
| "learning_rate": 0.00019062570509327992, | |
| "loss": 0.938, | |
| "mean_token_accuracy": 0.8219008255004883, | |
| "num_tokens": 611509.0, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_entropy": 0.9134028796106577, | |
| "eval_loss": 0.9236319661140442, | |
| "eval_mean_token_accuracy": 0.8233284279704094, | |
| "eval_num_tokens": 611509.0, | |
| "eval_runtime": 67.9585, | |
| "eval_samples_per_second": 29.43, | |
| "eval_steps_per_second": 0.471, | |
| "step": 125 | |
| }, | |
| { | |
| "entropy": 0.9313359212875366, | |
| "epoch": 0.6, | |
| "grad_norm": 0.3689591586589813, | |
| "learning_rate": 0.00018553941061473218, | |
| "loss": 0.9277, | |
| "mean_token_accuracy": 0.8244240188598633, | |
| "num_tokens": 733437.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_entropy": 0.9121778700500727, | |
| "eval_loss": 0.9206869006156921, | |
| "eval_mean_token_accuracy": 0.8243093993514776, | |
| "eval_num_tokens": 733437.0, | |
| "eval_runtime": 68.185, | |
| "eval_samples_per_second": 29.332, | |
| "eval_steps_per_second": 0.469, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 0.9311716175079345, | |
| "epoch": 0.7, | |
| "grad_norm": 0.3817085027694702, | |
| "learning_rate": 0.00017945574966774376, | |
| "loss": 0.9292, | |
| "mean_token_accuracy": 0.8232095694541931, | |
| "num_tokens": 855571.0, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_entropy": 0.9240057598799467, | |
| "eval_loss": 0.9190523028373718, | |
| "eval_mean_token_accuracy": 0.8243635054677725, | |
| "eval_num_tokens": 855571.0, | |
| "eval_runtime": 68.1745, | |
| "eval_samples_per_second": 29.336, | |
| "eval_steps_per_second": 0.469, | |
| "step": 175 | |
| }, | |
| { | |
| "entropy": 0.9368235039710998, | |
| "epoch": 0.8, | |
| "grad_norm": 0.343488872051239, | |
| "learning_rate": 0.00017244565609895074, | |
| "loss": 0.9328, | |
| "mean_token_accuracy": 0.822948260307312, | |
| "num_tokens": 977886.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_entropy": 0.9235651567578316, | |
| "eval_loss": 0.9163441061973572, | |
| "eval_mean_token_accuracy": 0.8248479198664427, | |
| "eval_num_tokens": 977886.0, | |
| "eval_runtime": 68.3524, | |
| "eval_samples_per_second": 29.26, | |
| "eval_steps_per_second": 0.468, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 0.9266206288337707, | |
| "epoch": 0.9, | |
| "grad_norm": 0.34697458148002625, | |
| "learning_rate": 0.00016459086570961594, | |
| "loss": 0.9247, | |
| "mean_token_accuracy": 0.8257838773727417, | |
| "num_tokens": 1099649.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_entropy": 0.9170300494879484, | |
| "eval_loss": 0.9154583215713501, | |
| "eval_mean_token_accuracy": 0.8250364065170288, | |
| "eval_num_tokens": 1099649.0, | |
| "eval_runtime": 67.9598, | |
| "eval_samples_per_second": 29.429, | |
| "eval_steps_per_second": 0.471, | |
| "step": 225 | |
| }, | |
| { | |
| "entropy": 0.9486314964294433, | |
| "epoch": 1.0, | |
| "grad_norm": 0.38791951537132263, | |
| "learning_rate": 0.00015598296323822024, | |
| "loss": 0.9457, | |
| "mean_token_accuracy": 0.8199513030052185, | |
| "num_tokens": 1222434.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_entropy": 0.9278132077306509, | |
| "eval_loss": 0.9133847951889038, | |
| "eval_mean_token_accuracy": 0.8252693247050047, | |
| "eval_num_tokens": 1222434.0, | |
| "eval_runtime": 67.957, | |
| "eval_samples_per_second": 29.43, | |
| "eval_steps_per_second": 0.471, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 0.8935548496246338, | |
| "epoch": 1.1, | |
| "grad_norm": 0.3913302421569824, | |
| "learning_rate": 0.00014672231450710066, | |
| "loss": 0.8794, | |
| "mean_token_accuracy": 0.8301347184181214, | |
| "num_tokens": 1344769.0, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_entropy": 0.8964261263608932, | |
| "eval_loss": 0.918248176574707, | |
| "eval_mean_token_accuracy": 0.8246872704476118, | |
| "eval_num_tokens": 1344769.0, | |
| "eval_runtime": 67.949, | |
| "eval_samples_per_second": 29.434, | |
| "eval_steps_per_second": 0.471, | |
| "step": 275 | |
| }, | |
| { | |
| "entropy": 0.8727394843101501, | |
| "epoch": 1.2, | |
| "grad_norm": 0.39644864201545715, | |
| "learning_rate": 0.00013691689618401835, | |
| "loss": 0.8669, | |
| "mean_token_accuracy": 0.8310828638076783, | |
| "num_tokens": 1466893.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_entropy": 0.8782581854611635, | |
| "eval_loss": 0.917015552520752, | |
| "eval_mean_token_accuracy": 0.8250990845263004, | |
| "eval_num_tokens": 1466893.0, | |
| "eval_runtime": 67.8451, | |
| "eval_samples_per_second": 29.479, | |
| "eval_steps_per_second": 0.472, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 0.8778977513313293, | |
| "epoch": 1.3, | |
| "grad_norm": 0.3395286500453949, | |
| "learning_rate": 0.00012668103680332012, | |
| "loss": 0.876, | |
| "mean_token_accuracy": 0.8312993144989014, | |
| "num_tokens": 1589094.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_entropy": 0.8799433764070272, | |
| "eval_loss": 0.9174618124961853, | |
| "eval_mean_token_accuracy": 0.8249436803162098, | |
| "eval_num_tokens": 1589094.0, | |
| "eval_runtime": 68.1596, | |
| "eval_samples_per_second": 29.343, | |
| "eval_steps_per_second": 0.469, | |
| "step": 325 | |
| }, | |
| { | |
| "entropy": 0.8876111268997192, | |
| "epoch": 1.4, | |
| "grad_norm": 0.364728182554245, | |
| "learning_rate": 0.00011613408372604825, | |
| "loss": 0.8773, | |
| "mean_token_accuracy": 0.8295953154563904, | |
| "num_tokens": 1711583.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_entropy": 0.85817476734519, | |
| "eval_loss": 0.9163104295730591, | |
| "eval_mean_token_accuracy": 0.8252090867608786, | |
| "eval_num_tokens": 1711583.0, | |
| "eval_runtime": 68.1812, | |
| "eval_samples_per_second": 29.334, | |
| "eval_steps_per_second": 0.469, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 0.88423011302948, | |
| "epoch": 1.5, | |
| "grad_norm": 0.38693588972091675, | |
| "learning_rate": 0.00010539901158188398, | |
| "loss": 0.883, | |
| "mean_token_accuracy": 0.8286035037040711, | |
| "num_tokens": 1834553.0, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_entropy": 0.873553803190589, | |
| "eval_loss": 0.9151167273521423, | |
| "eval_mean_token_accuracy": 0.8254394326359034, | |
| "eval_num_tokens": 1834553.0, | |
| "eval_runtime": 68.1992, | |
| "eval_samples_per_second": 29.326, | |
| "eval_steps_per_second": 0.469, | |
| "step": 375 | |
| }, | |
| { | |
| "entropy": 0.8715341877937317, | |
| "epoch": 1.6, | |
| "grad_norm": 0.3770897686481476, | |
| "learning_rate": 9.460098841811601e-05, | |
| "loss": 0.87, | |
| "mean_token_accuracy": 0.8320170021057129, | |
| "num_tokens": 1956574.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_entropy": 0.87935302965343, | |
| "eval_loss": 0.9151723980903625, | |
| "eval_mean_token_accuracy": 0.8255144450813532, | |
| "eval_num_tokens": 1956574.0, | |
| "eval_runtime": 67.9177, | |
| "eval_samples_per_second": 29.447, | |
| "eval_steps_per_second": 0.471, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 0.8710586881637573, | |
| "epoch": 1.7, | |
| "grad_norm": 0.3885301351547241, | |
| "learning_rate": 8.386591627395173e-05, | |
| "loss": 0.8672, | |
| "mean_token_accuracy": 0.8299804759025574, | |
| "num_tokens": 2078811.0, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_entropy": 0.8739563841372728, | |
| "eval_loss": 0.9149895906448364, | |
| "eval_mean_token_accuracy": 0.8253704849630594, | |
| "eval_num_tokens": 2078811.0, | |
| "eval_runtime": 66.7099, | |
| "eval_samples_per_second": 29.981, | |
| "eval_steps_per_second": 0.48, | |
| "step": 425 | |
| }, | |
| { | |
| "entropy": 0.8689111661911011, | |
| "epoch": 1.8, | |
| "grad_norm": 0.38322678208351135, | |
| "learning_rate": 7.33189631966799e-05, | |
| "loss": 0.865, | |
| "mean_token_accuracy": 0.8322654938697815, | |
| "num_tokens": 2200571.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_entropy": 0.8741269316524267, | |
| "eval_loss": 0.9147102236747742, | |
| "eval_mean_token_accuracy": 0.8253954574465752, | |
| "eval_num_tokens": 2200571.0, | |
| "eval_runtime": 66.1144, | |
| "eval_samples_per_second": 30.251, | |
| "eval_steps_per_second": 0.484, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 0.8786772465705872, | |
| "epoch": 1.9, | |
| "grad_norm": 0.3754963278770447, | |
| "learning_rate": 6.308310381598168e-05, | |
| "loss": 0.8755, | |
| "mean_token_accuracy": 0.8297365355491638, | |
| "num_tokens": 2322788.0, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_entropy": 0.8773031029850245, | |
| "eval_loss": 0.9134438633918762, | |
| "eval_mean_token_accuracy": 0.8253685813397169, | |
| "eval_num_tokens": 2322788.0, | |
| "eval_runtime": 66.1468, | |
| "eval_samples_per_second": 30.236, | |
| "eval_steps_per_second": 0.484, | |
| "step": 475 | |
| }, | |
| { | |
| "entropy": 0.8698205542564392, | |
| "epoch": 2.0, | |
| "grad_norm": 0.37209320068359375, | |
| "learning_rate": 5.327768549289934e-05, | |
| "loss": 0.8654, | |
| "mean_token_accuracy": 0.8314353656768799, | |
| "num_tokens": 2444868.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_entropy": 0.874303799122572, | |
| "eval_loss": 0.9136784076690674, | |
| "eval_mean_token_accuracy": 0.8255054354667664, | |
| "eval_num_tokens": 2444868.0, | |
| "eval_runtime": 65.7882, | |
| "eval_samples_per_second": 30.401, | |
| "eval_steps_per_second": 0.486, | |
| "step": 500 | |
| }, | |
| { | |
| "entropy": 0.8443045258522034, | |
| "epoch": 2.1, | |
| "grad_norm": 0.3855106830596924, | |
| "learning_rate": 4.4017036761779787e-05, | |
| "loss": 0.8248, | |
| "mean_token_accuracy": 0.8369424676895142, | |
| "num_tokens": 2567342.0, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_entropy": 0.8267646413296461, | |
| "eval_loss": 0.9248229265213013, | |
| "eval_mean_token_accuracy": 0.8247568681836128, | |
| "eval_num_tokens": 2567342.0, | |
| "eval_runtime": 66.4418, | |
| "eval_samples_per_second": 30.102, | |
| "eval_steps_per_second": 0.482, | |
| "step": 525 | |
| }, | |
| { | |
| "entropy": 0.8175929665565491, | |
| "epoch": 2.2, | |
| "grad_norm": 0.3969684839248657, | |
| "learning_rate": 3.540913429038407e-05, | |
| "loss": 0.8159, | |
| "mean_token_accuracy": 0.8389404201507569, | |
| "num_tokens": 2689296.0, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_entropy": 0.8400543294847012, | |
| "eval_loss": 0.9235786199569702, | |
| "eval_mean_token_accuracy": 0.8250540643930435, | |
| "eval_num_tokens": 2689296.0, | |
| "eval_runtime": 66.682, | |
| "eval_samples_per_second": 29.993, | |
| "eval_steps_per_second": 0.48, | |
| "step": 550 | |
| }, | |
| { | |
| "entropy": 0.8221414375305176, | |
| "epoch": 2.3, | |
| "grad_norm": 0.4103279709815979, | |
| "learning_rate": 2.7554343901049294e-05, | |
| "loss": 0.8108, | |
| "mean_token_accuracy": 0.8397244310379028, | |
| "num_tokens": 2811102.0, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_entropy": 0.8320044614374638, | |
| "eval_loss": 0.9250128269195557, | |
| "eval_mean_token_accuracy": 0.8249792046844959, | |
| "eval_num_tokens": 2811102.0, | |
| "eval_runtime": 68.2399, | |
| "eval_samples_per_second": 29.308, | |
| "eval_steps_per_second": 0.469, | |
| "step": 575 | |
| }, | |
| { | |
| "entropy": 0.8239590454101563, | |
| "epoch": 2.4, | |
| "grad_norm": 0.4046800434589386, | |
| "learning_rate": 2.0544250332256276e-05, | |
| "loss": 0.8193, | |
| "mean_token_accuracy": 0.8385440015792847, | |
| "num_tokens": 2933467.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_entropy": 0.8278196454048157, | |
| "eval_loss": 0.9243917465209961, | |
| "eval_mean_token_accuracy": 0.82486542314291, | |
| "eval_num_tokens": 2933467.0, | |
| "eval_runtime": 68.1284, | |
| "eval_samples_per_second": 29.356, | |
| "eval_steps_per_second": 0.47, | |
| "step": 600 | |
| }, | |
| { | |
| "entropy": 0.8189333271980286, | |
| "epoch": 2.5, | |
| "grad_norm": 0.38442716002464294, | |
| "learning_rate": 1.4460589385267842e-05, | |
| "loss": 0.8178, | |
| "mean_token_accuracy": 0.8380931878089904, | |
| "num_tokens": 3055276.0, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_entropy": 0.8392351847141981, | |
| "eval_loss": 0.9240784645080566, | |
| "eval_mean_token_accuracy": 0.8248794414103031, | |
| "eval_num_tokens": 3055276.0, | |
| "eval_runtime": 68.2247, | |
| "eval_samples_per_second": 29.315, | |
| "eval_steps_per_second": 0.469, | |
| "step": 625 | |
| }, | |
| { | |
| "entropy": 0.8184775829315185, | |
| "epoch": 2.6, | |
| "grad_norm": 0.42365893721580505, | |
| "learning_rate": 9.374294906720082e-06, | |
| "loss": 0.8096, | |
| "mean_token_accuracy": 0.8411558413505554, | |
| "num_tokens": 3176654.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_entropy": 0.8347331862896681, | |
| "eval_loss": 0.9245245456695557, | |
| "eval_mean_token_accuracy": 0.8248291350901127, | |
| "eval_num_tokens": 3176654.0, | |
| "eval_runtime": 67.8139, | |
| "eval_samples_per_second": 29.492, | |
| "eval_steps_per_second": 0.472, | |
| "step": 650 | |
| }, | |
| { | |
| "entropy": 0.8422538375854492, | |
| "epoch": 2.7, | |
| "grad_norm": 0.410686194896698, | |
| "learning_rate": 5.344671719092664e-06, | |
| "loss": 0.8453, | |
| "mean_token_accuracy": 0.832388162612915, | |
| "num_tokens": 3299985.0, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_entropy": 0.8364100754261017, | |
| "eval_loss": 0.9244425296783447, | |
| "eval_mean_token_accuracy": 0.8246717043220997, | |
| "eval_num_tokens": 3299985.0, | |
| "eval_runtime": 67.9993, | |
| "eval_samples_per_second": 29.412, | |
| "eval_steps_per_second": 0.471, | |
| "step": 675 | |
| }, | |
| { | |
| "entropy": 0.8317058253288269, | |
| "epoch": 2.8, | |
| "grad_norm": 0.45394909381866455, | |
| "learning_rate": 2.418704142465722e-06, | |
| "loss": 0.8301, | |
| "mean_token_accuracy": 0.8355569648742676, | |
| "num_tokens": 3422725.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_entropy": 0.8373962622135878, | |
| "eval_loss": 0.924216091632843, | |
| "eval_mean_token_accuracy": 0.8247099295258522, | |
| "eval_num_tokens": 3422725.0, | |
| "eval_runtime": 67.6852, | |
| "eval_samples_per_second": 29.549, | |
| "eval_steps_per_second": 0.473, | |
| "step": 700 | |
| }, | |
| { | |
| "entropy": 0.8333729934692383, | |
| "epoch": 2.9, | |
| "grad_norm": 0.4409957230091095, | |
| "learning_rate": 6.305081700136328e-07, | |
| "loss": 0.8241, | |
| "mean_token_accuracy": 0.83688227891922, | |
| "num_tokens": 3545132.0, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_entropy": 0.8377762027084827, | |
| "eval_loss": 0.924144446849823, | |
| "eval_mean_token_accuracy": 0.824684476479888, | |
| "eval_num_tokens": 3545132.0, | |
| "eval_runtime": 68.0427, | |
| "eval_samples_per_second": 29.393, | |
| "eval_steps_per_second": 0.47, | |
| "step": 725 | |
| }, | |
| { | |
| "entropy": 0.8257460308074951, | |
| "epoch": 3.0, | |
| "grad_norm": 0.40077298879623413, | |
| "learning_rate": 9.336847214269639e-10, | |
| "loss": 0.8146, | |
| "mean_token_accuracy": 0.8383808851242065, | |
| "num_tokens": 3667302.0, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_entropy": 0.8377156592905521, | |
| "eval_loss": 0.9241430759429932, | |
| "eval_mean_token_accuracy": 0.8246781621128321, | |
| "eval_num_tokens": 3667302.0, | |
| "eval_runtime": 67.5858, | |
| "eval_samples_per_second": 29.592, | |
| "eval_steps_per_second": 0.473, | |
| "step": 750 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.177111226528563e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |