| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.7659103486441614, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05534034311012728, |
| "grad_norm": 0.7568074464797974, |
| "learning_rate": 9.8e-05, |
| "loss": 1.0759, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11068068622025456, |
| "grad_norm": 0.14674387872219086, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 0.1883, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16602102933038185, |
| "grad_norm": 0.13491740822792053, |
| "learning_rate": 0.0001998263839556516, |
| "loss": 0.1823, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22136137244050913, |
| "grad_norm": 0.11964733898639679, |
| "learning_rate": 0.00019929192281085555, |
| "loss": 0.1812, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.27670171555063644, |
| "grad_norm": 0.14810685813426971, |
| "learning_rate": 0.0001983984765530473, |
| "loss": 0.181, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3320420586607637, |
| "grad_norm": 0.11917376518249512, |
| "learning_rate": 0.0001971492753936756, |
| "loss": 0.1801, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.387382401770891, |
| "grad_norm": 0.11992548406124115, |
| "learning_rate": 0.0001955488357587162, |
| "loss": 0.1777, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.44272274488101826, |
| "grad_norm": 0.09655219316482544, |
| "learning_rate": 0.00019360294395975392, |
| "loss": 0.1778, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.49806308799114557, |
| "grad_norm": 0.08890487998723984, |
| "learning_rate": 0.00019131863527385433, |
| "loss": 0.1776, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5534034311012729, |
| "grad_norm": 0.09338568150997162, |
| "learning_rate": 0.0001887041685078625, |
| "loss": 0.1756, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6087437742114001, |
| "grad_norm": 0.10447146743535995, |
| "learning_rate": 0.0001857689961390886, |
| "loss": 0.1769, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6640841173215274, |
| "grad_norm": 0.08940507471561432, |
| "learning_rate": 0.00018252373014033646, |
| "loss": 0.1767, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7194244604316546, |
| "grad_norm": 0.1015540286898613, |
| "learning_rate": 0.0001789801036128327, |
| "loss": 0.1749, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.774764803541782, |
| "grad_norm": 0.0866508036851883, |
| "learning_rate": 0.0001751509283657702, |
| "loss": 0.1765, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8301051466519093, |
| "grad_norm": 0.08853679150342941, |
| "learning_rate": 0.00017105004859583578, |
| "loss": 0.1757, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8854454897620365, |
| "grad_norm": 0.09371698647737503, |
| "learning_rate": 0.00016669229083419114, |
| "loss": 0.1766, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9407858328721638, |
| "grad_norm": 0.8600781559944153, |
| "learning_rate": 0.00016209341034187125, |
| "loss": 0.342, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9961261759822911, |
| "grad_norm": 0.0968112051486969, |
| "learning_rate": 0.00015727003414740492, |
| "loss": 0.1816, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0509131156613172, |
| "grad_norm": 0.092947818338871, |
| "learning_rate": 0.00015223960093260294, |
| "loss": 0.1753, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.1062534587714443, |
| "grad_norm": 0.07544200122356415, |
| "learning_rate": 0.00014702029798385264, |
| "loss": 0.1747, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1615938018815717, |
| "grad_norm": 0.06907663494348526, |
| "learning_rate": 0.00014163099543686964, |
| "loss": 0.1741, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.2169341449916988, |
| "grad_norm": 0.07450341433286667, |
| "learning_rate": 0.00013609117805264063, |
| "loss": 0.1754, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2722744881018262, |
| "grad_norm": 0.09569600224494934, |
| "learning_rate": 0.0001304208747712189, |
| "loss": 0.1732, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.3276148312119536, |
| "grad_norm": 0.08210264891386032, |
| "learning_rate": 0.00012464058629806633, |
| "loss": 0.1716, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3829551743220807, |
| "grad_norm": 0.06857864558696747, |
| "learning_rate": 0.00011877121098475106, |
| "loss": 0.1728, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.438295517432208, |
| "grad_norm": 0.11595187336206436, |
| "learning_rate": 0.00011283396927197472, |
| "loss": 0.174, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4936358605423354, |
| "grad_norm": 0.07529956847429276, |
| "learning_rate": 0.00010685032696810226, |
| "loss": 0.1733, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5489762036524626, |
| "grad_norm": 0.0644264817237854, |
| "learning_rate": 0.00010084191764057676, |
| "loss": 0.1738, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.60431654676259, |
| "grad_norm": 0.060298092663288116, |
| "learning_rate": 9.483046440080949e-05, |
| "loss": 0.1717, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6596568898727173, |
| "grad_norm": 0.059512991458177567, |
| "learning_rate": 8.883770136532834e-05, |
| "loss": 0.1735, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.7149972329828445, |
| "grad_norm": 0.0744907408952713, |
| "learning_rate": 8.288529507713752e-05, |
| "loss": 0.1722, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7703375760929718, |
| "grad_norm": 0.05729057267308235, |
| "learning_rate": 7.699476617138598e-05, |
| "loss": 0.1728, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8256779192030992, |
| "grad_norm": 0.06146302446722984, |
| "learning_rate": 7.118741156855904e-05, |
| "loss": 0.1714, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8810182623132263, |
| "grad_norm": 0.08402097970247269, |
| "learning_rate": 6.548422747649902e-05, |
| "loss": 0.1711, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.9363586054233535, |
| "grad_norm": 0.06482277065515518, |
| "learning_rate": 5.990583347963793e-05, |
| "loss": 0.1714, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.991698948533481, |
| "grad_norm": 0.06089329719543457, |
| "learning_rate": 5.44723979898939e-05, |
| "loss": 0.1711, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0464858882125068, |
| "grad_norm": 0.053777534514665604, |
| "learning_rate": 4.9203565328759604e-05, |
| "loss": 0.1708, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.1018262313226344, |
| "grad_norm": 0.052074234932661057, |
| "learning_rate": 4.411838470421454e-05, |
| "loss": 0.169, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.1571665744327615, |
| "grad_norm": 0.04878537356853485, |
| "learning_rate": 3.923524133924069e-05, |
| "loss": 0.1705, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.2125069175428886, |
| "grad_norm": 0.06310058385133743, |
| "learning_rate": 3.4571790000943973e-05, |
| "loss": 0.1706, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.2678472606530162, |
| "grad_norm": 0.05678916722536087, |
| "learning_rate": 3.014489117060344e-05, |
| "loss": 0.1692, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.3231876037631434, |
| "grad_norm": 0.05413331836462021, |
| "learning_rate": 2.5970550085421773e-05, |
| "loss": 0.1702, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.3785279468732705, |
| "grad_norm": 0.05526156723499298, |
| "learning_rate": 2.206385887236956e-05, |
| "loss": 0.1698, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.4338682899833977, |
| "grad_norm": 0.05364658683538437, |
| "learning_rate": 1.8438941983334958e-05, |
| "loss": 0.1712, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.4892086330935252, |
| "grad_norm": 0.06608569622039795, |
| "learning_rate": 1.5108905128855443e-05, |
| "loss": 0.1692, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.5445489762036524, |
| "grad_norm": 0.07030092924833298, |
| "learning_rate": 1.208578789505943e-05, |
| "loss": 0.1692, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.59988931931378, |
| "grad_norm": 0.059825293719768524, |
| "learning_rate": 9.380520215128841e-06, |
| "loss": 0.1684, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.655229662423907, |
| "grad_norm": 0.06272447854280472, |
| "learning_rate": 7.002882852657677e-06, |
| "loss": 0.1688, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.7105700055340343, |
| "grad_norm": 0.0538700707256794, |
| "learning_rate": 4.961472039777459e-06, |
| "loss": 0.1681, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.7659103486441614, |
| "grad_norm": 0.05325184017419815, |
| "learning_rate": 3.2636683978985116e-06, |
| "loss": 0.1688, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2712, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.176743776873759e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|