| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1907032181168058, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.023837902264600714, | |
| "grad_norm": 8.34217357635498, | |
| "learning_rate": 1.6e-07, | |
| "loss": 3.2614, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04767580452920143, | |
| "grad_norm": 10.162466049194336, | |
| "learning_rate": 3.6e-07, | |
| "loss": 3.2005, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07151370679380215, | |
| "grad_norm": 8.322413444519043, | |
| "learning_rate": 5.6e-07, | |
| "loss": 3.3002, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09535160905840286, | |
| "grad_norm": 6.742789268493652, | |
| "learning_rate": 7.599999999999999e-07, | |
| "loss": 3.1868, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11918951132300358, | |
| "grad_norm": 6.748029708862305, | |
| "learning_rate": 9.6e-07, | |
| "loss": 3.1789, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1430274135876043, | |
| "grad_norm": 5.005261421203613, | |
| "learning_rate": 9.992203820909905e-07, | |
| "loss": 3.0969, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16686531585220502, | |
| "grad_norm": 6.2596893310546875, | |
| "learning_rate": 9.960573506572389e-07, | |
| "loss": 3.021, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1907032181168057, | |
| "grad_norm": 5.589771747589111, | |
| "learning_rate": 9.904775776745956e-07, | |
| "loss": 3.1221, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21454112038140644, | |
| "grad_norm": 7.189596176147461, | |
| "learning_rate": 9.825082472361556e-07, | |
| "loss": 3.0537, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23837902264600716, | |
| "grad_norm": 5.899924278259277, | |
| "learning_rate": 9.721881851187405e-07, | |
| "loss": 3.0154, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2622169249106079, | |
| "grad_norm": 4.845108509063721, | |
| "learning_rate": 9.595676696276171e-07, | |
| "loss": 3.0747, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2860548271752086, | |
| "grad_norm": 5.564320087432861, | |
| "learning_rate": 9.447081866456487e-07, | |
| "loss": 2.9519, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3098927294398093, | |
| "grad_norm": 6.226819038391113, | |
| "learning_rate": 9.276821300802533e-07, | |
| "loss": 3.0256, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33373063170441003, | |
| "grad_norm": 5.26694393157959, | |
| "learning_rate": 9.085724491675642e-07, | |
| "loss": 2.843, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3575685339690107, | |
| "grad_norm": 5.471522808074951, | |
| "learning_rate": 8.874722443520898e-07, | |
| "loss": 2.8078, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3814064362336114, | |
| "grad_norm": 5.0895819664001465, | |
| "learning_rate": 8.644843137107057e-07, | |
| "loss": 2.8523, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4052443384982122, | |
| "grad_norm": 4.7197794914245605, | |
| "learning_rate": 8.397206521307583e-07, | |
| "loss": 2.754, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.42908224076281287, | |
| "grad_norm": 5.758249282836914, | |
| "learning_rate": 8.133019056822302e-07, | |
| "loss": 2.7133, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.45292014302741357, | |
| "grad_norm": 4.340641021728516, | |
| "learning_rate": 7.853567838422159e-07, | |
| "loss": 2.6577, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4767580452920143, | |
| "grad_norm": 4.795559406280518, | |
| "learning_rate": 7.560214324352858e-07, | |
| "loss": 2.7179, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5005959475566151, | |
| "grad_norm": 5.540432929992676, | |
| "learning_rate": 7.254387703447153e-07, | |
| "loss": 2.6652, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5244338498212158, | |
| "grad_norm": 4.245532035827637, | |
| "learning_rate": 6.937577932260514e-07, | |
| "loss": 2.6331, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5482717520858165, | |
| "grad_norm": 4.427391052246094, | |
| "learning_rate": 6.611328476152556e-07, | |
| "loss": 2.6846, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5721096543504172, | |
| "grad_norm": 5.0143842697143555, | |
| "learning_rate": 6.277228789678953e-07, | |
| "loss": 2.605, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5959475566150179, | |
| "grad_norm": 5.8499603271484375, | |
| "learning_rate": 5.936906572928624e-07, | |
| "loss": 2.4651, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6197854588796186, | |
| "grad_norm": 4.577277183532715, | |
| "learning_rate": 5.592019841532506e-07, | |
| "loss": 2.4897, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6436233611442194, | |
| "grad_norm": 4.542954444885254, | |
| "learning_rate": 5.244248848978067e-07, | |
| "loss": 2.4942, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6674612634088201, | |
| "grad_norm": 5.428004741668701, | |
| "learning_rate": 4.895287900583216e-07, | |
| "loss": 2.5005, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6912991656734208, | |
| "grad_norm": 4.4296956062316895, | |
| "learning_rate": 4.5468370990110997e-07, | |
| "loss": 2.3997, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7151370679380215, | |
| "grad_norm": 4.045319557189941, | |
| "learning_rate": 4.200594061540826e-07, | |
| "loss": 2.3467, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7389749702026222, | |
| "grad_norm": 4.404695510864258, | |
| "learning_rate": 3.8582456494467206e-07, | |
| "loss": 2.4801, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7628128724672228, | |
| "grad_norm": 5.092370510101318, | |
| "learning_rate": 3.521459749779768e-07, | |
| "loss": 2.4103, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7866507747318237, | |
| "grad_norm": 5.434021472930908, | |
| "learning_rate": 3.191877149589539e-07, | |
| "loss": 2.401, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8104886769964244, | |
| "grad_norm": 4.2743353843688965, | |
| "learning_rate": 2.8711035421746363e-07, | |
| "loss": 2.4399, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.834326579261025, | |
| "grad_norm": 4.966851711273193, | |
| "learning_rate": 2.5607017043063353e-07, | |
| "loss": 2.3473, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8581644815256257, | |
| "grad_norm": 5.600205898284912, | |
| "learning_rate": 2.262183882537249e-07, | |
| "loss": 2.4595, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8820023837902264, | |
| "grad_norm": 4.966123580932617, | |
| "learning_rate": 1.9770044256881258e-07, | |
| "loss": 2.4161, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9058402860548271, | |
| "grad_norm": 4.942025184631348, | |
| "learning_rate": 1.7065526994065972e-07, | |
| "loss": 2.3971, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.929678188319428, | |
| "grad_norm": 5.2323102951049805, | |
| "learning_rate": 1.4521463173173965e-07, | |
| "loss": 2.3786, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9535160905840286, | |
| "grad_norm": 5.717218399047852, | |
| "learning_rate": 1.2150247217412185e-07, | |
| "loss": 2.3473, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9773539928486293, | |
| "grad_norm": 5.0170416831970215, | |
| "learning_rate": 9.963431452563331e-08, | |
| "loss": 2.401, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 7.488749980926514, | |
| "learning_rate": 7.971669825215787e-08, | |
| "loss": 2.3823, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0238379022646007, | |
| "grad_norm": 6.092284202575684, | |
| "learning_rate": 6.184665997806831e-08, | |
| "loss": 2.3511, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0476758045292014, | |
| "grad_norm": 5.5308709144592285, | |
| "learning_rate": 4.611126073354571e-08, | |
| "loss": 2.4092, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.071513706793802, | |
| "grad_norm": 5.2230119705200195, | |
| "learning_rate": 3.258716180199278e-08, | |
| "loss": 2.3573, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0953516090584028, | |
| "grad_norm": 4.9154510498046875, | |
| "learning_rate": 2.1340251233966377e-08, | |
| "loss": 2.3055, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1191895113230035, | |
| "grad_norm": 4.627120494842529, | |
| "learning_rate": 1.2425322847218367e-08, | |
| "loss": 2.3723, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1430274135876042, | |
| "grad_norm": 6.616613864898682, | |
| "learning_rate": 5.8858092767236076e-09, | |
| "loss": 2.2949, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.166865315852205, | |
| "grad_norm": 5.475731372833252, | |
| "learning_rate": 1.7535703752478147e-09, | |
| "loss": 2.4503, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.1907032181168058, | |
| "grad_norm": 5.741678237915039, | |
| "learning_rate": 4.873799534788059e-11, | |
| "loss": 2.2666, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7169104310525952.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |