| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6827233074151337, | |
| "eval_steps": 500, | |
| "global_step": 5400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012643024211391365, | |
| "grad_norm": 0.8759542107582092, | |
| "learning_rate": 0.0004987484197218711, | |
| "loss": 3.8722, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02528604842278273, | |
| "grad_norm": 0.9236809015274048, | |
| "learning_rate": 0.0004974841972187105, | |
| "loss": 3.4858, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.037929072634174096, | |
| "grad_norm": 0.8505849242210388, | |
| "learning_rate": 0.0004962199747155499, | |
| "loss": 3.3438, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05057209684556546, | |
| "grad_norm": 0.8044902682304382, | |
| "learning_rate": 0.0004949557522123893, | |
| "loss": 3.2837, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06321512105695683, | |
| "grad_norm": 0.7873915433883667, | |
| "learning_rate": 0.0004936915297092288, | |
| "loss": 3.194, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07585814526834819, | |
| "grad_norm": 0.7622674107551575, | |
| "learning_rate": 0.0004924273072060682, | |
| "loss": 3.126, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.08850116947973956, | |
| "grad_norm": 0.8418383002281189, | |
| "learning_rate": 0.0004911630847029077, | |
| "loss": 3.0518, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10114419369113092, | |
| "grad_norm": 0.7434802055358887, | |
| "learning_rate": 0.0004898988621997471, | |
| "loss": 3.0434, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11378721790252229, | |
| "grad_norm": 0.8024940490722656, | |
| "learning_rate": 0.0004886346396965867, | |
| "loss": 2.9942, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.12643024211391365, | |
| "grad_norm": 0.8081286549568176, | |
| "learning_rate": 0.00048737041719342606, | |
| "loss": 2.9878, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.139073266325305, | |
| "grad_norm": 0.7084025144577026, | |
| "learning_rate": 0.0004861061946902655, | |
| "loss": 2.9314, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.15171629053669639, | |
| "grad_norm": 0.7388598322868347, | |
| "learning_rate": 0.000484841972187105, | |
| "loss": 2.9152, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.16435931474808774, | |
| "grad_norm": 0.7991167306900024, | |
| "learning_rate": 0.0004835777496839444, | |
| "loss": 2.917, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.17700233895947912, | |
| "grad_norm": 0.7912219762802124, | |
| "learning_rate": 0.0004823135271807838, | |
| "loss": 2.8725, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18964536317087047, | |
| "grad_norm": 0.8445726633071899, | |
| "learning_rate": 0.00048104930467762324, | |
| "loss": 2.8843, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.20228838738226185, | |
| "grad_norm": 0.7209933400154114, | |
| "learning_rate": 0.0004797850821744627, | |
| "loss": 2.8298, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2149314115936532, | |
| "grad_norm": 0.7905689477920532, | |
| "learning_rate": 0.00047852085967130215, | |
| "loss": 2.862, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.22757443580504458, | |
| "grad_norm": 0.745158314704895, | |
| "learning_rate": 0.0004772566371681416, | |
| "loss": 2.781, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.24021746001643593, | |
| "grad_norm": 0.7118976712226868, | |
| "learning_rate": 0.00047599241466498107, | |
| "loss": 2.7783, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2528604842278273, | |
| "grad_norm": 0.7946869730949402, | |
| "learning_rate": 0.0004747281921618205, | |
| "loss": 2.7825, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26550350843921866, | |
| "grad_norm": 0.7247060537338257, | |
| "learning_rate": 0.00047346396965865993, | |
| "loss": 2.7839, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.27814653265061, | |
| "grad_norm": 0.7256483435630798, | |
| "learning_rate": 0.0004721997471554994, | |
| "loss": 2.7731, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.29078955686200136, | |
| "grad_norm": 0.7218326926231384, | |
| "learning_rate": 0.0004709355246523388, | |
| "loss": 2.8133, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.30343258107339277, | |
| "grad_norm": 0.7010550498962402, | |
| "learning_rate": 0.00046967130214917825, | |
| "loss": 2.7432, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3160756052847841, | |
| "grad_norm": 0.7964794635772705, | |
| "learning_rate": 0.0004684070796460177, | |
| "loss": 2.7811, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3287186294961755, | |
| "grad_norm": 0.8072954416275024, | |
| "learning_rate": 0.00046714285714285716, | |
| "loss": 2.7089, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3413616537075668, | |
| "grad_norm": 0.6594070196151733, | |
| "learning_rate": 0.0004658786346396966, | |
| "loss": 2.7161, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.35400467791895823, | |
| "grad_norm": 0.704298734664917, | |
| "learning_rate": 0.000464614412136536, | |
| "loss": 2.698, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3666477021303496, | |
| "grad_norm": 0.7253355383872986, | |
| "learning_rate": 0.0004633501896333755, | |
| "loss": 2.696, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.37929072634174094, | |
| "grad_norm": 0.7043545246124268, | |
| "learning_rate": 0.00046208596713021493, | |
| "loss": 2.6807, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3919337505531323, | |
| "grad_norm": 0.6532794237136841, | |
| "learning_rate": 0.0004608217446270544, | |
| "loss": 2.6985, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4045767747645237, | |
| "grad_norm": 0.7272788286209106, | |
| "learning_rate": 0.0004595575221238938, | |
| "loss": 2.6767, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.41721979897591505, | |
| "grad_norm": 0.695071280002594, | |
| "learning_rate": 0.00045829329962073325, | |
| "loss": 2.6609, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4298628231873064, | |
| "grad_norm": 0.7230761051177979, | |
| "learning_rate": 0.0004570290771175727, | |
| "loss": 2.6488, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.44250584739869775, | |
| "grad_norm": 0.7420136332511902, | |
| "learning_rate": 0.00045576485461441217, | |
| "loss": 2.6507, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.45514887161008916, | |
| "grad_norm": 0.7115824222564697, | |
| "learning_rate": 0.00045450063211125157, | |
| "loss": 2.644, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.4677918958214805, | |
| "grad_norm": 0.6667810678482056, | |
| "learning_rate": 0.000453236409608091, | |
| "loss": 2.6841, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.48043492003287186, | |
| "grad_norm": 0.6836283802986145, | |
| "learning_rate": 0.0004519721871049305, | |
| "loss": 2.6462, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4930779442442632, | |
| "grad_norm": 0.7117214202880859, | |
| "learning_rate": 0.00045070796460176994, | |
| "loss": 2.6201, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.5057209684556546, | |
| "grad_norm": 0.6085230708122253, | |
| "learning_rate": 0.0004494437420986094, | |
| "loss": 2.6198, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5183639926670459, | |
| "grad_norm": 0.663446843624115, | |
| "learning_rate": 0.0004481795195954488, | |
| "loss": 2.5972, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5310070168784373, | |
| "grad_norm": 0.670093297958374, | |
| "learning_rate": 0.00044691529709228826, | |
| "loss": 2.6052, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5436500410898287, | |
| "grad_norm": 0.6052363514900208, | |
| "learning_rate": 0.00044565107458912766, | |
| "loss": 2.6038, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.55629306530122, | |
| "grad_norm": 0.6686172485351562, | |
| "learning_rate": 0.0004443868520859671, | |
| "loss": 2.5484, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5689360895126114, | |
| "grad_norm": 0.6228762865066528, | |
| "learning_rate": 0.0004431226295828066, | |
| "loss": 2.6119, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5815791137240027, | |
| "grad_norm": 0.6712014079093933, | |
| "learning_rate": 0.00044185840707964603, | |
| "loss": 2.581, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5942221379353941, | |
| "grad_norm": 0.6657222509384155, | |
| "learning_rate": 0.0004405941845764855, | |
| "loss": 2.5822, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.6068651621467855, | |
| "grad_norm": 0.639202356338501, | |
| "learning_rate": 0.00043932996207332494, | |
| "loss": 2.5736, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6195081863581768, | |
| "grad_norm": 0.654742419719696, | |
| "learning_rate": 0.0004380657395701644, | |
| "loss": 2.5515, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6321512105695682, | |
| "grad_norm": 0.704134464263916, | |
| "learning_rate": 0.0004368015170670038, | |
| "loss": 2.5499, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6447942347809597, | |
| "grad_norm": 0.6817001104354858, | |
| "learning_rate": 0.0004355372945638432, | |
| "loss": 2.611, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.657437258992351, | |
| "grad_norm": 0.6351118087768555, | |
| "learning_rate": 0.00043427307206068266, | |
| "loss": 2.566, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6700802832037424, | |
| "grad_norm": 0.6755563020706177, | |
| "learning_rate": 0.0004330088495575221, | |
| "loss": 2.5771, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6827233074151337, | |
| "grad_norm": 0.6010642647743225, | |
| "learning_rate": 0.0004317446270543616, | |
| "loss": 2.5216, | |
| "step": 5400 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 39550, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.2329309184e+16, | |
| "train_batch_size": 15, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |