| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6827233074151337, |
| "eval_steps": 500, |
| "global_step": 5400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012643024211391365, |
| "grad_norm": 0.8759542107582092, |
| "learning_rate": 0.0004987484197218711, |
| "loss": 3.8722, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02528604842278273, |
| "grad_norm": 0.9236809015274048, |
| "learning_rate": 0.0004974841972187105, |
| "loss": 3.4858, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.037929072634174096, |
| "grad_norm": 0.8505849242210388, |
| "learning_rate": 0.0004962199747155499, |
| "loss": 3.3438, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05057209684556546, |
| "grad_norm": 0.8044902682304382, |
| "learning_rate": 0.0004949557522123893, |
| "loss": 3.2837, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06321512105695683, |
| "grad_norm": 0.7873915433883667, |
| "learning_rate": 0.0004936915297092288, |
| "loss": 3.194, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07585814526834819, |
| "grad_norm": 0.7622674107551575, |
| "learning_rate": 0.0004924273072060682, |
| "loss": 3.126, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08850116947973956, |
| "grad_norm": 0.8418383002281189, |
| "learning_rate": 0.0004911630847029077, |
| "loss": 3.0518, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10114419369113092, |
| "grad_norm": 0.7434802055358887, |
| "learning_rate": 0.0004898988621997471, |
| "loss": 3.0434, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11378721790252229, |
| "grad_norm": 0.8024940490722656, |
| "learning_rate": 0.0004886346396965867, |
| "loss": 2.9942, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12643024211391365, |
| "grad_norm": 0.8081286549568176, |
| "learning_rate": 0.00048737041719342606, |
| "loss": 2.9878, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.139073266325305, |
| "grad_norm": 0.7084025144577026, |
| "learning_rate": 0.0004861061946902655, |
| "loss": 2.9314, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.15171629053669639, |
| "grad_norm": 0.7388598322868347, |
| "learning_rate": 0.000484841972187105, |
| "loss": 2.9152, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16435931474808774, |
| "grad_norm": 0.7991167306900024, |
| "learning_rate": 0.0004835777496839444, |
| "loss": 2.917, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.17700233895947912, |
| "grad_norm": 0.7912219762802124, |
| "learning_rate": 0.0004823135271807838, |
| "loss": 2.8725, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18964536317087047, |
| "grad_norm": 0.8445726633071899, |
| "learning_rate": 0.00048104930467762324, |
| "loss": 2.8843, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20228838738226185, |
| "grad_norm": 0.7209933400154114, |
| "learning_rate": 0.0004797850821744627, |
| "loss": 2.8298, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2149314115936532, |
| "grad_norm": 0.7905689477920532, |
| "learning_rate": 0.00047852085967130215, |
| "loss": 2.862, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.22757443580504458, |
| "grad_norm": 0.745158314704895, |
| "learning_rate": 0.0004772566371681416, |
| "loss": 2.781, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.24021746001643593, |
| "grad_norm": 0.7118976712226868, |
| "learning_rate": 0.00047599241466498107, |
| "loss": 2.7783, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2528604842278273, |
| "grad_norm": 0.7946869730949402, |
| "learning_rate": 0.0004747281921618205, |
| "loss": 2.7825, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.26550350843921866, |
| "grad_norm": 0.7247060537338257, |
| "learning_rate": 0.00047346396965865993, |
| "loss": 2.7839, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.27814653265061, |
| "grad_norm": 0.7256483435630798, |
| "learning_rate": 0.0004721997471554994, |
| "loss": 2.7731, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.29078955686200136, |
| "grad_norm": 0.7218326926231384, |
| "learning_rate": 0.0004709355246523388, |
| "loss": 2.8133, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.30343258107339277, |
| "grad_norm": 0.7010550498962402, |
| "learning_rate": 0.00046967130214917825, |
| "loss": 2.7432, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3160756052847841, |
| "grad_norm": 0.7964794635772705, |
| "learning_rate": 0.0004684070796460177, |
| "loss": 2.7811, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3287186294961755, |
| "grad_norm": 0.8072954416275024, |
| "learning_rate": 0.00046714285714285716, |
| "loss": 2.7089, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3413616537075668, |
| "grad_norm": 0.6594070196151733, |
| "learning_rate": 0.0004658786346396966, |
| "loss": 2.7161, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.35400467791895823, |
| "grad_norm": 0.704298734664917, |
| "learning_rate": 0.000464614412136536, |
| "loss": 2.698, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3666477021303496, |
| "grad_norm": 0.7253355383872986, |
| "learning_rate": 0.0004633501896333755, |
| "loss": 2.696, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.37929072634174094, |
| "grad_norm": 0.7043545246124268, |
| "learning_rate": 0.00046208596713021493, |
| "loss": 2.6807, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3919337505531323, |
| "grad_norm": 0.6532794237136841, |
| "learning_rate": 0.0004608217446270544, |
| "loss": 2.6985, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4045767747645237, |
| "grad_norm": 0.7272788286209106, |
| "learning_rate": 0.0004595575221238938, |
| "loss": 2.6767, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.41721979897591505, |
| "grad_norm": 0.695071280002594, |
| "learning_rate": 0.00045829329962073325, |
| "loss": 2.6609, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4298628231873064, |
| "grad_norm": 0.7230761051177979, |
| "learning_rate": 0.0004570290771175727, |
| "loss": 2.6488, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.44250584739869775, |
| "grad_norm": 0.7420136332511902, |
| "learning_rate": 0.00045576485461441217, |
| "loss": 2.6507, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.45514887161008916, |
| "grad_norm": 0.7115824222564697, |
| "learning_rate": 0.00045450063211125157, |
| "loss": 2.644, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.4677918958214805, |
| "grad_norm": 0.6667810678482056, |
| "learning_rate": 0.000453236409608091, |
| "loss": 2.6841, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.48043492003287186, |
| "grad_norm": 0.6836283802986145, |
| "learning_rate": 0.0004519721871049305, |
| "loss": 2.6462, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.4930779442442632, |
| "grad_norm": 0.7117214202880859, |
| "learning_rate": 0.00045070796460176994, |
| "loss": 2.6201, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5057209684556546, |
| "grad_norm": 0.6085230708122253, |
| "learning_rate": 0.0004494437420986094, |
| "loss": 2.6198, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5183639926670459, |
| "grad_norm": 0.663446843624115, |
| "learning_rate": 0.0004481795195954488, |
| "loss": 2.5972, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5310070168784373, |
| "grad_norm": 0.670093297958374, |
| "learning_rate": 0.00044691529709228826, |
| "loss": 2.6052, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5436500410898287, |
| "grad_norm": 0.6052363514900208, |
| "learning_rate": 0.00044565107458912766, |
| "loss": 2.6038, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.55629306530122, |
| "grad_norm": 0.6686172485351562, |
| "learning_rate": 0.0004443868520859671, |
| "loss": 2.5484, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5689360895126114, |
| "grad_norm": 0.6228762865066528, |
| "learning_rate": 0.0004431226295828066, |
| "loss": 2.6119, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5815791137240027, |
| "grad_norm": 0.6712014079093933, |
| "learning_rate": 0.00044185840707964603, |
| "loss": 2.581, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5942221379353941, |
| "grad_norm": 0.6657222509384155, |
| "learning_rate": 0.0004405941845764855, |
| "loss": 2.5822, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6068651621467855, |
| "grad_norm": 0.639202356338501, |
| "learning_rate": 0.00043932996207332494, |
| "loss": 2.5736, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6195081863581768, |
| "grad_norm": 0.654742419719696, |
| "learning_rate": 0.0004380657395701644, |
| "loss": 2.5515, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6321512105695682, |
| "grad_norm": 0.704134464263916, |
| "learning_rate": 0.0004368015170670038, |
| "loss": 2.5499, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6447942347809597, |
| "grad_norm": 0.6817001104354858, |
| "learning_rate": 0.0004355372945638432, |
| "loss": 2.611, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.657437258992351, |
| "grad_norm": 0.6351118087768555, |
| "learning_rate": 0.00043427307206068266, |
| "loss": 2.566, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6700802832037424, |
| "grad_norm": 0.6755563020706177, |
| "learning_rate": 0.0004330088495575221, |
| "loss": 2.5771, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.6827233074151337, |
| "grad_norm": 0.6010642647743225, |
| "learning_rate": 0.0004317446270543616, |
| "loss": 2.5216, |
| "step": 5400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 39550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.2329309184e+16, |
| "train_batch_size": 15, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|