| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 237, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.8735829591751099, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 3.2478, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.7458922863006592, |
| "learning_rate": 1.9655172413793106e-05, |
| "loss": 3.1764, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 1.80453622341156, |
| "learning_rate": 1.9224137931034484e-05, |
| "loss": 2.9433, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 1.8682630062103271, |
| "learning_rate": 1.8793103448275863e-05, |
| "loss": 2.5689, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 2.233153820037842, |
| "learning_rate": 1.8362068965517245e-05, |
| "loss": 2.1608, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 1.8297864198684692, |
| "learning_rate": 1.7931034482758623e-05, |
| "loss": 1.7308, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 1.6991928815841675, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 1.3536, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5095541401273885, |
| "grad_norm": 1.4975662231445312, |
| "learning_rate": 1.706896551724138e-05, |
| "loss": 1.0481, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5732484076433121, |
| "grad_norm": 1.4604802131652832, |
| "learning_rate": 1.663793103448276e-05, |
| "loss": 0.8506, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6369426751592356, |
| "grad_norm": 0.740442156791687, |
| "learning_rate": 1.6206896551724137e-05, |
| "loss": 0.7286, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7006369426751592, |
| "grad_norm": 0.5034754276275635, |
| "learning_rate": 1.577586206896552e-05, |
| "loss": 0.6376, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7643312101910829, |
| "grad_norm": 0.5653797388076782, |
| "learning_rate": 1.5344827586206898e-05, |
| "loss": 0.5873, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8280254777070064, |
| "grad_norm": 0.6510421633720398, |
| "learning_rate": 1.4913793103448278e-05, |
| "loss": 0.5896, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.89171974522293, |
| "grad_norm": 0.4169202148914337, |
| "learning_rate": 1.4482758620689657e-05, |
| "loss": 0.5866, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9554140127388535, |
| "grad_norm": 0.5169458985328674, |
| "learning_rate": 1.4051724137931035e-05, |
| "loss": 0.6369, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0127388535031847, |
| "grad_norm": 0.5400104522705078, |
| "learning_rate": 1.3620689655172414e-05, |
| "loss": 0.5814, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0764331210191083, |
| "grad_norm": 0.44644486904144287, |
| "learning_rate": 1.3189655172413794e-05, |
| "loss": 0.5647, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.1401273885350318, |
| "grad_norm": 0.5076264142990112, |
| "learning_rate": 1.2758620689655174e-05, |
| "loss": 0.5476, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.2038216560509554, |
| "grad_norm": 0.46874696016311646, |
| "learning_rate": 1.2327586206896553e-05, |
| "loss": 0.4927, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.267515923566879, |
| "grad_norm": 0.5135082006454468, |
| "learning_rate": 1.1896551724137933e-05, |
| "loss": 0.5551, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3312101910828025, |
| "grad_norm": 0.5747122168540955, |
| "learning_rate": 1.1465517241379311e-05, |
| "loss": 0.5505, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.394904458598726, |
| "grad_norm": 0.5480216145515442, |
| "learning_rate": 1.103448275862069e-05, |
| "loss": 0.5135, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4585987261146496, |
| "grad_norm": 0.5516991019248962, |
| "learning_rate": 1.060344827586207e-05, |
| "loss": 0.5969, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.5222929936305731, |
| "grad_norm": 0.6291227340698242, |
| "learning_rate": 1.0172413793103449e-05, |
| "loss": 0.521, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5859872611464967, |
| "grad_norm": 0.5867863893508911, |
| "learning_rate": 9.741379310344829e-06, |
| "loss": 0.5218, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.6496815286624202, |
| "grad_norm": 0.6907349824905396, |
| "learning_rate": 9.310344827586207e-06, |
| "loss": 0.5387, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7133757961783438, |
| "grad_norm": 0.7726341485977173, |
| "learning_rate": 8.879310344827588e-06, |
| "loss": 0.4955, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.7770700636942676, |
| "grad_norm": 0.6937519907951355, |
| "learning_rate": 8.448275862068966e-06, |
| "loss": 0.522, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8407643312101911, |
| "grad_norm": 0.7441688776016235, |
| "learning_rate": 8.017241379310345e-06, |
| "loss": 0.4902, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.9044585987261147, |
| "grad_norm": 0.6664876937866211, |
| "learning_rate": 7.586206896551724e-06, |
| "loss": 0.5205, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.9681528662420382, |
| "grad_norm": 0.6684409976005554, |
| "learning_rate": 7.155172413793104e-06, |
| "loss": 0.5223, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.0254777070063694, |
| "grad_norm": 0.7442811727523804, |
| "learning_rate": 6.724137931034484e-06, |
| "loss": 0.5559, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.089171974522293, |
| "grad_norm": 0.7137691974639893, |
| "learning_rate": 6.293103448275862e-06, |
| "loss": 0.4983, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.1528662420382165, |
| "grad_norm": 0.7240712642669678, |
| "learning_rate": 5.862068965517242e-06, |
| "loss": 0.5343, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.21656050955414, |
| "grad_norm": 0.8344106078147888, |
| "learning_rate": 5.431034482758621e-06, |
| "loss": 0.4778, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.2802547770700636, |
| "grad_norm": 0.8354288935661316, |
| "learning_rate": 5e-06, |
| "loss": 0.4551, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.343949044585987, |
| "grad_norm": 0.8019914627075195, |
| "learning_rate": 4.56896551724138e-06, |
| "loss": 0.4619, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.4076433121019107, |
| "grad_norm": 0.8863728642463684, |
| "learning_rate": 4.137931034482759e-06, |
| "loss": 0.4663, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.4713375796178343, |
| "grad_norm": 0.8233481645584106, |
| "learning_rate": 3.7068965517241385e-06, |
| "loss": 0.4788, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.535031847133758, |
| "grad_norm": 0.9060497283935547, |
| "learning_rate": 3.2758620689655175e-06, |
| "loss": 0.4514, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.5987261146496814, |
| "grad_norm": 0.8957809805870056, |
| "learning_rate": 2.844827586206897e-06, |
| "loss": 0.4729, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.662420382165605, |
| "grad_norm": 0.9156747460365295, |
| "learning_rate": 2.4137931034482762e-06, |
| "loss": 0.5263, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.7261146496815285, |
| "grad_norm": 0.9548827409744263, |
| "learning_rate": 1.982758620689655e-06, |
| "loss": 0.4613, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.789808917197452, |
| "grad_norm": 0.9629204273223877, |
| "learning_rate": 1.5517241379310346e-06, |
| "loss": 0.507, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.853503184713376, |
| "grad_norm": 0.9625599980354309, |
| "learning_rate": 1.120689655172414e-06, |
| "loss": 0.4884, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.917197452229299, |
| "grad_norm": 0.9648425579071045, |
| "learning_rate": 6.896551724137931e-07, |
| "loss": 0.467, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.980891719745223, |
| "grad_norm": 0.93361496925354, |
| "learning_rate": 2.5862068965517245e-07, |
| "loss": 0.4868, |
| "step": 235 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 237, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.973223653835571e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|