| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25460956377173916, |
| "eval_steps": 500, |
| "global_step": 3800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 6.700251678203663e-05, |
| "grad_norm": 5.96875, |
| "learning_rate": 0.0002, |
| "loss": 2.3034, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0067002516782036625, |
| "grad_norm": 0.8203125, |
| "learning_rate": 0.00019867336683417085, |
| "loss": 1.0562, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.013400503356407325, |
| "grad_norm": 0.7734375, |
| "learning_rate": 0.00019733333333333335, |
| "loss": 0.9729, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02010075503461099, |
| "grad_norm": 0.75, |
| "learning_rate": 0.00019599329983249582, |
| "loss": 0.9693, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02680100671281465, |
| "grad_norm": 0.78125, |
| "learning_rate": 0.00019465326633165831, |
| "loss": 0.9563, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.033501258391018314, |
| "grad_norm": 0.72265625, |
| "learning_rate": 0.00019331323283082078, |
| "loss": 0.9593, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04020151006922198, |
| "grad_norm": 0.73828125, |
| "learning_rate": 0.00019197319932998325, |
| "loss": 0.9478, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.046901761747425635, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.00019063316582914575, |
| "loss": 0.9474, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0536020134256293, |
| "grad_norm": 0.7734375, |
| "learning_rate": 0.00018929313232830821, |
| "loss": 0.9438, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.060302265103832964, |
| "grad_norm": 0.70703125, |
| "learning_rate": 0.0001879530988274707, |
| "loss": 0.9297, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06700251678203663, |
| "grad_norm": 0.73828125, |
| "learning_rate": 0.00018661306532663318, |
| "loss": 0.9297, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07370276846024029, |
| "grad_norm": 0.69140625, |
| "learning_rate": 0.00018527303182579565, |
| "loss": 0.9218, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.08040302013844396, |
| "grad_norm": 0.73046875, |
| "learning_rate": 0.00018393299832495814, |
| "loss": 0.9129, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0871032718166476, |
| "grad_norm": 0.70703125, |
| "learning_rate": 0.0001825929648241206, |
| "loss": 0.9156, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.09380352349485127, |
| "grad_norm": 0.71484375, |
| "learning_rate": 0.00018125293132328308, |
| "loss": 0.9151, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.10050377517305494, |
| "grad_norm": 1.0625, |
| "learning_rate": 0.00017991289782244557, |
| "loss": 0.9115, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1072040268512586, |
| "grad_norm": 0.796875, |
| "learning_rate": 0.00017857286432160804, |
| "loss": 0.9114, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.11390427852946226, |
| "grad_norm": 0.6484375, |
| "learning_rate": 0.00017723283082077054, |
| "loss": 0.9064, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.12060453020766593, |
| "grad_norm": 0.69921875, |
| "learning_rate": 0.000175892797319933, |
| "loss": 0.9039, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.12730478188586958, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.00017455276381909548, |
| "loss": 0.8975, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.13400503356407326, |
| "grad_norm": 0.71484375, |
| "learning_rate": 0.00017321273031825794, |
| "loss": 0.8919, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1407052852422769, |
| "grad_norm": 0.80078125, |
| "learning_rate": 0.00017187269681742044, |
| "loss": 0.8902, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.14740553692048058, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.00017053266331658293, |
| "loss": 0.8941, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.15410578859868423, |
| "grad_norm": 0.6875, |
| "learning_rate": 0.0001691926298157454, |
| "loss": 0.8928, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.1608060402768879, |
| "grad_norm": 0.7578125, |
| "learning_rate": 0.00016785259631490787, |
| "loss": 0.8889, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.16750629195509156, |
| "grad_norm": 0.69921875, |
| "learning_rate": 0.00016651256281407034, |
| "loss": 0.8825, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.1742065436332952, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.00016517252931323284, |
| "loss": 0.8742, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.1809067953114989, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.00016383249581239533, |
| "loss": 0.878, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.18760704698970254, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001624924623115578, |
| "loss": 0.8679, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.19430729866790622, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.00016115242881072027, |
| "loss": 0.8618, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.20100755034610987, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.00015981239530988274, |
| "loss": 0.8695, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.20770780202431355, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.00015847236180904523, |
| "loss": 0.8769, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.2144080537025172, |
| "grad_norm": 0.6484375, |
| "learning_rate": 0.00015713232830820773, |
| "loss": 0.8665, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.22110830538072088, |
| "grad_norm": 0.7109375, |
| "learning_rate": 0.0001557922948073702, |
| "loss": 0.8572, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.22780855705892453, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.00015445226130653266, |
| "loss": 0.8696, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.23450880873712818, |
| "grad_norm": 0.7578125, |
| "learning_rate": 0.00015311222780569513, |
| "loss": 0.8543, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.24120906041533186, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.00015177219430485763, |
| "loss": 0.8618, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.2479093120935355, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.00015043216080402012, |
| "loss": 0.8553, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.25460956377173916, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001490921273031826, |
| "loss": 0.8541, |
| "step": 3800 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 14925, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0541408970364707e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|