| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.496, |
| "eval_steps": 500, |
| "global_step": 62, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008, |
| "grad_norm": 25.248437881469727, |
| "learning_rate": 0.0, |
| "loss": 6.0663, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 22.579456329345703, |
| "learning_rate": 0.0001, |
| "loss": 7.6342, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 15.876864433288574, |
| "learning_rate": 0.0002, |
| "loss": 3.9035, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 8.342036247253418, |
| "learning_rate": 0.0001998629534754574, |
| "loss": 2.2364, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.403432369232178, |
| "learning_rate": 0.00019945218953682734, |
| "loss": 2.0261, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 4.060673236846924, |
| "learning_rate": 0.00019876883405951377, |
| "loss": 1.6871, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 2.3833701610565186, |
| "learning_rate": 0.00019781476007338058, |
| "loss": 0.9948, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.367588996887207, |
| "learning_rate": 0.00019659258262890683, |
| "loss": 1.2749, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.9617669582366943, |
| "learning_rate": 0.00019510565162951537, |
| "loss": 0.7384, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.604466438293457, |
| "learning_rate": 0.00019335804264972018, |
| "loss": 0.6554, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.062431573867798, |
| "learning_rate": 0.0001913545457642601, |
| "loss": 0.4422, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.2651094198226929, |
| "learning_rate": 0.0001891006524188368, |
| "loss": 0.4698, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 1.352691411972046, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 0.5757, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.8969707489013672, |
| "learning_rate": 0.00018386705679454242, |
| "loss": 0.8865, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.659216284751892, |
| "learning_rate": 0.00018090169943749476, |
| "loss": 0.5039, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.4563014507293701, |
| "learning_rate": 0.0001777145961456971, |
| "loss": 0.5335, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.4130172729492188, |
| "learning_rate": 0.00017431448254773944, |
| "loss": 0.4197, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 2.0561697483062744, |
| "learning_rate": 0.00017071067811865476, |
| "loss": 0.3086, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.6160928010940552, |
| "learning_rate": 0.00016691306063588583, |
| "loss": 0.4929, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8215347528457642, |
| "learning_rate": 0.00016293203910498376, |
| "loss": 0.6025, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.6622347831726074, |
| "learning_rate": 0.00015877852522924732, |
| "loss": 0.2183, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.9541563987731934, |
| "learning_rate": 0.00015446390350150273, |
| "loss": 0.2127, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 2.495389938354492, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.5699, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.2959003448486328, |
| "learning_rate": 0.00014539904997395468, |
| "loss": 0.4397, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.8394997119903564, |
| "learning_rate": 0.00014067366430758004, |
| "loss": 0.3454, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.2677969932556152, |
| "learning_rate": 0.00013583679495453, |
| "loss": 0.2386, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.2156089544296265, |
| "learning_rate": 0.00013090169943749476, |
| "loss": 0.2735, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.1582199335098267, |
| "learning_rate": 0.00012588190451025207, |
| "loss": 0.3418, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.468334674835205, |
| "learning_rate": 0.00012079116908177593, |
| "loss": 0.3395, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 4.29418420791626, |
| "learning_rate": 0.0001156434465040231, |
| "loss": 0.3724, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.1691845655441284, |
| "learning_rate": 0.00011045284632676536, |
| "loss": 0.3646, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 1.6384227275848389, |
| "learning_rate": 0.0001052335956242944, |
| "loss": 0.4455, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 2.050229072570801, |
| "learning_rate": 0.0001, |
| "loss": 0.4172, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.9373909831047058, |
| "learning_rate": 9.476640437570562e-05, |
| "loss": 0.2038, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.8982837796211243, |
| "learning_rate": 8.954715367323468e-05, |
| "loss": 0.1809, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.8005573153495789, |
| "learning_rate": 8.435655349597689e-05, |
| "loss": 0.2386, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.281983494758606, |
| "learning_rate": 7.920883091822408e-05, |
| "loss": 0.3369, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 1.69044029712677, |
| "learning_rate": 7.411809548974792e-05, |
| "loss": 0.1734, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.7630087733268738, |
| "learning_rate": 6.909830056250527e-05, |
| "loss": 0.211, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.7337403297424316, |
| "learning_rate": 6.416320504546997e-05, |
| "loss": 0.1901, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 2.1493542194366455, |
| "learning_rate": 5.9326335692419995e-05, |
| "loss": 0.2632, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 1.6053903102874756, |
| "learning_rate": 5.4600950026045326e-05, |
| "loss": 0.2899, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.245193362236023, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.2924, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 1.2301777601242065, |
| "learning_rate": 4.5536096498497295e-05, |
| "loss": 0.3287, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.8539015054702759, |
| "learning_rate": 4.12214747707527e-05, |
| "loss": 0.175, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.803677499294281, |
| "learning_rate": 3.7067960895016275e-05, |
| "loss": 0.219, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.8334856629371643, |
| "learning_rate": 3.308693936411421e-05, |
| "loss": 0.1989, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 1.2251709699630737, |
| "learning_rate": 2.9289321881345254e-05, |
| "loss": 0.4258, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 2.798891067504883, |
| "learning_rate": 2.5685517452260567e-05, |
| "loss": 0.2843, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4188216924667358, |
| "learning_rate": 2.2285403854302912e-05, |
| "loss": 0.4029, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.2292898893356323, |
| "learning_rate": 1.9098300562505266e-05, |
| "loss": 0.2635, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 1.7358604669570923, |
| "learning_rate": 1.6132943205457606e-05, |
| "loss": 0.5454, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.9566776156425476, |
| "learning_rate": 1.339745962155613e-05, |
| "loss": 0.1983, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 2.299311876296997, |
| "learning_rate": 1.0899347581163221e-05, |
| "loss": 0.493, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.1702775955200195, |
| "learning_rate": 8.645454235739903e-06, |
| "loss": 0.2654, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.6890839338302612, |
| "learning_rate": 6.6419573502798374e-06, |
| "loss": 0.269, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.2613484859466553, |
| "learning_rate": 4.8943483704846475e-06, |
| "loss": 0.3043, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 1.2633836269378662, |
| "learning_rate": 3.40741737109318e-06, |
| "loss": 0.2219, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.1540069580078125, |
| "learning_rate": 2.1852399266194314e-06, |
| "loss": 0.2728, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.3534456491470337, |
| "learning_rate": 1.231165940486234e-06, |
| "loss": 0.3004, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.2902393341064453, |
| "learning_rate": 5.478104631726711e-07, |
| "loss": 0.4588, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 1.371500849723816, |
| "learning_rate": 1.3704652454261668e-07, |
| "loss": 0.3775, |
| "step": 62 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 62, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 62, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1902189176239968.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|