| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 243, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09917355371900827, |
| "grad_norm": 0.05352747067809105, |
| "learning_rate": 8.400000000000001e-05, |
| "loss": 0.7596395015716553, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.19834710743801653, |
| "grad_norm": 0.024916447699069977, |
| "learning_rate": 0.00018, |
| "loss": 0.6222814321517944, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2975206611570248, |
| "grad_norm": 0.016597295179963112, |
| "learning_rate": 0.00017945376095861547, |
| "loss": 0.5472516417503357, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.39669421487603307, |
| "grad_norm": 0.012275703251361847, |
| "learning_rate": 0.00017782167443646923, |
| "loss": 0.517052948474884, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.49586776859504134, |
| "grad_norm": 0.011337196454405785, |
| "learning_rate": 0.00017512355175305713, |
| "loss": 0.501731276512146, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5950413223140496, |
| "grad_norm": 0.012592756189405918, |
| "learning_rate": 0.000171392144462782, |
| "loss": 0.5070059299468994, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6942148760330579, |
| "grad_norm": 0.012089293450117111, |
| "learning_rate": 0.00016667274679544943, |
| "loss": 0.48694121837615967, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7933884297520661, |
| "grad_norm": 0.01202303171157837, |
| "learning_rate": 0.00016102264584567545, |
| "loss": 0.4653348922729492, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8925619834710744, |
| "grad_norm": 0.011642170138657093, |
| "learning_rate": 0.00015451042618516063, |
| "loss": 0.4749022126197815, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9917355371900827, |
| "grad_norm": 0.01265915110707283, |
| "learning_rate": 0.00014721513733889716, |
| "loss": 0.475362092256546, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0867768595041323, |
| "grad_norm": 0.013994473032653332, |
| "learning_rate": 0.00013922533423101844, |
| "loss": 0.4846913516521454, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1859504132231404, |
| "grad_norm": 0.015602202154695988, |
| "learning_rate": 0.00013063800224798005, |
| "loss": 0.4634976089000702, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.2851239669421488, |
| "grad_norm": 0.015496148727834225, |
| "learning_rate": 0.00012155737996734791, |
| "loss": 0.4593273103237152, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.384297520661157, |
| "grad_norm": 0.014515814371407032, |
| "learning_rate": 0.00011209369384267194, |
| "loss": 0.47245365381240845, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.4834710743801653, |
| "grad_norm": 0.01587059162557125, |
| "learning_rate": 0.00010236182020365675, |
| "loss": 0.4527878761291504, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5826446280991735, |
| "grad_norm": 0.016186628490686417, |
| "learning_rate": 9.24798908131346e-05, |
| "loss": 0.4529184401035309, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.6818181818181817, |
| "grad_norm": 0.015668360516428947, |
| "learning_rate": 8.25678589074901e-05, |
| "loss": 0.45313313603401184, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.78099173553719, |
| "grad_norm": 0.0156264491379261, |
| "learning_rate": 7.274604312686357e-05, |
| "loss": 0.44556480646133423, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.8801652892561984, |
| "grad_norm": 0.01666293293237686, |
| "learning_rate": 6.313366700984752e-05, |
| "loss": 0.44562897086143494, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.9793388429752066, |
| "grad_norm": 0.01604226417839527, |
| "learning_rate": 5.384741178123277e-05, |
| "loss": 0.459658145904541, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.074380165289256, |
| "grad_norm": 0.017228346318006516, |
| "learning_rate": 4.500000000000002e-05, |
| "loss": 0.435981810092926, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.1735537190082646, |
| "grad_norm": 0.01715254969894886, |
| "learning_rate": 3.669882726015181e-05, |
| "loss": 0.4425530731678009, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.016410550102591515, |
| "learning_rate": 2.904465855368332e-05, |
| "loss": 0.4361629784107208, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.371900826446281, |
| "grad_norm": 0.01654178649187088, |
| "learning_rate": 2.213040512007935e-05, |
| "loss": 0.4368935823440552, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.4710743801652892, |
| "grad_norm": 0.017943061888217926, |
| "learning_rate": 1.6039996629727944e-05, |
| "loss": 0.44572409987449646, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.5702479338842976, |
| "grad_norm": 0.01812249794602394, |
| "learning_rate": 1.0847362391415993e-05, |
| "loss": 0.4320479929447174, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.669421487603306, |
| "grad_norm": 0.017407618463039398, |
| "learning_rate": 6.6155339506678416e-06, |
| "loss": 0.43307027220726013, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.768595041322314, |
| "grad_norm": 0.017422957345843315, |
| "learning_rate": 3.395879972167463e-06, |
| "loss": 0.4369695782661438, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.8677685950413223, |
| "grad_norm": 0.018260862678289413, |
| "learning_rate": 1.227482693754991e-06, |
| "loss": 0.43731415271759033, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.9669421487603307, |
| "grad_norm": 0.018071839585900307, |
| "learning_rate": 1.3666352100154435e-07, |
| "loss": 0.4352337419986725, |
| "step": 240 |
| } |
| ], |
| "logging_steps": 8, |
| "max_steps": 243, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.997164774211256e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|