| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 324, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.030864197530864196, | |
| "grad_norm": 0.2539002488078261, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4363, | |
| "num_input_tokens_seen": 0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06172839506172839, | |
| "grad_norm": 0.2024392131106122, | |
| "learning_rate": 9.681528662420384e-06, | |
| "loss": 0.2931, | |
| "num_input_tokens_seen": 0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 0.1711436526142984, | |
| "learning_rate": 9.363057324840765e-06, | |
| "loss": 0.2336, | |
| "num_input_tokens_seen": 0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12345679012345678, | |
| "grad_norm": 0.13488565924080936, | |
| "learning_rate": 9.044585987261148e-06, | |
| "loss": 0.2006, | |
| "num_input_tokens_seen": 0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.15432098765432098, | |
| "grad_norm": 0.13503015545972158, | |
| "learning_rate": 8.726114649681529e-06, | |
| "loss": 0.1855, | |
| "num_input_tokens_seen": 0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 0.16484545379786694, | |
| "learning_rate": 8.407643312101912e-06, | |
| "loss": 0.1708, | |
| "num_input_tokens_seen": 0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.21604938271604937, | |
| "grad_norm": 0.16373587348563579, | |
| "learning_rate": 8.089171974522295e-06, | |
| "loss": 0.1622, | |
| "num_input_tokens_seen": 0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 0.1524418787913334, | |
| "learning_rate": 7.770700636942676e-06, | |
| "loss": 0.1491, | |
| "num_input_tokens_seen": 0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 0.13239482123815, | |
| "learning_rate": 7.452229299363057e-06, | |
| "loss": 0.1615, | |
| "num_input_tokens_seen": 0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.30864197530864196, | |
| "grad_norm": 0.21888788342573567, | |
| "learning_rate": 7.13375796178344e-06, | |
| "loss": 0.1531, | |
| "num_input_tokens_seen": 0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3395061728395062, | |
| "grad_norm": 0.14773397182859288, | |
| "learning_rate": 6.815286624203822e-06, | |
| "loss": 0.1549, | |
| "num_input_tokens_seen": 0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.14063670969844863, | |
| "learning_rate": 6.496815286624204e-06, | |
| "loss": 0.1451, | |
| "num_input_tokens_seen": 0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4012345679012346, | |
| "grad_norm": 0.1483882380802712, | |
| "learning_rate": 6.178343949044586e-06, | |
| "loss": 0.1411, | |
| "num_input_tokens_seen": 0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.43209876543209874, | |
| "grad_norm": 0.19499270485543743, | |
| "learning_rate": 5.859872611464969e-06, | |
| "loss": 0.1435, | |
| "num_input_tokens_seen": 0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 0.14731278284216748, | |
| "learning_rate": 5.541401273885351e-06, | |
| "loss": 0.1399, | |
| "num_input_tokens_seen": 0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 0.14099478047141387, | |
| "learning_rate": 5.222929936305733e-06, | |
| "loss": 0.1375, | |
| "num_input_tokens_seen": 0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5246913580246914, | |
| "grad_norm": 0.13980815390161488, | |
| "learning_rate": 4.904458598726115e-06, | |
| "loss": 0.1447, | |
| "num_input_tokens_seen": 0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.15870399104525085, | |
| "learning_rate": 4.585987261146497e-06, | |
| "loss": 0.1349, | |
| "num_input_tokens_seen": 0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5864197530864198, | |
| "grad_norm": 0.13469923079260826, | |
| "learning_rate": 4.26751592356688e-06, | |
| "loss": 0.1353, | |
| "num_input_tokens_seen": 0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 0.16157642492469387, | |
| "learning_rate": 3.949044585987262e-06, | |
| "loss": 0.1436, | |
| "num_input_tokens_seen": 0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6481481481481481, | |
| "grad_norm": 0.16708609597150495, | |
| "learning_rate": 3.6305732484076435e-06, | |
| "loss": 0.1264, | |
| "num_input_tokens_seen": 0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6790123456790124, | |
| "grad_norm": 0.14770437980798296, | |
| "learning_rate": 3.3121019108280255e-06, | |
| "loss": 0.1228, | |
| "num_input_tokens_seen": 0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7098765432098766, | |
| "grad_norm": 0.13900965859775397, | |
| "learning_rate": 2.993630573248408e-06, | |
| "loss": 0.1196, | |
| "num_input_tokens_seen": 0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.12123168814466377, | |
| "learning_rate": 2.67515923566879e-06, | |
| "loss": 0.1291, | |
| "num_input_tokens_seen": 0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7716049382716049, | |
| "grad_norm": 0.12106750215748638, | |
| "learning_rate": 2.356687898089172e-06, | |
| "loss": 0.1306, | |
| "num_input_tokens_seen": 0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8024691358024691, | |
| "grad_norm": 0.1393388456599626, | |
| "learning_rate": 2.0382165605095544e-06, | |
| "loss": 0.1228, | |
| "num_input_tokens_seen": 0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.18012074978813084, | |
| "learning_rate": 1.7197452229299363e-06, | |
| "loss": 0.1178, | |
| "num_input_tokens_seen": 0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8641975308641975, | |
| "grad_norm": 0.12683446108621385, | |
| "learning_rate": 1.4012738853503185e-06, | |
| "loss": 0.1176, | |
| "num_input_tokens_seen": 0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8950617283950617, | |
| "grad_norm": 0.14596666878755418, | |
| "learning_rate": 1.0828025477707007e-06, | |
| "loss": 0.1126, | |
| "num_input_tokens_seen": 0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 0.1345601273071694, | |
| "learning_rate": 7.643312101910829e-07, | |
| "loss": 0.1236, | |
| "num_input_tokens_seen": 0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9567901234567902, | |
| "grad_norm": 0.13789896926760864, | |
| "learning_rate": 4.45859872611465e-07, | |
| "loss": 0.1103, | |
| "num_input_tokens_seen": 0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.13336398116511375, | |
| "learning_rate": 1.2738853503184715e-07, | |
| "loss": 0.1111, | |
| "num_input_tokens_seen": 0, | |
| "step": 320 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 324, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 727599714140160.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |