| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.10713807419311638, |
| "eval_steps": 500, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0021427614838623278, |
| "grad_norm": 64.89540762075667, |
| "learning_rate": 5e-06, |
| "loss": 3.2563, |
| "num_input_tokens_seen": 1048576, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0042855229677246556, |
| "grad_norm": 64.53587667772443, |
| "learning_rate": 1e-05, |
| "loss": 3.2442, |
| "num_input_tokens_seen": 2097152, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0064282844515869825, |
| "grad_norm": 45.91705534890451, |
| "learning_rate": 1.5e-05, |
| "loss": 2.7435, |
| "num_input_tokens_seen": 3145728, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008571045935449311, |
| "grad_norm": 9.616577532098649, |
| "learning_rate": 2e-05, |
| "loss": 2.0932, |
| "num_input_tokens_seen": 4194304, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010713807419311638, |
| "grad_norm": 22.677650894260427, |
| "learning_rate": 2.5e-05, |
| "loss": 2.1313, |
| "num_input_tokens_seen": 5242880, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012856568903173965, |
| "grad_norm": 16.200400277863025, |
| "learning_rate": 3e-05, |
| "loss": 2.1563, |
| "num_input_tokens_seen": 6291456, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014999330387036294, |
| "grad_norm": 7.7723602177379725, |
| "learning_rate": 3.5e-05, |
| "loss": 1.9378, |
| "num_input_tokens_seen": 7340032, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017142091870898622, |
| "grad_norm": 8.349008010722175, |
| "learning_rate": 4e-05, |
| "loss": 1.8095, |
| "num_input_tokens_seen": 8388608, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.019284853354760947, |
| "grad_norm": 4.24057283338546, |
| "learning_rate": 4.5e-05, |
| "loss": 1.6948, |
| "num_input_tokens_seen": 9437184, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.021427614838623276, |
| "grad_norm": 9.738414333035731, |
| "learning_rate": 5e-05, |
| "loss": 1.7145, |
| "num_input_tokens_seen": 10485760, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.023570376322485605, |
| "grad_norm": 9.427464720180852, |
| "learning_rate": 4.999999429436697e-05, |
| "loss": 1.7124, |
| "num_input_tokens_seen": 11534336, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02571313780634793, |
| "grad_norm": 3.2252035671130743, |
| "learning_rate": 4.9999977177470465e-05, |
| "loss": 1.6181, |
| "num_input_tokens_seen": 12582912, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02785589929021026, |
| "grad_norm": 5.389002593456943, |
| "learning_rate": 4.999994864931831e-05, |
| "loss": 1.5381, |
| "num_input_tokens_seen": 13631488, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.029998660774072587, |
| "grad_norm": 2.93969979997987, |
| "learning_rate": 4.999990870992352e-05, |
| "loss": 1.532, |
| "num_input_tokens_seen": 14680064, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03214142225793491, |
| "grad_norm": 3.0591292630760933, |
| "learning_rate": 4.999985735930432e-05, |
| "loss": 1.4952, |
| "num_input_tokens_seen": 15728640, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.034284183741797244, |
| "grad_norm": 3.324378449482722, |
| "learning_rate": 4.9999794597484165e-05, |
| "loss": 1.4567, |
| "num_input_tokens_seen": 16777216, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03642694522565957, |
| "grad_norm": 4.561130115369689, |
| "learning_rate": 4.999972042449169e-05, |
| "loss": 1.4686, |
| "num_input_tokens_seen": 17825792, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.038569706709521895, |
| "grad_norm": 1.780527582253664, |
| "learning_rate": 4.9999634840360755e-05, |
| "loss": 1.4052, |
| "num_input_tokens_seen": 18874368, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04071246819338423, |
| "grad_norm": 3.117995934114996, |
| "learning_rate": 4.9999537845130426e-05, |
| "loss": 1.4083, |
| "num_input_tokens_seen": 19922944, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04285522967724655, |
| "grad_norm": 2.848287146164459, |
| "learning_rate": 4.999942943884498e-05, |
| "loss": 1.3887, |
| "num_input_tokens_seen": 20971520, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04499799116110888, |
| "grad_norm": 1.69625375895056, |
| "learning_rate": 4.9999309621553894e-05, |
| "loss": 1.349, |
| "num_input_tokens_seen": 22020096, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04714075264497121, |
| "grad_norm": 2.567244377686529, |
| "learning_rate": 4.9999178393311855e-05, |
| "loss": 1.3423, |
| "num_input_tokens_seen": 23068672, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.049283514128833535, |
| "grad_norm": 1.7526016889237623, |
| "learning_rate": 4.999903575417877e-05, |
| "loss": 1.3301, |
| "num_input_tokens_seen": 24117248, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05142627561269586, |
| "grad_norm": 2.1556250824756282, |
| "learning_rate": 4.9998881704219745e-05, |
| "loss": 1.3152, |
| "num_input_tokens_seen": 25165824, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05356903709655819, |
| "grad_norm": 1.8871936642830933, |
| "learning_rate": 4.9998716243505096e-05, |
| "loss": 1.304, |
| "num_input_tokens_seen": 26214400, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05571179858042052, |
| "grad_norm": 1.674338621481819, |
| "learning_rate": 4.999853937211034e-05, |
| "loss": 1.2796, |
| "num_input_tokens_seen": 27262976, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05785456006428284, |
| "grad_norm": 1.761831320598704, |
| "learning_rate": 4.9998351090116226e-05, |
| "loss": 1.2732, |
| "num_input_tokens_seen": 28311552, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.059997321548145174, |
| "grad_norm": 1.7061574034058262, |
| "learning_rate": 4.9998151397608674e-05, |
| "loss": 1.2686, |
| "num_input_tokens_seen": 29360128, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0621400830320075, |
| "grad_norm": 1.5863747354870246, |
| "learning_rate": 4.999794029467886e-05, |
| "loss": 1.2613, |
| "num_input_tokens_seen": 30408704, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06428284451586982, |
| "grad_norm": 1.7274454226000222, |
| "learning_rate": 4.9997717781423114e-05, |
| "loss": 1.2526, |
| "num_input_tokens_seen": 31457280, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06642560599973216, |
| "grad_norm": 1.4317285831126387, |
| "learning_rate": 4.999748385794302e-05, |
| "loss": 1.2329, |
| "num_input_tokens_seen": 32505856, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06856836748359449, |
| "grad_norm": 1.8999621450491984, |
| "learning_rate": 4.999723852434535e-05, |
| "loss": 1.2436, |
| "num_input_tokens_seen": 33554432, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07071112896745681, |
| "grad_norm": 1.4448128803947724, |
| "learning_rate": 4.999698178074209e-05, |
| "loss": 1.2355, |
| "num_input_tokens_seen": 34603008, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07285389045131914, |
| "grad_norm": 2.144552654239913, |
| "learning_rate": 4.9996713627250426e-05, |
| "loss": 1.2217, |
| "num_input_tokens_seen": 35651584, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07499665193518147, |
| "grad_norm": 1.1224127832608906, |
| "learning_rate": 4.999643406399275e-05, |
| "loss": 1.2163, |
| "num_input_tokens_seen": 36700160, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07713941341904379, |
| "grad_norm": 2.0366823883396057, |
| "learning_rate": 4.9996143091096684e-05, |
| "loss": 1.2142, |
| "num_input_tokens_seen": 37748736, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07928217490290612, |
| "grad_norm": 1.296430607752612, |
| "learning_rate": 4.999584070869502e-05, |
| "loss": 1.2073, |
| "num_input_tokens_seen": 38797312, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08142493638676845, |
| "grad_norm": 1.4801029998241608, |
| "learning_rate": 4.999552691692581e-05, |
| "loss": 1.2124, |
| "num_input_tokens_seen": 39845888, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08356769787063077, |
| "grad_norm": 1.4660757543282248, |
| "learning_rate": 4.999520171593226e-05, |
| "loss": 1.1989, |
| "num_input_tokens_seen": 40894464, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0857104593544931, |
| "grad_norm": 1.7036809143512879, |
| "learning_rate": 4.999486510586282e-05, |
| "loss": 1.1902, |
| "num_input_tokens_seen": 41943040, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08785322083835544, |
| "grad_norm": 1.5061981122893944, |
| "learning_rate": 4.999451708687114e-05, |
| "loss": 1.1964, |
| "num_input_tokens_seen": 42991616, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08999598232221775, |
| "grad_norm": 1.050371458696268, |
| "learning_rate": 4.999415765911606e-05, |
| "loss": 1.1799, |
| "num_input_tokens_seen": 44040192, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09213874380608009, |
| "grad_norm": 1.6332624514974972, |
| "learning_rate": 4.9993786822761656e-05, |
| "loss": 1.1769, |
| "num_input_tokens_seen": 45088768, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09428150528994242, |
| "grad_norm": 1.351155620545513, |
| "learning_rate": 4.999340457797718e-05, |
| "loss": 1.1779, |
| "num_input_tokens_seen": 46137344, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09642426677380474, |
| "grad_norm": 1.2370952346467414, |
| "learning_rate": 4.999301092493712e-05, |
| "loss": 1.183, |
| "num_input_tokens_seen": 47185920, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09856702825766707, |
| "grad_norm": 1.4038096900765242, |
| "learning_rate": 4.999260586382116e-05, |
| "loss": 1.1645, |
| "num_input_tokens_seen": 48234496, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1007097897415294, |
| "grad_norm": 1.1452882430899725, |
| "learning_rate": 4.999218939481418e-05, |
| "loss": 1.1727, |
| "num_input_tokens_seen": 49283072, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10285255122539172, |
| "grad_norm": 1.3160375257186312, |
| "learning_rate": 4.999176151810629e-05, |
| "loss": 1.1574, |
| "num_input_tokens_seen": 50331648, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10499531270925405, |
| "grad_norm": 1.1507076301290393, |
| "learning_rate": 4.9991322233892784e-05, |
| "loss": 1.1581, |
| "num_input_tokens_seen": 51380224, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.10713807419311638, |
| "grad_norm": 1.6090478698286774, |
| "learning_rate": 4.999087154237418e-05, |
| "loss": 1.1568, |
| "num_input_tokens_seen": 52428800, |
| "step": 50 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4660, |
| "num_input_tokens_seen": 52428800, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 83647172444160.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|