| { |
| "best_metric": 3.913165807723999, |
| "best_model_checkpoint": "miner_id_24/checkpoint-50", |
| "epoch": 0.13635588886995056, |
| "eval_steps": 25, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0027271177773990113, |
| "grad_norm": 1.8906813859939575, |
| "learning_rate": 5e-05, |
| "loss": 4.7933, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0027271177773990113, |
| "eval_loss": 6.023176670074463, |
| "eval_runtime": 81.9841, |
| "eval_samples_per_second": 30.128, |
| "eval_steps_per_second": 3.769, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005454235554798023, |
| "grad_norm": 1.9939332008361816, |
| "learning_rate": 0.0001, |
| "loss": 4.9141, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008181353332197034, |
| "grad_norm": 2.0799591541290283, |
| "learning_rate": 9.989294616193017e-05, |
| "loss": 4.9722, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010908471109596045, |
| "grad_norm": 2.5818052291870117, |
| "learning_rate": 9.957224306869053e-05, |
| "loss": 4.9123, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.013635588886995058, |
| "grad_norm": 2.5464484691619873, |
| "learning_rate": 9.903926402016153e-05, |
| "loss": 4.6455, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01636270666439407, |
| "grad_norm": 2.710998296737671, |
| "learning_rate": 9.829629131445342e-05, |
| "loss": 4.5686, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01908982444179308, |
| "grad_norm": 2.9093258380889893, |
| "learning_rate": 9.73465064747553e-05, |
| "loss": 4.5537, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02181694221919209, |
| "grad_norm": 3.1069393157958984, |
| "learning_rate": 9.619397662556435e-05, |
| "loss": 4.4152, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0245440599965911, |
| "grad_norm": 3.0680086612701416, |
| "learning_rate": 9.484363707663442e-05, |
| "loss": 4.2492, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.027271177773990116, |
| "grad_norm": 2.153977394104004, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 4.2606, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.029998295551389127, |
| "grad_norm": 2.850743055343628, |
| "learning_rate": 9.157348061512727e-05, |
| "loss": 4.0922, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03272541332878814, |
| "grad_norm": 6.232481956481934, |
| "learning_rate": 8.966766701456177e-05, |
| "loss": 4.3643, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03545253110618715, |
| "grad_norm": 4.058305263519287, |
| "learning_rate": 8.759199037394887e-05, |
| "loss": 4.1761, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03817964888358616, |
| "grad_norm": 4.460968017578125, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 4.0582, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.040906766660985174, |
| "grad_norm": 3.495102643966675, |
| "learning_rate": 8.296729075500344e-05, |
| "loss": 3.9237, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04363388443838418, |
| "grad_norm": 2.912580728530884, |
| "learning_rate": 8.043807145043604e-05, |
| "loss": 3.8686, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.046361002215783195, |
| "grad_norm": 2.6480324268341064, |
| "learning_rate": 7.777851165098012e-05, |
| "loss": 3.8619, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0490881199931822, |
| "grad_norm": 1.9118335247039795, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 3.9475, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05181523777058122, |
| "grad_norm": 1.7946391105651855, |
| "learning_rate": 7.211443451095007e-05, |
| "loss": 3.9611, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05454235554798023, |
| "grad_norm": 1.6800788640975952, |
| "learning_rate": 6.91341716182545e-05, |
| "loss": 3.971, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05726947332537924, |
| "grad_norm": 2.0760059356689453, |
| "learning_rate": 6.607197326515808e-05, |
| "loss": 3.8846, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05999659110277825, |
| "grad_norm": 2.401682138442993, |
| "learning_rate": 6.294095225512603e-05, |
| "loss": 4.1091, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06272370888017727, |
| "grad_norm": 2.5078513622283936, |
| "learning_rate": 5.9754516100806423e-05, |
| "loss": 4.0463, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06545082665757627, |
| "grad_norm": 2.7291085720062256, |
| "learning_rate": 5.6526309611002594e-05, |
| "loss": 4.0887, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06817794443497528, |
| "grad_norm": 4.357483863830566, |
| "learning_rate": 5.327015646150716e-05, |
| "loss": 4.0481, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06817794443497528, |
| "eval_loss": 4.011327743530273, |
| "eval_runtime": 82.0442, |
| "eval_samples_per_second": 30.106, |
| "eval_steps_per_second": 3.766, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0709050622123743, |
| "grad_norm": 2.2828495502471924, |
| "learning_rate": 5e-05, |
| "loss": 3.9172, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07363217998977331, |
| "grad_norm": 2.7300260066986084, |
| "learning_rate": 4.6729843538492847e-05, |
| "loss": 4.0181, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07635929776717232, |
| "grad_norm": 2.9719653129577637, |
| "learning_rate": 4.347369038899744e-05, |
| "loss": 4.0165, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07908641554457133, |
| "grad_norm": 2.9800491333007812, |
| "learning_rate": 4.0245483899193595e-05, |
| "loss": 3.8962, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08181353332197035, |
| "grad_norm": 2.593806266784668, |
| "learning_rate": 3.705904774487396e-05, |
| "loss": 3.9558, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08454065109936935, |
| "grad_norm": 2.301185131072998, |
| "learning_rate": 3.392802673484193e-05, |
| "loss": 3.9558, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.08726776887676836, |
| "grad_norm": 2.195345878601074, |
| "learning_rate": 3.086582838174551e-05, |
| "loss": 3.897, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08999488665416738, |
| "grad_norm": 1.9010220766067505, |
| "learning_rate": 2.7885565489049946e-05, |
| "loss": 3.9487, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09272200443156639, |
| "grad_norm": 2.0472588539123535, |
| "learning_rate": 2.500000000000001e-05, |
| "loss": 4.121, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0954491222089654, |
| "grad_norm": 2.0967180728912354, |
| "learning_rate": 2.2221488349019903e-05, |
| "loss": 3.9512, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0981762399863644, |
| "grad_norm": 2.0463292598724365, |
| "learning_rate": 1.9561928549563968e-05, |
| "loss": 4.0246, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10090335776376343, |
| "grad_norm": 2.5716755390167236, |
| "learning_rate": 1.703270924499656e-05, |
| "loss": 4.1409, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10363047554116243, |
| "grad_norm": 1.0110124349594116, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 3.745, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.10635759331856144, |
| "grad_norm": 1.0488909482955933, |
| "learning_rate": 1.2408009626051137e-05, |
| "loss": 3.7371, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10908471109596046, |
| "grad_norm": 0.9047977328300476, |
| "learning_rate": 1.0332332985438248e-05, |
| "loss": 3.8171, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11181182887335947, |
| "grad_norm": 0.9148534536361694, |
| "learning_rate": 8.426519384872733e-06, |
| "loss": 3.7319, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11453894665075848, |
| "grad_norm": 1.192430853843689, |
| "learning_rate": 6.698729810778065e-06, |
| "loss": 3.8528, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11726606442815748, |
| "grad_norm": 1.1312968730926514, |
| "learning_rate": 5.156362923365588e-06, |
| "loss": 3.8156, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1199931822055565, |
| "grad_norm": 0.9511891603469849, |
| "learning_rate": 3.8060233744356633e-06, |
| "loss": 3.894, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.12272029998295551, |
| "grad_norm": 1.1418358087539673, |
| "learning_rate": 2.653493525244721e-06, |
| "loss": 3.7971, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12544741776035453, |
| "grad_norm": 1.1028140783309937, |
| "learning_rate": 1.70370868554659e-06, |
| "loss": 3.8487, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12817453553775354, |
| "grad_norm": 1.3050944805145264, |
| "learning_rate": 9.607359798384785e-07, |
| "loss": 3.8828, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.13090165331515255, |
| "grad_norm": 1.4526429176330566, |
| "learning_rate": 4.277569313094809e-07, |
| "loss": 3.8323, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13362877109255156, |
| "grad_norm": 1.9823384284973145, |
| "learning_rate": 1.0705383806982606e-07, |
| "loss": 3.9885, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.13635588886995056, |
| "grad_norm": 4.876001834869385, |
| "learning_rate": 0.0, |
| "loss": 4.1589, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13635588886995056, |
| "eval_loss": 3.913165807723999, |
| "eval_runtime": 82.0402, |
| "eval_samples_per_second": 30.107, |
| "eval_steps_per_second": 3.766, |
| "step": 50 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 50, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 1, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.968086838542336e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|