| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 8823, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 7.5471698113207555e-06, | |
| "loss": 6.6425, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.5094339622641511e-05, | |
| "loss": 4.8785, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9999174617418052e-05, | |
| "loss": 2.0576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9987722672633802e-05, | |
| "loss": 1.7927, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9962812964571567e-05, | |
| "loss": 1.7317, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9924479057334537e-05, | |
| "loss": 1.7022, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9872772603202818e-05, | |
| "loss": 1.6741, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9807763273035574e-05, | |
| "loss": 1.6608, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9729538662394363e-05, | |
| "loss": 1.6597, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.9638204173514217e-05, | |
| "loss": 1.6598, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.953388287328142e-05, | |
| "loss": 1.6501, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9416715327409453e-05, | |
| "loss": 1.637, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9286859411036396e-05, | |
| "loss": 1.6317, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.914449009599919e-05, | |
| "loss": 1.6292, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.898979921507119e-05, | |
| "loss": 1.6279, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8822995203480823e-05, | |
| "loss": 1.6225, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.86443028180596e-05, | |
| "loss": 1.6234, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8453962834397847e-05, | |
| "loss": 1.6073, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.8252231722416328e-05, | |
| "loss": 1.6119, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.8039381300790812e-05, | |
| "loss": 1.5936, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.781569837069528e-05, | |
| "loss": 1.6175, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.758148432935723e-05, | |
| "loss": 1.6093, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.7337054763945823e-05, | |
| "loss": 1.6081, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7082739026340097e-05, | |
| "loss": 1.6064, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.6818879789350134e-05, | |
| "loss": 1.6016, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.6545832584989235e-05, | |
| "loss": 1.6062, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.6263965325419206e-05, | |
| "loss": 1.6042, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5973657807214245e-05, | |
| "loss": 1.5967, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.5675301199611413e-05, | |
| "loss": 1.6095, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.536929751743723e-05, | |
| "loss": 1.5958, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.5056059079420575e-05, | |
| "loss": 1.6012, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.4736007952621852e-05, | |
| "loss": 1.5872, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.4409575383726852e-05, | |
| "loss": 1.5967, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.4077201217971817e-05, | |
| "loss": 1.5913, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.3739333306482481e-05, | |
| "loss": 1.5902, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.3396426902825753e-05, | |
| "loss": 1.5908, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.3048944049587138e-05, | |
| "loss": 1.5883, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.2697352955800396e-05, | |
| "loss": 1.5907, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.2342127366068364e-05, | |
| "loss": 1.5864, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.1983745922224985e-05, | |
| "loss": 1.5912, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.1622691518398636e-05, | |
| "loss": 1.59, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.1259450650345798e-05, | |
| "loss": 1.5837, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0894512759931785e-05, | |
| "loss": 1.5805, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.0528369575641793e-05, | |
| "loss": 1.5916, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.0161514450010882e-05, | |
| "loss": 1.5975, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.794441694865673e-06, | |
| "loss": 1.5885, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.427645915273446e-06, | |
| "loss": 1.5849, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.061621343096156e-06, | |
| "loss": 1.5754, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.696861171047268e-06, | |
| "loss": 1.5906, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.33385688814881e-06, | |
| "loss": 1.5837, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.97309761748402e-06, | |
| "loss": 1.5807, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.615069457137927e-06, | |
| "loss": 1.5907, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 7.260254825213902e-06, | |
| "loss": 1.5826, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 6.909131809808755e-06, | |
| "loss": 1.5796, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 6.562173524822188e-06, | |
| "loss": 1.5814, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 6.219847472468641e-06, | |
| "loss": 1.5861, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.882614913350499e-06, | |
| "loss": 1.5744, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.550930244941448e-06, | |
| "loss": 1.5797, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 5.2252403893173835e-06, | |
| "loss": 1.5857, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.9059841909599456e-06, | |
| "loss": 1.5728, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.593591825444028e-06, | |
| "loss": 1.5701, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.288484219806016e-06, | |
| "loss": 1.5846, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.991072485373858e-06, | |
| "loss": 1.5716, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.7017573638230296e-06, | |
| "loss": 1.5798, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.420928687204965e-06, | |
| "loss": 1.5739, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.1489648526753913e-06, | |
| "loss": 1.5845, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.8862323126304427e-06, | |
| "loss": 1.5808, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.6330850809374685e-06, | |
| "loss": 1.5704, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.389864255925913e-06, | |
| "loss": 1.5769, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.1568975607809895e-06, | |
| "loss": 1.5765, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.934498901959424e-06, | |
| "loss": 1.5665, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.722967946222277e-06, | |
| "loss": 1.5822, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.5225897168548032e-06, | |
| "loss": 1.5931, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.3336342096173239e-06, | |
| "loss": 1.5855, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.1563560289446819e-06, | |
| "loss": 1.5736, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.909940448844412e-07, | |
| "loss": 1.5942, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 8.377710712360631e-07, | |
| "loss": 1.5792, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.968935653247766e-07, | |
| "loss": 1.5715, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.685513498146533e-07, | |
| "loss": 1.5777, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.529173569357459e-07, | |
| "loss": 1.5795, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.5014739546990087e-07, | |
| "loss": 1.5799, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.603799408092389e-07, | |
| "loss": 1.5743, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.8373594837017505e-07, | |
| "loss": 1.5779, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.2031869061438494e-07, | |
| "loss": 1.5689, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 7.02136178963242e-08, | |
| "loss": 1.5854, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.3488243324814044e-08, | |
| "loss": 1.5654, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.0192051793809221e-08, | |
| "loss": 1.5818, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.5643330492995953e-10, | |
| "loss": 1.5853, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 8823, | |
| "total_flos": 6.175868485067866e+18, | |
| "train_loss": 1.6989395520347432, | |
| "train_runtime": 13309.6743, | |
| "train_samples_per_second": 21.208, | |
| "train_steps_per_second": 0.663 | |
| } | |
| ], | |
| "max_steps": 8823, | |
| "num_train_epochs": 3, | |
| "total_flos": 6.175868485067866e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |