| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.99904, | |
| "global_step": 2343, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1e-05, | |
| "loss": 36.9779, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2e-05, | |
| "loss": 16.6502, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9554168524297815e-05, | |
| "loss": 13.7007, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9108337048595635e-05, | |
| "loss": 6.0772, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.8662505572893448e-05, | |
| "loss": 4.2448, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.8216674097191264e-05, | |
| "loss": 5.0142, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.777084262148908e-05, | |
| "loss": 3.3927, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.7325011145786894e-05, | |
| "loss": 3.4273, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.687917967008471e-05, | |
| "loss": 2.906, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.6433348194382527e-05, | |
| "loss": 2.6533, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.598751671868034e-05, | |
| "loss": 2.5707, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.5541685242978156e-05, | |
| "loss": 2.5857, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5095853767275971e-05, | |
| "loss": 2.5658, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.4650022291573786e-05, | |
| "loss": 2.7225, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.42041908158716e-05, | |
| "loss": 2.0151, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.88208, | |
| "eval_loss": 3.9117867946624756, | |
| "eval_rmse": 4.946967789532184, | |
| "eval_runtime": 621.0961, | |
| "eval_samples_per_second": 40.251, | |
| "eval_steps_per_second": 2.517, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.3758359340169416e-05, | |
| "loss": 2.4671, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.3312527864467232e-05, | |
| "loss": 1.8725, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.2866696388765047e-05, | |
| "loss": 1.9538, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.2420864913062862e-05, | |
| "loss": 1.7274, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.197503343736068e-05, | |
| "loss": 1.9257, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.1529201961658493e-05, | |
| "loss": 1.8435, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.1083370485956308e-05, | |
| "loss": 1.6858, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.0637539010254126e-05, | |
| "loss": 1.9954, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.019170753455194e-05, | |
| "loss": 1.7033, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.745876058849756e-06, | |
| "loss": 1.4969, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.30004458314757e-06, | |
| "loss": 1.5146, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 8.854213107445387e-06, | |
| "loss": 2.0415, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 8.408381631743202e-06, | |
| "loss": 2.0009, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 7.962550156041017e-06, | |
| "loss": 1.7283, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 7.516718680338832e-06, | |
| "loss": 1.7819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 7.070887204636649e-06, | |
| "loss": 1.5547, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.95872, | |
| "eval_loss": 1.6247199773788452, | |
| "eval_rmse": 4.865195563248444, | |
| "eval_runtime": 619.8743, | |
| "eval_samples_per_second": 40.331, | |
| "eval_steps_per_second": 2.521, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 6.625055728934463e-06, | |
| "loss": 1.4602, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 6.179224253232279e-06, | |
| "loss": 1.2891, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 5.733392777530095e-06, | |
| "loss": 1.2627, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 5.28756130182791e-06, | |
| "loss": 1.4664, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.841729826125725e-06, | |
| "loss": 1.2305, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 4.39589835042354e-06, | |
| "loss": 1.2872, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.950066874721356e-06, | |
| "loss": 1.2644, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.5042353990191713e-06, | |
| "loss": 1.3754, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.0584039233169866e-06, | |
| "loss": 1.1153, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.612572447614802e-06, | |
| "loss": 1.3498, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.1667409719126175e-06, | |
| "loss": 1.2545, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.7209094962104325e-06, | |
| "loss": 1.0703, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.2750780205082481e-06, | |
| "loss": 1.1519, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 8.292465448060634e-07, | |
| "loss": 1.0694, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 3.834150691038788e-07, | |
| "loss": 0.9306, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.96212, | |
| "eval_loss": 1.7369074821472168, | |
| "eval_rmse": 5.055060677999024, | |
| "eval_runtime": 635.5792, | |
| "eval_samples_per_second": 39.334, | |
| "eval_steps_per_second": 2.459, | |
| "step": 2343 | |
| } | |
| ], | |
| "max_steps": 2343, | |
| "num_train_epochs": 3, | |
| "total_flos": 6.576905725215466e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |