{ "best_metric": 0.01995982974767685, "best_model_checkpoint": "trustworthiness-longformer/checkpoint-1995", "epoch": 3.0, "eval_steps": 500, "global_step": 1995, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03759398496240601, "grad_norm": 9.034588813781738, "learning_rate": 4.229323308270677e-06, "loss": 0.5584, "step": 25 }, { "epoch": 0.07518796992481203, "grad_norm": 1.967883586883545, "learning_rate": 8.1203007518797e-06, "loss": 0.1203, "step": 50 }, { "epoch": 0.11278195488721804, "grad_norm": 1.5318199396133423, "learning_rate": 1.2349624060150378e-05, "loss": 0.0532, "step": 75 }, { "epoch": 0.15037593984962405, "grad_norm": 0.6738603115081787, "learning_rate": 1.6578947368421053e-05, "loss": 0.0438, "step": 100 }, { "epoch": 0.18796992481203006, "grad_norm": 2.247737169265747, "learning_rate": 2.080827067669173e-05, "loss": 0.0443, "step": 125 }, { "epoch": 0.22556390977443608, "grad_norm": 1.5706892013549805, "learning_rate": 2.5037593984962406e-05, "loss": 0.0422, "step": 150 }, { "epoch": 0.2631578947368421, "grad_norm": 1.9436813592910767, "learning_rate": 2.9266917293233085e-05, "loss": 0.0403, "step": 175 }, { "epoch": 0.3007518796992481, "grad_norm": 1.7137566804885864, "learning_rate": 3.349624060150376e-05, "loss": 0.0389, "step": 200 }, { "epoch": 0.3383458646616541, "grad_norm": 1.5335218906402588, "learning_rate": 3.772556390977444e-05, "loss": 0.0369, "step": 225 }, { "epoch": 0.37593984962406013, "grad_norm": 2.1619019508361816, "learning_rate": 4.195488721804511e-05, "loss": 0.0357, "step": 250 }, { "epoch": 0.41353383458646614, "grad_norm": 0.8143567442893982, "learning_rate": 4.486842105263158e-05, "loss": 0.034, "step": 275 }, { "epoch": 0.45112781954887216, "grad_norm": 0.34969833493232727, "learning_rate": 4.4398496240601505e-05, "loss": 0.0309, "step": 300 }, { "epoch": 0.48872180451127817, "grad_norm": 0.6551300883293152, "learning_rate": 4.392857142857143e-05, "loss": 0.0315, "step": 325 }, { "epoch": 0.5263157894736842, "grad_norm": 1.1032406091690063, "learning_rate": 4.3458646616541354e-05, "loss": 0.0309, "step": 350 }, { "epoch": 0.5639097744360902, "grad_norm": 0.3101660907268524, "learning_rate": 4.2988721804511285e-05, "loss": 0.03, "step": 375 }, { "epoch": 0.6015037593984962, "grad_norm": 0.9715960621833801, "learning_rate": 4.251879699248121e-05, "loss": 0.029, "step": 400 }, { "epoch": 0.6390977443609023, "grad_norm": 1.2566590309143066, "learning_rate": 4.2048872180451127e-05, "loss": 0.0311, "step": 425 }, { "epoch": 0.6766917293233082, "grad_norm": 0.9328742027282715, "learning_rate": 4.157894736842105e-05, "loss": 0.0283, "step": 450 }, { "epoch": 0.7142857142857143, "grad_norm": 1.1680524349212646, "learning_rate": 4.110902255639098e-05, "loss": 0.0279, "step": 475 }, { "epoch": 0.7518796992481203, "grad_norm": 0.6984049081802368, "learning_rate": 4.0639097744360906e-05, "loss": 0.0276, "step": 500 }, { "epoch": 0.7894736842105263, "grad_norm": 0.7427186965942383, "learning_rate": 4.016917293233083e-05, "loss": 0.0284, "step": 525 }, { "epoch": 0.8270676691729323, "grad_norm": 0.9322423338890076, "learning_rate": 3.9699248120300755e-05, "loss": 0.0267, "step": 550 }, { "epoch": 0.8646616541353384, "grad_norm": 0.5467056035995483, "learning_rate": 3.922932330827068e-05, "loss": 0.0274, "step": 575 }, { "epoch": 0.9022556390977443, "grad_norm": 0.7492877840995789, "learning_rate": 3.87593984962406e-05, "loss": 0.0266, "step": 600 }, { "epoch": 0.9398496240601504, "grad_norm": 1.0156948566436768, "learning_rate": 3.828947368421053e-05, "loss": 0.0257, "step": 625 }, { "epoch": 0.9774436090225563, "grad_norm": 2.6121363639831543, "learning_rate": 3.781954887218045e-05, "loss": 0.0255, "step": 650 }, { "epoch": 1.0, "eval_explained_variance": 0.43071454763412476, "eval_loss": 0.021834315732121468, "eval_mae": 0.11838787794113159, "eval_mse": 0.021832076832652092, "eval_r2": 0.4306418299674988, "eval_rmse": 0.14775681653531958, "eval_runtime": 333.3711, "eval_samples_per_second": 63.818, "eval_steps_per_second": 0.999, "step": 665 }, { "epoch": 1.0150375939849625, "grad_norm": 1.001768708229065, "learning_rate": 3.734962406015038e-05, "loss": 0.0248, "step": 675 }, { "epoch": 1.0526315789473684, "grad_norm": 0.44310006499290466, "learning_rate": 3.687969924812031e-05, "loss": 0.0248, "step": 700 }, { "epoch": 1.0902255639097744, "grad_norm": 0.7214398384094238, "learning_rate": 3.6409774436090224e-05, "loss": 0.0241, "step": 725 }, { "epoch": 1.1278195488721805, "grad_norm": 0.6446682810783386, "learning_rate": 3.593984962406015e-05, "loss": 0.0238, "step": 750 }, { "epoch": 1.1654135338345863, "grad_norm": 0.539253830909729, "learning_rate": 3.546992481203008e-05, "loss": 0.0261, "step": 775 }, { "epoch": 1.2030075187969924, "grad_norm": 0.4309803247451782, "learning_rate": 3.5000000000000004e-05, "loss": 0.0243, "step": 800 }, { "epoch": 1.2406015037593985, "grad_norm": 0.2850622534751892, "learning_rate": 3.453007518796993e-05, "loss": 0.0239, "step": 825 }, { "epoch": 1.2781954887218046, "grad_norm": 0.4944869577884674, "learning_rate": 3.406015037593985e-05, "loss": 0.0248, "step": 850 }, { "epoch": 1.3157894736842106, "grad_norm": 0.26742395758628845, "learning_rate": 3.3590225563909776e-05, "loss": 0.0234, "step": 875 }, { "epoch": 1.3533834586466165, "grad_norm": 1.0373315811157227, "learning_rate": 3.31203007518797e-05, "loss": 0.0243, "step": 900 }, { "epoch": 1.3909774436090225, "grad_norm": 0.6992561221122742, "learning_rate": 3.2650375939849625e-05, "loss": 0.0234, "step": 925 }, { "epoch": 1.4285714285714286, "grad_norm": 0.6922001242637634, "learning_rate": 3.218045112781955e-05, "loss": 0.024, "step": 950 }, { "epoch": 1.4661654135338344, "grad_norm": 0.7946988940238953, "learning_rate": 3.171052631578948e-05, "loss": 0.0261, "step": 975 }, { "epoch": 1.5037593984962405, "grad_norm": 0.24744807183742523, "learning_rate": 3.1240601503759404e-05, "loss": 0.0234, "step": 1000 }, { "epoch": 1.5413533834586466, "grad_norm": 0.6126484870910645, "learning_rate": 3.077067669172932e-05, "loss": 0.0237, "step": 1025 }, { "epoch": 1.5789473684210527, "grad_norm": 0.9163932800292969, "learning_rate": 3.030075187969925e-05, "loss": 0.0251, "step": 1050 }, { "epoch": 1.6165413533834587, "grad_norm": 0.42940250039100647, "learning_rate": 2.9830827067669177e-05, "loss": 0.025, "step": 1075 }, { "epoch": 1.6541353383458648, "grad_norm": 0.21495333313941956, "learning_rate": 2.93609022556391e-05, "loss": 0.0229, "step": 1100 }, { "epoch": 1.6917293233082706, "grad_norm": 0.20642122626304626, "learning_rate": 2.8890977443609026e-05, "loss": 0.0223, "step": 1125 }, { "epoch": 1.7293233082706767, "grad_norm": 0.2935808002948761, "learning_rate": 2.8421052631578946e-05, "loss": 0.0228, "step": 1150 }, { "epoch": 1.7669172932330826, "grad_norm": 0.6718006730079651, "learning_rate": 2.7951127819548878e-05, "loss": 0.0228, "step": 1175 }, { "epoch": 1.8045112781954886, "grad_norm": 0.2039840817451477, "learning_rate": 2.74812030075188e-05, "loss": 0.0235, "step": 1200 }, { "epoch": 1.8421052631578947, "grad_norm": 0.6539952754974365, "learning_rate": 2.7011278195488723e-05, "loss": 0.0232, "step": 1225 }, { "epoch": 1.8796992481203008, "grad_norm": 0.20303431153297424, "learning_rate": 2.6541353383458647e-05, "loss": 0.0243, "step": 1250 }, { "epoch": 1.9172932330827068, "grad_norm": 1.0404738187789917, "learning_rate": 2.6071428571428574e-05, "loss": 0.0235, "step": 1275 }, { "epoch": 1.954887218045113, "grad_norm": 0.4455120861530304, "learning_rate": 2.56015037593985e-05, "loss": 0.0234, "step": 1300 }, { "epoch": 1.9924812030075187, "grad_norm": 0.29621434211730957, "learning_rate": 2.5131578947368423e-05, "loss": 0.0233, "step": 1325 }, { "epoch": 2.0, "eval_explained_variance": 0.4605059027671814, "eval_loss": 0.020697500556707382, "eval_mae": 0.11327706277370453, "eval_mse": 0.020693965256214142, "eval_r2": 0.4603225588798523, "eval_rmse": 0.14385397198622685, "eval_runtime": 332.1992, "eval_samples_per_second": 64.043, "eval_steps_per_second": 1.002, "step": 1330 }, { "epoch": 2.030075187969925, "grad_norm": 0.17958290874958038, "learning_rate": 2.4661654135338347e-05, "loss": 0.0235, "step": 1350 }, { "epoch": 2.0676691729323307, "grad_norm": 0.27772992849349976, "learning_rate": 2.4191729323308275e-05, "loss": 0.0223, "step": 1375 }, { "epoch": 2.1052631578947367, "grad_norm": 0.1573406457901001, "learning_rate": 2.37218045112782e-05, "loss": 0.0214, "step": 1400 }, { "epoch": 2.142857142857143, "grad_norm": 0.704254686832428, "learning_rate": 2.3251879699248123e-05, "loss": 0.0224, "step": 1425 }, { "epoch": 2.180451127819549, "grad_norm": 0.3219689726829529, "learning_rate": 2.2781954887218044e-05, "loss": 0.0219, "step": 1450 }, { "epoch": 2.218045112781955, "grad_norm": 1.3933064937591553, "learning_rate": 2.2312030075187972e-05, "loss": 0.022, "step": 1475 }, { "epoch": 2.255639097744361, "grad_norm": 0.4605341851711273, "learning_rate": 2.1842105263157896e-05, "loss": 0.0221, "step": 1500 }, { "epoch": 2.293233082706767, "grad_norm": 0.23276148736476898, "learning_rate": 2.137218045112782e-05, "loss": 0.0216, "step": 1525 }, { "epoch": 2.3308270676691727, "grad_norm": 0.602206289768219, "learning_rate": 2.0902255639097745e-05, "loss": 0.0211, "step": 1550 }, { "epoch": 2.3684210526315788, "grad_norm": 0.3370234966278076, "learning_rate": 2.0432330827067672e-05, "loss": 0.0208, "step": 1575 }, { "epoch": 2.406015037593985, "grad_norm": 0.3906821310520172, "learning_rate": 1.9962406015037593e-05, "loss": 0.0202, "step": 1600 }, { "epoch": 2.443609022556391, "grad_norm": 0.26114577054977417, "learning_rate": 1.949248120300752e-05, "loss": 0.0221, "step": 1625 }, { "epoch": 2.481203007518797, "grad_norm": 0.215437650680542, "learning_rate": 1.9022556390977445e-05, "loss": 0.023, "step": 1650 }, { "epoch": 2.518796992481203, "grad_norm": 0.7732388377189636, "learning_rate": 1.855263157894737e-05, "loss": 0.0217, "step": 1675 }, { "epoch": 2.556390977443609, "grad_norm": 0.2342628836631775, "learning_rate": 1.8082706766917293e-05, "loss": 0.0219, "step": 1700 }, { "epoch": 2.593984962406015, "grad_norm": 0.49042457342147827, "learning_rate": 1.761278195488722e-05, "loss": 0.0213, "step": 1725 }, { "epoch": 2.6315789473684212, "grad_norm": 0.6491354703903198, "learning_rate": 1.7142857142857142e-05, "loss": 0.0215, "step": 1750 }, { "epoch": 2.6691729323308273, "grad_norm": 0.4228556454181671, "learning_rate": 1.667293233082707e-05, "loss": 0.0211, "step": 1775 }, { "epoch": 2.706766917293233, "grad_norm": 0.25106143951416016, "learning_rate": 1.6203007518796994e-05, "loss": 0.0209, "step": 1800 }, { "epoch": 2.744360902255639, "grad_norm": 0.3744986355304718, "learning_rate": 1.5733082706766918e-05, "loss": 0.0214, "step": 1825 }, { "epoch": 2.781954887218045, "grad_norm": 1.0658032894134521, "learning_rate": 1.5263157894736842e-05, "loss": 0.022, "step": 1850 }, { "epoch": 2.819548872180451, "grad_norm": 0.6378421187400818, "learning_rate": 1.4793233082706768e-05, "loss": 0.0212, "step": 1875 }, { "epoch": 2.857142857142857, "grad_norm": 0.972565770149231, "learning_rate": 1.4323308270676692e-05, "loss": 0.0219, "step": 1900 }, { "epoch": 2.8947368421052633, "grad_norm": 0.43301036953926086, "learning_rate": 1.3853383458646618e-05, "loss": 0.0213, "step": 1925 }, { "epoch": 2.932330827067669, "grad_norm": 0.6031925082206726, "learning_rate": 1.3383458646616541e-05, "loss": 0.0217, "step": 1950 }, { "epoch": 2.969924812030075, "grad_norm": 0.20304885506629944, "learning_rate": 1.2913533834586467e-05, "loss": 0.0209, "step": 1975 }, { "epoch": 3.0, "eval_explained_variance": 0.4826643466949463, "eval_loss": 0.01995982974767685, "eval_mae": 0.11011234670877457, "eval_mse": 0.019954577088356018, "eval_r2": 0.4796050786972046, "eval_rmse": 0.14126067070616655, "eval_runtime": 332.8065, "eval_samples_per_second": 63.926, "eval_steps_per_second": 1.001, "step": 1995 } ], "logging_steps": 25, "max_steps": 2660, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.709296778916659e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }