| { | |
| "best_metric": 0.01995982974767685, | |
| "best_model_checkpoint": "trustworthiness-longformer/checkpoint-1995", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1995, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03759398496240601, | |
| "grad_norm": 9.034588813781738, | |
| "learning_rate": 4.229323308270677e-06, | |
| "loss": 0.5584, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07518796992481203, | |
| "grad_norm": 1.967883586883545, | |
| "learning_rate": 8.1203007518797e-06, | |
| "loss": 0.1203, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11278195488721804, | |
| "grad_norm": 1.5318199396133423, | |
| "learning_rate": 1.2349624060150378e-05, | |
| "loss": 0.0532, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.15037593984962405, | |
| "grad_norm": 0.6738603115081787, | |
| "learning_rate": 1.6578947368421053e-05, | |
| "loss": 0.0438, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18796992481203006, | |
| "grad_norm": 2.247737169265747, | |
| "learning_rate": 2.080827067669173e-05, | |
| "loss": 0.0443, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.22556390977443608, | |
| "grad_norm": 1.5706892013549805, | |
| "learning_rate": 2.5037593984962406e-05, | |
| "loss": 0.0422, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 1.9436813592910767, | |
| "learning_rate": 2.9266917293233085e-05, | |
| "loss": 0.0403, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3007518796992481, | |
| "grad_norm": 1.7137566804885864, | |
| "learning_rate": 3.349624060150376e-05, | |
| "loss": 0.0389, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3383458646616541, | |
| "grad_norm": 1.5335218906402588, | |
| "learning_rate": 3.772556390977444e-05, | |
| "loss": 0.0369, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 2.1619019508361816, | |
| "learning_rate": 4.195488721804511e-05, | |
| "loss": 0.0357, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.41353383458646614, | |
| "grad_norm": 0.8143567442893982, | |
| "learning_rate": 4.486842105263158e-05, | |
| "loss": 0.034, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.45112781954887216, | |
| "grad_norm": 0.34969833493232727, | |
| "learning_rate": 4.4398496240601505e-05, | |
| "loss": 0.0309, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48872180451127817, | |
| "grad_norm": 0.6551300883293152, | |
| "learning_rate": 4.392857142857143e-05, | |
| "loss": 0.0315, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 1.1032406091690063, | |
| "learning_rate": 4.3458646616541354e-05, | |
| "loss": 0.0309, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5639097744360902, | |
| "grad_norm": 0.3101660907268524, | |
| "learning_rate": 4.2988721804511285e-05, | |
| "loss": 0.03, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6015037593984962, | |
| "grad_norm": 0.9715960621833801, | |
| "learning_rate": 4.251879699248121e-05, | |
| "loss": 0.029, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6390977443609023, | |
| "grad_norm": 1.2566590309143066, | |
| "learning_rate": 4.2048872180451127e-05, | |
| "loss": 0.0311, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6766917293233082, | |
| "grad_norm": 0.9328742027282715, | |
| "learning_rate": 4.157894736842105e-05, | |
| "loss": 0.0283, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 1.1680524349212646, | |
| "learning_rate": 4.110902255639098e-05, | |
| "loss": 0.0279, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 0.6984049081802368, | |
| "learning_rate": 4.0639097744360906e-05, | |
| "loss": 0.0276, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 0.7427186965942383, | |
| "learning_rate": 4.016917293233083e-05, | |
| "loss": 0.0284, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8270676691729323, | |
| "grad_norm": 0.9322423338890076, | |
| "learning_rate": 3.9699248120300755e-05, | |
| "loss": 0.0267, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8646616541353384, | |
| "grad_norm": 0.5467056035995483, | |
| "learning_rate": 3.922932330827068e-05, | |
| "loss": 0.0274, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.9022556390977443, | |
| "grad_norm": 0.7492877840995789, | |
| "learning_rate": 3.87593984962406e-05, | |
| "loss": 0.0266, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9398496240601504, | |
| "grad_norm": 1.0156948566436768, | |
| "learning_rate": 3.828947368421053e-05, | |
| "loss": 0.0257, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9774436090225563, | |
| "grad_norm": 2.6121363639831543, | |
| "learning_rate": 3.781954887218045e-05, | |
| "loss": 0.0255, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_explained_variance": 0.43071454763412476, | |
| "eval_loss": 0.021834315732121468, | |
| "eval_mae": 0.11838787794113159, | |
| "eval_mse": 0.021832076832652092, | |
| "eval_r2": 0.4306418299674988, | |
| "eval_rmse": 0.14775681653531958, | |
| "eval_runtime": 333.3711, | |
| "eval_samples_per_second": 63.818, | |
| "eval_steps_per_second": 0.999, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.0150375939849625, | |
| "grad_norm": 1.001768708229065, | |
| "learning_rate": 3.734962406015038e-05, | |
| "loss": 0.0248, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.44310006499290466, | |
| "learning_rate": 3.687969924812031e-05, | |
| "loss": 0.0248, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0902255639097744, | |
| "grad_norm": 0.7214398384094238, | |
| "learning_rate": 3.6409774436090224e-05, | |
| "loss": 0.0241, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.1278195488721805, | |
| "grad_norm": 0.6446682810783386, | |
| "learning_rate": 3.593984962406015e-05, | |
| "loss": 0.0238, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1654135338345863, | |
| "grad_norm": 0.539253830909729, | |
| "learning_rate": 3.546992481203008e-05, | |
| "loss": 0.0261, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.2030075187969924, | |
| "grad_norm": 0.4309803247451782, | |
| "learning_rate": 3.5000000000000004e-05, | |
| "loss": 0.0243, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2406015037593985, | |
| "grad_norm": 0.2850622534751892, | |
| "learning_rate": 3.453007518796993e-05, | |
| "loss": 0.0239, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.2781954887218046, | |
| "grad_norm": 0.4944869577884674, | |
| "learning_rate": 3.406015037593985e-05, | |
| "loss": 0.0248, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 0.26742395758628845, | |
| "learning_rate": 3.3590225563909776e-05, | |
| "loss": 0.0234, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.3533834586466165, | |
| "grad_norm": 1.0373315811157227, | |
| "learning_rate": 3.31203007518797e-05, | |
| "loss": 0.0243, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3909774436090225, | |
| "grad_norm": 0.6992561221122742, | |
| "learning_rate": 3.2650375939849625e-05, | |
| "loss": 0.0234, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.6922001242637634, | |
| "learning_rate": 3.218045112781955e-05, | |
| "loss": 0.024, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4661654135338344, | |
| "grad_norm": 0.7946988940238953, | |
| "learning_rate": 3.171052631578948e-05, | |
| "loss": 0.0261, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "grad_norm": 0.24744807183742523, | |
| "learning_rate": 3.1240601503759404e-05, | |
| "loss": 0.0234, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5413533834586466, | |
| "grad_norm": 0.6126484870910645, | |
| "learning_rate": 3.077067669172932e-05, | |
| "loss": 0.0237, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.9163932800292969, | |
| "learning_rate": 3.030075187969925e-05, | |
| "loss": 0.0251, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6165413533834587, | |
| "grad_norm": 0.42940250039100647, | |
| "learning_rate": 2.9830827067669177e-05, | |
| "loss": 0.025, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.6541353383458648, | |
| "grad_norm": 0.21495333313941956, | |
| "learning_rate": 2.93609022556391e-05, | |
| "loss": 0.0229, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6917293233082706, | |
| "grad_norm": 0.20642122626304626, | |
| "learning_rate": 2.8890977443609026e-05, | |
| "loss": 0.0223, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.7293233082706767, | |
| "grad_norm": 0.2935808002948761, | |
| "learning_rate": 2.8421052631578946e-05, | |
| "loss": 0.0228, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7669172932330826, | |
| "grad_norm": 0.6718006730079651, | |
| "learning_rate": 2.7951127819548878e-05, | |
| "loss": 0.0228, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.8045112781954886, | |
| "grad_norm": 0.2039840817451477, | |
| "learning_rate": 2.74812030075188e-05, | |
| "loss": 0.0235, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 0.6539952754974365, | |
| "learning_rate": 2.7011278195488723e-05, | |
| "loss": 0.0232, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.8796992481203008, | |
| "grad_norm": 0.20303431153297424, | |
| "learning_rate": 2.6541353383458647e-05, | |
| "loss": 0.0243, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9172932330827068, | |
| "grad_norm": 1.0404738187789917, | |
| "learning_rate": 2.6071428571428574e-05, | |
| "loss": 0.0235, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.954887218045113, | |
| "grad_norm": 0.4455120861530304, | |
| "learning_rate": 2.56015037593985e-05, | |
| "loss": 0.0234, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9924812030075187, | |
| "grad_norm": 0.29621434211730957, | |
| "learning_rate": 2.5131578947368423e-05, | |
| "loss": 0.0233, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_explained_variance": 0.4605059027671814, | |
| "eval_loss": 0.020697500556707382, | |
| "eval_mae": 0.11327706277370453, | |
| "eval_mse": 0.020693965256214142, | |
| "eval_r2": 0.4603225588798523, | |
| "eval_rmse": 0.14385397198622685, | |
| "eval_runtime": 332.1992, | |
| "eval_samples_per_second": 64.043, | |
| "eval_steps_per_second": 1.002, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.030075187969925, | |
| "grad_norm": 0.17958290874958038, | |
| "learning_rate": 2.4661654135338347e-05, | |
| "loss": 0.0235, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0676691729323307, | |
| "grad_norm": 0.27772992849349976, | |
| "learning_rate": 2.4191729323308275e-05, | |
| "loss": 0.0223, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.1573406457901001, | |
| "learning_rate": 2.37218045112782e-05, | |
| "loss": 0.0214, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.704254686832428, | |
| "learning_rate": 2.3251879699248123e-05, | |
| "loss": 0.0224, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.180451127819549, | |
| "grad_norm": 0.3219689726829529, | |
| "learning_rate": 2.2781954887218044e-05, | |
| "loss": 0.0219, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.218045112781955, | |
| "grad_norm": 1.3933064937591553, | |
| "learning_rate": 2.2312030075187972e-05, | |
| "loss": 0.022, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "grad_norm": 0.4605341851711273, | |
| "learning_rate": 2.1842105263157896e-05, | |
| "loss": 0.0221, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.293233082706767, | |
| "grad_norm": 0.23276148736476898, | |
| "learning_rate": 2.137218045112782e-05, | |
| "loss": 0.0216, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.3308270676691727, | |
| "grad_norm": 0.602206289768219, | |
| "learning_rate": 2.0902255639097745e-05, | |
| "loss": 0.0211, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.3684210526315788, | |
| "grad_norm": 0.3370234966278076, | |
| "learning_rate": 2.0432330827067672e-05, | |
| "loss": 0.0208, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.406015037593985, | |
| "grad_norm": 0.3906821310520172, | |
| "learning_rate": 1.9962406015037593e-05, | |
| "loss": 0.0202, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.443609022556391, | |
| "grad_norm": 0.26114577054977417, | |
| "learning_rate": 1.949248120300752e-05, | |
| "loss": 0.0221, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.481203007518797, | |
| "grad_norm": 0.215437650680542, | |
| "learning_rate": 1.9022556390977445e-05, | |
| "loss": 0.023, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.518796992481203, | |
| "grad_norm": 0.7732388377189636, | |
| "learning_rate": 1.855263157894737e-05, | |
| "loss": 0.0217, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.556390977443609, | |
| "grad_norm": 0.2342628836631775, | |
| "learning_rate": 1.8082706766917293e-05, | |
| "loss": 0.0219, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.593984962406015, | |
| "grad_norm": 0.49042457342147827, | |
| "learning_rate": 1.761278195488722e-05, | |
| "loss": 0.0213, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.6491354703903198, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.0215, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.6691729323308273, | |
| "grad_norm": 0.4228556454181671, | |
| "learning_rate": 1.667293233082707e-05, | |
| "loss": 0.0211, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.706766917293233, | |
| "grad_norm": 0.25106143951416016, | |
| "learning_rate": 1.6203007518796994e-05, | |
| "loss": 0.0209, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.744360902255639, | |
| "grad_norm": 0.3744986355304718, | |
| "learning_rate": 1.5733082706766918e-05, | |
| "loss": 0.0214, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.781954887218045, | |
| "grad_norm": 1.0658032894134521, | |
| "learning_rate": 1.5263157894736842e-05, | |
| "loss": 0.022, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.819548872180451, | |
| "grad_norm": 0.6378421187400818, | |
| "learning_rate": 1.4793233082706768e-05, | |
| "loss": 0.0212, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.972565770149231, | |
| "learning_rate": 1.4323308270676692e-05, | |
| "loss": 0.0219, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.8947368421052633, | |
| "grad_norm": 0.43301036953926086, | |
| "learning_rate": 1.3853383458646618e-05, | |
| "loss": 0.0213, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.932330827067669, | |
| "grad_norm": 0.6031925082206726, | |
| "learning_rate": 1.3383458646616541e-05, | |
| "loss": 0.0217, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.969924812030075, | |
| "grad_norm": 0.20304885506629944, | |
| "learning_rate": 1.2913533834586467e-05, | |
| "loss": 0.0209, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_explained_variance": 0.4826643466949463, | |
| "eval_loss": 0.01995982974767685, | |
| "eval_mae": 0.11011234670877457, | |
| "eval_mse": 0.019954577088356018, | |
| "eval_r2": 0.4796050786972046, | |
| "eval_rmse": 0.14126067070616655, | |
| "eval_runtime": 332.8065, | |
| "eval_samples_per_second": 63.926, | |
| "eval_steps_per_second": 1.001, | |
| "step": 1995 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 2660, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.709296778916659e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |