| { | |
| "best_metric": 0.03224072605371475, | |
| "best_model_checkpoint": "/gs/fs/tgh-25IAK/ue02946/PLANT/Season_based_split_performance/final_full_250304_ver2_12_fixed_rp__s20000_lr1.00E-04_wd0.01_rwd0.005_csew0_csea0_sw0.2_csewv0_swv0.2_cw0.05_rid256_dp0.05_eid64_edp0.1_lgw0.001_bs16_facebookesm2_t33_650M_UR50D/trained_until_full/results/checkpoint-19000", | |
| "epoch": 9.336609336609337, | |
| "eval_steps": 1000, | |
| "global_step": 19000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2457002457002457, | |
| "grad_norm": 0.9868232607841492, | |
| "learning_rate": 1.2425e-05, | |
| "loss": 0.1679, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "grad_norm": 0.6529730558395386, | |
| "learning_rate": 2.4925000000000003e-05, | |
| "loss": 0.0803, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "eval_loss": 0.06003545597195625, | |
| "eval_runtime": 174.4345, | |
| "eval_samples_per_second": 40.869, | |
| "eval_steps_per_second": 1.278, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7371007371007371, | |
| "grad_norm": 0.24933239817619324, | |
| "learning_rate": 3.7425e-05, | |
| "loss": 0.0655, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "grad_norm": 0.7596633434295654, | |
| "learning_rate": 4.992500000000001e-05, | |
| "loss": 0.0544, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "eval_loss": 0.05782468616962433, | |
| "eval_runtime": 174.3269, | |
| "eval_samples_per_second": 40.894, | |
| "eval_steps_per_second": 1.279, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2285012285012284, | |
| "grad_norm": 0.31257227063179016, | |
| "learning_rate": 4.8619444444444446e-05, | |
| "loss": 0.0454, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.4742014742014742, | |
| "grad_norm": 0.21421055495738983, | |
| "learning_rate": 4.723055555555555e-05, | |
| "loss": 0.043, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4742014742014742, | |
| "eval_loss": 0.043084025382995605, | |
| "eval_runtime": 174.7024, | |
| "eval_samples_per_second": 40.807, | |
| "eval_steps_per_second": 1.276, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.71990171990172, | |
| "grad_norm": 0.26165878772735596, | |
| "learning_rate": 4.5841666666666665e-05, | |
| "loss": 0.0417, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.9656019656019657, | |
| "grad_norm": 0.3407607674598694, | |
| "learning_rate": 4.4452777777777785e-05, | |
| "loss": 0.0394, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.9656019656019657, | |
| "eval_loss": 0.037903204560279846, | |
| "eval_runtime": 175.2535, | |
| "eval_samples_per_second": 40.678, | |
| "eval_steps_per_second": 1.272, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.211302211302211, | |
| "grad_norm": 0.41331663727760315, | |
| "learning_rate": 4.306388888888889e-05, | |
| "loss": 0.0355, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.457002457002457, | |
| "grad_norm": 0.3084980845451355, | |
| "learning_rate": 4.1675e-05, | |
| "loss": 0.0372, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.457002457002457, | |
| "eval_loss": 0.042647846043109894, | |
| "eval_runtime": 175.1611, | |
| "eval_samples_per_second": 40.7, | |
| "eval_steps_per_second": 1.273, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 0.1795072853565216, | |
| "learning_rate": 4.0286111111111116e-05, | |
| "loss": 0.0346, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.9484029484029484, | |
| "grad_norm": 0.2850644588470459, | |
| "learning_rate": 3.889722222222222e-05, | |
| "loss": 0.0329, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.9484029484029484, | |
| "eval_loss": 0.03940274938941002, | |
| "eval_runtime": 175.0589, | |
| "eval_samples_per_second": 40.723, | |
| "eval_steps_per_second": 1.274, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.194103194103194, | |
| "grad_norm": 0.196182519197464, | |
| "learning_rate": 3.7508333333333335e-05, | |
| "loss": 0.033, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.43980343980344, | |
| "grad_norm": 0.17266607284545898, | |
| "learning_rate": 3.611944444444445e-05, | |
| "loss": 0.0312, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.43980343980344, | |
| "eval_loss": 0.037528131157159805, | |
| "eval_runtime": 175.0252, | |
| "eval_samples_per_second": 40.731, | |
| "eval_steps_per_second": 1.274, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.6855036855036856, | |
| "grad_norm": 0.18219518661499023, | |
| "learning_rate": 3.4730555555555553e-05, | |
| "loss": 0.0312, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.9312039312039313, | |
| "grad_norm": 0.32914480566978455, | |
| "learning_rate": 3.3341666666666666e-05, | |
| "loss": 0.0311, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.9312039312039313, | |
| "eval_loss": 0.036644209176301956, | |
| "eval_runtime": 174.8439, | |
| "eval_samples_per_second": 40.774, | |
| "eval_steps_per_second": 1.275, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.176904176904177, | |
| "grad_norm": 0.1908116638660431, | |
| "learning_rate": 3.195277777777778e-05, | |
| "loss": 0.0294, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.422604422604422, | |
| "grad_norm": 0.23989616334438324, | |
| "learning_rate": 3.056388888888889e-05, | |
| "loss": 0.029, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.422604422604422, | |
| "eval_loss": 0.03631925955414772, | |
| "eval_runtime": 174.7772, | |
| "eval_samples_per_second": 40.789, | |
| "eval_steps_per_second": 1.276, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.6683046683046685, | |
| "grad_norm": 0.15896788239479065, | |
| "learning_rate": 2.9175e-05, | |
| "loss": 0.0294, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.914004914004914, | |
| "grad_norm": 0.17607934772968292, | |
| "learning_rate": 2.7786111111111114e-05, | |
| "loss": 0.0278, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.914004914004914, | |
| "eval_loss": 0.0335356779396534, | |
| "eval_runtime": 174.7415, | |
| "eval_samples_per_second": 40.797, | |
| "eval_steps_per_second": 1.276, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.15970515970516, | |
| "grad_norm": 0.1342485249042511, | |
| "learning_rate": 2.64e-05, | |
| "loss": 0.0285, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.405405405405405, | |
| "grad_norm": 0.32292836904525757, | |
| "learning_rate": 2.5011111111111114e-05, | |
| "loss": 0.0275, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.405405405405405, | |
| "eval_loss": 0.03467724099755287, | |
| "eval_runtime": 175.0323, | |
| "eval_samples_per_second": 40.73, | |
| "eval_steps_per_second": 1.274, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.651105651105651, | |
| "grad_norm": 0.4820247292518616, | |
| "learning_rate": 2.3622222222222223e-05, | |
| "loss": 0.028, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.896805896805897, | |
| "grad_norm": 0.29012107849121094, | |
| "learning_rate": 2.2233333333333333e-05, | |
| "loss": 0.0258, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.896805896805897, | |
| "eval_loss": 0.033297277987003326, | |
| "eval_runtime": 175.0904, | |
| "eval_samples_per_second": 40.716, | |
| "eval_steps_per_second": 1.274, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.142506142506143, | |
| "grad_norm": 0.1986035406589508, | |
| "learning_rate": 2.0847222222222224e-05, | |
| "loss": 0.026, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.388206388206388, | |
| "grad_norm": 0.258876770734787, | |
| "learning_rate": 1.9458333333333333e-05, | |
| "loss": 0.0254, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.388206388206388, | |
| "eval_loss": 0.034665919840335846, | |
| "eval_runtime": 174.8409, | |
| "eval_samples_per_second": 40.774, | |
| "eval_steps_per_second": 1.275, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.6339066339066335, | |
| "grad_norm": 0.6466019153594971, | |
| "learning_rate": 1.8069444444444446e-05, | |
| "loss": 0.026, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 6.87960687960688, | |
| "grad_norm": 0.23270685970783234, | |
| "learning_rate": 1.668055555555556e-05, | |
| "loss": 0.0256, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 6.87960687960688, | |
| "eval_loss": 0.03323034569621086, | |
| "eval_runtime": 174.9701, | |
| "eval_samples_per_second": 40.744, | |
| "eval_steps_per_second": 1.275, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.125307125307125, | |
| "grad_norm": 0.6487894058227539, | |
| "learning_rate": 1.5291666666666668e-05, | |
| "loss": 0.0254, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 7.371007371007371, | |
| "grad_norm": 0.5228617191314697, | |
| "learning_rate": 1.3905555555555555e-05, | |
| "loss": 0.0247, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.371007371007371, | |
| "eval_loss": 0.03286266699433327, | |
| "eval_runtime": 174.9609, | |
| "eval_samples_per_second": 40.746, | |
| "eval_steps_per_second": 1.275, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.616707616707616, | |
| "grad_norm": 0.4490284323692322, | |
| "learning_rate": 1.2516666666666668e-05, | |
| "loss": 0.0253, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 7.862407862407863, | |
| "grad_norm": 0.40585392713546753, | |
| "learning_rate": 1.112777777777778e-05, | |
| "loss": 0.0239, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 7.862407862407863, | |
| "eval_loss": 0.03392437845468521, | |
| "eval_runtime": 174.9232, | |
| "eval_samples_per_second": 40.755, | |
| "eval_steps_per_second": 1.275, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.108108108108109, | |
| "grad_norm": 0.14088092744350433, | |
| "learning_rate": 9.738888888888889e-06, | |
| "loss": 0.0238, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 8.353808353808354, | |
| "grad_norm": 0.10795734822750092, | |
| "learning_rate": 8.350000000000001e-06, | |
| "loss": 0.0241, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.353808353808354, | |
| "eval_loss": 0.033751752227544785, | |
| "eval_runtime": 174.9375, | |
| "eval_samples_per_second": 40.752, | |
| "eval_steps_per_second": 1.275, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.5995085995086, | |
| "grad_norm": 0.20282748341560364, | |
| "learning_rate": 6.963888888888889e-06, | |
| "loss": 0.0223, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 8.845208845208845, | |
| "grad_norm": 0.1708468645811081, | |
| "learning_rate": 5.575e-06, | |
| "loss": 0.0233, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 8.845208845208845, | |
| "eval_loss": 0.03310641273856163, | |
| "eval_runtime": 175.167, | |
| "eval_samples_per_second": 40.698, | |
| "eval_steps_per_second": 1.273, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 9.090909090909092, | |
| "grad_norm": 0.18372096121311188, | |
| "learning_rate": 4.186111111111112e-06, | |
| "loss": 0.0232, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 9.336609336609337, | |
| "grad_norm": 0.4115198254585266, | |
| "learning_rate": 2.797222222222222e-06, | |
| "loss": 0.0228, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 9.336609336609337, | |
| "eval_loss": 0.03224072605371475, | |
| "eval_runtime": 175.1142, | |
| "eval_samples_per_second": 40.711, | |
| "eval_steps_per_second": 1.273, | |
| "step": 19000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |