| { | |
| "best_metric": 1.6187845468521118, | |
| "best_model_checkpoint": "./Sustainability_model/checkpoint-2000", | |
| "epoch": 1.220703125, | |
| "eval_steps": 100, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01220703125, | |
| "grad_norm": 3.0088555812835693, | |
| "learning_rate": 2e-05, | |
| "loss": 2.1582, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0244140625, | |
| "grad_norm": 5.197660446166992, | |
| "learning_rate": 2e-05, | |
| "loss": 2.0856, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03662109375, | |
| "grad_norm": 3.234564781188965, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9269, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.048828125, | |
| "grad_norm": 7.08390474319458, | |
| "learning_rate": 2e-05, | |
| "loss": 1.888, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.048828125, | |
| "eval_loss": 1.8261231184005737, | |
| "eval_runtime": 590.9102, | |
| "eval_samples_per_second": 3.468, | |
| "eval_steps_per_second": 0.435, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06103515625, | |
| "grad_norm": 3.1646361351013184, | |
| "learning_rate": 2e-05, | |
| "loss": 1.8649, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0732421875, | |
| "grad_norm": 6.104555130004883, | |
| "learning_rate": 2e-05, | |
| "loss": 1.742, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08544921875, | |
| "grad_norm": 2.9724113941192627, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7567, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09765625, | |
| "grad_norm": 6.2468791007995605, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09765625, | |
| "eval_loss": 1.7315690517425537, | |
| "eval_runtime": 590.974, | |
| "eval_samples_per_second": 3.467, | |
| "eval_steps_per_second": 0.435, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10986328125, | |
| "grad_norm": 2.97963285446167, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6694, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1220703125, | |
| "grad_norm": 4.771264553070068, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6833, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13427734375, | |
| "grad_norm": 2.825491428375244, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6958, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.146484375, | |
| "grad_norm": 4.647068977355957, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7428, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.146484375, | |
| "eval_loss": 1.6999598741531372, | |
| "eval_runtime": 590.2857, | |
| "eval_samples_per_second": 3.471, | |
| "eval_steps_per_second": 0.435, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15869140625, | |
| "grad_norm": 3.1953535079956055, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7458, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1708984375, | |
| "grad_norm": 5.5873799324035645, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6244, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18310546875, | |
| "grad_norm": 2.5425360202789307, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6862, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1953125, | |
| "grad_norm": 4.082971572875977, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6836, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1953125, | |
| "eval_loss": 1.6864606142044067, | |
| "eval_runtime": 589.1989, | |
| "eval_samples_per_second": 3.478, | |
| "eval_steps_per_second": 0.436, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.20751953125, | |
| "grad_norm": 2.6709253787994385, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6939, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2197265625, | |
| "grad_norm": 5.410455703735352, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5974, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23193359375, | |
| "grad_norm": 2.8631389141082764, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6609, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.244140625, | |
| "grad_norm": 3.2581229209899902, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6251, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.244140625, | |
| "eval_loss": 1.67488431930542, | |
| "eval_runtime": 589.2638, | |
| "eval_samples_per_second": 3.477, | |
| "eval_steps_per_second": 0.436, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25634765625, | |
| "grad_norm": 2.8811697959899902, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7135, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2685546875, | |
| "grad_norm": 5.96162748336792, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6709, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28076171875, | |
| "grad_norm": 2.4651806354522705, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6504, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.29296875, | |
| "grad_norm": 4.032615661621094, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7128, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.29296875, | |
| "eval_loss": 1.668798565864563, | |
| "eval_runtime": 589.1105, | |
| "eval_samples_per_second": 3.478, | |
| "eval_steps_per_second": 0.436, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.30517578125, | |
| "grad_norm": 2.694554328918457, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7093, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3173828125, | |
| "grad_norm": 4.213258743286133, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6899, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.32958984375, | |
| "grad_norm": 2.69679594039917, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6451, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.341796875, | |
| "grad_norm": 3.6988604068756104, | |
| "learning_rate": 2e-05, | |
| "loss": 1.631, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.341796875, | |
| "eval_loss": 1.662984013557434, | |
| "eval_runtime": 588.5535, | |
| "eval_samples_per_second": 3.481, | |
| "eval_steps_per_second": 0.437, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.35400390625, | |
| "grad_norm": 2.6815237998962402, | |
| "learning_rate": 2e-05, | |
| "loss": 1.688, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3662109375, | |
| "grad_norm": 5.819088459014893, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6649, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.37841796875, | |
| "grad_norm": 2.524092674255371, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6305, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.390625, | |
| "grad_norm": 4.0569963455200195, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6493, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.390625, | |
| "eval_loss": 1.6568603515625, | |
| "eval_runtime": 588.2081, | |
| "eval_samples_per_second": 3.483, | |
| "eval_steps_per_second": 0.437, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.40283203125, | |
| "grad_norm": 2.565763473510742, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6983, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4150390625, | |
| "grad_norm": 6.5800676345825195, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6565, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.42724609375, | |
| "grad_norm": 2.1741669178009033, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7585, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.439453125, | |
| "grad_norm": 3.838252305984497, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6141, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.439453125, | |
| "eval_loss": 1.6529587507247925, | |
| "eval_runtime": 588.0827, | |
| "eval_samples_per_second": 3.484, | |
| "eval_steps_per_second": 0.437, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.45166015625, | |
| "grad_norm": 4.486364841461182, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6489, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4638671875, | |
| "grad_norm": 3.693453311920166, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6026, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.47607421875, | |
| "grad_norm": 2.4286513328552246, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5639, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.48828125, | |
| "grad_norm": 3.9820656776428223, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6621, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.48828125, | |
| "eval_loss": 1.6506658792495728, | |
| "eval_runtime": 588.1468, | |
| "eval_samples_per_second": 3.484, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.50048828125, | |
| "grad_norm": 2.915191411972046, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6281, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5126953125, | |
| "grad_norm": 4.406491756439209, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7108, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.52490234375, | |
| "grad_norm": 2.6505398750305176, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7151, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.537109375, | |
| "grad_norm": 3.872833728790283, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5925, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.537109375, | |
| "eval_loss": 1.6442919969558716, | |
| "eval_runtime": 588.2624, | |
| "eval_samples_per_second": 3.483, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.54931640625, | |
| "grad_norm": 2.210282802581787, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5845, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.5615234375, | |
| "grad_norm": 3.7344298362731934, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5994, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.57373046875, | |
| "grad_norm": 2.3247945308685303, | |
| "learning_rate": 2e-05, | |
| "loss": 1.622, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.5859375, | |
| "grad_norm": 4.974765300750732, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6571, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5859375, | |
| "eval_loss": 1.6453276872634888, | |
| "eval_runtime": 588.5916, | |
| "eval_samples_per_second": 3.481, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.59814453125, | |
| "grad_norm": 2.6029038429260254, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6854, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6103515625, | |
| "grad_norm": 3.8252599239349365, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6875, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.62255859375, | |
| "grad_norm": 2.5335938930511475, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5917, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.634765625, | |
| "grad_norm": 3.6627395153045654, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6078, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.634765625, | |
| "eval_loss": 1.638580322265625, | |
| "eval_runtime": 588.7972, | |
| "eval_samples_per_second": 3.48, | |
| "eval_steps_per_second": 0.436, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.64697265625, | |
| "grad_norm": 2.5015482902526855, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6793, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.6591796875, | |
| "grad_norm": 3.70072340965271, | |
| "learning_rate": 2e-05, | |
| "loss": 1.661, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.67138671875, | |
| "grad_norm": 2.6039609909057617, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6349, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.68359375, | |
| "grad_norm": 3.3291618824005127, | |
| "learning_rate": 2e-05, | |
| "loss": 1.616, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.68359375, | |
| "eval_loss": 1.6347644329071045, | |
| "eval_runtime": 588.5837, | |
| "eval_samples_per_second": 3.481, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.69580078125, | |
| "grad_norm": 2.6853315830230713, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7087, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.7080078125, | |
| "grad_norm": 3.296851396560669, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6676, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.72021484375, | |
| "grad_norm": 2.3841185569763184, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6212, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.732421875, | |
| "grad_norm": 3.612088441848755, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6473, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.732421875, | |
| "eval_loss": 1.6339186429977417, | |
| "eval_runtime": 588.3073, | |
| "eval_samples_per_second": 3.483, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.74462890625, | |
| "grad_norm": 2.6555330753326416, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6643, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.7568359375, | |
| "grad_norm": 4.533504486083984, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6236, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.76904296875, | |
| "grad_norm": 2.2276220321655273, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6783, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "grad_norm": 3.533113956451416, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6123, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "eval_loss": 1.628023386001587, | |
| "eval_runtime": 588.6386, | |
| "eval_samples_per_second": 3.481, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.79345703125, | |
| "grad_norm": 2.2332117557525635, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6795, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.8056640625, | |
| "grad_norm": 4.059207916259766, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5915, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.81787109375, | |
| "grad_norm": 2.46692156791687, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6456, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.830078125, | |
| "grad_norm": 3.602611780166626, | |
| "learning_rate": 2e-05, | |
| "loss": 1.564, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.830078125, | |
| "eval_loss": 1.6274890899658203, | |
| "eval_runtime": 588.2617, | |
| "eval_samples_per_second": 3.483, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.84228515625, | |
| "grad_norm": 2.20896315574646, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6469, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.8544921875, | |
| "grad_norm": 4.329638481140137, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5571, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.86669921875, | |
| "grad_norm": 1.9945570230484009, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6461, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.87890625, | |
| "grad_norm": 3.428687334060669, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6564, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.87890625, | |
| "eval_loss": 1.6232744455337524, | |
| "eval_runtime": 588.0784, | |
| "eval_samples_per_second": 3.484, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.89111328125, | |
| "grad_norm": 2.5266592502593994, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5607, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.9033203125, | |
| "grad_norm": 3.4067883491516113, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6394, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.91552734375, | |
| "grad_norm": 2.0028152465820312, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6908, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.927734375, | |
| "grad_norm": 2.8983733654022217, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5646, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.927734375, | |
| "eval_loss": 1.6202832460403442, | |
| "eval_runtime": 587.8115, | |
| "eval_samples_per_second": 3.486, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.93994140625, | |
| "grad_norm": 2.6408419609069824, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5905, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.9521484375, | |
| "grad_norm": 3.899275302886963, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6138, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.96435546875, | |
| "grad_norm": 2.338137149810791, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6963, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.9765625, | |
| "grad_norm": 3.6352951526641846, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5849, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9765625, | |
| "eval_loss": 1.6187845468521118, | |
| "eval_runtime": 587.8791, | |
| "eval_samples_per_second": 3.485, | |
| "eval_steps_per_second": 0.437, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.98876953125, | |
| "grad_norm": 2.4254846572875977, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6391, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.0009765625, | |
| "grad_norm": 2.079317569732666, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6238, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.01318359375, | |
| "grad_norm": 2.1677002906799316, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5543, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.025390625, | |
| "grad_norm": 2.4266505241394043, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4812, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.025390625, | |
| "eval_loss": 1.6256210803985596, | |
| "eval_runtime": 585.954, | |
| "eval_samples_per_second": 3.497, | |
| "eval_steps_per_second": 0.439, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.03759765625, | |
| "grad_norm": 2.4697976112365723, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5147, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.0498046875, | |
| "grad_norm": 2.3185527324676514, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5198, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.06201171875, | |
| "grad_norm": 2.7304463386535645, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5237, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.07421875, | |
| "grad_norm": 2.616072177886963, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5598, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.07421875, | |
| "eval_loss": 1.623382568359375, | |
| "eval_runtime": 586.1381, | |
| "eval_samples_per_second": 3.496, | |
| "eval_steps_per_second": 0.438, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.08642578125, | |
| "grad_norm": 2.7308809757232666, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5691, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.0986328125, | |
| "grad_norm": 2.6916451454162598, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5102, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.11083984375, | |
| "grad_norm": 2.960580348968506, | |
| "learning_rate": 2e-05, | |
| "loss": 1.539, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.123046875, | |
| "grad_norm": 2.5936009883880615, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5657, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.123046875, | |
| "eval_loss": 1.6226788759231567, | |
| "eval_runtime": 586.4284, | |
| "eval_samples_per_second": 3.494, | |
| "eval_steps_per_second": 0.438, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.13525390625, | |
| "grad_norm": 2.8930952548980713, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4579, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.1474609375, | |
| "grad_norm": 2.8736538887023926, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5127, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.15966796875, | |
| "grad_norm": 4.384296894073486, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5988, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.171875, | |
| "grad_norm": 2.728992223739624, | |
| "learning_rate": 2e-05, | |
| "loss": 1.51, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.171875, | |
| "eval_loss": 1.6226541996002197, | |
| "eval_runtime": 586.345, | |
| "eval_samples_per_second": 3.495, | |
| "eval_steps_per_second": 0.438, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.18408203125, | |
| "grad_norm": 2.651820421218872, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5226, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.1962890625, | |
| "grad_norm": 2.717193126678467, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4966, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.20849609375, | |
| "grad_norm": 2.9759628772735596, | |
| "learning_rate": 2e-05, | |
| "loss": 1.526, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.220703125, | |
| "grad_norm": 2.8832080364227295, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5452, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.220703125, | |
| "eval_loss": 1.6226392984390259, | |
| "eval_runtime": 586.3744, | |
| "eval_samples_per_second": 3.494, | |
| "eval_steps_per_second": 0.438, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 4096, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 6, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.924062136972083e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |