| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.999966663333, | |
| "global_step": 7499, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013, | |
| "learning_rate": 1.7006980333014778e-05, | |
| "loss": 0.6595, | |
| "r_loss": 0.0, | |
| "step": 100, | |
| "steps": 100 | |
| }, | |
| { | |
| "epoch": 0.027, | |
| "learning_rate": 1.9565280790988883e-05, | |
| "loss": 0.6458, | |
| "r_loss": 0.0, | |
| "step": 200, | |
| "steps": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9799385482540556e-05, | |
| "loss": 0.6391, | |
| "r_loss": 0.0, | |
| "step": 300, | |
| "steps": 300 | |
| }, | |
| { | |
| "epoch": 0.053, | |
| "learning_rate": 1.9524571075061864e-05, | |
| "loss": 0.635, | |
| "r_loss": 0.0, | |
| "step": 400, | |
| "steps": 400 | |
| }, | |
| { | |
| "epoch": 0.067, | |
| "learning_rate": 1.925250481165796e-05, | |
| "loss": 0.6268, | |
| "r_loss": 0.0, | |
| "step": 500, | |
| "steps": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.897769040417927e-05, | |
| "loss": 0.6208, | |
| "r_loss": 0.0, | |
| "step": 600, | |
| "steps": 600 | |
| }, | |
| { | |
| "epoch": 0.093, | |
| "learning_rate": 1.8702875996700577e-05, | |
| "loss": 0.6161, | |
| "r_loss": 0.0, | |
| "step": 700, | |
| "steps": 700 | |
| }, | |
| { | |
| "epoch": 0.107, | |
| "learning_rate": 1.842806158922189e-05, | |
| "loss": 0.6145, | |
| "r_loss": 0.0, | |
| "step": 800, | |
| "steps": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8153247181743197e-05, | |
| "loss": 0.6041, | |
| "r_loss": 0.0, | |
| "step": 900, | |
| "steps": 900 | |
| }, | |
| { | |
| "epoch": 0.133, | |
| "learning_rate": 1.7878432774264505e-05, | |
| "loss": 0.6049, | |
| "r_loss": 0.0, | |
| "step": 1000, | |
| "steps": 1000 | |
| }, | |
| { | |
| "epoch": 0.147, | |
| "learning_rate": 1.7603618366785813e-05, | |
| "loss": 0.6019, | |
| "r_loss": 0.0, | |
| "step": 1100, | |
| "steps": 1100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.732880395930712e-05, | |
| "loss": 0.5995, | |
| "r_loss": 0.0, | |
| "step": 1200, | |
| "steps": 1200 | |
| }, | |
| { | |
| "epoch": 0.173, | |
| "learning_rate": 1.7056737695903218e-05, | |
| "loss": 0.5949, | |
| "r_loss": 0.0, | |
| "step": 1300, | |
| "steps": 1300 | |
| }, | |
| { | |
| "epoch": 0.187, | |
| "learning_rate": 1.6781923288424526e-05, | |
| "loss": 0.5914, | |
| "r_loss": 0.0, | |
| "step": 1400, | |
| "steps": 1400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.6507108880945834e-05, | |
| "loss": 0.5884, | |
| "r_loss": 0.0, | |
| "step": 1500, | |
| "steps": 1500 | |
| }, | |
| { | |
| "epoch": 0.213, | |
| "learning_rate": 1.6232294473467146e-05, | |
| "loss": 0.5839, | |
| "r_loss": 0.0, | |
| "step": 1600, | |
| "steps": 1600 | |
| }, | |
| { | |
| "epoch": 0.227, | |
| "learning_rate": 1.5957480065988454e-05, | |
| "loss": 0.5816, | |
| "r_loss": 0.0, | |
| "step": 1700, | |
| "steps": 1700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.5682665658509762e-05, | |
| "loss": 0.5807, | |
| "r_loss": 0.0, | |
| "step": 1800, | |
| "steps": 1800 | |
| }, | |
| { | |
| "epoch": 0.253, | |
| "learning_rate": 1.540785125103107e-05, | |
| "loss": 0.5821, | |
| "r_loss": 0.0, | |
| "step": 1900, | |
| "steps": 1900 | |
| }, | |
| { | |
| "epoch": 0.267, | |
| "learning_rate": 1.5133036843552378e-05, | |
| "loss": 0.5767, | |
| "r_loss": 0.0, | |
| "step": 2000, | |
| "steps": 2000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.4858222436073688e-05, | |
| "loss": 0.5704, | |
| "r_loss": 0.0, | |
| "step": 2100, | |
| "steps": 2100 | |
| }, | |
| { | |
| "epoch": 0.293, | |
| "learning_rate": 1.4583408028594996e-05, | |
| "loss": 0.5702, | |
| "r_loss": 0.0, | |
| "step": 2200, | |
| "steps": 2200 | |
| }, | |
| { | |
| "epoch": 0.307, | |
| "learning_rate": 1.4308593621116305e-05, | |
| "loss": 0.573, | |
| "r_loss": 0.0, | |
| "step": 2300, | |
| "steps": 2300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.4033779213637613e-05, | |
| "loss": 0.5683, | |
| "r_loss": 0.0, | |
| "step": 2400, | |
| "steps": 2400 | |
| }, | |
| { | |
| "epoch": 0.333, | |
| "learning_rate": 1.3758964806158922e-05, | |
| "loss": 0.5654, | |
| "r_loss": 0.0, | |
| "step": 2500, | |
| "steps": 2500 | |
| }, | |
| { | |
| "epoch": 0.347, | |
| "learning_rate": 1.3484150398680231e-05, | |
| "loss": 0.5613, | |
| "r_loss": 0.0, | |
| "step": 2600, | |
| "steps": 2600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.320933599120154e-05, | |
| "loss": 0.5604, | |
| "r_loss": 0.0, | |
| "step": 2700, | |
| "steps": 2700 | |
| }, | |
| { | |
| "epoch": 0.373, | |
| "learning_rate": 1.2934521583722849e-05, | |
| "loss": 0.5551, | |
| "r_loss": 0.0, | |
| "step": 2800, | |
| "steps": 2800 | |
| }, | |
| { | |
| "epoch": 0.387, | |
| "learning_rate": 1.2659707176244157e-05, | |
| "loss": 0.5551, | |
| "r_loss": 0.0, | |
| "step": 2900, | |
| "steps": 2900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.2384892768765467e-05, | |
| "loss": 0.5574, | |
| "r_loss": 0.0, | |
| "step": 3000, | |
| "steps": 3000 | |
| }, | |
| { | |
| "epoch": 0.413, | |
| "learning_rate": 1.2110078361286775e-05, | |
| "loss": 0.5539, | |
| "r_loss": 0.0, | |
| "step": 3100, | |
| "steps": 3100 | |
| }, | |
| { | |
| "epoch": 0.427, | |
| "learning_rate": 1.1835263953808083e-05, | |
| "loss": 0.5525, | |
| "r_loss": 0.0, | |
| "step": 3200, | |
| "steps": 3200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.1560449546329393e-05, | |
| "loss": 0.5503, | |
| "r_loss": 0.0, | |
| "step": 3300, | |
| "steps": 3300 | |
| }, | |
| { | |
| "epoch": 0.453, | |
| "learning_rate": 1.12856351388507e-05, | |
| "loss": 0.5506, | |
| "r_loss": 0.0, | |
| "step": 3400, | |
| "steps": 3400 | |
| }, | |
| { | |
| "epoch": 0.467, | |
| "learning_rate": 1.101082073137201e-05, | |
| "loss": 0.5422, | |
| "r_loss": 0.0, | |
| "step": 3500, | |
| "steps": 3500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.0736006323893319e-05, | |
| "loss": 0.5443, | |
| "r_loss": 0.0, | |
| "step": 3600, | |
| "steps": 3600 | |
| }, | |
| { | |
| "epoch": 0.493, | |
| "learning_rate": 1.0461191916414627e-05, | |
| "loss": 0.5422, | |
| "r_loss": 0.0, | |
| "step": 3700, | |
| "steps": 3700 | |
| }, | |
| { | |
| "epoch": 0.507, | |
| "learning_rate": 1.0189125653010724e-05, | |
| "loss": 0.5378, | |
| "r_loss": 0.0, | |
| "step": 3800, | |
| "steps": 3800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.914311245532032e-06, | |
| "loss": 0.5401, | |
| "r_loss": 0.0, | |
| "step": 3900, | |
| "steps": 3900 | |
| }, | |
| { | |
| "epoch": 0.533, | |
| "learning_rate": 9.63949683805334e-06, | |
| "loss": 0.538, | |
| "r_loss": 0.0, | |
| "step": 4000, | |
| "steps": 4000 | |
| }, | |
| { | |
| "epoch": 0.547, | |
| "learning_rate": 9.36468243057465e-06, | |
| "loss": 0.5384, | |
| "r_loss": 0.0, | |
| "step": 4100, | |
| "steps": 4100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.08986802309596e-06, | |
| "loss": 0.5306, | |
| "r_loss": 0.0, | |
| "step": 4200, | |
| "steps": 4200 | |
| }, | |
| { | |
| "epoch": 0.573, | |
| "learning_rate": 8.815053615617266e-06, | |
| "loss": 0.5317, | |
| "r_loss": 0.0, | |
| "step": 4300, | |
| "steps": 4300 | |
| }, | |
| { | |
| "epoch": 0.587, | |
| "learning_rate": 8.540239208138575e-06, | |
| "loss": 0.5305, | |
| "r_loss": 0.0, | |
| "step": 4400, | |
| "steps": 4400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.265424800659885e-06, | |
| "loss": 0.5313, | |
| "r_loss": 0.0, | |
| "step": 4500, | |
| "steps": 4500 | |
| }, | |
| { | |
| "epoch": 0.613, | |
| "learning_rate": 7.990610393181193e-06, | |
| "loss": 0.531, | |
| "r_loss": 0.0, | |
| "step": 4600, | |
| "steps": 4600 | |
| }, | |
| { | |
| "epoch": 0.627, | |
| "learning_rate": 7.715795985702503e-06, | |
| "loss": 0.5285, | |
| "r_loss": 0.0, | |
| "step": 4700, | |
| "steps": 4700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.440981578223811e-06, | |
| "loss": 0.5261, | |
| "r_loss": 0.0, | |
| "step": 4800, | |
| "steps": 4800 | |
| }, | |
| { | |
| "epoch": 0.653, | |
| "learning_rate": 7.166167170745119e-06, | |
| "loss": 0.5178, | |
| "r_loss": 0.0, | |
| "step": 4900, | |
| "steps": 4900 | |
| }, | |
| { | |
| "epoch": 0.667, | |
| "learning_rate": 6.891352763266428e-06, | |
| "loss": 0.5199, | |
| "r_loss": 0.0, | |
| "step": 5000, | |
| "steps": 5000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.616538355787737e-06, | |
| "loss": 0.5191, | |
| "r_loss": 0.0, | |
| "step": 5100, | |
| "steps": 5100 | |
| }, | |
| { | |
| "epoch": 0.693, | |
| "learning_rate": 6.341723948309046e-06, | |
| "loss": 0.5197, | |
| "r_loss": 0.0, | |
| "step": 5200, | |
| "steps": 5200 | |
| }, | |
| { | |
| "epoch": 0.707, | |
| "learning_rate": 6.066909540830355e-06, | |
| "loss": 0.5172, | |
| "r_loss": 0.0, | |
| "step": 5300, | |
| "steps": 5300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.792095133351663e-06, | |
| "loss": 0.5196, | |
| "r_loss": 0.0, | |
| "step": 5400, | |
| "steps": 5400 | |
| }, | |
| { | |
| "epoch": 0.733, | |
| "learning_rate": 5.517280725872972e-06, | |
| "loss": 0.5113, | |
| "r_loss": 0.0, | |
| "step": 5500, | |
| "steps": 5500 | |
| }, | |
| { | |
| "epoch": 0.747, | |
| "learning_rate": 5.245214462469068e-06, | |
| "loss": 0.5123, | |
| "r_loss": 0.0, | |
| "step": 5600, | |
| "steps": 5600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.973148199065163e-06, | |
| "loss": 0.513, | |
| "r_loss": 0.0, | |
| "step": 5700, | |
| "steps": 5700 | |
| }, | |
| { | |
| "epoch": 0.773, | |
| "learning_rate": 4.698333791586473e-06, | |
| "loss": 0.5123, | |
| "r_loss": 0.0, | |
| "step": 5800, | |
| "steps": 5800 | |
| }, | |
| { | |
| "epoch": 0.787, | |
| "learning_rate": 4.423519384107781e-06, | |
| "loss": 0.5103, | |
| "r_loss": 0.0, | |
| "step": 5900, | |
| "steps": 5900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.14870497662909e-06, | |
| "loss": 0.5086, | |
| "r_loss": 0.0, | |
| "step": 6000, | |
| "steps": 6000 | |
| }, | |
| { | |
| "epoch": 0.813, | |
| "learning_rate": 3.876638713225185e-06, | |
| "loss": 0.5042, | |
| "r_loss": 0.0, | |
| "step": 6100, | |
| "steps": 6100 | |
| }, | |
| { | |
| "epoch": 0.827, | |
| "learning_rate": 3.6018243057464943e-06, | |
| "loss": 0.5042, | |
| "r_loss": 0.0, | |
| "step": 6200, | |
| "steps": 6200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.3270098982678032e-06, | |
| "loss": 0.507, | |
| "r_loss": 0.0, | |
| "step": 6300, | |
| "steps": 6300 | |
| }, | |
| { | |
| "epoch": 0.853, | |
| "learning_rate": 3.0521954907891117e-06, | |
| "loss": 0.5014, | |
| "r_loss": 0.0, | |
| "step": 6400, | |
| "steps": 6400 | |
| }, | |
| { | |
| "epoch": 0.867, | |
| "learning_rate": 2.7773810833104206e-06, | |
| "loss": 0.5033, | |
| "r_loss": 0.0, | |
| "step": 6500, | |
| "steps": 6500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.5025666758317295e-06, | |
| "loss": 0.4996, | |
| "r_loss": 0.0, | |
| "step": 6600, | |
| "steps": 6600 | |
| }, | |
| { | |
| "epoch": 0.893, | |
| "learning_rate": 2.227752268353038e-06, | |
| "loss": 0.5003, | |
| "r_loss": 0.0, | |
| "step": 6700, | |
| "steps": 6700 | |
| }, | |
| { | |
| "epoch": 0.907, | |
| "learning_rate": 1.952937860874347e-06, | |
| "loss": 0.5008, | |
| "r_loss": 0.0, | |
| "step": 6800, | |
| "steps": 6800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.6781234533956558e-06, | |
| "loss": 0.5023, | |
| "r_loss": 0.0, | |
| "step": 6900, | |
| "steps": 6900 | |
| }, | |
| { | |
| "epoch": 0.933, | |
| "learning_rate": 1.4033090459169645e-06, | |
| "loss": 0.497, | |
| "r_loss": 0.0, | |
| "step": 7000, | |
| "steps": 7000 | |
| }, | |
| { | |
| "epoch": 0.947, | |
| "learning_rate": 1.1284946384382732e-06, | |
| "loss": 0.5002, | |
| "r_loss": 0.0, | |
| "step": 7100, | |
| "steps": 7100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.536802309595821e-07, | |
| "loss": 0.4972, | |
| "r_loss": 0.0, | |
| "step": 7200, | |
| "steps": 7200 | |
| }, | |
| { | |
| "epoch": 0.973, | |
| "learning_rate": 5.788658234808909e-07, | |
| "loss": 0.5017, | |
| "r_loss": 0.0, | |
| "step": 7300, | |
| "steps": 7300 | |
| }, | |
| { | |
| "epoch": 0.987, | |
| "learning_rate": 3.040514160021996e-07, | |
| "loss": 0.4991, | |
| "r_loss": 0.0, | |
| "step": 7400, | |
| "steps": 7400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 7499, | |
| "steps": 7499, | |
| "total_flos": 1222855706542080.0, | |
| "train_loss": 0.5490713825319938, | |
| "train_runtime": 153878.2498, | |
| "train_samples_per_second": 6.238, | |
| "train_steps_per_second": 0.049 | |
| } | |
| ], | |
| "max_steps": 7499, | |
| "num_train_epochs": 1, | |
| "total_flos": 1222855706542080.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |