| { | |
| "best_metric": 0.39055171608924866, | |
| "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-11-07_18-29-28_experiment/checkpoint-6895", | |
| "epoch": 30.996954314720814, | |
| "eval_steps": 500, | |
| "global_step": 7633, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.065040650406504e-05, | |
| "loss": 6.0993, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 8.130081300813008e-05, | |
| "loss": 5.0338, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.2072072072072072, | |
| "eval_loss": 3.800293445587158, | |
| "eval_runtime": 1.7789, | |
| "eval_samples_per_second": 1357.054, | |
| "eval_steps_per_second": 84.886, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00012195121951219512, | |
| "loss": 3.8428, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00016260162601626016, | |
| "loss": 2.8076, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.25823462236055444, | |
| "eval_loss": 1.976719856262207, | |
| "eval_runtime": 1.774, | |
| "eval_samples_per_second": 1360.786, | |
| "eval_steps_per_second": 85.12, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0002032520325203252, | |
| "loss": 2.2493, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00024390243902439024, | |
| "loss": 1.9599, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.0002845528455284553, | |
| "loss": 1.7151, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.2894563992327041, | |
| "eval_loss": 1.4048570394515991, | |
| "eval_runtime": 1.775, | |
| "eval_samples_per_second": 1360.028, | |
| "eval_steps_per_second": 85.072, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.0003252032520325203, | |
| "loss": 1.5375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 0.00036585365853658537, | |
| "loss": 1.3954, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.312210304753801, | |
| "eval_loss": 1.0894988775253296, | |
| "eval_runtime": 1.8067, | |
| "eval_samples_per_second": 1336.115, | |
| "eval_steps_per_second": 83.576, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.0004065040650406504, | |
| "loss": 1.2718, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 0.00044715447154471545, | |
| "loss": 1.1709, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 0.0004878048780487805, | |
| "loss": 1.0895, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.3280339162692104, | |
| "eval_loss": 0.8805840015411377, | |
| "eval_runtime": 1.7957, | |
| "eval_samples_per_second": 1344.309, | |
| "eval_steps_per_second": 84.089, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 0.0004999506716812021, | |
| "loss": 0.9914, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 0.0004997091104496882, | |
| "loss": 0.9375, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.3402263075005411, | |
| "eval_loss": 0.7313582301139832, | |
| "eval_runtime": 1.7974, | |
| "eval_samples_per_second": 1343.047, | |
| "eval_steps_per_second": 84.01, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 0.0004992664502959351, | |
| "loss": 0.8598, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.0004986230477086575, | |
| "loss": 0.8097, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.0004977794208410241, | |
| "loss": 0.7668, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3481269173067019, | |
| "eval_loss": 0.6367093324661255, | |
| "eval_runtime": 1.8149, | |
| "eval_samples_per_second": 1330.094, | |
| "eval_steps_per_second": 83.2, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 0.0004967362490933723, | |
| "loss": 0.716, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 0.0004954943725660643, | |
| "loss": 0.6978, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.35578121617889635, | |
| "eval_loss": 0.5603917241096497, | |
| "eval_runtime": 1.8199, | |
| "eval_samples_per_second": 1326.454, | |
| "eval_steps_per_second": 82.972, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 0.0004940547913829275, | |
| "loss": 0.6556, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 0.0004924186648858207, | |
| "loss": 0.627, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 0.0004905873107009799, | |
| "loss": 0.6133, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.3603864842472962, | |
| "eval_loss": 0.5122300386428833, | |
| "eval_runtime": 1.8088, | |
| "eval_samples_per_second": 1334.561, | |
| "eval_steps_per_second": 83.479, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 0.0004885622036778897, | |
| "loss": 0.5846, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 0.0004863449747015384, | |
| "loss": 0.5681, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.36240548750905005, | |
| "eval_loss": 0.48733416199684143, | |
| "eval_runtime": 1.781, | |
| "eval_samples_per_second": 1355.443, | |
| "eval_steps_per_second": 84.785, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "learning_rate": 0.0004839374093790139, | |
| "loss": 0.5537, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 0.00048134144660149535, | |
| "loss": 0.5314, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 0.0004785591769828005, | |
| "loss": 0.536, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.36355120655037804, | |
| "eval_loss": 0.47042036056518555, | |
| "eval_runtime": 1.8486, | |
| "eval_samples_per_second": 1305.878, | |
| "eval_steps_per_second": 81.685, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "learning_rate": 0.00047559284117574613, | |
| "loss": 0.5126, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 0.0004724448280676768, | |
| "loss": 0.511, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.3647827612202094, | |
| "eval_loss": 0.4570145606994629, | |
| "eval_runtime": 1.8132, | |
| "eval_samples_per_second": 1331.315, | |
| "eval_steps_per_second": 83.276, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 0.00046911767285661587, | |
| "loss": 0.4918, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "learning_rate": 0.0004656140550095876, | |
| "loss": 0.4883, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.00046193679610475414, | |
| "loss": 0.4929, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.3655067660867164, | |
| "eval_loss": 0.4465464651584625, | |
| "eval_runtime": 1.8353, | |
| "eval_samples_per_second": 1315.328, | |
| "eval_steps_per_second": 82.276, | |
| "step": 3201 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 0.0004580888575591068, | |
| "loss": 0.4634, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 13.81, | |
| "learning_rate": 0.00045407333824353966, | |
| "loss": 0.4757, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.36605909969621653, | |
| "eval_loss": 0.43762096762657166, | |
| "eval_runtime": 1.8426, | |
| "eval_samples_per_second": 1310.127, | |
| "eval_steps_per_second": 81.951, | |
| "step": 3447 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 0.00044989347198722777, | |
| "loss": 0.4605, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 0.00044555262497331783, | |
| "loss": 0.4507, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.3666450211603484, | |
| "eval_loss": 0.42967188358306885, | |
| "eval_runtime": 1.7693, | |
| "eval_samples_per_second": 1364.379, | |
| "eval_steps_per_second": 85.344, | |
| "step": 3693 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "learning_rate": 0.0004410542930280316, | |
| "loss": 0.4591, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "learning_rate": 0.0004364020988053623, | |
| "loss": 0.4366, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 0.00043159978886963223, | |
| "loss": 0.4449, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.3675183053807743, | |
| "eval_loss": 0.4223393499851227, | |
| "eval_runtime": 1.82, | |
| "eval_samples_per_second": 1326.352, | |
| "eval_steps_per_second": 82.966, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "learning_rate": 0.0004266512306782628, | |
| "loss": 0.4323, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "learning_rate": 0.00042156040946718344, | |
| "loss": 0.4312, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.36820125842495355, | |
| "eval_loss": 0.4195675849914551, | |
| "eval_runtime": 1.8599, | |
| "eval_samples_per_second": 1297.945, | |
| "eval_steps_per_second": 81.189, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 0.00041633142504139133, | |
| "loss": 0.4315, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "learning_rate": 0.00041096848847324417, | |
| "loss": 0.4158, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 17.87, | |
| "learning_rate": 0.0004054759187111451, | |
| "loss": 0.4252, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.36844383737507186, | |
| "eval_loss": 0.41086554527282715, | |
| "eval_runtime": 1.8281, | |
| "eval_samples_per_second": 1320.494, | |
| "eval_steps_per_second": 82.599, | |
| "step": 4432 | |
| }, | |
| { | |
| "epoch": 18.27, | |
| "learning_rate": 0.00039985813910135305, | |
| "loss": 0.4129, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 18.68, | |
| "learning_rate": 0.00039411967382571643, | |
| "loss": 0.4102, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.3685520649066631, | |
| "eval_loss": 0.40959808230400085, | |
| "eval_runtime": 1.835, | |
| "eval_samples_per_second": 1315.545, | |
| "eval_steps_per_second": 82.29, | |
| "step": 4678 | |
| }, | |
| { | |
| "epoch": 19.09, | |
| "learning_rate": 0.0003882651442582019, | |
| "loss": 0.4104, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 19.49, | |
| "learning_rate": 0.00038229926524315015, | |
| "loss": 0.3982, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 19.9, | |
| "learning_rate": 0.0003762268412982577, | |
| "loss": 0.4092, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.3690036349522679, | |
| "eval_loss": 0.4085357189178467, | |
| "eval_runtime": 1.7636, | |
| "eval_samples_per_second": 1368.753, | |
| "eval_steps_per_second": 85.618, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 20.3, | |
| "learning_rate": 0.00037005276274534144, | |
| "loss": 0.3863, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 20.71, | |
| "learning_rate": 0.0003637820017720022, | |
| "loss": 0.3941, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.36924248191853826, | |
| "eval_loss": 0.4053109884262085, | |
| "eval_runtime": 1.7869, | |
| "eval_samples_per_second": 1350.961, | |
| "eval_steps_per_second": 84.505, | |
| "step": 5171 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 0.00035741960842735953, | |
| "loss": 0.3992, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 21.52, | |
| "learning_rate": 0.0003509707065550817, | |
| "loss": 0.3846, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "learning_rate": 0.00034444048966698643, | |
| "loss": 0.3882, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.3694365450786329, | |
| "eval_loss": 0.40214401483535767, | |
| "eval_runtime": 1.8091, | |
| "eval_samples_per_second": 1334.335, | |
| "eval_steps_per_second": 83.465, | |
| "step": 5417 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "learning_rate": 0.0003378342167605362, | |
| "loss": 0.3787, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 22.74, | |
| "learning_rate": 0.00033115720808359495, | |
| "loss": 0.3821, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.3694477410301768, | |
| "eval_loss": 0.4013039767742157, | |
| "eval_runtime": 1.8106, | |
| "eval_samples_per_second": 1333.273, | |
| "eval_steps_per_second": 83.399, | |
| "step": 5663 | |
| }, | |
| { | |
| "epoch": 23.15, | |
| "learning_rate": 0.0003244148408498587, | |
| "loss": 0.3724, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 0.000317612544908409, | |
| "loss": 0.372, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 23.96, | |
| "learning_rate": 0.000310755798370878, | |
| "loss": 0.3769, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.36991797099502155, | |
| "eval_loss": 0.399305135011673, | |
| "eval_runtime": 1.7971, | |
| "eval_samples_per_second": 1343.28, | |
| "eval_steps_per_second": 84.025, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 24.37, | |
| "learning_rate": 0.00030385012319974537, | |
| "loss": 0.3671, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "learning_rate": 0.00029690108076132154, | |
| "loss": 0.3696, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.3692051620800585, | |
| "eval_loss": 0.3980158865451813, | |
| "eval_runtime": 1.8534, | |
| "eval_samples_per_second": 1302.488, | |
| "eval_steps_per_second": 81.473, | |
| "step": 6156 | |
| }, | |
| { | |
| "epoch": 25.18, | |
| "learning_rate": 0.0002899142673469971, | |
| "loss": 0.3577, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 25.58, | |
| "learning_rate": 0.00028289530966636625, | |
| "loss": 0.3604, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "learning_rate": 0.000275849860315853, | |
| "loss": 0.3628, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.36994782686580535, | |
| "eval_loss": 0.39616090059280396, | |
| "eval_runtime": 1.7695, | |
| "eval_samples_per_second": 1364.248, | |
| "eval_steps_per_second": 85.336, | |
| "step": 6402 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "learning_rate": 0.0002687835932264908, | |
| "loss": 0.3498, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "learning_rate": 0.0002617021990945197, | |
| "loss": 0.3587, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.37061958395844063, | |
| "eval_loss": 0.3926030397415161, | |
| "eval_runtime": 1.8394, | |
| "eval_samples_per_second": 1312.376, | |
| "eval_steps_per_second": 82.091, | |
| "step": 6648 | |
| }, | |
| { | |
| "epoch": 27.21, | |
| "learning_rate": 0.0002546113807984821, | |
| "loss": 0.3534, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 0.00024751684880650884, | |
| "loss": 0.3492, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.37057106816841695, | |
| "eval_loss": 0.39055171608924866, | |
| "eval_runtime": 1.8305, | |
| "eval_samples_per_second": 1318.747, | |
| "eval_steps_per_second": 82.49, | |
| "step": 6895 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "learning_rate": 0.00024042431657749118, | |
| "loss": 0.3534, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 28.43, | |
| "learning_rate": 0.0002333394959598461, | |
| "loss": 0.3418, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "learning_rate": 0.00022626809259157726, | |
| "loss": 0.3461, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.3706457078453764, | |
| "eval_loss": 0.3932913541793823, | |
| "eval_runtime": 1.7675, | |
| "eval_samples_per_second": 1365.756, | |
| "eval_steps_per_second": 85.43, | |
| "step": 7141 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "learning_rate": 0.00021921580130533828, | |
| "loss": 0.3412, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 29.64, | |
| "learning_rate": 0.0002121883015421973, | |
| "loss": 0.3363, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.3706942236354001, | |
| "eval_loss": 0.39353010058403015, | |
| "eval_runtime": 1.8169, | |
| "eval_samples_per_second": 1328.629, | |
| "eval_steps_per_second": 83.108, | |
| "step": 7387 | |
| }, | |
| { | |
| "epoch": 30.05, | |
| "learning_rate": 0.00020519125277779733, | |
| "loss": 0.3422, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 30.46, | |
| "learning_rate": 0.00019823028996459485, | |
| "loss": 0.3356, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "learning_rate": 0.00019131101899384867, | |
| "loss": 0.3337, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.3702277256544034, | |
| "eval_loss": 0.3950214684009552, | |
| "eval_runtime": 1.7977, | |
| "eval_samples_per_second": 1342.849, | |
| "eval_steps_per_second": 83.998, | |
| "step": 7633 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "step": 7633, | |
| "total_flos": 564540063409152.0, | |
| "train_loss": 0.7912082670869731, | |
| "train_runtime": 664.7096, | |
| "train_samples_per_second": 592.59, | |
| "train_steps_per_second": 18.504 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 12300, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 564540063409152.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |