| { |
| "best_metric": 0.39055171608924866, |
| "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-11-07_18-29-28_experiment/checkpoint-6895", |
| "epoch": 30.996954314720814, |
| "eval_steps": 500, |
| "global_step": 7633, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.065040650406504e-05, |
| "loss": 6.0993, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 8.130081300813008e-05, |
| "loss": 5.0338, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.2072072072072072, |
| "eval_loss": 3.800293445587158, |
| "eval_runtime": 1.7789, |
| "eval_samples_per_second": 1357.054, |
| "eval_steps_per_second": 84.886, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00012195121951219512, |
| "loss": 3.8428, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00016260162601626016, |
| "loss": 2.8076, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.25823462236055444, |
| "eval_loss": 1.976719856262207, |
| "eval_runtime": 1.774, |
| "eval_samples_per_second": 1360.786, |
| "eval_steps_per_second": 85.12, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.0002032520325203252, |
| "loss": 2.2493, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 0.00024390243902439024, |
| "loss": 1.9599, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.0002845528455284553, |
| "loss": 1.7151, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.2894563992327041, |
| "eval_loss": 1.4048570394515991, |
| "eval_runtime": 1.775, |
| "eval_samples_per_second": 1360.028, |
| "eval_steps_per_second": 85.072, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 0.0003252032520325203, |
| "loss": 1.5375, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 0.00036585365853658537, |
| "loss": 1.3954, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.312210304753801, |
| "eval_loss": 1.0894988775253296, |
| "eval_runtime": 1.8067, |
| "eval_samples_per_second": 1336.115, |
| "eval_steps_per_second": 83.576, |
| "step": 985 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 0.0004065040650406504, |
| "loss": 1.2718, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 0.00044715447154471545, |
| "loss": 1.1709, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 0.0004878048780487805, |
| "loss": 1.0895, |
| "step": 1200 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.3280339162692104, |
| "eval_loss": 0.8805840015411377, |
| "eval_runtime": 1.7957, |
| "eval_samples_per_second": 1344.309, |
| "eval_steps_per_second": 84.089, |
| "step": 1231 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 0.0004999506716812021, |
| "loss": 0.9914, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 0.0004997091104496882, |
| "loss": 0.9375, |
| "step": 1400 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.3402263075005411, |
| "eval_loss": 0.7313582301139832, |
| "eval_runtime": 1.7974, |
| "eval_samples_per_second": 1343.047, |
| "eval_steps_per_second": 84.01, |
| "step": 1477 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 0.0004992664502959351, |
| "loss": 0.8598, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 0.0004986230477086575, |
| "loss": 0.8097, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 0.0004977794208410241, |
| "loss": 0.7668, |
| "step": 1700 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.3481269173067019, |
| "eval_loss": 0.6367093324661255, |
| "eval_runtime": 1.8149, |
| "eval_samples_per_second": 1330.094, |
| "eval_steps_per_second": 83.2, |
| "step": 1723 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 0.0004967362490933723, |
| "loss": 0.716, |
| "step": 1800 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 0.0004954943725660643, |
| "loss": 0.6978, |
| "step": 1900 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.35578121617889635, |
| "eval_loss": 0.5603917241096497, |
| "eval_runtime": 1.8199, |
| "eval_samples_per_second": 1326.454, |
| "eval_steps_per_second": 82.972, |
| "step": 1970 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 0.0004940547913829275, |
| "loss": 0.6556, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 0.0004924186648858207, |
| "loss": 0.627, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 0.0004905873107009799, |
| "loss": 0.6133, |
| "step": 2200 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.3603864842472962, |
| "eval_loss": 0.5122300386428833, |
| "eval_runtime": 1.8088, |
| "eval_samples_per_second": 1334.561, |
| "eval_steps_per_second": 83.479, |
| "step": 2216 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 0.0004885622036778897, |
| "loss": 0.5846, |
| "step": 2300 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 0.0004863449747015384, |
| "loss": 0.5681, |
| "step": 2400 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.36240548750905005, |
| "eval_loss": 0.48733416199684143, |
| "eval_runtime": 1.781, |
| "eval_samples_per_second": 1355.443, |
| "eval_steps_per_second": 84.785, |
| "step": 2462 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 0.0004839374093790139, |
| "loss": 0.5537, |
| "step": 2500 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 0.00048134144660149535, |
| "loss": 0.5314, |
| "step": 2600 |
| }, |
| { |
| "epoch": 10.96, |
| "learning_rate": 0.0004785591769828005, |
| "loss": 0.536, |
| "step": 2700 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.36355120655037804, |
| "eval_loss": 0.47042036056518555, |
| "eval_runtime": 1.8486, |
| "eval_samples_per_second": 1305.878, |
| "eval_steps_per_second": 81.685, |
| "step": 2708 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 0.00047559284117574613, |
| "loss": 0.5126, |
| "step": 2800 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 0.0004724448280676768, |
| "loss": 0.511, |
| "step": 2900 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.3647827612202094, |
| "eval_loss": 0.4570145606994629, |
| "eval_runtime": 1.8132, |
| "eval_samples_per_second": 1331.315, |
| "eval_steps_per_second": 83.276, |
| "step": 2955 |
| }, |
| { |
| "epoch": 12.18, |
| "learning_rate": 0.00046911767285661587, |
| "loss": 0.4918, |
| "step": 3000 |
| }, |
| { |
| "epoch": 12.59, |
| "learning_rate": 0.0004656140550095876, |
| "loss": 0.4883, |
| "step": 3100 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.00046193679610475414, |
| "loss": 0.4929, |
| "step": 3200 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.3655067660867164, |
| "eval_loss": 0.4465464651584625, |
| "eval_runtime": 1.8353, |
| "eval_samples_per_second": 1315.328, |
| "eval_steps_per_second": 82.276, |
| "step": 3201 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 0.0004580888575591068, |
| "loss": 0.4634, |
| "step": 3300 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 0.00045407333824353966, |
| "loss": 0.4757, |
| "step": 3400 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.36605909969621653, |
| "eval_loss": 0.43762096762657166, |
| "eval_runtime": 1.8426, |
| "eval_samples_per_second": 1310.127, |
| "eval_steps_per_second": 81.951, |
| "step": 3447 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 0.00044989347198722777, |
| "loss": 0.4605, |
| "step": 3500 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 0.00044555262497331783, |
| "loss": 0.4507, |
| "step": 3600 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.3666450211603484, |
| "eval_loss": 0.42967188358306885, |
| "eval_runtime": 1.7693, |
| "eval_samples_per_second": 1364.379, |
| "eval_steps_per_second": 85.344, |
| "step": 3693 |
| }, |
| { |
| "epoch": 15.03, |
| "learning_rate": 0.0004410542930280316, |
| "loss": 0.4591, |
| "step": 3700 |
| }, |
| { |
| "epoch": 15.43, |
| "learning_rate": 0.0004364020988053623, |
| "loss": 0.4366, |
| "step": 3800 |
| }, |
| { |
| "epoch": 15.84, |
| "learning_rate": 0.00043159978886963223, |
| "loss": 0.4449, |
| "step": 3900 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.3675183053807743, |
| "eval_loss": 0.4223393499851227, |
| "eval_runtime": 1.82, |
| "eval_samples_per_second": 1326.352, |
| "eval_steps_per_second": 82.966, |
| "step": 3940 |
| }, |
| { |
| "epoch": 16.24, |
| "learning_rate": 0.0004266512306782628, |
| "loss": 0.4323, |
| "step": 4000 |
| }, |
| { |
| "epoch": 16.65, |
| "learning_rate": 0.00042156040946718344, |
| "loss": 0.4312, |
| "step": 4100 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.36820125842495355, |
| "eval_loss": 0.4195675849914551, |
| "eval_runtime": 1.8599, |
| "eval_samples_per_second": 1297.945, |
| "eval_steps_per_second": 81.189, |
| "step": 4186 |
| }, |
| { |
| "epoch": 17.06, |
| "learning_rate": 0.00041633142504139133, |
| "loss": 0.4315, |
| "step": 4200 |
| }, |
| { |
| "epoch": 17.46, |
| "learning_rate": 0.00041096848847324417, |
| "loss": 0.4158, |
| "step": 4300 |
| }, |
| { |
| "epoch": 17.87, |
| "learning_rate": 0.0004054759187111451, |
| "loss": 0.4252, |
| "step": 4400 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.36844383737507186, |
| "eval_loss": 0.41086554527282715, |
| "eval_runtime": 1.8281, |
| "eval_samples_per_second": 1320.494, |
| "eval_steps_per_second": 82.599, |
| "step": 4432 |
| }, |
| { |
| "epoch": 18.27, |
| "learning_rate": 0.00039985813910135305, |
| "loss": 0.4129, |
| "step": 4500 |
| }, |
| { |
| "epoch": 18.68, |
| "learning_rate": 0.00039411967382571643, |
| "loss": 0.4102, |
| "step": 4600 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.3685520649066631, |
| "eval_loss": 0.40959808230400085, |
| "eval_runtime": 1.835, |
| "eval_samples_per_second": 1315.545, |
| "eval_steps_per_second": 82.29, |
| "step": 4678 |
| }, |
| { |
| "epoch": 19.09, |
| "learning_rate": 0.0003882651442582019, |
| "loss": 0.4104, |
| "step": 4700 |
| }, |
| { |
| "epoch": 19.49, |
| "learning_rate": 0.00038229926524315015, |
| "loss": 0.3982, |
| "step": 4800 |
| }, |
| { |
| "epoch": 19.9, |
| "learning_rate": 0.0003762268412982577, |
| "loss": 0.4092, |
| "step": 4900 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.3690036349522679, |
| "eval_loss": 0.4085357189178467, |
| "eval_runtime": 1.7636, |
| "eval_samples_per_second": 1368.753, |
| "eval_steps_per_second": 85.618, |
| "step": 4925 |
| }, |
| { |
| "epoch": 20.3, |
| "learning_rate": 0.00037005276274534144, |
| "loss": 0.3863, |
| "step": 5000 |
| }, |
| { |
| "epoch": 20.71, |
| "learning_rate": 0.0003637820017720022, |
| "loss": 0.3941, |
| "step": 5100 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.36924248191853826, |
| "eval_loss": 0.4053109884262085, |
| "eval_runtime": 1.7869, |
| "eval_samples_per_second": 1350.961, |
| "eval_steps_per_second": 84.505, |
| "step": 5171 |
| }, |
| { |
| "epoch": 21.12, |
| "learning_rate": 0.00035741960842735953, |
| "loss": 0.3992, |
| "step": 5200 |
| }, |
| { |
| "epoch": 21.52, |
| "learning_rate": 0.0003509707065550817, |
| "loss": 0.3846, |
| "step": 5300 |
| }, |
| { |
| "epoch": 21.93, |
| "learning_rate": 0.00034444048966698643, |
| "loss": 0.3882, |
| "step": 5400 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.3694365450786329, |
| "eval_loss": 0.40214401483535767, |
| "eval_runtime": 1.8091, |
| "eval_samples_per_second": 1334.335, |
| "eval_steps_per_second": 83.465, |
| "step": 5417 |
| }, |
| { |
| "epoch": 22.34, |
| "learning_rate": 0.0003378342167605362, |
| "loss": 0.3787, |
| "step": 5500 |
| }, |
| { |
| "epoch": 22.74, |
| "learning_rate": 0.00033115720808359495, |
| "loss": 0.3821, |
| "step": 5600 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.3694477410301768, |
| "eval_loss": 0.4013039767742157, |
| "eval_runtime": 1.8106, |
| "eval_samples_per_second": 1333.273, |
| "eval_steps_per_second": 83.399, |
| "step": 5663 |
| }, |
| { |
| "epoch": 23.15, |
| "learning_rate": 0.0003244148408498587, |
| "loss": 0.3724, |
| "step": 5700 |
| }, |
| { |
| "epoch": 23.55, |
| "learning_rate": 0.000317612544908409, |
| "loss": 0.372, |
| "step": 5800 |
| }, |
| { |
| "epoch": 23.96, |
| "learning_rate": 0.000310755798370878, |
| "loss": 0.3769, |
| "step": 5900 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.36991797099502155, |
| "eval_loss": 0.399305135011673, |
| "eval_runtime": 1.7971, |
| "eval_samples_per_second": 1343.28, |
| "eval_steps_per_second": 84.025, |
| "step": 5910 |
| }, |
| { |
| "epoch": 24.37, |
| "learning_rate": 0.00030385012319974537, |
| "loss": 0.3671, |
| "step": 6000 |
| }, |
| { |
| "epoch": 24.77, |
| "learning_rate": 0.00029690108076132154, |
| "loss": 0.3696, |
| "step": 6100 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.3692051620800585, |
| "eval_loss": 0.3980158865451813, |
| "eval_runtime": 1.8534, |
| "eval_samples_per_second": 1302.488, |
| "eval_steps_per_second": 81.473, |
| "step": 6156 |
| }, |
| { |
| "epoch": 25.18, |
| "learning_rate": 0.0002899142673469971, |
| "loss": 0.3577, |
| "step": 6200 |
| }, |
| { |
| "epoch": 25.58, |
| "learning_rate": 0.00028289530966636625, |
| "loss": 0.3604, |
| "step": 6300 |
| }, |
| { |
| "epoch": 25.99, |
| "learning_rate": 0.000275849860315853, |
| "loss": 0.3628, |
| "step": 6400 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.36994782686580535, |
| "eval_loss": 0.39616090059280396, |
| "eval_runtime": 1.7695, |
| "eval_samples_per_second": 1364.248, |
| "eval_steps_per_second": 85.336, |
| "step": 6402 |
| }, |
| { |
| "epoch": 26.4, |
| "learning_rate": 0.0002687835932264908, |
| "loss": 0.3498, |
| "step": 6500 |
| }, |
| { |
| "epoch": 26.8, |
| "learning_rate": 0.0002617021990945197, |
| "loss": 0.3587, |
| "step": 6600 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.37061958395844063, |
| "eval_loss": 0.3926030397415161, |
| "eval_runtime": 1.8394, |
| "eval_samples_per_second": 1312.376, |
| "eval_steps_per_second": 82.091, |
| "step": 6648 |
| }, |
| { |
| "epoch": 27.21, |
| "learning_rate": 0.0002546113807984821, |
| "loss": 0.3534, |
| "step": 6700 |
| }, |
| { |
| "epoch": 27.61, |
| "learning_rate": 0.00024751684880650884, |
| "loss": 0.3492, |
| "step": 6800 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.37057106816841695, |
| "eval_loss": 0.39055171608924866, |
| "eval_runtime": 1.8305, |
| "eval_samples_per_second": 1318.747, |
| "eval_steps_per_second": 82.49, |
| "step": 6895 |
| }, |
| { |
| "epoch": 28.02, |
| "learning_rate": 0.00024042431657749118, |
| "loss": 0.3534, |
| "step": 6900 |
| }, |
| { |
| "epoch": 28.43, |
| "learning_rate": 0.0002333394959598461, |
| "loss": 0.3418, |
| "step": 7000 |
| }, |
| { |
| "epoch": 28.83, |
| "learning_rate": 0.00022626809259157726, |
| "loss": 0.3461, |
| "step": 7100 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.3706457078453764, |
| "eval_loss": 0.3932913541793823, |
| "eval_runtime": 1.7675, |
| "eval_samples_per_second": 1365.756, |
| "eval_steps_per_second": 85.43, |
| "step": 7141 |
| }, |
| { |
| "epoch": 29.24, |
| "learning_rate": 0.00021921580130533828, |
| "loss": 0.3412, |
| "step": 7200 |
| }, |
| { |
| "epoch": 29.64, |
| "learning_rate": 0.0002121883015421973, |
| "loss": 0.3363, |
| "step": 7300 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.3706942236354001, |
| "eval_loss": 0.39353010058403015, |
| "eval_runtime": 1.8169, |
| "eval_samples_per_second": 1328.629, |
| "eval_steps_per_second": 83.108, |
| "step": 7387 |
| }, |
| { |
| "epoch": 30.05, |
| "learning_rate": 0.00020519125277779733, |
| "loss": 0.3422, |
| "step": 7400 |
| }, |
| { |
| "epoch": 30.46, |
| "learning_rate": 0.00019823028996459485, |
| "loss": 0.3356, |
| "step": 7500 |
| }, |
| { |
| "epoch": 30.86, |
| "learning_rate": 0.00019131101899384867, |
| "loss": 0.3337, |
| "step": 7600 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.3702277256544034, |
| "eval_loss": 0.3950214684009552, |
| "eval_runtime": 1.7977, |
| "eval_samples_per_second": 1342.849, |
| "eval_steps_per_second": 83.998, |
| "step": 7633 |
| }, |
| { |
| "epoch": 31.0, |
| "step": 7633, |
| "total_flos": 564540063409152.0, |
| "train_loss": 0.7912082670869731, |
| "train_runtime": 664.7096, |
| "train_samples_per_second": 592.59, |
| "train_steps_per_second": 18.504 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 12300, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "total_flos": 564540063409152.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|