| { | |
| "best_metric": 0.25979954936421096, | |
| "best_model_checkpoint": "./logo-matching-base/checkpoint-510", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 72.06623077392578, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.6833, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_adjusted_mutual_info_score": 0.0959962302587181, | |
| "eval_adjusted_rand_score": 0.06912863690017566, | |
| "eval_completeness_score": 0.5109803955924982, | |
| "eval_fowlkes_mallows_score": 0.44395093511191686, | |
| "eval_homogeneity_score": 0.12370192092796624, | |
| "eval_loss": 0.06912863690017566, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 16212, | |
| 31438 | |
| ], | |
| [ | |
| 2986, | |
| 10620 | |
| ] | |
| ], | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9e-06, | |
| "loss": 0.5711, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_adjusted_mutual_info_score": 0.16050612062870448, | |
| "eval_adjusted_rand_score": 0.04788887456487486, | |
| "eval_completeness_score": 0.4801879169228485, | |
| "eval_fowlkes_mallows_score": 0.326241320471177, | |
| "eval_homogeneity_score": 0.24777791806632807, | |
| "eval_loss": 0.04788887456487486, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 30178, | |
| 17472 | |
| ], | |
| [ | |
| 7800, | |
| 5806 | |
| ] | |
| ], | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.5e-06, | |
| "loss": 0.5048, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_adjusted_mutual_info_score": 0.14453579947468986, | |
| "eval_adjusted_rand_score": 0.06941336059571244, | |
| "eval_completeness_score": 0.48534870125531976, | |
| "eval_fowlkes_mallows_score": 0.35623205660821267, | |
| "eval_homogeneity_score": 0.20687927372388243, | |
| "eval_loss": 0.06941336059571244, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 28728, | |
| 18922 | |
| ], | |
| [ | |
| 6962, | |
| 6644 | |
| ] | |
| ], | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.4474, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_adjusted_mutual_info_score": 0.15197952267992879, | |
| "eval_adjusted_rand_score": 0.06395362023662775, | |
| "eval_completeness_score": 0.5053597877227305, | |
| "eval_fowlkes_mallows_score": 0.36009183925771726, | |
| "eval_homogeneity_score": 0.201135083549786, | |
| "eval_loss": 0.06395362023662775, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 27632, | |
| 20018 | |
| ], | |
| [ | |
| 6716, | |
| 6890 | |
| ] | |
| ], | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 53.12451934814453, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.4433, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_adjusted_mutual_info_score": 0.09225927782155577, | |
| "eval_adjusted_rand_score": 0.04497696614477031, | |
| "eval_completeness_score": 0.4856685890606832, | |
| "eval_fowlkes_mallows_score": 0.41794001145778076, | |
| "eval_homogeneity_score": 0.12954647762487131, | |
| "eval_loss": 0.04497696614477031, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 16938, | |
| 30712 | |
| ], | |
| [ | |
| 3792, | |
| 9814 | |
| ] | |
| ], | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7e-06, | |
| "loss": 0.4582, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_adjusted_mutual_info_score": 0.1550123095541971, | |
| "eval_adjusted_rand_score": 0.1235971988422464, | |
| "eval_completeness_score": 0.7625668447266793, | |
| "eval_fowlkes_mallows_score": 0.5091255095660694, | |
| "eval_homogeneity_score": 0.13490749128374505, | |
| "eval_loss": 0.1235971988422464, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 13668, | |
| 33982 | |
| ], | |
| [ | |
| 754, | |
| 12852 | |
| ] | |
| ], | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 24.434818267822266, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 0.4384, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_adjusted_mutual_info_score": 0.17177361413411174, | |
| "eval_adjusted_rand_score": 0.08374811610033726, | |
| "eval_completeness_score": 0.5222737901409524, | |
| "eval_fowlkes_mallows_score": 0.3647858508374196, | |
| "eval_homogeneity_score": 0.2242118568770657, | |
| "eval_loss": 0.08374811610033726, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 29196, | |
| 18454 | |
| ], | |
| [ | |
| 6850, | |
| 6756 | |
| ] | |
| ], | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6e-06, | |
| "loss": 0.4388, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_adjusted_mutual_info_score": 0.20449439835224686, | |
| "eval_adjusted_rand_score": 0.20212814342460073, | |
| "eval_completeness_score": 0.5850787856442333, | |
| "eval_fowlkes_mallows_score": 0.4533371718722817, | |
| "eval_homogeneity_score": 0.22520931487447624, | |
| "eval_loss": 0.20212814342460073, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 30796, | |
| 16854 | |
| ], | |
| [ | |
| 5202, | |
| 8404 | |
| ] | |
| ], | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 0.4136, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_adjusted_mutual_info_score": 0.11177382718405922, | |
| "eval_adjusted_rand_score": 0.09640002189335507, | |
| "eval_completeness_score": 0.6506809526372479, | |
| "eval_fowlkes_mallows_score": 0.4899932693994939, | |
| "eval_homogeneity_score": 0.10817341204672741, | |
| "eval_loss": 0.09640002189335507, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 12770, | |
| 34880 | |
| ], | |
| [ | |
| 1174, | |
| 12432 | |
| ] | |
| ], | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4148, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_adjusted_mutual_info_score": 0.13291287868141516, | |
| "eval_adjusted_rand_score": 0.0173768729388201, | |
| "eval_completeness_score": 0.46335582053405955, | |
| "eval_fowlkes_mallows_score": 0.3147845140860169, | |
| "eval_homogeneity_score": 0.20737389396161876, | |
| "eval_loss": 0.0173768729388201, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 28346, | |
| 19304 | |
| ], | |
| [ | |
| 7786, | |
| 5820 | |
| ] | |
| ], | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.4146, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_adjusted_mutual_info_score": 0.15463237739434937, | |
| "eval_adjusted_rand_score": 0.05775589302463435, | |
| "eval_completeness_score": 0.5013447616164763, | |
| "eval_fowlkes_mallows_score": 0.34999174740596783, | |
| "eval_homogeneity_score": 0.21319532171325858, | |
| "eval_loss": 0.05775589302463435, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 28252, | |
| 19398 | |
| ], | |
| [ | |
| 7026, | |
| 6580 | |
| ] | |
| ], | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.4096, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_adjusted_mutual_info_score": 0.15954128854018185, | |
| "eval_adjusted_rand_score": 0.08353262118433151, | |
| "eval_completeness_score": 0.48657807669089076, | |
| "eval_fowlkes_mallows_score": 0.3488784926098972, | |
| "eval_homogeneity_score": 0.23781381627812734, | |
| "eval_loss": 0.08353262118433151, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 31128, | |
| 16522 | |
| ], | |
| [ | |
| 7482, | |
| 6124 | |
| ] | |
| ], | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5e-06, | |
| "loss": 0.3973, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_adjusted_mutual_info_score": 0.19041946370338364, | |
| "eval_adjusted_rand_score": 0.12553878006056823, | |
| "eval_completeness_score": 0.5046617692993569, | |
| "eval_fowlkes_mallows_score": 0.36189400936638344, | |
| "eval_homogeneity_score": 0.27375639307416655, | |
| "eval_loss": 0.12553878006056823, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 33812, | |
| 13838 | |
| ], | |
| [ | |
| 7670, | |
| 5936 | |
| ] | |
| ], | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4051, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_adjusted_mutual_info_score": 0.18691214822396793, | |
| "eval_adjusted_rand_score": 0.18158266255584393, | |
| "eval_completeness_score": 0.5245970271191535, | |
| "eval_fowlkes_mallows_score": 0.41690193788215973, | |
| "eval_homogeneity_score": 0.24324643119902978, | |
| "eval_loss": 0.18158266255584393, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 33010, | |
| 14640 | |
| ], | |
| [ | |
| 6422, | |
| 7184 | |
| ] | |
| ], | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 20.364652633666992, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.4062, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_adjusted_mutual_info_score": 0.21823634710165685, | |
| "eval_adjusted_rand_score": 0.25979954936421096, | |
| "eval_completeness_score": 0.5641057686568595, | |
| "eval_fowlkes_mallows_score": 0.47949730143044716, | |
| "eval_homogeneity_score": 0.2648407343665406, | |
| "eval_loss": 0.25979954936421096, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 33538, | |
| 14112 | |
| ], | |
| [ | |
| 5216, | |
| 8390 | |
| ] | |
| ], | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.4025, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_adjusted_mutual_info_score": 0.10056939420043334, | |
| "eval_adjusted_rand_score": 0.0907629141131465, | |
| "eval_completeness_score": 0.5696556835173626, | |
| "eval_fowlkes_mallows_score": 0.4759933991482618, | |
| "eval_homogeneity_score": 0.11380033927304208, | |
| "eval_loss": 0.0907629141131465, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 14202, | |
| 33448 | |
| ], | |
| [ | |
| 1794, | |
| 11812 | |
| ] | |
| ], | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.4043, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_adjusted_mutual_info_score": 0.11845707888523377, | |
| "eval_adjusted_rand_score": 0.06151994460143414, | |
| "eval_completeness_score": 0.5431310507543884, | |
| "eval_fowlkes_mallows_score": 0.4323980307250435, | |
| "eval_homogeneity_score": 0.14340786222528185, | |
| "eval_loss": 0.06151994460143414, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 16966, | |
| 30684 | |
| ], | |
| [ | |
| 3408, | |
| 10198 | |
| ] | |
| ], | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.4013, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_adjusted_mutual_info_score": 0.20098591140511965, | |
| "eval_adjusted_rand_score": 0.24115050477114428, | |
| "eval_completeness_score": 0.5343711342342489, | |
| "eval_fowlkes_mallows_score": 0.45788735752549203, | |
| "eval_homogeneity_score": 0.2597787469313228, | |
| "eval_loss": 0.24115050477114428, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 34176, | |
| 13474 | |
| ], | |
| [ | |
| 5818, | |
| 7788 | |
| ] | |
| ], | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.4006, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_adjusted_mutual_info_score": 0.252153502376998, | |
| "eval_adjusted_rand_score": 0.24023922903374736, | |
| "eval_completeness_score": 0.5460417717065094, | |
| "eval_fowlkes_mallows_score": 0.4233209236936553, | |
| "eval_homogeneity_score": 0.34453853702602516, | |
| "eval_loss": 0.24023922903374736, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 37986, | |
| 9664 | |
| ], | |
| [ | |
| 7382, | |
| 6224 | |
| ] | |
| ], | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.4044, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_adjusted_mutual_info_score": 0.08179538296319702, | |
| "eval_adjusted_rand_score": 0.040227652358330604, | |
| "eval_completeness_score": 0.48262386100058413, | |
| "eval_fowlkes_mallows_score": 0.423484876527761, | |
| "eval_homogeneity_score": 0.11562820805581293, | |
| "eval_loss": 0.040227652358330604, | |
| "eval_pair_confusion_matrix": [ | |
| [ | |
| 15374, | |
| 32276 | |
| ], | |
| [ | |
| 3428, | |
| 10178 | |
| ] | |
| ], | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 680, | |
| "total_flos": 0.0, | |
| "train_loss": 0.44298483904670266, | |
| "train_runtime": 986.129, | |
| "train_samples_per_second": 21.843, | |
| "train_steps_per_second": 0.69 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |