| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 776, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 0.651349663734436, | |
| "learning_rate": 0.0006666666666666666, | |
| "loss": 4.8949, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "grad_norm": 0.1445673555135727, | |
| "learning_rate": 0.0009999301905929286, | |
| "loss": 4.8663, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "grad_norm": 0.1684003323316574, | |
| "learning_rate": 0.0009982557393033759, | |
| "loss": 4.9032, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 0.1190505176782608, | |
| "learning_rate": 0.0009943559569286732, | |
| "loss": 4.8437, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.127385675907135, | |
| "learning_rate": 0.0009882482608435923, | |
| "loss": 4.8599, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "grad_norm": 0.1187027171254158, | |
| "learning_rate": 0.0009799599295015153, | |
| "loss": 4.9368, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "grad_norm": 0.14755752682685852, | |
| "learning_rate": 0.000969527980602239, | |
| "loss": 4.8873, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 0.12241631001234055, | |
| "learning_rate": 0.0009569990057619413, | |
| "loss": 4.8978, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "grad_norm": 0.24350175261497498, | |
| "learning_rate": 0.0009424289624237143, | |
| "loss": 4.9195, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.26752641797065735, | |
| "learning_rate": 0.0009258829239380381, | |
| "loss": 4.8751, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "grad_norm": 0.1479790061712265, | |
| "learning_rate": 0.0009074347889294017, | |
| "loss": 4.8514, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 0.2047603875398636, | |
| "learning_rate": 0.0008871669512471068, | |
| "loss": 4.857, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5360824742268041, | |
| "grad_norm": 0.16178925335407257, | |
| "learning_rate": 0.0008651699319743347, | |
| "loss": 4.8986, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5773195876288659, | |
| "grad_norm": 0.14003640413284302, | |
| "learning_rate": 0.0008415419751390154, | |
| "loss": 4.8395, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 0.14736343920230865, | |
| "learning_rate": 0.0008163886089321493, | |
| "loss": 4.8982, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6597938144329897, | |
| "grad_norm": 0.1361091136932373, | |
| "learning_rate": 0.0007898221743932888, | |
| "loss": 4.9129, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7010309278350515, | |
| "grad_norm": 0.1403878629207611, | |
| "learning_rate": 0.0007619613236681844, | |
| "loss": 4.8745, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7422680412371134, | |
| "grad_norm": 0.1756931096315384, | |
| "learning_rate": 0.000732930490079499, | |
| "loss": 4.9091, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7835051546391752, | |
| "grad_norm": 0.1545019894838333, | |
| "learning_rate": 0.0007028593323773818, | |
| "loss": 4.8666, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.13788259029388428, | |
| "learning_rate": 0.0006718821556520151, | |
| "loss": 4.8873, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.865979381443299, | |
| "grad_norm": 0.1487036943435669, | |
| "learning_rate": 0.000640137311494478, | |
| "loss": 4.8503, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9072164948453608, | |
| "grad_norm": 0.11844358593225479, | |
| "learning_rate": 0.0006077665800849568, | |
| "loss": 4.8911, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9484536082474226, | |
| "grad_norm": 0.14698012173175812, | |
| "learning_rate": 0.0005749145369680407, | |
| "loss": 4.8839, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9896907216494846, | |
| "grad_norm": 0.15790720283985138, | |
| "learning_rate": 0.0005417279073432449, | |
| "loss": 4.8374, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 0.08333557407549637, | |
| "eval_cap_loss": 1.7252829168567951, | |
| "eval_con_loss": 1.9740812087181918, | |
| "eval_loss": 3.699364125728607, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 0.08333557407549637, | |
| "eval_cap_loss": 1.7252829168567951, | |
| "eval_con_loss": 1.9740812087181918, | |
| "eval_loss": 3.699364125728607, | |
| "eval_runtime": 146.0914, | |
| "eval_samples_per_second": 21.226, | |
| "eval_steps_per_second": 2.656, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 0.2379327416419983, | |
| "learning_rate": 0.0005083549107546504, | |
| "loss": 4.8517, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0721649484536082, | |
| "grad_norm": 0.19742196798324585, | |
| "learning_rate": 0.00047494459910644044, | |
| "loss": 4.8684, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1134020618556701, | |
| "grad_norm": 0.16078642010688782, | |
| "learning_rate": 0.0004416461909609119, | |
| "loss": 4.8392, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.1546391752577319, | |
| "grad_norm": 0.08572334796190262, | |
| "learning_rate": 0.00040860840509215494, | |
| "loss": 4.8405, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1958762886597938, | |
| "grad_norm": 0.1408192217350006, | |
| "learning_rate": 0.00037597879627190335, | |
| "loss": 4.9271, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2371134020618557, | |
| "grad_norm": 0.1493539661169052, | |
| "learning_rate": 0.00034390309625410685, | |
| "loss": 4.8443, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2783505154639174, | |
| "grad_norm": 0.1724126935005188, | |
| "learning_rate": 0.0003125245629015395, | |
| "loss": 4.8752, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.3195876288659794, | |
| "grad_norm": 0.14615976810455322, | |
| "learning_rate": 0.00028198334036140874, | |
| "loss": 4.8231, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.3608247422680413, | |
| "grad_norm": 0.13363386690616608, | |
| "learning_rate": 0.00025241583314757326, | |
| "loss": 4.8644, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.402061855670103, | |
| "grad_norm": 0.17828340828418732, | |
| "learning_rate": 0.00022395409692487172, | |
| "loss": 4.8331, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.443298969072165, | |
| "grad_norm": 0.1240629181265831, | |
| "learning_rate": 0.0001967252487164663, | |
| "loss": 4.8677, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.4845360824742269, | |
| "grad_norm": 0.16822531819343567, | |
| "learning_rate": 0.00017085089916835921, | |
| "loss": 4.856, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5257731958762886, | |
| "grad_norm": 0.1542797088623047, | |
| "learning_rate": 0.00014644660940672628, | |
| "loss": 4.8461, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.5670103092783505, | |
| "grad_norm": 0.12938471138477325, | |
| "learning_rate": 0.0001236213749138743, | |
| "loss": 4.8883, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.6082474226804124, | |
| "grad_norm": 0.21958403289318085, | |
| "learning_rate": 0.0001024771387279585, | |
| "loss": 4.8829, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.6494845360824741, | |
| "grad_norm": 0.13377727568149567, | |
| "learning_rate": 8.310833614062651e-05, | |
| "loss": 4.8772, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.690721649484536, | |
| "grad_norm": 0.124494768679142, | |
| "learning_rate": 6.560147292608176e-05, | |
| "loss": 4.8758, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.731958762886598, | |
| "grad_norm": 0.1438254714012146, | |
| "learning_rate": 5.003473898529609e-05, | |
| "loss": 4.8465, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.7731958762886597, | |
| "grad_norm": 0.12780630588531494, | |
| "learning_rate": 3.6477659130931316e-05, | |
| "loss": 4.8857, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8144329896907216, | |
| "grad_norm": 0.12371128052473068, | |
| "learning_rate": 2.4990782572647973e-05, | |
| "loss": 4.9022, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.8556701030927836, | |
| "grad_norm": 0.14497648179531097, | |
| "learning_rate": 1.5625412489637337e-05, | |
| "loss": 4.8414, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.8969072164948453, | |
| "grad_norm": 0.13651418685913086, | |
| "learning_rate": 8.423376898168244e-06, | |
| "loss": 4.8314, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.9381443298969072, | |
| "grad_norm": 0.1279928982257843, | |
| "learning_rate": 3.416841837512952e-06, | |
| "loss": 4.9007, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.9793814432989691, | |
| "grad_norm": 0.13989858329296112, | |
| "learning_rate": 6.281677086071303e-07, | |
| "loss": 4.8226, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 0.08337060838622823, | |
| "eval_cap_loss": 1.716913080246178, | |
| "eval_con_loss": 1.969243642288385, | |
| "eval_loss": 3.6861567269895494, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 0.08337060838622823, | |
| "eval_cap_loss": 1.716913080246178, | |
| "eval_con_loss": 1.969243642288385, | |
| "eval_loss": 3.6861567269895494, | |
| "eval_runtime": 142.9819, | |
| "eval_samples_per_second": 21.688, | |
| "eval_steps_per_second": 2.714, | |
| "step": 776 | |
| } | |
| ], | |
| "logging_steps": 16, | |
| "max_steps": 776, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |