| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.09191659306478246, |
| "eval_steps": 187, |
| "global_step": 1496, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 9.0625, |
| "learning_rate": 1e-05, |
| "loss": 1.8816, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.953125, |
| "learning_rate": 1e-05, |
| "loss": 1.7744, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_privacy_sources_loss": 1.0537431240081787, |
| "eval_validation_privacy_sources_runtime": 211.2726, |
| "eval_validation_privacy_sources_samples_per_second": 1.813, |
| "eval_validation_privacy_sources_steps_per_second": 0.909, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_agenda_digitale_loss": 1.5260587930679321, |
| "eval_validation_agenda_digitale_runtime": 187.3877, |
| "eval_validation_agenda_digitale_samples_per_second": 1.814, |
| "eval_validation_agenda_digitale_steps_per_second": 0.907, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_leggepertutti_loss": 1.3466496467590332, |
| "eval_validation_leggepertutti_runtime": 35.8176, |
| "eval_validation_leggepertutti_samples_per_second": 1.815, |
| "eval_validation_leggepertutti_steps_per_second": 0.921, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_altalexprivacy_loss": 1.3750169277191162, |
| "eval_validation_altalexprivacy_runtime": 58.3803, |
| "eval_validation_altalexprivacy_samples_per_second": 1.816, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_mc4_it_loss": 1.9072209596633911, |
| "eval_validation_mc4_it_runtime": 78.2378, |
| "eval_validation_mc4_it_samples_per_second": 1.815, |
| "eval_validation_mc4_it_steps_per_second": 0.907, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_validation_dolma_loss": 1.9709844589233398, |
| "eval_validation_dolma_runtime": 54.0008, |
| "eval_validation_dolma_samples_per_second": 1.815, |
| "eval_validation_dolma_steps_per_second": 0.907, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 4.09375, |
| "learning_rate": 1e-05, |
| "loss": 1.7218, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_privacy_sources_loss": 1.0035160779953003, |
| "eval_validation_privacy_sources_runtime": 210.9306, |
| "eval_validation_privacy_sources_samples_per_second": 1.816, |
| "eval_validation_privacy_sources_steps_per_second": 0.91, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_agenda_digitale_loss": 1.4919817447662354, |
| "eval_validation_agenda_digitale_runtime": 187.2244, |
| "eval_validation_agenda_digitale_samples_per_second": 1.816, |
| "eval_validation_agenda_digitale_steps_per_second": 0.908, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_leggepertutti_loss": 1.3234634399414062, |
| "eval_validation_leggepertutti_runtime": 35.8104, |
| "eval_validation_leggepertutti_samples_per_second": 1.815, |
| "eval_validation_leggepertutti_steps_per_second": 0.922, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_altalexprivacy_loss": 1.3424029350280762, |
| "eval_validation_altalexprivacy_runtime": 58.367, |
| "eval_validation_altalexprivacy_samples_per_second": 1.816, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_mc4_it_loss": 1.8769702911376953, |
| "eval_validation_mc4_it_runtime": 78.1952, |
| "eval_validation_mc4_it_samples_per_second": 1.816, |
| "eval_validation_mc4_it_steps_per_second": 0.908, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_dolma_loss": 1.9725910425186157, |
| "eval_validation_dolma_runtime": 53.957, |
| "eval_validation_dolma_samples_per_second": 1.816, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 3.53125, |
| "learning_rate": 1e-05, |
| "loss": 1.7007, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_privacy_sources_loss": 0.9692163467407227, |
| "eval_validation_privacy_sources_runtime": 210.8774, |
| "eval_validation_privacy_sources_samples_per_second": 1.816, |
| "eval_validation_privacy_sources_steps_per_second": 0.91, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_agenda_digitale_loss": 1.4738318920135498, |
| "eval_validation_agenda_digitale_runtime": 187.1789, |
| "eval_validation_agenda_digitale_samples_per_second": 1.816, |
| "eval_validation_agenda_digitale_steps_per_second": 0.908, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_leggepertutti_loss": 1.3062564134597778, |
| "eval_validation_leggepertutti_runtime": 35.7887, |
| "eval_validation_leggepertutti_samples_per_second": 1.816, |
| "eval_validation_leggepertutti_steps_per_second": 0.922, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_altalexprivacy_loss": 1.3234490156173706, |
| "eval_validation_altalexprivacy_runtime": 58.3587, |
| "eval_validation_altalexprivacy_samples_per_second": 1.816, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_mc4_it_loss": 1.8544881343841553, |
| "eval_validation_mc4_it_runtime": 78.1604, |
| "eval_validation_mc4_it_samples_per_second": 1.817, |
| "eval_validation_mc4_it_steps_per_second": 0.908, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_dolma_loss": 1.969956874847412, |
| "eval_validation_dolma_runtime": 53.9662, |
| "eval_validation_dolma_samples_per_second": 1.816, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 3.796875, |
| "learning_rate": 1e-05, |
| "loss": 1.6866, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_privacy_sources_loss": 0.9504958391189575, |
| "eval_validation_privacy_sources_runtime": 210.7635, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_agenda_digitale_loss": 1.4634190797805786, |
| "eval_validation_agenda_digitale_runtime": 187.1135, |
| "eval_validation_agenda_digitale_samples_per_second": 1.817, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_leggepertutti_loss": 1.2935285568237305, |
| "eval_validation_leggepertutti_runtime": 35.7883, |
| "eval_validation_leggepertutti_samples_per_second": 1.816, |
| "eval_validation_leggepertutti_steps_per_second": 0.922, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_altalexprivacy_loss": 1.3104432821273804, |
| "eval_validation_altalexprivacy_runtime": 58.3511, |
| "eval_validation_altalexprivacy_samples_per_second": 1.817, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_mc4_it_loss": 1.8366450071334839, |
| "eval_validation_mc4_it_runtime": 78.1461, |
| "eval_validation_mc4_it_samples_per_second": 1.817, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_dolma_loss": 1.9702107906341553, |
| "eval_validation_dolma_runtime": 53.9537, |
| "eval_validation_dolma_samples_per_second": 1.816, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.59375, |
| "learning_rate": 1e-05, |
| "loss": 1.6602, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_privacy_sources_loss": 0.9363130927085876, |
| "eval_validation_privacy_sources_runtime": 210.7426, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_agenda_digitale_loss": 1.4509363174438477, |
| "eval_validation_agenda_digitale_runtime": 187.1896, |
| "eval_validation_agenda_digitale_samples_per_second": 1.816, |
| "eval_validation_agenda_digitale_steps_per_second": 0.908, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_leggepertutti_loss": 1.2871097326278687, |
| "eval_validation_leggepertutti_runtime": 35.8163, |
| "eval_validation_leggepertutti_samples_per_second": 1.815, |
| "eval_validation_leggepertutti_steps_per_second": 0.921, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_altalexprivacy_loss": 1.2997937202453613, |
| "eval_validation_altalexprivacy_runtime": 58.363, |
| "eval_validation_altalexprivacy_samples_per_second": 1.816, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_mc4_it_loss": 1.8237556219100952, |
| "eval_validation_mc4_it_runtime": 78.2373, |
| "eval_validation_mc4_it_samples_per_second": 1.815, |
| "eval_validation_mc4_it_steps_per_second": 0.907, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_dolma_loss": 1.9671412706375122, |
| "eval_validation_dolma_runtime": 53.9727, |
| "eval_validation_dolma_samples_per_second": 1.816, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 3.203125, |
| "learning_rate": 1e-05, |
| "loss": 1.6552, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_privacy_sources_loss": 0.9231541752815247, |
| "eval_validation_privacy_sources_runtime": 210.8141, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_agenda_digitale_loss": 1.4408273696899414, |
| "eval_validation_agenda_digitale_runtime": 187.3646, |
| "eval_validation_agenda_digitale_samples_per_second": 1.815, |
| "eval_validation_agenda_digitale_steps_per_second": 0.907, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_leggepertutti_loss": 1.277867317199707, |
| "eval_validation_leggepertutti_runtime": 35.8389, |
| "eval_validation_leggepertutti_samples_per_second": 1.814, |
| "eval_validation_leggepertutti_steps_per_second": 0.921, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_altalexprivacy_loss": 1.2885111570358276, |
| "eval_validation_altalexprivacy_runtime": 58.4214, |
| "eval_validation_altalexprivacy_samples_per_second": 1.814, |
| "eval_validation_altalexprivacy_steps_per_second": 0.907, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_mc4_it_loss": 1.81148362159729, |
| "eval_validation_mc4_it_runtime": 78.2293, |
| "eval_validation_mc4_it_samples_per_second": 1.815, |
| "eval_validation_mc4_it_steps_per_second": 0.908, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_validation_dolma_loss": 1.9675589799880981, |
| "eval_validation_dolma_runtime": 53.9854, |
| "eval_validation_dolma_samples_per_second": 1.815, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.625, |
| "learning_rate": 1e-05, |
| "loss": 1.6499, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_privacy_sources_loss": 0.9118366241455078, |
| "eval_validation_privacy_sources_runtime": 210.7511, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_agenda_digitale_loss": 1.4312654733657837, |
| "eval_validation_agenda_digitale_runtime": 187.1317, |
| "eval_validation_agenda_digitale_samples_per_second": 1.817, |
| "eval_validation_agenda_digitale_steps_per_second": 0.908, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_leggepertutti_loss": 1.270119547843933, |
| "eval_validation_leggepertutti_runtime": 35.7811, |
| "eval_validation_leggepertutti_samples_per_second": 1.817, |
| "eval_validation_leggepertutti_steps_per_second": 0.922, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_altalexprivacy_loss": 1.2772430181503296, |
| "eval_validation_altalexprivacy_runtime": 58.3202, |
| "eval_validation_altalexprivacy_samples_per_second": 1.818, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_mc4_it_loss": 1.8016338348388672, |
| "eval_validation_mc4_it_runtime": 78.1274, |
| "eval_validation_mc4_it_samples_per_second": 1.818, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_dolma_loss": 1.967103123664856, |
| "eval_validation_dolma_runtime": 53.9496, |
| "eval_validation_dolma_samples_per_second": 1.817, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 3.40625, |
| "learning_rate": 1e-05, |
| "loss": 1.6352, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_privacy_sources_loss": 0.904914140701294, |
| "eval_validation_privacy_sources_runtime": 210.817, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_agenda_digitale_loss": 1.4257241487503052, |
| "eval_validation_agenda_digitale_runtime": 187.1239, |
| "eval_validation_agenda_digitale_samples_per_second": 1.817, |
| "eval_validation_agenda_digitale_steps_per_second": 0.908, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_leggepertutti_loss": 1.2642766237258911, |
| "eval_validation_leggepertutti_runtime": 35.7971, |
| "eval_validation_leggepertutti_samples_per_second": 1.816, |
| "eval_validation_leggepertutti_steps_per_second": 0.922, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_altalexprivacy_loss": 1.2678091526031494, |
| "eval_validation_altalexprivacy_runtime": 58.3563, |
| "eval_validation_altalexprivacy_samples_per_second": 1.816, |
| "eval_validation_altalexprivacy_steps_per_second": 0.908, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_mc4_it_loss": 1.793602705001831, |
| "eval_validation_mc4_it_runtime": 78.1834, |
| "eval_validation_mc4_it_samples_per_second": 1.816, |
| "eval_validation_mc4_it_steps_per_second": 0.908, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_dolma_loss": 1.9663574695587158, |
| "eval_validation_dolma_runtime": 53.9397, |
| "eval_validation_dolma_samples_per_second": 1.817, |
| "eval_validation_dolma_steps_per_second": 0.908, |
| "step": 1496 |
| } |
| ], |
| "logging_steps": 187, |
| "max_steps": 32550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 187, |
| "total_flos": 6.274280818772804e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|