| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25295109612141653, |
| "eval_steps": 750, |
| "global_step": 12000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 13.5, |
| "learning_rate": 1e-05, |
| "loss": 1.8758, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 7.3125, |
| "learning_rate": 1e-05, |
| "loss": 1.7401, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_privacy_sources_loss": 1.0377193689346313, |
| "eval_validation_privacy_sources_runtime": 210.7816, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_agenda_digitale_loss": 1.533129334449768, |
| "eval_validation_agenda_digitale_runtime": 186.9056, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_leggepertutti_loss": 1.3645591735839844, |
| "eval_validation_leggepertutti_runtime": 35.7426, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_altalexprivacy_loss": 1.3818964958190918, |
| "eval_validation_altalexprivacy_runtime": 58.2767, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_mc4_it_loss": 1.948660135269165, |
| "eval_validation_mc4_it_runtime": 78.0751, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_dolma_loss": 1.9987527132034302, |
| "eval_validation_dolma_runtime": 53.8799, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.0625, |
| "learning_rate": 1e-05, |
| "loss": 1.681, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_privacy_sources_loss": 0.9826437830924988, |
| "eval_validation_privacy_sources_runtime": 210.5002, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_agenda_digitale_loss": 1.4970210790634155, |
| "eval_validation_agenda_digitale_runtime": 186.8812, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_leggepertutti_loss": 1.3328404426574707, |
| "eval_validation_leggepertutti_runtime": 35.7381, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_altalexprivacy_loss": 1.3369431495666504, |
| "eval_validation_altalexprivacy_runtime": 58.2702, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_mc4_it_loss": 1.9100874662399292, |
| "eval_validation_mc4_it_runtime": 78.0367, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_dolma_loss": 1.9980002641677856, |
| "eval_validation_dolma_runtime": 53.8563, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.9375, |
| "learning_rate": 1e-05, |
| "loss": 1.6532, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_privacy_sources_loss": 0.9487292170524597, |
| "eval_validation_privacy_sources_runtime": 210.5366, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_agenda_digitale_loss": 1.4758782386779785, |
| "eval_validation_agenda_digitale_runtime": 186.8673, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_leggepertutti_loss": 1.3132394552230835, |
| "eval_validation_leggepertutti_runtime": 35.7296, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_altalexprivacy_loss": 1.3129243850708008, |
| "eval_validation_altalexprivacy_runtime": 58.2737, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_mc4_it_loss": 1.8821855783462524, |
| "eval_validation_mc4_it_runtime": 78.0531, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_dolma_loss": 1.9904078245162964, |
| "eval_validation_dolma_runtime": 53.8818, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 7.1875, |
| "learning_rate": 1e-05, |
| "loss": 1.6405, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_privacy_sources_loss": 0.9250594973564148, |
| "eval_validation_privacy_sources_runtime": 210.437, |
| "eval_validation_privacy_sources_samples_per_second": 1.82, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_agenda_digitale_loss": 1.4562047719955444, |
| "eval_validation_agenda_digitale_runtime": 186.7992, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_leggepertutti_loss": 1.2950125932693481, |
| "eval_validation_leggepertutti_runtime": 35.7393, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_altalexprivacy_loss": 1.291795015335083, |
| "eval_validation_altalexprivacy_runtime": 58.2417, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_mc4_it_loss": 1.8612309694290161, |
| "eval_validation_mc4_it_runtime": 78.0155, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_dolma_loss": 1.9900999069213867, |
| "eval_validation_dolma_runtime": 53.8387, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.21875, |
| "learning_rate": 1e-05, |
| "loss": 1.6146, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_privacy_sources_loss": 0.9070245623588562, |
| "eval_validation_privacy_sources_runtime": 210.504, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_agenda_digitale_loss": 1.4407680034637451, |
| "eval_validation_agenda_digitale_runtime": 186.8241, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_leggepertutti_loss": 1.2863850593566895, |
| "eval_validation_leggepertutti_runtime": 35.7293, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_altalexprivacy_loss": 1.270463228225708, |
| "eval_validation_altalexprivacy_runtime": 58.2503, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_mc4_it_loss": 1.8474267721176147, |
| "eval_validation_mc4_it_runtime": 78.069, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_dolma_loss": 1.9887909889221191, |
| "eval_validation_dolma_runtime": 53.8833, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 12.9375, |
| "learning_rate": 1e-05, |
| "loss": 1.6013, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_privacy_sources_loss": 0.8938004374504089, |
| "eval_validation_privacy_sources_runtime": 210.5349, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_agenda_digitale_loss": 1.430904746055603, |
| "eval_validation_agenda_digitale_runtime": 186.8995, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_leggepertutti_loss": 1.2800484895706177, |
| "eval_validation_leggepertutti_runtime": 35.7362, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_altalexprivacy_loss": 1.2601969242095947, |
| "eval_validation_altalexprivacy_runtime": 58.267, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_mc4_it_loss": 1.8354917764663696, |
| "eval_validation_mc4_it_runtime": 78.0368, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_validation_dolma_loss": 1.9825700521469116, |
| "eval_validation_dolma_runtime": 53.8422, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 6.5, |
| "learning_rate": 1e-05, |
| "loss": 1.6194, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_privacy_sources_loss": 0.8817969560623169, |
| "eval_validation_privacy_sources_runtime": 210.5387, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_agenda_digitale_loss": 1.4225614070892334, |
| "eval_validation_agenda_digitale_runtime": 186.8844, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_leggepertutti_loss": 1.2730649709701538, |
| "eval_validation_leggepertutti_runtime": 35.7438, |
| "eval_validation_leggepertutti_samples_per_second": 1.818, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_altalexprivacy_loss": 1.2494661808013916, |
| "eval_validation_altalexprivacy_runtime": 58.261, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_mc4_it_loss": 1.8241732120513916, |
| "eval_validation_mc4_it_runtime": 78.0351, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_validation_dolma_loss": 1.9821105003356934, |
| "eval_validation_dolma_runtime": 53.8348, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 10.4375, |
| "learning_rate": 1e-05, |
| "loss": 1.5952, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_privacy_sources_loss": 0.8718611001968384, |
| "eval_validation_privacy_sources_runtime": 210.5572, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_agenda_digitale_loss": 1.4152109622955322, |
| "eval_validation_agenda_digitale_runtime": 186.9175, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_leggepertutti_loss": 1.272850513458252, |
| "eval_validation_leggepertutti_runtime": 35.7513, |
| "eval_validation_leggepertutti_samples_per_second": 1.818, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_altalexprivacy_loss": 1.2443125247955322, |
| "eval_validation_altalexprivacy_runtime": 58.2787, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_mc4_it_loss": 1.8142365217208862, |
| "eval_validation_mc4_it_runtime": 78.0695, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_validation_dolma_loss": 1.974460482597351, |
| "eval_validation_dolma_runtime": 53.8685, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 6.71875, |
| "learning_rate": 1e-05, |
| "loss": 1.5684, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_privacy_sources_loss": 0.8657892942428589, |
| "eval_validation_privacy_sources_runtime": 210.5467, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_agenda_digitale_loss": 1.4074455499649048, |
| "eval_validation_agenda_digitale_runtime": 186.8991, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_leggepertutti_loss": 1.2634377479553223, |
| "eval_validation_leggepertutti_runtime": 35.7287, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_altalexprivacy_loss": 1.237537145614624, |
| "eval_validation_altalexprivacy_runtime": 58.2781, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_mc4_it_loss": 1.806999921798706, |
| "eval_validation_mc4_it_runtime": 78.0496, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_validation_dolma_loss": 1.9717859029769897, |
| "eval_validation_dolma_runtime": 53.877, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 6.375, |
| "learning_rate": 1e-05, |
| "loss": 1.5669, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_privacy_sources_loss": 0.8547385334968567, |
| "eval_validation_privacy_sources_runtime": 210.6656, |
| "eval_validation_privacy_sources_samples_per_second": 1.818, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_agenda_digitale_loss": 1.4008606672286987, |
| "eval_validation_agenda_digitale_runtime": 186.9912, |
| "eval_validation_agenda_digitale_samples_per_second": 1.818, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_leggepertutti_loss": 1.2604777812957764, |
| "eval_validation_leggepertutti_runtime": 35.7381, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_altalexprivacy_loss": 1.230379343032837, |
| "eval_validation_altalexprivacy_runtime": 58.2725, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_mc4_it_loss": 1.799519658088684, |
| "eval_validation_mc4_it_runtime": 78.0653, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_validation_dolma_loss": 1.9693360328674316, |
| "eval_validation_dolma_runtime": 53.9102, |
| "eval_validation_dolma_samples_per_second": 1.818, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.90625, |
| "learning_rate": 1e-05, |
| "loss": 1.5739, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_privacy_sources_loss": 0.8504645228385925, |
| "eval_validation_privacy_sources_runtime": 210.717, |
| "eval_validation_privacy_sources_samples_per_second": 1.818, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_agenda_digitale_loss": 1.3965485095977783, |
| "eval_validation_agenda_digitale_runtime": 187.041, |
| "eval_validation_agenda_digitale_samples_per_second": 1.818, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_leggepertutti_loss": 1.253336787223816, |
| "eval_validation_leggepertutti_runtime": 35.7643, |
| "eval_validation_leggepertutti_samples_per_second": 1.817, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_altalexprivacy_loss": 1.2244532108306885, |
| "eval_validation_altalexprivacy_runtime": 58.2913, |
| "eval_validation_altalexprivacy_samples_per_second": 1.818, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_mc4_it_loss": 1.7938719987869263, |
| "eval_validation_mc4_it_runtime": 78.0965, |
| "eval_validation_mc4_it_samples_per_second": 1.818, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_validation_dolma_loss": 1.9664684534072876, |
| "eval_validation_dolma_runtime": 53.9155, |
| "eval_validation_dolma_samples_per_second": 1.818, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 6.46875, |
| "learning_rate": 1e-05, |
| "loss": 1.562, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_privacy_sources_loss": 0.8448366522789001, |
| "eval_validation_privacy_sources_runtime": 210.5401, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_agenda_digitale_loss": 1.391615867614746, |
| "eval_validation_agenda_digitale_runtime": 186.8437, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_leggepertutti_loss": 1.2515813112258911, |
| "eval_validation_leggepertutti_runtime": 35.7517, |
| "eval_validation_leggepertutti_samples_per_second": 1.818, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_altalexprivacy_loss": 1.2215769290924072, |
| "eval_validation_altalexprivacy_runtime": 58.2739, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_mc4_it_loss": 1.7882682085037231, |
| "eval_validation_mc4_it_runtime": 78.0639, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_validation_dolma_loss": 1.965390682220459, |
| "eval_validation_dolma_runtime": 53.8824, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 9.8125, |
| "learning_rate": 1e-05, |
| "loss": 1.5531, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_privacy_sources_loss": 0.8414725065231323, |
| "eval_validation_privacy_sources_runtime": 210.4886, |
| "eval_validation_privacy_sources_samples_per_second": 1.82, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_agenda_digitale_loss": 1.3873320817947388, |
| "eval_validation_agenda_digitale_runtime": 186.8465, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_leggepertutti_loss": 1.2437468767166138, |
| "eval_validation_leggepertutti_runtime": 35.7324, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_altalexprivacy_loss": 1.2164758443832397, |
| "eval_validation_altalexprivacy_runtime": 58.2539, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_mc4_it_loss": 1.783717155456543, |
| "eval_validation_mc4_it_runtime": 78.0228, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_validation_dolma_loss": 1.9636856317520142, |
| "eval_validation_dolma_runtime": 53.8632, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.125, |
| "learning_rate": 1e-05, |
| "loss": 1.5595, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_privacy_sources_loss": 0.8364436030387878, |
| "eval_validation_privacy_sources_runtime": 210.449, |
| "eval_validation_privacy_sources_samples_per_second": 1.82, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_agenda_digitale_loss": 1.3836082220077515, |
| "eval_validation_agenda_digitale_runtime": 186.8368, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_leggepertutti_loss": 1.2467608451843262, |
| "eval_validation_leggepertutti_runtime": 35.73, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_altalexprivacy_loss": 1.2111377716064453, |
| "eval_validation_altalexprivacy_runtime": 58.2474, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_mc4_it_loss": 1.7798333168029785, |
| "eval_validation_mc4_it_runtime": 78.0289, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_validation_dolma_loss": 1.9624452590942383, |
| "eval_validation_dolma_runtime": 53.8564, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.1875, |
| "learning_rate": 1e-05, |
| "loss": 1.5657, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_privacy_sources_loss": 0.8329368829727173, |
| "eval_validation_privacy_sources_runtime": 210.663, |
| "eval_validation_privacy_sources_samples_per_second": 1.818, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_agenda_digitale_loss": 1.3799829483032227, |
| "eval_validation_agenda_digitale_runtime": 186.9433, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_leggepertutti_loss": 1.2420663833618164, |
| "eval_validation_leggepertutti_runtime": 35.7651, |
| "eval_validation_leggepertutti_samples_per_second": 1.817, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_altalexprivacy_loss": 1.206700325012207, |
| "eval_validation_altalexprivacy_runtime": 58.327, |
| "eval_validation_altalexprivacy_samples_per_second": 1.817, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_mc4_it_loss": 1.7747920751571655, |
| "eval_validation_mc4_it_runtime": 78.1384, |
| "eval_validation_mc4_it_samples_per_second": 1.817, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_validation_dolma_loss": 1.957626461982727, |
| "eval_validation_dolma_runtime": 53.9238, |
| "eval_validation_dolma_samples_per_second": 1.817, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.90625, |
| "learning_rate": 1e-05, |
| "loss": 1.564, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_privacy_sources_loss": 0.8280984163284302, |
| "eval_validation_privacy_sources_runtime": 210.585, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_agenda_digitale_loss": 1.3771744966506958, |
| "eval_validation_agenda_digitale_runtime": 186.9251, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.909, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_leggepertutti_loss": 1.2391917705535889, |
| "eval_validation_leggepertutti_runtime": 35.7521, |
| "eval_validation_leggepertutti_samples_per_second": 1.818, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_altalexprivacy_loss": 1.2027921676635742, |
| "eval_validation_altalexprivacy_runtime": 58.2673, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_mc4_it_loss": 1.772462248802185, |
| "eval_validation_mc4_it_runtime": 78.1049, |
| "eval_validation_mc4_it_samples_per_second": 1.818, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_validation_dolma_loss": 1.957063913345337, |
| "eval_validation_dolma_runtime": 53.8823, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 12000 |
| } |
| ], |
| "logging_steps": 750, |
| "max_steps": 94880, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 750, |
| "total_flos": 1.2582113941389312e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|