| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.07904721753794267, |
| "eval_steps": 750, |
| "global_step": 3750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 13.5, |
| "learning_rate": 1e-05, |
| "loss": 1.8758, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 7.3125, |
| "learning_rate": 1e-05, |
| "loss": 1.7401, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_privacy_sources_loss": 1.0377193689346313, |
| "eval_validation_privacy_sources_runtime": 210.7816, |
| "eval_validation_privacy_sources_samples_per_second": 1.817, |
| "eval_validation_privacy_sources_steps_per_second": 0.911, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_agenda_digitale_loss": 1.533129334449768, |
| "eval_validation_agenda_digitale_runtime": 186.9056, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_leggepertutti_loss": 1.3645591735839844, |
| "eval_validation_leggepertutti_runtime": 35.7426, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_altalexprivacy_loss": 1.3818964958190918, |
| "eval_validation_altalexprivacy_runtime": 58.2767, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_mc4_it_loss": 1.948660135269165, |
| "eval_validation_mc4_it_runtime": 78.0751, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_validation_dolma_loss": 1.9987527132034302, |
| "eval_validation_dolma_runtime": 53.8799, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.0625, |
| "learning_rate": 1e-05, |
| "loss": 1.681, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_privacy_sources_loss": 0.9826437830924988, |
| "eval_validation_privacy_sources_runtime": 210.5002, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_agenda_digitale_loss": 1.4970210790634155, |
| "eval_validation_agenda_digitale_runtime": 186.8812, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_leggepertutti_loss": 1.3328404426574707, |
| "eval_validation_leggepertutti_runtime": 35.7381, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_altalexprivacy_loss": 1.3369431495666504, |
| "eval_validation_altalexprivacy_runtime": 58.2702, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_mc4_it_loss": 1.9100874662399292, |
| "eval_validation_mc4_it_runtime": 78.0367, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_validation_dolma_loss": 1.9980002641677856, |
| "eval_validation_dolma_runtime": 53.8563, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.9375, |
| "learning_rate": 1e-05, |
| "loss": 1.6532, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_privacy_sources_loss": 0.9487292170524597, |
| "eval_validation_privacy_sources_runtime": 210.5366, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_agenda_digitale_loss": 1.4758782386779785, |
| "eval_validation_agenda_digitale_runtime": 186.8673, |
| "eval_validation_agenda_digitale_samples_per_second": 1.819, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_leggepertutti_loss": 1.3132394552230835, |
| "eval_validation_leggepertutti_runtime": 35.7296, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_altalexprivacy_loss": 1.3129243850708008, |
| "eval_validation_altalexprivacy_runtime": 58.2737, |
| "eval_validation_altalexprivacy_samples_per_second": 1.819, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_mc4_it_loss": 1.8821855783462524, |
| "eval_validation_mc4_it_runtime": 78.0531, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_validation_dolma_loss": 1.9904078245162964, |
| "eval_validation_dolma_runtime": 53.8818, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 7.1875, |
| "learning_rate": 1e-05, |
| "loss": 1.6405, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_privacy_sources_loss": 0.9250594973564148, |
| "eval_validation_privacy_sources_runtime": 210.437, |
| "eval_validation_privacy_sources_samples_per_second": 1.82, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_agenda_digitale_loss": 1.4562047719955444, |
| "eval_validation_agenda_digitale_runtime": 186.7992, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_leggepertutti_loss": 1.2950125932693481, |
| "eval_validation_leggepertutti_runtime": 35.7393, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.923, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_altalexprivacy_loss": 1.291795015335083, |
| "eval_validation_altalexprivacy_runtime": 58.2417, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_mc4_it_loss": 1.8612309694290161, |
| "eval_validation_mc4_it_runtime": 78.0155, |
| "eval_validation_mc4_it_samples_per_second": 1.82, |
| "eval_validation_mc4_it_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_validation_dolma_loss": 1.9900999069213867, |
| "eval_validation_dolma_runtime": 53.8387, |
| "eval_validation_dolma_samples_per_second": 1.82, |
| "eval_validation_dolma_steps_per_second": 0.91, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.21875, |
| "learning_rate": 1e-05, |
| "loss": 1.6146, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_privacy_sources_loss": 0.9070245623588562, |
| "eval_validation_privacy_sources_runtime": 210.504, |
| "eval_validation_privacy_sources_samples_per_second": 1.819, |
| "eval_validation_privacy_sources_steps_per_second": 0.912, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_agenda_digitale_loss": 1.4407680034637451, |
| "eval_validation_agenda_digitale_runtime": 186.8241, |
| "eval_validation_agenda_digitale_samples_per_second": 1.82, |
| "eval_validation_agenda_digitale_steps_per_second": 0.91, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_leggepertutti_loss": 1.2863850593566895, |
| "eval_validation_leggepertutti_runtime": 35.7293, |
| "eval_validation_leggepertutti_samples_per_second": 1.819, |
| "eval_validation_leggepertutti_steps_per_second": 0.924, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_altalexprivacy_loss": 1.270463228225708, |
| "eval_validation_altalexprivacy_runtime": 58.2503, |
| "eval_validation_altalexprivacy_samples_per_second": 1.82, |
| "eval_validation_altalexprivacy_steps_per_second": 0.91, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_mc4_it_loss": 1.8474267721176147, |
| "eval_validation_mc4_it_runtime": 78.069, |
| "eval_validation_mc4_it_samples_per_second": 1.819, |
| "eval_validation_mc4_it_steps_per_second": 0.909, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_validation_dolma_loss": 1.9887909889221191, |
| "eval_validation_dolma_runtime": 53.8833, |
| "eval_validation_dolma_samples_per_second": 1.819, |
| "eval_validation_dolma_steps_per_second": 0.909, |
| "step": 3750 |
| } |
| ], |
| "logging_steps": 750, |
| "max_steps": 94880, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 750, |
| "total_flos": 3.93191060668416e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|