{ "best_global_step": 1800, "best_metric": 0.8268568617806198, "best_model_checkpoint": "haryos_model_loras/xlm-roberta-base_massive_lora_ru-RU/checkpoint-1800", "epoch": 5.0, "eval_steps": 500, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1388888888888889, "grad_norm": 12.341004371643066, "learning_rate": 0.00019455555555555556, "loss": 3.8297, "step": 50 }, { "epoch": 0.2777777777777778, "grad_norm": 4.518715858459473, "learning_rate": 0.00018899999999999999, "loss": 3.4891, "step": 100 }, { "epoch": 0.4166666666666667, "grad_norm": 4.883383750915527, "learning_rate": 0.00018344444444444446, "loss": 2.5645, "step": 150 }, { "epoch": 0.5555555555555556, "grad_norm": 5.5965256690979, "learning_rate": 0.00017788888888888892, "loss": 2.0708, "step": 200 }, { "epoch": 0.6944444444444444, "grad_norm": 5.290555000305176, "learning_rate": 0.00017233333333333334, "loss": 1.8224, "step": 250 }, { "epoch": 0.8333333333333334, "grad_norm": 4.0180182456970215, "learning_rate": 0.0001667777777777778, "loss": 1.6021, "step": 300 }, { "epoch": 0.9722222222222222, "grad_norm": 5.648788928985596, "learning_rate": 0.00016122222222222224, "loss": 1.4299, "step": 350 }, { "epoch": 1.0, "eval_accuracy": 0.7137235612395475, "eval_f1": 0.693439702877471, "eval_loss": 1.0370498895645142, "eval_runtime": 0.7125, "eval_samples_per_second": 2853.251, "eval_steps_per_second": 44.911, "step": 360 }, { "epoch": 1.1111111111111112, "grad_norm": 5.4577531814575195, "learning_rate": 0.00015566666666666666, "loss": 1.2951, "step": 400 }, { "epoch": 1.25, "grad_norm": 5.014864921569824, "learning_rate": 0.00015011111111111112, "loss": 1.1408, "step": 450 }, { "epoch": 1.3888888888888888, "grad_norm": 4.7342424392700195, "learning_rate": 0.00014455555555555557, "loss": 1.0931, "step": 500 }, { "epoch": 1.5277777777777777, "grad_norm": 5.168295860290527, "learning_rate": 0.000139, "loss": 1.1019, "step": 550 }, { "epoch": 1.6666666666666665, "grad_norm": 4.365182876586914, "learning_rate": 0.00013344444444444447, "loss": 1.0144, "step": 600 }, { "epoch": 1.8055555555555556, "grad_norm": 6.45994234085083, "learning_rate": 0.0001278888888888889, "loss": 1.0265, "step": 650 }, { "epoch": 1.9444444444444444, "grad_norm": 7.900120735168457, "learning_rate": 0.00012233333333333334, "loss": 0.8592, "step": 700 }, { "epoch": 2.0, "eval_accuracy": 0.7870142646335465, "eval_f1": 0.7781407151270732, "eval_loss": 0.7638030052185059, "eval_runtime": 0.7135, "eval_samples_per_second": 2849.212, "eval_steps_per_second": 44.847, "step": 720 }, { "epoch": 2.0833333333333335, "grad_norm": 26.352907180786133, "learning_rate": 0.00011677777777777778, "loss": 0.9371, "step": 750 }, { "epoch": 2.2222222222222223, "grad_norm": 5.762099742889404, "learning_rate": 0.00011122222222222223, "loss": 0.8033, "step": 800 }, { "epoch": 2.361111111111111, "grad_norm": 4.249943733215332, "learning_rate": 0.00010566666666666667, "loss": 0.8347, "step": 850 }, { "epoch": 2.5, "grad_norm": 6.318827152252197, "learning_rate": 0.0001001111111111111, "loss": 0.7922, "step": 900 }, { "epoch": 2.638888888888889, "grad_norm": 6.6529998779296875, "learning_rate": 9.455555555555556e-05, "loss": 0.8138, "step": 950 }, { "epoch": 2.7777777777777777, "grad_norm": 4.681656837463379, "learning_rate": 8.900000000000001e-05, "loss": 0.8096, "step": 1000 }, { "epoch": 2.9166666666666665, "grad_norm": 5.7172417640686035, "learning_rate": 8.344444444444445e-05, "loss": 0.7567, "step": 1050 }, { "epoch": 3.0, "eval_accuracy": 0.8135759960649287, "eval_f1": 0.8073713514844725, "eval_loss": 0.675218939781189, "eval_runtime": 0.7101, "eval_samples_per_second": 2862.995, "eval_steps_per_second": 45.064, "step": 1080 }, { "epoch": 3.0555555555555554, "grad_norm": 4.0479207038879395, "learning_rate": 7.788888888888888e-05, "loss": 0.7424, "step": 1100 }, { "epoch": 3.1944444444444446, "grad_norm": 7.587343692779541, "learning_rate": 7.233333333333335e-05, "loss": 0.6938, "step": 1150 }, { "epoch": 3.3333333333333335, "grad_norm": 4.655948638916016, "learning_rate": 6.677777777777779e-05, "loss": 0.6952, "step": 1200 }, { "epoch": 3.4722222222222223, "grad_norm": 3.5373153686523438, "learning_rate": 6.122222222222222e-05, "loss": 0.6695, "step": 1250 }, { "epoch": 3.611111111111111, "grad_norm": 5.440669536590576, "learning_rate": 5.566666666666667e-05, "loss": 0.7073, "step": 1300 }, { "epoch": 3.75, "grad_norm": 4.433872699737549, "learning_rate": 5.011111111111111e-05, "loss": 0.6782, "step": 1350 }, { "epoch": 3.888888888888889, "grad_norm": 10.24996280670166, "learning_rate": 4.4555555555555555e-05, "loss": 0.6768, "step": 1400 }, { "epoch": 4.0, "eval_accuracy": 0.823905558288244, "eval_f1": 0.8182727867907158, "eval_loss": 0.644848108291626, "eval_runtime": 0.7112, "eval_samples_per_second": 2858.72, "eval_steps_per_second": 44.997, "step": 1440 }, { "epoch": 4.027777777777778, "grad_norm": 4.958348751068115, "learning_rate": 3.9000000000000006e-05, "loss": 0.646, "step": 1450 }, { "epoch": 4.166666666666667, "grad_norm": 6.34914493560791, "learning_rate": 3.3444444444444443e-05, "loss": 0.6338, "step": 1500 }, { "epoch": 4.305555555555555, "grad_norm": 2.541375160217285, "learning_rate": 2.788888888888889e-05, "loss": 0.636, "step": 1550 }, { "epoch": 4.444444444444445, "grad_norm": 3.2549381256103516, "learning_rate": 2.2333333333333335e-05, "loss": 0.6564, "step": 1600 }, { "epoch": 4.583333333333333, "grad_norm": 6.394855976104736, "learning_rate": 1.677777777777778e-05, "loss": 0.6363, "step": 1650 }, { "epoch": 4.722222222222222, "grad_norm": 6.167675971984863, "learning_rate": 1.1222222222222224e-05, "loss": 0.6488, "step": 1700 }, { "epoch": 4.861111111111111, "grad_norm": 6.480133533477783, "learning_rate": 5.666666666666667e-06, "loss": 0.6291, "step": 1750 }, { "epoch": 5.0, "grad_norm": 5.8079705238342285, "learning_rate": 1.1111111111111112e-07, "loss": 0.6268, "step": 1800 }, { "epoch": 5.0, "eval_accuracy": 0.8268568617806198, "eval_f1": 0.8222405831607275, "eval_loss": 0.6318742036819458, "eval_runtime": 0.7163, "eval_samples_per_second": 2838.328, "eval_steps_per_second": 44.676, "step": 1800 }, { "epoch": 5.0, "step": 1800, "total_flos": 3843028539002880.0, "train_loss": 1.1128748491075304, "train_runtime": 67.4066, "train_samples_per_second": 854.071, "train_steps_per_second": 26.704 } ], "logging_steps": 50, "max_steps": 1800, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3843028539002880.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }