| { |
| "best_global_step": 1800, |
| "best_metric": 0.8268568617806198, |
| "best_model_checkpoint": "haryos_model_loras/xlm-roberta-base_massive_lora_ru-RU/checkpoint-1800", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 12.341004371643066, |
| "learning_rate": 0.00019455555555555556, |
| "loss": 3.8297, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 4.518715858459473, |
| "learning_rate": 0.00018899999999999999, |
| "loss": 3.4891, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 4.883383750915527, |
| "learning_rate": 0.00018344444444444446, |
| "loss": 2.5645, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 5.5965256690979, |
| "learning_rate": 0.00017788888888888892, |
| "loss": 2.0708, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 5.290555000305176, |
| "learning_rate": 0.00017233333333333334, |
| "loss": 1.8224, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 4.0180182456970215, |
| "learning_rate": 0.0001667777777777778, |
| "loss": 1.6021, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 5.648788928985596, |
| "learning_rate": 0.00016122222222222224, |
| "loss": 1.4299, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7137235612395475, |
| "eval_f1": 0.693439702877471, |
| "eval_loss": 1.0370498895645142, |
| "eval_runtime": 0.7125, |
| "eval_samples_per_second": 2853.251, |
| "eval_steps_per_second": 44.911, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 5.4577531814575195, |
| "learning_rate": 0.00015566666666666666, |
| "loss": 1.2951, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 5.014864921569824, |
| "learning_rate": 0.00015011111111111112, |
| "loss": 1.1408, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 4.7342424392700195, |
| "learning_rate": 0.00014455555555555557, |
| "loss": 1.0931, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5277777777777777, |
| "grad_norm": 5.168295860290527, |
| "learning_rate": 0.000139, |
| "loss": 1.1019, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 4.365182876586914, |
| "learning_rate": 0.00013344444444444447, |
| "loss": 1.0144, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8055555555555556, |
| "grad_norm": 6.45994234085083, |
| "learning_rate": 0.0001278888888888889, |
| "loss": 1.0265, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 7.900120735168457, |
| "learning_rate": 0.00012233333333333334, |
| "loss": 0.8592, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7870142646335465, |
| "eval_f1": 0.7781407151270732, |
| "eval_loss": 0.7638030052185059, |
| "eval_runtime": 0.7135, |
| "eval_samples_per_second": 2849.212, |
| "eval_steps_per_second": 44.847, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 26.352907180786133, |
| "learning_rate": 0.00011677777777777778, |
| "loss": 0.9371, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 5.762099742889404, |
| "learning_rate": 0.00011122222222222223, |
| "loss": 0.8033, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.361111111111111, |
| "grad_norm": 4.249943733215332, |
| "learning_rate": 0.00010566666666666667, |
| "loss": 0.8347, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 6.318827152252197, |
| "learning_rate": 0.0001001111111111111, |
| "loss": 0.7922, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.638888888888889, |
| "grad_norm": 6.6529998779296875, |
| "learning_rate": 9.455555555555556e-05, |
| "loss": 0.8138, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 4.681656837463379, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 0.8096, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 5.7172417640686035, |
| "learning_rate": 8.344444444444445e-05, |
| "loss": 0.7567, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8135759960649287, |
| "eval_f1": 0.8073713514844725, |
| "eval_loss": 0.675218939781189, |
| "eval_runtime": 0.7101, |
| "eval_samples_per_second": 2862.995, |
| "eval_steps_per_second": 45.064, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.0555555555555554, |
| "grad_norm": 4.0479207038879395, |
| "learning_rate": 7.788888888888888e-05, |
| "loss": 0.7424, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.1944444444444446, |
| "grad_norm": 7.587343692779541, |
| "learning_rate": 7.233333333333335e-05, |
| "loss": 0.6938, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 4.655948638916016, |
| "learning_rate": 6.677777777777779e-05, |
| "loss": 0.6952, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "grad_norm": 3.5373153686523438, |
| "learning_rate": 6.122222222222222e-05, |
| "loss": 0.6695, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.611111111111111, |
| "grad_norm": 5.440669536590576, |
| "learning_rate": 5.566666666666667e-05, |
| "loss": 0.7073, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 4.433872699737549, |
| "learning_rate": 5.011111111111111e-05, |
| "loss": 0.6782, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 10.24996280670166, |
| "learning_rate": 4.4555555555555555e-05, |
| "loss": 0.6768, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.823905558288244, |
| "eval_f1": 0.8182727867907158, |
| "eval_loss": 0.644848108291626, |
| "eval_runtime": 0.7112, |
| "eval_samples_per_second": 2858.72, |
| "eval_steps_per_second": 44.997, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.027777777777778, |
| "grad_norm": 4.958348751068115, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.646, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 6.34914493560791, |
| "learning_rate": 3.3444444444444443e-05, |
| "loss": 0.6338, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.305555555555555, |
| "grad_norm": 2.541375160217285, |
| "learning_rate": 2.788888888888889e-05, |
| "loss": 0.636, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 3.2549381256103516, |
| "learning_rate": 2.2333333333333335e-05, |
| "loss": 0.6564, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 6.394855976104736, |
| "learning_rate": 1.677777777777778e-05, |
| "loss": 0.6363, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.722222222222222, |
| "grad_norm": 6.167675971984863, |
| "learning_rate": 1.1222222222222224e-05, |
| "loss": 0.6488, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "grad_norm": 6.480133533477783, |
| "learning_rate": 5.666666666666667e-06, |
| "loss": 0.6291, |
| "step": 1750 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 5.8079705238342285, |
| "learning_rate": 1.1111111111111112e-07, |
| "loss": 0.6268, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8268568617806198, |
| "eval_f1": 0.8222405831607275, |
| "eval_loss": 0.6318742036819458, |
| "eval_runtime": 0.7163, |
| "eval_samples_per_second": 2838.328, |
| "eval_steps_per_second": 44.676, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 1800, |
| "total_flos": 3843028539002880.0, |
| "train_loss": 1.1128748491075304, |
| "train_runtime": 67.4066, |
| "train_samples_per_second": 854.071, |
| "train_steps_per_second": 26.704 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1800, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3843028539002880.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|