{ "best_global_step": 3498, "best_metric": 0.9396295674217445, "best_model_checkpoint": "./roberta_urdu_multilabel/checkpoint-3498", "epoch": 6.0, "eval_steps": 500, "global_step": 3498, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17152658662092624, "grad_norm": 0.4993675649166107, "learning_rate": 4.858490566037736e-05, "loss": 0.2695, "step": 100 }, { "epoch": 0.34305317324185247, "grad_norm": 0.32874006032943726, "learning_rate": 4.7155517438536305e-05, "loss": 0.116, "step": 200 }, { "epoch": 0.5145797598627787, "grad_norm": 0.22341817617416382, "learning_rate": 4.5726129216695255e-05, "loss": 0.0827, "step": 300 }, { "epoch": 0.6861063464837049, "grad_norm": 0.6156010031700134, "learning_rate": 4.4296740994854205e-05, "loss": 0.0596, "step": 400 }, { "epoch": 0.8576329331046312, "grad_norm": 0.1617479920387268, "learning_rate": 4.2867352773013155e-05, "loss": 0.0518, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.9099485420240138, "eval_f1_macro": 0.8772930595964492, "eval_loss": 0.03939065709710121, "eval_runtime": 4.0802, "eval_samples_per_second": 285.774, "eval_steps_per_second": 35.783, "step": 583 }, { "epoch": 1.0291595197255574, "grad_norm": 0.4286077916622162, "learning_rate": 4.14379645511721e-05, "loss": 0.0414, "step": 600 }, { "epoch": 1.2006861063464838, "grad_norm": 0.45919153094291687, "learning_rate": 4.000857632933105e-05, "loss": 0.0347, "step": 700 }, { "epoch": 1.3722126929674099, "grad_norm": 0.5688769221305847, "learning_rate": 3.857918810748999e-05, "loss": 0.0283, "step": 800 }, { "epoch": 1.5437392795883362, "grad_norm": 0.3409585654735565, "learning_rate": 3.714979988564895e-05, "loss": 0.0259, "step": 900 }, { "epoch": 1.7152658662092626, "grad_norm": 0.303937166929245, "learning_rate": 3.572041166380789e-05, "loss": 0.0244, "step": 1000 }, { "epoch": 1.8867924528301887, "grad_norm": 0.04824039712548256, "learning_rate": 3.429102344196684e-05, "loss": 0.0217, "step": 1100 }, { "epoch": 2.0, "eval_accuracy": 0.9716981132075472, "eval_f1_macro": 0.9048159496552155, "eval_loss": 0.01790458895266056, "eval_runtime": 4.0828, "eval_samples_per_second": 285.59, "eval_steps_per_second": 35.76, "step": 1166 }, { "epoch": 2.058319039451115, "grad_norm": 0.07180823385715485, "learning_rate": 3.2861635220125784e-05, "loss": 0.0179, "step": 1200 }, { "epoch": 2.2298456260720414, "grad_norm": 0.3384993374347687, "learning_rate": 3.1432246998284733e-05, "loss": 0.0163, "step": 1300 }, { "epoch": 2.4013722126929675, "grad_norm": 0.0720255896449089, "learning_rate": 3.0002858776443683e-05, "loss": 0.0148, "step": 1400 }, { "epoch": 2.5728987993138936, "grad_norm": 0.07835888862609863, "learning_rate": 2.8573470554602633e-05, "loss": 0.0137, "step": 1500 }, { "epoch": 2.7444253859348198, "grad_norm": 0.03401586785912514, "learning_rate": 2.7144082332761576e-05, "loss": 0.0129, "step": 1600 }, { "epoch": 2.915951972555746, "grad_norm": 0.04987897351384163, "learning_rate": 2.571469411092053e-05, "loss": 0.0119, "step": 1700 }, { "epoch": 3.0, "eval_accuracy": 0.9785591766723842, "eval_f1_macro": 0.9069474297862891, "eval_loss": 0.011610370129346848, "eval_runtime": 4.0899, "eval_samples_per_second": 285.091, "eval_steps_per_second": 35.698, "step": 1749 }, { "epoch": 3.0874785591766725, "grad_norm": 0.038767341524362564, "learning_rate": 2.4285305889079473e-05, "loss": 0.0111, "step": 1800 }, { "epoch": 3.2590051457975986, "grad_norm": 0.048064444214105606, "learning_rate": 2.2855917667238423e-05, "loss": 0.0107, "step": 1900 }, { "epoch": 3.4305317324185247, "grad_norm": 0.08297387510538101, "learning_rate": 2.142652944539737e-05, "loss": 0.0103, "step": 2000 }, { "epoch": 3.6020583190394513, "grad_norm": 0.027825674042105675, "learning_rate": 1.999714122355632e-05, "loss": 0.0087, "step": 2100 }, { "epoch": 3.7735849056603774, "grad_norm": 0.024125738069415092, "learning_rate": 1.8567753001715266e-05, "loss": 0.0077, "step": 2200 }, { "epoch": 3.9451114922813035, "grad_norm": 0.04014933481812477, "learning_rate": 1.7138364779874212e-05, "loss": 0.0088, "step": 2300 }, { "epoch": 4.0, "eval_accuracy": 0.9854202401372213, "eval_f1_macro": 0.9392697973755549, "eval_loss": 0.009160671383142471, "eval_runtime": 4.1026, "eval_samples_per_second": 284.212, "eval_steps_per_second": 35.587, "step": 2332 }, { "epoch": 4.11663807890223, "grad_norm": 0.09714564681053162, "learning_rate": 1.5708976558033162e-05, "loss": 0.0088, "step": 2400 }, { "epoch": 4.288164665523156, "grad_norm": 0.02462666854262352, "learning_rate": 1.427958833619211e-05, "loss": 0.007, "step": 2500 }, { "epoch": 4.459691252144083, "grad_norm": 0.03808571770787239, "learning_rate": 1.2850200114351058e-05, "loss": 0.0075, "step": 2600 }, { "epoch": 4.631217838765009, "grad_norm": 0.0727507695555687, "learning_rate": 1.1420811892510007e-05, "loss": 0.0069, "step": 2700 }, { "epoch": 4.802744425385935, "grad_norm": 0.02290419489145279, "learning_rate": 9.991423670668955e-06, "loss": 0.0075, "step": 2800 }, { "epoch": 4.974271012006861, "grad_norm": 0.032202959060668945, "learning_rate": 8.562035448827901e-06, "loss": 0.0073, "step": 2900 }, { "epoch": 5.0, "eval_accuracy": 0.983704974271012, "eval_f1_macro": 0.9105312017109829, "eval_loss": 0.007727212272584438, "eval_runtime": 4.1477, "eval_samples_per_second": 281.123, "eval_steps_per_second": 35.201, "step": 2915 }, { "epoch": 5.145797598627787, "grad_norm": 0.0157458633184433, "learning_rate": 7.132647226986849e-06, "loss": 0.0068, "step": 3000 }, { "epoch": 5.317324185248713, "grad_norm": 0.02146329917013645, "learning_rate": 5.7032590051457984e-06, "loss": 0.0061, "step": 3100 }, { "epoch": 5.4888507718696395, "grad_norm": 0.017605546861886978, "learning_rate": 4.273870783304746e-06, "loss": 0.0066, "step": 3200 }, { "epoch": 5.660377358490566, "grad_norm": 0.017790155485272408, "learning_rate": 2.8444825614636936e-06, "loss": 0.0067, "step": 3300 }, { "epoch": 5.831903945111492, "grad_norm": 0.033828429877758026, "learning_rate": 1.4150943396226415e-06, "loss": 0.0059, "step": 3400 }, { "epoch": 6.0, "eval_accuracy": 0.9854202401372213, "eval_f1_macro": 0.9396295674217445, "eval_loss": 0.0071120294742286205, "eval_runtime": 4.1615, "eval_samples_per_second": 280.185, "eval_steps_per_second": 35.083, "step": 3498 } ], "logging_steps": 100, "max_steps": 3498, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3680565061920768.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }