| { | |
| "best_global_step": 3498, | |
| "best_metric": 0.9396295674217445, | |
| "best_model_checkpoint": "./roberta_urdu_multilabel/checkpoint-3498", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 3498, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17152658662092624, | |
| "grad_norm": 0.4993675649166107, | |
| "learning_rate": 4.858490566037736e-05, | |
| "loss": 0.2695, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.34305317324185247, | |
| "grad_norm": 0.32874006032943726, | |
| "learning_rate": 4.7155517438536305e-05, | |
| "loss": 0.116, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5145797598627787, | |
| "grad_norm": 0.22341817617416382, | |
| "learning_rate": 4.5726129216695255e-05, | |
| "loss": 0.0827, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6861063464837049, | |
| "grad_norm": 0.6156010031700134, | |
| "learning_rate": 4.4296740994854205e-05, | |
| "loss": 0.0596, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8576329331046312, | |
| "grad_norm": 0.1617479920387268, | |
| "learning_rate": 4.2867352773013155e-05, | |
| "loss": 0.0518, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9099485420240138, | |
| "eval_f1_macro": 0.8772930595964492, | |
| "eval_loss": 0.03939065709710121, | |
| "eval_runtime": 4.0802, | |
| "eval_samples_per_second": 285.774, | |
| "eval_steps_per_second": 35.783, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.0291595197255574, | |
| "grad_norm": 0.4286077916622162, | |
| "learning_rate": 4.14379645511721e-05, | |
| "loss": 0.0414, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2006861063464838, | |
| "grad_norm": 0.45919153094291687, | |
| "learning_rate": 4.000857632933105e-05, | |
| "loss": 0.0347, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3722126929674099, | |
| "grad_norm": 0.5688769221305847, | |
| "learning_rate": 3.857918810748999e-05, | |
| "loss": 0.0283, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.5437392795883362, | |
| "grad_norm": 0.3409585654735565, | |
| "learning_rate": 3.714979988564895e-05, | |
| "loss": 0.0259, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7152658662092626, | |
| "grad_norm": 0.303937166929245, | |
| "learning_rate": 3.572041166380789e-05, | |
| "loss": 0.0244, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 0.04824039712548256, | |
| "learning_rate": 3.429102344196684e-05, | |
| "loss": 0.0217, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9716981132075472, | |
| "eval_f1_macro": 0.9048159496552155, | |
| "eval_loss": 0.01790458895266056, | |
| "eval_runtime": 4.0828, | |
| "eval_samples_per_second": 285.59, | |
| "eval_steps_per_second": 35.76, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.058319039451115, | |
| "grad_norm": 0.07180823385715485, | |
| "learning_rate": 3.2861635220125784e-05, | |
| "loss": 0.0179, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2298456260720414, | |
| "grad_norm": 0.3384993374347687, | |
| "learning_rate": 3.1432246998284733e-05, | |
| "loss": 0.0163, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.4013722126929675, | |
| "grad_norm": 0.0720255896449089, | |
| "learning_rate": 3.0002858776443683e-05, | |
| "loss": 0.0148, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.5728987993138936, | |
| "grad_norm": 0.07835888862609863, | |
| "learning_rate": 2.8573470554602633e-05, | |
| "loss": 0.0137, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.7444253859348198, | |
| "grad_norm": 0.03401586785912514, | |
| "learning_rate": 2.7144082332761576e-05, | |
| "loss": 0.0129, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.915951972555746, | |
| "grad_norm": 0.04987897351384163, | |
| "learning_rate": 2.571469411092053e-05, | |
| "loss": 0.0119, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9785591766723842, | |
| "eval_f1_macro": 0.9069474297862891, | |
| "eval_loss": 0.011610370129346848, | |
| "eval_runtime": 4.0899, | |
| "eval_samples_per_second": 285.091, | |
| "eval_steps_per_second": 35.698, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 3.0874785591766725, | |
| "grad_norm": 0.038767341524362564, | |
| "learning_rate": 2.4285305889079473e-05, | |
| "loss": 0.0111, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.2590051457975986, | |
| "grad_norm": 0.048064444214105606, | |
| "learning_rate": 2.2855917667238423e-05, | |
| "loss": 0.0107, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.4305317324185247, | |
| "grad_norm": 0.08297387510538101, | |
| "learning_rate": 2.142652944539737e-05, | |
| "loss": 0.0103, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.6020583190394513, | |
| "grad_norm": 0.027825674042105675, | |
| "learning_rate": 1.999714122355632e-05, | |
| "loss": 0.0087, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.7735849056603774, | |
| "grad_norm": 0.024125738069415092, | |
| "learning_rate": 1.8567753001715266e-05, | |
| "loss": 0.0077, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.9451114922813035, | |
| "grad_norm": 0.04014933481812477, | |
| "learning_rate": 1.7138364779874212e-05, | |
| "loss": 0.0088, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9854202401372213, | |
| "eval_f1_macro": 0.9392697973755549, | |
| "eval_loss": 0.009160671383142471, | |
| "eval_runtime": 4.1026, | |
| "eval_samples_per_second": 284.212, | |
| "eval_steps_per_second": 35.587, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 4.11663807890223, | |
| "grad_norm": 0.09714564681053162, | |
| "learning_rate": 1.5708976558033162e-05, | |
| "loss": 0.0088, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.288164665523156, | |
| "grad_norm": 0.02462666854262352, | |
| "learning_rate": 1.427958833619211e-05, | |
| "loss": 0.007, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.459691252144083, | |
| "grad_norm": 0.03808571770787239, | |
| "learning_rate": 1.2850200114351058e-05, | |
| "loss": 0.0075, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.631217838765009, | |
| "grad_norm": 0.0727507695555687, | |
| "learning_rate": 1.1420811892510007e-05, | |
| "loss": 0.0069, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.802744425385935, | |
| "grad_norm": 0.02290419489145279, | |
| "learning_rate": 9.991423670668955e-06, | |
| "loss": 0.0075, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.974271012006861, | |
| "grad_norm": 0.032202959060668945, | |
| "learning_rate": 8.562035448827901e-06, | |
| "loss": 0.0073, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.983704974271012, | |
| "eval_f1_macro": 0.9105312017109829, | |
| "eval_loss": 0.007727212272584438, | |
| "eval_runtime": 4.1477, | |
| "eval_samples_per_second": 281.123, | |
| "eval_steps_per_second": 35.201, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 5.145797598627787, | |
| "grad_norm": 0.0157458633184433, | |
| "learning_rate": 7.132647226986849e-06, | |
| "loss": 0.0068, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.317324185248713, | |
| "grad_norm": 0.02146329917013645, | |
| "learning_rate": 5.7032590051457984e-06, | |
| "loss": 0.0061, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.4888507718696395, | |
| "grad_norm": 0.017605546861886978, | |
| "learning_rate": 4.273870783304746e-06, | |
| "loss": 0.0066, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.660377358490566, | |
| "grad_norm": 0.017790155485272408, | |
| "learning_rate": 2.8444825614636936e-06, | |
| "loss": 0.0067, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.831903945111492, | |
| "grad_norm": 0.033828429877758026, | |
| "learning_rate": 1.4150943396226415e-06, | |
| "loss": 0.0059, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9854202401372213, | |
| "eval_f1_macro": 0.9396295674217445, | |
| "eval_loss": 0.0071120294742286205, | |
| "eval_runtime": 4.1615, | |
| "eval_samples_per_second": 280.185, | |
| "eval_steps_per_second": 35.083, | |
| "step": 3498 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3498, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3680565061920768.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |