| { | |
| "best_global_step": 1000, | |
| "best_metric": 0.4237593412399292, | |
| "best_model_checkpoint": "./finetuned_entity_extraction_v2_telugu\\checkpoint-1000", | |
| "epoch": 1.0, | |
| "eval_steps": 200, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.7661869525909424, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 2.2122, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.6054468154907227, | |
| "learning_rate": 1.9869002134404235e-05, | |
| "loss": 1.0335, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.5563281774520874, | |
| "learning_rate": 1.946885829634935e-05, | |
| "loss": 0.8195, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.888305425643921, | |
| "learning_rate": 1.8810428687441415e-05, | |
| "loss": 0.7121, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 0.6703252196311951, | |
| "eval_runtime": 28.9212, | |
| "eval_samples_per_second": 69.153, | |
| "eval_steps_per_second": 17.288, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.5473520755767822, | |
| "learning_rate": 1.7911673551013553e-05, | |
| "loss": 0.6368, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.4704129695892334, | |
| "learning_rate": 1.6797108584307732e-05, | |
| "loss": 0.5804, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.578823208808899, | |
| "learning_rate": 1.5497136214161662e-05, | |
| "loss": 0.549, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.5565356016159058, | |
| "learning_rate": 1.4047216298057872e-05, | |
| "loss": 0.525, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.5127490758895874, | |
| "eval_runtime": 28.1888, | |
| "eval_samples_per_second": 70.95, | |
| "eval_steps_per_second": 17.738, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.5739060640335083, | |
| "learning_rate": 1.2486898871648552e-05, | |
| "loss": 0.4904, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.7133370637893677, | |
| "learning_rate": 1.0858745326882172e-05, | |
| "loss": 0.4795, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.4752830266952515, | |
| "learning_rate": 9.20716744818044e-06, | |
| "loss": 0.4592, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.4320343732833862, | |
| "learning_rate": 7.577215974732139e-06, | |
| "loss": 0.4531, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 0.45128172636032104, | |
| "eval_runtime": 30.6969, | |
| "eval_samples_per_second": 65.153, | |
| "eval_steps_per_second": 16.288, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.5366995334625244, | |
| "learning_rate": 6.0133517337665504e-06, | |
| "loss": 0.4392, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.3664205074310303, | |
| "learning_rate": 4.5582328650874095e-06, | |
| "loss": 0.4371, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.2980847358703613, | |
| "learning_rate": 3.25155121822048e-06, | |
| "loss": 0.4341, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.4085419178009033, | |
| "learning_rate": 2.1289496622274754e-06, | |
| "loss": 0.4254, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.42791882157325745, | |
| "eval_runtime": 31.0012, | |
| "eval_samples_per_second": 64.514, | |
| "eval_steps_per_second": 16.128, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.3011988401412964, | |
| "learning_rate": 1.2210498411520256e-06, | |
| "loss": 0.4215, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.3734039068222046, | |
| "learning_rate": 5.526168953948752e-07, | |
| "loss": 0.4205, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.1741344928741455, | |
| "learning_rate": 1.4188393324163663e-07, | |
| "loss": 0.4243, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.3026286363601685, | |
| "learning_rate": 5.467919892865326e-11, | |
| "loss": 0.4166, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.4237593412399292, | |
| "eval_runtime": 29.6043, | |
| "eval_samples_per_second": 67.558, | |
| "eval_steps_per_second": 16.889, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 907581238726656.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |