| { |
| "best_metric": 53.324968632371395, |
| "best_model_checkpoint": "/scratch/camembertv2/runs/results/fquad/camembertv2-base-bf16-p2-17000/max_seq_length-896-doc_stride-128-max_answer_length-30-gradient_accumulation_steps-4-precision-fp32-learning_rate-5e-06-epochs-6-lr_scheduler-cosine-warmup_steps-0/SEED-25/checkpoint-3888", |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 3888, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15432098765432098, |
| "grad_norm": 4.547507286071777, |
| "learning_rate": 4.99184317884152e-06, |
| "loss": 5.1604, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.30864197530864196, |
| "grad_norm": 12.684767723083496, |
| "learning_rate": 4.967425942351207e-06, |
| "loss": 4.0839, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.46296296296296297, |
| "grad_norm": 14.742673873901367, |
| "learning_rate": 4.926907624154051e-06, |
| "loss": 3.3159, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6172839506172839, |
| "grad_norm": 12.705907821655273, |
| "learning_rate": 4.870552624790192e-06, |
| "loss": 2.9494, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7716049382716049, |
| "grad_norm": 14.801329612731934, |
| "learning_rate": 4.798728686380588e-06, |
| "loss": 2.7635, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9259259259259259, |
| "grad_norm": 17.233285903930664, |
| "learning_rate": 4.711904492941644e-06, |
| "loss": 2.6393, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_exact_match": 38.86449184441656, |
| "eval_f1": 60.0036086905889, |
| "eval_runtime": 6.9307, |
| "eval_samples_per_second": 459.985, |
| "eval_steps_per_second": 7.214, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.0802469135802468, |
| "grad_norm": 14.701664924621582, |
| "learning_rate": 4.610646612007849e-06, |
| "loss": 2.4089, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2345679012345678, |
| "grad_norm": 17.278104782104492, |
| "learning_rate": 4.495615797519732e-06, |
| "loss": 2.3405, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 11.50146770477295, |
| "learning_rate": 4.367562678102491e-06, |
| "loss": 2.2084, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5432098765432098, |
| "grad_norm": 13.203764915466309, |
| "learning_rate": 4.22732285887122e-06, |
| "loss": 2.1694, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6975308641975309, |
| "grad_norm": 21.71219825744629, |
| "learning_rate": 4.075811468725734e-06, |
| "loss": 2.0862, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 12.909610748291016, |
| "learning_rate": 3.914017188716347e-06, |
| "loss": 2.0016, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_exact_match": 48.745294855708906, |
| "eval_f1": 70.05708349304844, |
| "eval_runtime": 6.5382, |
| "eval_samples_per_second": 487.593, |
| "eval_steps_per_second": 7.647, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.006172839506173, |
| "grad_norm": 16.666929244995117, |
| "learning_rate": 3.7429958004482575e-06, |
| "loss": 1.9412, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.1604938271604937, |
| "grad_norm": 10.462796211242676, |
| "learning_rate": 3.5638632966241686e-06, |
| "loss": 1.8009, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.314814814814815, |
| "grad_norm": 13.769060134887695, |
| "learning_rate": 3.3777885986819725e-06, |
| "loss": 1.7928, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.4691358024691357, |
| "grad_norm": 15.287083625793457, |
| "learning_rate": 3.1859859290482544e-06, |
| "loss": 1.7865, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.623456790123457, |
| "grad_norm": 11.451101303100586, |
| "learning_rate": 2.989706887782151e-06, |
| "loss": 1.7489, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 17.512975692749023, |
| "learning_rate": 2.7902322853130758e-06, |
| "loss": 1.6978, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.932098765432099, |
| "grad_norm": 17.15248680114746, |
| "learning_rate": 2.5888637845674276e-06, |
| "loss": 1.6566, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_exact_match": 50.47051442910916, |
| "eval_f1": 72.25048266378954, |
| "eval_runtime": 6.5447, |
| "eval_samples_per_second": 487.112, |
| "eval_steps_per_second": 7.64, |
| "step": 1944 |
| }, |
| { |
| "epoch": 3.0864197530864197, |
| "grad_norm": 11.384383201599121, |
| "learning_rate": 2.3869154070232346e-06, |
| "loss": 1.6309, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.240740740740741, |
| "grad_norm": 14.492201805114746, |
| "learning_rate": 2.185704958119594e-06, |
| "loss": 1.5353, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.3950617283950617, |
| "grad_norm": 15.613585472106934, |
| "learning_rate": 1.9865454279740452e-06, |
| "loss": 1.5249, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.549382716049383, |
| "grad_norm": 12.233988761901855, |
| "learning_rate": 1.7907364235221128e-06, |
| "loss": 1.5499, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.7037037037037037, |
| "grad_norm": 11.811338424682617, |
| "learning_rate": 1.5995556879882246e-06, |
| "loss": 1.5159, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.8580246913580245, |
| "grad_norm": 18.379695892333984, |
| "learning_rate": 1.414250763027336e-06, |
| "loss": 1.5072, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_exact_match": 53.01129234629862, |
| "eval_f1": 74.3205610049545, |
| "eval_runtime": 6.5679, |
| "eval_samples_per_second": 485.39, |
| "eval_steps_per_second": 7.613, |
| "step": 2592 |
| }, |
| { |
| "epoch": 4.012345679012346, |
| "grad_norm": 12.669611930847168, |
| "learning_rate": 1.2360308479456027e-06, |
| "loss": 1.5257, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 13.753548622131348, |
| "learning_rate": 1.0660589091223854e-06, |
| "loss": 1.4296, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.320987654320987, |
| "grad_norm": 10.774425506591797, |
| "learning_rate": 9.054440911232348e-07, |
| "loss": 1.4796, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.4753086419753085, |
| "grad_norm": 16.21649742126465, |
| "learning_rate": 7.552344790248104e-07, |
| "loss": 1.426, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.62962962962963, |
| "grad_norm": 11.01417064666748, |
| "learning_rate": 6.164102591808482e-07, |
| "loss": 1.4245, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.783950617283951, |
| "grad_norm": 10.40230941772461, |
| "learning_rate": 4.898773230583353e-07, |
| "loss": 1.4493, |
| "step": 3100 |
| }, |
| { |
| "epoch": 4.938271604938271, |
| "grad_norm": 10.953381538391113, |
| "learning_rate": 3.7646135588175676e-07, |
| "loss": 1.404, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_exact_match": 53.168130489335006, |
| "eval_f1": 74.39491719320372, |
| "eval_runtime": 6.6406, |
| "eval_samples_per_second": 480.08, |
| "eval_steps_per_second": 7.529, |
| "step": 3240 |
| }, |
| { |
| "epoch": 5.092592592592593, |
| "grad_norm": 13.173111915588379, |
| "learning_rate": 2.7690244865973494e-07, |
| "loss": 1.43, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.246913580246914, |
| "grad_norm": 13.998867988586426, |
| "learning_rate": 1.918502687530241e-07, |
| "loss": 1.3968, |
| "step": 3400 |
| }, |
| { |
| "epoch": 5.401234567901234, |
| "grad_norm": 12.186470985412598, |
| "learning_rate": 1.2185982049813472e-07, |
| "loss": 1.378, |
| "step": 3500 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 16.22747039794922, |
| "learning_rate": 6.738782355044048e-08, |
| "loss": 1.4347, |
| "step": 3600 |
| }, |
| { |
| "epoch": 5.709876543209877, |
| "grad_norm": 17.75710105895996, |
| "learning_rate": 2.878973257973955e-08, |
| "loss": 1.422, |
| "step": 3700 |
| }, |
| { |
| "epoch": 5.864197530864198, |
| "grad_norm": 17.476356506347656, |
| "learning_rate": 6.317417766116829e-09, |
| "loss": 1.3868, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_exact_match": 53.324968632371395, |
| "eval_f1": 74.54839090269344, |
| "eval_runtime": 6.5292, |
| "eval_samples_per_second": 488.268, |
| "eval_steps_per_second": 7.658, |
| "step": 3888 |
| }, |
| { |
| "epoch": 6.0, |
| "step": 3888, |
| "total_flos": 2.0387348740618656e+16, |
| "train_loss": 1.9457146935011624, |
| "train_runtime": 824.1497, |
| "train_samples_per_second": 150.926, |
| "train_steps_per_second": 4.718 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3888, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0387348740618656e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|