| { | |
| "best_metric": 1.6572628021240234, | |
| "best_model_checkpoint": "./modele-socratique-sft\\checkpoint-171", | |
| "epoch": 6.95906432748538, | |
| "eval_steps": 500, | |
| "global_step": 595, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11695906432748537, | |
| "grad_norm": 4.373384475708008, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 5.2688, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23391812865497075, | |
| "grad_norm": 7.579962253570557, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 4.2978, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 3.2114369869232178, | |
| "learning_rate": 0.0001, | |
| "loss": 2.864, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4678362573099415, | |
| "grad_norm": 2.231703758239746, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 2.2745, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5847953216374269, | |
| "grad_norm": 1.8353674411773682, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 2.0056, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 3.654597282409668, | |
| "learning_rate": 0.0002, | |
| "loss": 1.9087, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8187134502923976, | |
| "grad_norm": 1.4905436038970947, | |
| "learning_rate": 0.00019626168224299065, | |
| "loss": 1.751, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.935672514619883, | |
| "grad_norm": 1.4564584493637085, | |
| "learning_rate": 0.00019252336448598133, | |
| "loss": 1.7465, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9941520467836257, | |
| "eval_loss": 1.743960976600647, | |
| "eval_runtime": 1.1773, | |
| "eval_samples_per_second": 64.554, | |
| "eval_steps_per_second": 8.494, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 1.3438069820404053, | |
| "learning_rate": 0.00018878504672897197, | |
| "loss": 1.6509, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1695906432748537, | |
| "grad_norm": 1.4461097717285156, | |
| "learning_rate": 0.00018504672897196262, | |
| "loss": 1.5477, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.286549707602339, | |
| "grad_norm": 1.7722139358520508, | |
| "learning_rate": 0.0001813084112149533, | |
| "loss": 1.6173, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4035087719298245, | |
| "grad_norm": 1.3352079391479492, | |
| "learning_rate": 0.00017757009345794393, | |
| "loss": 1.6119, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.52046783625731, | |
| "grad_norm": 1.5055865049362183, | |
| "learning_rate": 0.00017383177570093458, | |
| "loss": 1.5867, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.6374269005847952, | |
| "grad_norm": 1.6256341934204102, | |
| "learning_rate": 0.00017009345794392523, | |
| "loss": 1.5639, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "grad_norm": 1.4370089769363403, | |
| "learning_rate": 0.0001663551401869159, | |
| "loss": 1.526, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.871345029239766, | |
| "grad_norm": 1.7688274383544922, | |
| "learning_rate": 0.00016261682242990654, | |
| "loss": 1.5438, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9883040935672516, | |
| "grad_norm": 1.3985792398452759, | |
| "learning_rate": 0.0001588785046728972, | |
| "loss": 1.5231, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.6572628021240234, | |
| "eval_runtime": 1.1323, | |
| "eval_samples_per_second": 67.121, | |
| "eval_steps_per_second": 8.832, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 1.5881729125976562, | |
| "learning_rate": 0.00015514018691588786, | |
| "loss": 1.3743, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.75332510471344, | |
| "learning_rate": 0.0001514018691588785, | |
| "loss": 1.3126, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.3391812865497075, | |
| "grad_norm": 1.8260974884033203, | |
| "learning_rate": 0.00014766355140186915, | |
| "loss": 1.3309, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.456140350877193, | |
| "grad_norm": 1.6168378591537476, | |
| "learning_rate": 0.00014392523364485982, | |
| "loss": 1.3181, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.573099415204678, | |
| "grad_norm": 1.8509645462036133, | |
| "learning_rate": 0.00014018691588785047, | |
| "loss": 1.3092, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.690058479532164, | |
| "grad_norm": 1.8677273988723755, | |
| "learning_rate": 0.0001364485981308411, | |
| "loss": 1.3226, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.807017543859649, | |
| "grad_norm": 1.8391717672348022, | |
| "learning_rate": 0.00013271028037383179, | |
| "loss": 1.3218, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.9239766081871346, | |
| "grad_norm": 1.6857693195343018, | |
| "learning_rate": 0.00012897196261682243, | |
| "loss": 1.2798, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.9941520467836256, | |
| "eval_loss": 1.668265461921692, | |
| "eval_runtime": 1.1808, | |
| "eval_samples_per_second": 64.361, | |
| "eval_steps_per_second": 8.469, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 3.0409356725146197, | |
| "grad_norm": 1.7038238048553467, | |
| "learning_rate": 0.00012523364485981308, | |
| "loss": 1.2419, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 2.1333186626434326, | |
| "learning_rate": 0.00012149532710280373, | |
| "loss": 1.1094, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.2748538011695905, | |
| "grad_norm": 2.1007728576660156, | |
| "learning_rate": 0.00011775700934579439, | |
| "loss": 1.0783, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.391812865497076, | |
| "grad_norm": 2.2706727981567383, | |
| "learning_rate": 0.00011401869158878504, | |
| "loss": 1.0856, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.5087719298245617, | |
| "grad_norm": 2.1818716526031494, | |
| "learning_rate": 0.0001102803738317757, | |
| "loss": 1.1019, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.625730994152047, | |
| "grad_norm": 2.2601561546325684, | |
| "learning_rate": 0.00010654205607476636, | |
| "loss": 1.1007, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.742690058479532, | |
| "grad_norm": 2.215036153793335, | |
| "learning_rate": 0.000102803738317757, | |
| "loss": 1.0918, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.8596491228070176, | |
| "grad_norm": 2.1949431896209717, | |
| "learning_rate": 9.906542056074767e-05, | |
| "loss": 1.0702, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.976608187134503, | |
| "grad_norm": 2.5513603687286377, | |
| "learning_rate": 9.532710280373832e-05, | |
| "loss": 1.0959, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.7493380308151245, | |
| "eval_runtime": 1.2179, | |
| "eval_samples_per_second": 62.404, | |
| "eval_steps_per_second": 8.211, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 4.093567251461988, | |
| "grad_norm": 2.321434497833252, | |
| "learning_rate": 9.158878504672898e-05, | |
| "loss": 0.9406, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 2.5859365463256836, | |
| "learning_rate": 8.785046728971964e-05, | |
| "loss": 0.8669, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.3274853801169595, | |
| "grad_norm": 2.6224827766418457, | |
| "learning_rate": 8.411214953271028e-05, | |
| "loss": 0.871, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 2.5626139640808105, | |
| "learning_rate": 8.037383177570094e-05, | |
| "loss": 0.9072, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.56140350877193, | |
| "grad_norm": 2.6079370975494385, | |
| "learning_rate": 7.663551401869158e-05, | |
| "loss": 0.9142, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.678362573099415, | |
| "grad_norm": 2.648815631866455, | |
| "learning_rate": 7.289719626168224e-05, | |
| "loss": 0.8995, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.7953216374269, | |
| "grad_norm": 2.6691548824310303, | |
| "learning_rate": 6.91588785046729e-05, | |
| "loss": 0.884, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.912280701754386, | |
| "grad_norm": 2.8678014278411865, | |
| "learning_rate": 6.542056074766355e-05, | |
| "loss": 0.8708, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.994152046783626, | |
| "eval_loss": 1.87999427318573, | |
| "eval_runtime": 1.2454, | |
| "eval_samples_per_second": 61.025, | |
| "eval_steps_per_second": 8.03, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 5.029239766081871, | |
| "grad_norm": 2.3341574668884277, | |
| "learning_rate": 6.16822429906542e-05, | |
| "loss": 0.8429, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.146198830409356, | |
| "grad_norm": 2.833981513977051, | |
| "learning_rate": 5.794392523364486e-05, | |
| "loss": 0.717, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.2631578947368425, | |
| "grad_norm": 2.6604554653167725, | |
| "learning_rate": 5.420560747663551e-05, | |
| "loss": 0.7133, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.380116959064328, | |
| "grad_norm": 2.6542723178863525, | |
| "learning_rate": 5.046728971962617e-05, | |
| "loss": 0.7169, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.497076023391813, | |
| "grad_norm": 2.9539148807525635, | |
| "learning_rate": 4.672897196261683e-05, | |
| "loss": 0.7134, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.614035087719298, | |
| "grad_norm": 3.140651226043701, | |
| "learning_rate": 4.299065420560748e-05, | |
| "loss": 0.7021, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.730994152046784, | |
| "grad_norm": 3.199179172515869, | |
| "learning_rate": 3.925233644859813e-05, | |
| "loss": 0.7289, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.847953216374269, | |
| "grad_norm": 2.914994478225708, | |
| "learning_rate": 3.551401869158878e-05, | |
| "loss": 0.7574, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.964912280701754, | |
| "grad_norm": 2.9280600547790527, | |
| "learning_rate": 3.177570093457944e-05, | |
| "loss": 0.727, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.0164217948913574, | |
| "eval_runtime": 1.1308, | |
| "eval_samples_per_second": 67.209, | |
| "eval_steps_per_second": 8.843, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 6.081871345029239, | |
| "grad_norm": 2.5345840454101562, | |
| "learning_rate": 2.8037383177570094e-05, | |
| "loss": 0.6034, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 6.1988304093567255, | |
| "grad_norm": 2.790266990661621, | |
| "learning_rate": 2.429906542056075e-05, | |
| "loss": 0.5992, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 2.85329532623291, | |
| "learning_rate": 2.05607476635514e-05, | |
| "loss": 0.605, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.432748538011696, | |
| "grad_norm": 2.943007230758667, | |
| "learning_rate": 1.6822429906542056e-05, | |
| "loss": 0.6168, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.549707602339181, | |
| "grad_norm": 2.8062615394592285, | |
| "learning_rate": 1.308411214953271e-05, | |
| "loss": 0.5825, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 2.97013258934021, | |
| "learning_rate": 9.345794392523365e-06, | |
| "loss": 0.6344, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.783625730994152, | |
| "grad_norm": 2.7482662200927734, | |
| "learning_rate": 5.607476635514019e-06, | |
| "loss": 0.6138, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.900584795321637, | |
| "grad_norm": 3.1583168506622314, | |
| "learning_rate": 1.8691588785046728e-06, | |
| "loss": 0.6043, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.95906432748538, | |
| "eval_loss": 2.1435160636901855, | |
| "eval_runtime": 1.2421, | |
| "eval_samples_per_second": 61.187, | |
| "eval_steps_per_second": 8.051, | |
| "step": 595 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 595, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3262279482728448.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |