Buckets:
| { | |
| "epoch": 3.0, | |
| "global_step": 675, | |
| "max_steps": 675, | |
| "logging_steps": 10, | |
| "eval_steps": 500, | |
| "save_steps": 500, | |
| "train_batch_size": 8, | |
| "num_train_epochs": 3, | |
| "num_input_tokens_seen": 0, | |
| "total_flos": 62223875481600.0, | |
| "log_history": [ | |
| { | |
| "loss": 2.466257667541504, | |
| "grad_norm": 2.1648311614990234, | |
| "learning_rate": 4.5e-06, | |
| "epoch": 0.044444444444444446, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 2.397753143310547, | |
| "grad_norm": 4.898372650146484, | |
| "learning_rate": 9.5e-06, | |
| "epoch": 0.08888888888888889, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 2.3467193603515626, | |
| "grad_norm": 2.4088022708892822, | |
| "learning_rate": 1.45e-05, | |
| "epoch": 0.13333333333333333, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 2.3866750717163088, | |
| "grad_norm": 2.3660852909088135, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "epoch": 0.17777777777777778, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 2.4110111236572265, | |
| "grad_norm": 1.8886386156082153, | |
| "learning_rate": 2.45e-05, | |
| "epoch": 0.2222222222222222, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 2.3524829864501955, | |
| "grad_norm": 2.3049066066741943, | |
| "learning_rate": 2.95e-05, | |
| "epoch": 0.26666666666666666, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 2.45643310546875, | |
| "grad_norm": 2.200075387954712, | |
| "learning_rate": 3.45e-05, | |
| "epoch": 0.3111111111111111, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 2.4559621810913086, | |
| "grad_norm": 2.166034460067749, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "epoch": 0.35555555555555557, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 2.372382926940918, | |
| "grad_norm": 1.8101855516433716, | |
| "learning_rate": 4.4500000000000004e-05, | |
| "epoch": 0.4, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 2.4228847503662108, | |
| "grad_norm": 2.296342134475708, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "epoch": 0.4444444444444444, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 2.4426870346069336, | |
| "grad_norm": 2.5206782817840576, | |
| "learning_rate": 4.9217391304347824e-05, | |
| "epoch": 0.4888888888888889, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 2.379886817932129, | |
| "grad_norm": 2.3830366134643555, | |
| "learning_rate": 4.834782608695652e-05, | |
| "epoch": 0.5333333333333333, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 2.3728614807128907, | |
| "grad_norm": 2.637955665588379, | |
| "learning_rate": 4.747826086956522e-05, | |
| "epoch": 0.5777777777777777, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 2.462764358520508, | |
| "grad_norm": 2.458829879760742, | |
| "learning_rate": 4.660869565217392e-05, | |
| "epoch": 0.6222222222222222, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 2.323098373413086, | |
| "grad_norm": 2.68799090385437, | |
| "learning_rate": 4.5739130434782614e-05, | |
| "epoch": 0.6666666666666666, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 2.330042839050293, | |
| "grad_norm": 2.776823043823242, | |
| "learning_rate": 4.486956521739131e-05, | |
| "epoch": 0.7111111111111111, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 2.3432085037231447, | |
| "grad_norm": 2.658755302429199, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "epoch": 0.7555555555555555, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 2.353105163574219, | |
| "grad_norm": 2.509666681289673, | |
| "learning_rate": 4.3130434782608695e-05, | |
| "epoch": 0.8, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 2.4502593994140627, | |
| "grad_norm": 2.1432905197143555, | |
| "learning_rate": 4.226086956521739e-05, | |
| "epoch": 0.8444444444444444, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 2.399493408203125, | |
| "grad_norm": 2.2782559394836426, | |
| "learning_rate": 4.1391304347826086e-05, | |
| "epoch": 0.8888888888888888, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 2.4100601196289064, | |
| "grad_norm": 2.4559714794158936, | |
| "learning_rate": 4.052173913043478e-05, | |
| "epoch": 0.9333333333333333, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 2.3846929550170897, | |
| "grad_norm": 2.3984427452087402, | |
| "learning_rate": 3.965217391304348e-05, | |
| "epoch": 0.9777777777777777, | |
| "step": 220 | |
| }, | |
| { | |
| "eval_loss": 2.3704192638397217, | |
| "eval_runtime": 2.7149, | |
| "eval_samples_per_second": 73.667, | |
| "eval_steps_per_second": 9.208, | |
| "epoch": 1.0, | |
| "step": 225 | |
| }, | |
| { | |
| "loss": 2.3897687911987306, | |
| "grad_norm": 2.1680757999420166, | |
| "learning_rate": 3.878260869565218e-05, | |
| "epoch": 1.0222222222222221, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 2.432059097290039, | |
| "grad_norm": 1.4551690816879272, | |
| "learning_rate": 3.7913043478260876e-05, | |
| "epoch": 1.0666666666666667, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 2.447095489501953, | |
| "grad_norm": 1.8473337888717651, | |
| "learning_rate": 3.704347826086957e-05, | |
| "epoch": 1.1111111111111112, | |
| "step": 250 | |
| }, | |
| { | |
| "loss": 2.43582820892334, | |
| "grad_norm": 2.2014007568359375, | |
| "learning_rate": 3.617391304347826e-05, | |
| "epoch": 1.1555555555555554, | |
| "step": 260 | |
| }, | |
| { | |
| "loss": 2.372956466674805, | |
| "grad_norm": 2.310044050216675, | |
| "learning_rate": 3.5304347826086956e-05, | |
| "epoch": 1.2, | |
| "step": 270 | |
| }, | |
| { | |
| "loss": 2.3458200454711915, | |
| "grad_norm": 2.2953226566314697, | |
| "learning_rate": 3.443478260869565e-05, | |
| "epoch": 1.2444444444444445, | |
| "step": 280 | |
| }, | |
| { | |
| "loss": 2.3527500152587892, | |
| "grad_norm": 2.1383249759674072, | |
| "learning_rate": 3.356521739130435e-05, | |
| "epoch": 1.2888888888888888, | |
| "step": 290 | |
| }, | |
| { | |
| "loss": 2.378863525390625, | |
| "grad_norm": 3.1357085704803467, | |
| "learning_rate": 3.269565217391304e-05, | |
| "epoch": 1.3333333333333333, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 2.3464269638061523, | |
| "grad_norm": 1.6716424226760864, | |
| "learning_rate": 3.182608695652174e-05, | |
| "epoch": 1.3777777777777778, | |
| "step": 310 | |
| }, | |
| { | |
| "loss": 2.392436218261719, | |
| "grad_norm": 2.721541166305542, | |
| "learning_rate": 3.0956521739130435e-05, | |
| "epoch": 1.4222222222222223, | |
| "step": 320 | |
| }, | |
| { | |
| "loss": 2.3591419219970704, | |
| "grad_norm": 1.6220859289169312, | |
| "learning_rate": 3.008695652173913e-05, | |
| "epoch": 1.4666666666666668, | |
| "step": 330 | |
| }, | |
| { | |
| "loss": 2.4155120849609375, | |
| "grad_norm": 1.9437788724899292, | |
| "learning_rate": 2.921739130434783e-05, | |
| "epoch": 1.511111111111111, | |
| "step": 340 | |
| }, | |
| { | |
| "loss": 2.358566474914551, | |
| "grad_norm": 2.0959434509277344, | |
| "learning_rate": 2.8347826086956525e-05, | |
| "epoch": 1.5555555555555556, | |
| "step": 350 | |
| }, | |
| { | |
| "loss": 2.387654495239258, | |
| "grad_norm": 1.9890892505645752, | |
| "learning_rate": 2.747826086956522e-05, | |
| "epoch": 1.6, | |
| "step": 360 | |
| }, | |
| { | |
| "loss": 2.3926748275756835, | |
| "grad_norm": 2.2642343044281006, | |
| "learning_rate": 2.6608695652173913e-05, | |
| "epoch": 1.6444444444444444, | |
| "step": 370 | |
| }, | |
| { | |
| "loss": 2.3806028366088867, | |
| "grad_norm": 1.9827393293380737, | |
| "learning_rate": 2.573913043478261e-05, | |
| "epoch": 1.6888888888888889, | |
| "step": 380 | |
| }, | |
| { | |
| "loss": 2.3464998245239257, | |
| "grad_norm": 2.2234737873077393, | |
| "learning_rate": 2.4869565217391305e-05, | |
| "epoch": 1.7333333333333334, | |
| "step": 390 | |
| }, | |
| { | |
| "loss": 2.378097915649414, | |
| "grad_norm": 2.2928340435028076, | |
| "learning_rate": 2.4e-05, | |
| "epoch": 1.7777777777777777, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 2.36694393157959, | |
| "grad_norm": 3.076613426208496, | |
| "learning_rate": 2.31304347826087e-05, | |
| "epoch": 1.8222222222222222, | |
| "step": 410 | |
| }, | |
| { | |
| "loss": 2.4177019119262697, | |
| "grad_norm": 2.2511839866638184, | |
| "learning_rate": 2.2260869565217392e-05, | |
| "epoch": 1.8666666666666667, | |
| "step": 420 | |
| }, | |
| { | |
| "loss": 2.339081954956055, | |
| "grad_norm": 2.397934675216675, | |
| "learning_rate": 2.1391304347826088e-05, | |
| "epoch": 1.911111111111111, | |
| "step": 430 | |
| }, | |
| { | |
| "loss": 2.26760311126709, | |
| "grad_norm": 2.236941337585449, | |
| "learning_rate": 2.0521739130434784e-05, | |
| "epoch": 1.9555555555555557, | |
| "step": 440 | |
| }, | |
| { | |
| "loss": 2.368147277832031, | |
| "grad_norm": 1.9281189441680908, | |
| "learning_rate": 1.965217391304348e-05, | |
| "epoch": 2.0, | |
| "step": 450 | |
| }, | |
| { | |
| "eval_loss": 2.3753504753112793, | |
| "eval_runtime": 2.3939, | |
| "eval_samples_per_second": 83.545, | |
| "eval_steps_per_second": 10.443, | |
| "epoch": 2.0, | |
| "step": 450 | |
| }, | |
| { | |
| "loss": 2.367934989929199, | |
| "grad_norm": 3.061547040939331, | |
| "learning_rate": 1.8782608695652175e-05, | |
| "epoch": 2.0444444444444443, | |
| "step": 460 | |
| }, | |
| { | |
| "loss": 2.341065216064453, | |
| "grad_norm": 2.0394673347473145, | |
| "learning_rate": 1.791304347826087e-05, | |
| "epoch": 2.088888888888889, | |
| "step": 470 | |
| }, | |
| { | |
| "loss": 2.4043365478515626, | |
| "grad_norm": 1.4524158239364624, | |
| "learning_rate": 1.7043478260869566e-05, | |
| "epoch": 2.1333333333333333, | |
| "step": 480 | |
| }, | |
| { | |
| "loss": 2.358335494995117, | |
| "grad_norm": 2.0704870223999023, | |
| "learning_rate": 1.6173913043478262e-05, | |
| "epoch": 2.1777777777777776, | |
| "step": 490 | |
| }, | |
| { | |
| "loss": 2.421707534790039, | |
| "grad_norm": 2.6253604888916016, | |
| "learning_rate": 1.5304347826086958e-05, | |
| "epoch": 2.2222222222222223, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 2.365186882019043, | |
| "grad_norm": 1.7911664247512817, | |
| "learning_rate": 1.4434782608695652e-05, | |
| "epoch": 2.2666666666666666, | |
| "step": 510 | |
| }, | |
| { | |
| "loss": 2.359132766723633, | |
| "grad_norm": 2.027219295501709, | |
| "learning_rate": 1.356521739130435e-05, | |
| "epoch": 2.311111111111111, | |
| "step": 520 | |
| }, | |
| { | |
| "loss": 2.3417600631713866, | |
| "grad_norm": 2.138606309890747, | |
| "learning_rate": 1.2695652173913045e-05, | |
| "epoch": 2.3555555555555556, | |
| "step": 530 | |
| }, | |
| { | |
| "loss": 2.3759010314941404, | |
| "grad_norm": 2.2981395721435547, | |
| "learning_rate": 1.1826086956521739e-05, | |
| "epoch": 2.4, | |
| "step": 540 | |
| }, | |
| { | |
| "loss": 2.3546443939208985, | |
| "grad_norm": 1.8775476217269897, | |
| "learning_rate": 1.0956521739130435e-05, | |
| "epoch": 2.4444444444444446, | |
| "step": 550 | |
| }, | |
| { | |
| "loss": 2.362605857849121, | |
| "grad_norm": 2.8558459281921387, | |
| "learning_rate": 1.008695652173913e-05, | |
| "epoch": 2.488888888888889, | |
| "step": 560 | |
| }, | |
| { | |
| "loss": 2.386676788330078, | |
| "grad_norm": 3.7472620010375977, | |
| "learning_rate": 9.217391304347826e-06, | |
| "epoch": 2.533333333333333, | |
| "step": 570 | |
| }, | |
| { | |
| "loss": 2.3616586685180665, | |
| "grad_norm": 2.004561424255371, | |
| "learning_rate": 8.347826086956522e-06, | |
| "epoch": 2.5777777777777775, | |
| "step": 580 | |
| }, | |
| { | |
| "loss": 2.381579780578613, | |
| "grad_norm": 2.163069248199463, | |
| "learning_rate": 7.478260869565218e-06, | |
| "epoch": 2.6222222222222222, | |
| "step": 590 | |
| }, | |
| { | |
| "loss": 2.3825847625732424, | |
| "grad_norm": 2.028392791748047, | |
| "learning_rate": 6.608695652173913e-06, | |
| "epoch": 2.6666666666666665, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 2.433236312866211, | |
| "grad_norm": 2.136918306350708, | |
| "learning_rate": 5.739130434782608e-06, | |
| "epoch": 2.7111111111111112, | |
| "step": 610 | |
| }, | |
| { | |
| "loss": 2.3539539337158204, | |
| "grad_norm": 1.6569041013717651, | |
| "learning_rate": 4.869565217391304e-06, | |
| "epoch": 2.7555555555555555, | |
| "step": 620 | |
| }, | |
| { | |
| "loss": 2.364187812805176, | |
| "grad_norm": 2.2416770458221436, | |
| "learning_rate": 4.000000000000001e-06, | |
| "epoch": 2.8, | |
| "step": 630 | |
| }, | |
| { | |
| "loss": 2.329428863525391, | |
| "grad_norm": 3.4299488067626953, | |
| "learning_rate": 3.130434782608696e-06, | |
| "epoch": 2.8444444444444446, | |
| "step": 640 | |
| }, | |
| { | |
| "loss": 2.3712581634521483, | |
| "grad_norm": 2.076444149017334, | |
| "learning_rate": 2.2608695652173913e-06, | |
| "epoch": 2.888888888888889, | |
| "step": 650 | |
| }, | |
| { | |
| "loss": 2.325495719909668, | |
| "grad_norm": 3.042565107345581, | |
| "learning_rate": 1.391304347826087e-06, | |
| "epoch": 2.9333333333333336, | |
| "step": 660 | |
| }, | |
| { | |
| "loss": 2.392244338989258, | |
| "grad_norm": 1.986987590789795, | |
| "learning_rate": 5.217391304347826e-07, | |
| "epoch": 2.977777777777778, | |
| "step": 670 | |
| }, | |
| { | |
| "eval_loss": 2.3485498428344727, | |
| "eval_runtime": 2.3724, | |
| "eval_samples_per_second": 84.302, | |
| "eval_steps_per_second": 10.538, | |
| "epoch": 3.0, | |
| "step": 675 | |
| }, | |
| { | |
| "train_runtime": 206.0328, | |
| "train_samples_per_second": 26.209, | |
| "train_steps_per_second": 3.276, | |
| "total_flos": 62223875481600.0, | |
| "train_loss": 2.380847351639359, | |
| "epoch": 3.0, | |
| "step": 675 | |
| } | |
| ], | |
| "best_metric": 2.3485498428344727, | |
| "best_global_step": 675, | |
| "best_model_checkpoint": "./output/checkpoint-675", | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "is_hyper_param_search": false, | |
| "trial_name": null, | |
| "trial_params": null, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_training_stop": true, | |
| "should_epoch_stop": false, | |
| "should_save": true, | |
| "should_evaluate": false, | |
| "should_log": false | |
| }, | |
| "attributes": {} | |
| } | |
| } | |
| } |
Xet Storage Details
- Size:
- 13.8 kB
- Xet hash:
- f2b669a935801aadba5921a71888e74e493a4fc048be4fb0f5a9c6b253c91616
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.