| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998166819431714, | |
| "eval_steps": 500, | |
| "global_step": 2727, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4652014652014653e-05, | |
| "loss": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.9304029304029305e-05, | |
| "loss": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.3956043956043955e-05, | |
| "loss": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.860805860805861e-05, | |
| "loss": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.326007326007326e-05, | |
| "loss": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.791208791208791e-05, | |
| "loss": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010256410256410256, | |
| "loss": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00011721611721611722, | |
| "loss": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00013186813186813188, | |
| "loss": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00014652014652014652, | |
| "loss": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016117216117216118, | |
| "loss": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017582417582417582, | |
| "loss": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019047619047619048, | |
| "loss": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019942950285248574, | |
| "loss": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000197799511002445, | |
| "loss": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019616951915240425, | |
| "loss": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019453952730236348, | |
| "loss": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019290953545232276, | |
| "loss": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019127954360228199, | |
| "loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00018964955175224124, | |
| "loss": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001880195599022005, | |
| "loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00018638956805215975, | |
| "loss": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000184759576202119, | |
| "loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00018312958435207826, | |
| "loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001814995925020375, | |
| "loss": 0.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017986960065199674, | |
| "loss": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000178239608801956, | |
| "loss": 0.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017660961695191524, | |
| "loss": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001749796251018745, | |
| "loss": 0.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017334963325183375, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000171719641401793, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017008964955175223, | |
| "loss": 0.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016845965770171151, | |
| "loss": 0.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016682966585167074, | |
| "loss": 0.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016519967400163, | |
| "loss": 0.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016356968215158925, | |
| "loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001619396903015485, | |
| "loss": 0.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016030969845150773, | |
| "loss": 0.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000158679706601467, | |
| "loss": 0.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00015704971475142624, | |
| "loss": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00015541972290138552, | |
| "loss": 0.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00015378973105134475, | |
| "loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000152159739201304, | |
| "loss": 0.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00015052974735126325, | |
| "loss": 0.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001488997555012225, | |
| "loss": 0.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00014726976365118173, | |
| "loss": 0.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00014563977180114102, | |
| "loss": 0.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00014400977995110024, | |
| "loss": 0.0, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001423797881010595, | |
| "loss": 0.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00014074979625101875, | |
| "loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000139119804400978, | |
| "loss": 0.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00013748981255093726, | |
| "loss": 0.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001358598207008965, | |
| "loss": 0.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00013422982885085577, | |
| "loss": 0.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000132599837000815, | |
| "loss": 0.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00013096984515077427, | |
| "loss": 0.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001293398533007335, | |
| "loss": 0.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00012770986145069276, | |
| "loss": 0.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000126079869600652, | |
| "loss": 0.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00012444987775061126, | |
| "loss": 0.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001228198859005705, | |
| "loss": 0.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00012118989405052976, | |
| "loss": 0.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000119559902200489, | |
| "loss": 0.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00011792991035044825, | |
| "loss": 0.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001162999185004075, | |
| "loss": 0.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00011466992665036676, | |
| "loss": 0.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000113039934800326, | |
| "loss": 0.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00011140994295028527, | |
| "loss": 0.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010977995110024451, | |
| "loss": 0.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010814995925020375, | |
| "loss": 0.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010651996740016302, | |
| "loss": 0.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010488997555012226, | |
| "loss": 0.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001032599837000815, | |
| "loss": 0.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010162999185004076, | |
| "loss": 0.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.837000814995926e-05, | |
| "loss": 0.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.67400162999185e-05, | |
| "loss": 0.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.511002444987775e-05, | |
| "loss": 0.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.348003259983701e-05, | |
| "loss": 0.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.185004074979625e-05, | |
| "loss": 0.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.02200488997555e-05, | |
| "loss": 0.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.859005704971476e-05, | |
| "loss": 0.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.6960065199674e-05, | |
| "loss": 0.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.533007334963325e-05, | |
| "loss": 0.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.37000814995925e-05, | |
| "loss": 0.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.207008964955176e-05, | |
| "loss": 0.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.044009779951101e-05, | |
| "loss": 0.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.881010594947025e-05, | |
| "loss": 0.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.71801140994295e-05, | |
| "loss": 0.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.555012224938876e-05, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.392013039934801e-05, | |
| "loss": 0.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.229013854930725e-05, | |
| "loss": 0.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.066014669926651e-05, | |
| "loss": 0.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.903015484922576e-05, | |
| "loss": 0.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.740016299918502e-05, | |
| "loss": 0.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.577017114914426e-05, | |
| "loss": 0.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.414017929910351e-05, | |
| "loss": 0.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.251018744906276e-05, | |
| "loss": 0.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.0880195599022005e-05, | |
| "loss": 0.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.925020374898126e-05, | |
| "loss": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.762021189894051e-05, | |
| "loss": 0.0, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.5990220048899754e-05, | |
| "loss": 0.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.436022819885901e-05, | |
| "loss": 0.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.273023634881826e-05, | |
| "loss": 0.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.110024449877751e-05, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9470252648736756e-05, | |
| "loss": 0.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.784026079869601e-05, | |
| "loss": 0.0, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.6210268948655264e-05, | |
| "loss": 0.0, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.458027709861451e-05, | |
| "loss": 0.0, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.295028524857376e-05, | |
| "loss": 0.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.132029339853301e-05, | |
| "loss": 0.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.969030154849226e-05, | |
| "loss": 0.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.8060309698451507e-05, | |
| "loss": 0.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.643031784841076e-05, | |
| "loss": 0.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.480032599837001e-05, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3170334148329255e-05, | |
| "loss": 0.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.154034229828851e-05, | |
| "loss": 0.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.991035044824776e-05, | |
| "loss": 0.0, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.8280358598207013e-05, | |
| "loss": 0.0, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.665036674816626e-05, | |
| "loss": 0.0, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.5020374898125508e-05, | |
| "loss": 0.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.3390383048084762e-05, | |
| "loss": 0.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.1760391198044012e-05, | |
| "loss": 0.0, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0130399348003263e-05, | |
| "loss": 0.0, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.850040749796251e-05, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.687041564792176e-05, | |
| "loss": 0.0, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5240423797881013e-05, | |
| "loss": 0.0, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.361043194784026e-05, | |
| "loss": 0.0, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.198044009779951e-05, | |
| "loss": 0.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0350448247758763e-05, | |
| "loss": 0.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.720456397718012e-06, | |
| "loss": 0.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.090464547677262e-06, | |
| "loss": 0.0, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.460472697636512e-06, | |
| "loss": 0.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.830480847595763e-06, | |
| "loss": 0.0, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.2004889975550126e-06, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.704971475142625e-07, | |
| "loss": 0.0, | |
| "step": 2720 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 2727, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 67854207762432.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |