| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.758733462167385, | |
| "global_step": 8000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-06, | |
| "loss": 5.2948, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1e-05, | |
| "loss": 4.9959, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5e-05, | |
| "loss": 4.6899, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2e-05, | |
| "loss": 4.5063, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5e-05, | |
| "loss": 4.4335, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3e-05, | |
| "loss": 4.3943, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.5e-05, | |
| "loss": 4.35, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4e-05, | |
| "loss": 4.2928, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5e-05, | |
| "loss": 4.2246, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 5e-05, | |
| "loss": 4.1448, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 4.0853, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6e-05, | |
| "loss": 4.0358, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 3.9763, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7e-05, | |
| "loss": 3.8816, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 3.7246, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 8e-05, | |
| "loss": 3.5085, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 8.5e-05, | |
| "loss": 3.2879, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9e-05, | |
| "loss": 3.0898, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.5e-05, | |
| "loss": 2.8867, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001, | |
| "loss": 2.6323, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 9.932478055367996e-05, | |
| "loss": 2.3103, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.86495611073599e-05, | |
| "loss": 2.0185, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.797434166103985e-05, | |
| "loss": 1.7382, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 9.729912221471978e-05, | |
| "loss": 1.5627, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.662390276839974e-05, | |
| "loss": 1.4679, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 9.594868332207968e-05, | |
| "loss": 1.4046, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.527346387575963e-05, | |
| "loss": 1.3544, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.459824442943957e-05, | |
| "loss": 1.325, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.392302498311952e-05, | |
| "loss": 1.2984, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.324780553679947e-05, | |
| "loss": 1.2735, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.25725860904794e-05, | |
| "loss": 1.2617, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.189736664415936e-05, | |
| "loss": 1.2475, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.12221471978393e-05, | |
| "loss": 1.2348, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.054692775151925e-05, | |
| "loss": 1.2387, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 8.98717083051992e-05, | |
| "loss": 1.2149, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 8.919648885887914e-05, | |
| "loss": 1.2004, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 8.852126941255908e-05, | |
| "loss": 1.1918, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.784604996623903e-05, | |
| "loss": 1.1825, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.717083051991897e-05, | |
| "loss": 1.1794, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.649561107359893e-05, | |
| "loss": 1.175, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.582039162727888e-05, | |
| "loss": 1.1657, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 8.514517218095882e-05, | |
| "loss": 1.1621, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 8.446995273463876e-05, | |
| "loss": 1.1564, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 8.37947332883187e-05, | |
| "loss": 1.149, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 8.311951384199866e-05, | |
| "loss": 1.1478, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 8.24442943956786e-05, | |
| "loss": 1.1449, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 8.176907494935855e-05, | |
| "loss": 1.1393, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 8.109385550303849e-05, | |
| "loss": 1.135, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 8.041863605671843e-05, | |
| "loss": 1.1308, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 7.974341661039839e-05, | |
| "loss": 1.1282, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 7.906819716407833e-05, | |
| "loss": 1.1197, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 7.839297771775828e-05, | |
| "loss": 1.1227, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 7.771775827143822e-05, | |
| "loss": 1.1192, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 7.704253882511818e-05, | |
| "loss": 1.1109, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 7.63673193787981e-05, | |
| "loss": 1.1166, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 7.569209993247806e-05, | |
| "loss": 1.1132, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.5016880486158e-05, | |
| "loss": 1.1043, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.434166103983795e-05, | |
| "loss": 1.1085, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.366644159351789e-05, | |
| "loss": 1.1018, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.299122214719785e-05, | |
| "loss": 1.1034, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 7.23160027008778e-05, | |
| "loss": 1.0954, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 7.164078325455773e-05, | |
| "loss": 1.0973, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 7.096556380823768e-05, | |
| "loss": 1.0971, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 7.029034436191762e-05, | |
| "loss": 1.0893, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 6.961512491559758e-05, | |
| "loss": 1.0891, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 6.893990546927752e-05, | |
| "loss": 1.0866, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 6.826468602295747e-05, | |
| "loss": 1.0938, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 6.758946657663741e-05, | |
| "loss": 1.1025, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 6.691424713031735e-05, | |
| "loss": 1.0858, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 6.623902768399731e-05, | |
| "loss": 1.0835, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 6.556380823767725e-05, | |
| "loss": 1.0793, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 6.48885887913572e-05, | |
| "loss": 1.0754, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 6.421336934503714e-05, | |
| "loss": 1.073, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 6.353814989871708e-05, | |
| "loss": 1.0774, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 6.286293045239702e-05, | |
| "loss": 1.0763, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 6.218771100607698e-05, | |
| "loss": 1.0693, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 6.151249155975692e-05, | |
| "loss": 1.0736, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 6.083727211343687e-05, | |
| "loss": 1.0762, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.016205266711682e-05, | |
| "loss": 1.0691, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 5.948683322079677e-05, | |
| "loss": 1.0668, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.88116137744767e-05, | |
| "loss": 1.0638, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.813639432815665e-05, | |
| "loss": 1.0671, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 5.7461174881836596e-05, | |
| "loss": 1.0595, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 5.6785955435516544e-05, | |
| "loss": 1.0606, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 5.61107359891965e-05, | |
| "loss": 1.0621, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 5.5435516542876445e-05, | |
| "loss": 1.0633, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 5.476029709655638e-05, | |
| "loss": 1.056, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 5.4085077650236326e-05, | |
| "loss": 1.0598, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.3409858203916274e-05, | |
| "loss": 1.0532, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.273463875759622e-05, | |
| "loss": 1.0536, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.205941931127617e-05, | |
| "loss": 1.0522, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 5.1384199864956116e-05, | |
| "loss": 1.0478, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 5.0708980418636057e-05, | |
| "loss": 1.0514, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 5.0033760972316004e-05, | |
| "loss": 1.0526, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.935854152599595e-05, | |
| "loss": 1.0476, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 4.86833220796759e-05, | |
| "loss": 1.0459, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.8008102633355846e-05, | |
| "loss": 1.0463, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 4.733288318703579e-05, | |
| "loss": 1.0469, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.6657663740715734e-05, | |
| "loss": 1.0444, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.598244429439568e-05, | |
| "loss": 1.0461, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.530722484807562e-05, | |
| "loss": 1.0613, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 4.463200540175557e-05, | |
| "loss": 1.0418, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 4.395678595543552e-05, | |
| "loss": 1.0415, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 4.3281566509115464e-05, | |
| "loss": 1.0401, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 4.260634706279541e-05, | |
| "loss": 1.0404, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 4.193112761647536e-05, | |
| "loss": 1.0372, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 4.125590817015531e-05, | |
| "loss": 1.0423, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 4.058068872383525e-05, | |
| "loss": 1.0343, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.9905469277515195e-05, | |
| "loss": 1.0374, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.923024983119514e-05, | |
| "loss": 1.0355, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.855503038487508e-05, | |
| "loss": 1.0312, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.787981093855503e-05, | |
| "loss": 1.0374, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.720459149223498e-05, | |
| "loss": 1.034, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.6529372045914925e-05, | |
| "loss": 1.0327, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.585415259959487e-05, | |
| "loss": 1.0317, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.517893315327482e-05, | |
| "loss": 1.0317, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.450371370695476e-05, | |
| "loss": 1.028, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.382849426063471e-05, | |
| "loss": 1.027, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.3153274814314655e-05, | |
| "loss": 1.0253, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.2478055367994596e-05, | |
| "loss": 1.0291, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.180283592167454e-05, | |
| "loss": 1.0266, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.112761647535449e-05, | |
| "loss": 1.0311, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.0452397029034435e-05, | |
| "loss": 1.0281, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 2.9777177582714382e-05, | |
| "loss": 1.0238, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 2.910195813639433e-05, | |
| "loss": 1.0248, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 2.8426738690074277e-05, | |
| "loss": 1.0235, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 2.775151924375422e-05, | |
| "loss": 1.0252, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 2.7076299797434168e-05, | |
| "loss": 1.0221, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 2.6401080351114116e-05, | |
| "loss": 1.0204, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 2.572586090479406e-05, | |
| "loss": 1.0205, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 2.5050641458474007e-05, | |
| "loss": 1.0207, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 2.437542201215395e-05, | |
| "loss": 1.0165, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 2.37002025658339e-05, | |
| "loss": 1.0182, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 2.3024983119513842e-05, | |
| "loss": 1.0165, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 2.234976367319379e-05, | |
| "loss": 1.0316, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 2.1674544226873737e-05, | |
| "loss": 1.0146, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.099932478055368e-05, | |
| "loss": 1.015, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 2.0324105334233625e-05, | |
| "loss": 1.0156, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.9648885887913573e-05, | |
| "loss": 1.016, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 1.8973666441593517e-05, | |
| "loss": 1.0149, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.8298446995273467e-05, | |
| "loss": 1.0127, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.762322754895341e-05, | |
| "loss": 1.0085, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.6948008102633355e-05, | |
| "loss": 1.0151, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.6272788656313303e-05, | |
| "loss": 1.0136, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.5597569209993247e-05, | |
| "loss": 1.0077, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.4922349763673194e-05, | |
| "loss": 1.0103, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.4247130317353142e-05, | |
| "loss": 1.0155, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.3571910871033086e-05, | |
| "loss": 1.0098, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.2896691424713031e-05, | |
| "loss": 1.0093, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 1.2221471978392979e-05, | |
| "loss": 1.0112, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.1546252532072925e-05, | |
| "loss": 1.0075, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 1.087103308575287e-05, | |
| "loss": 1.0086, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.0195813639432816e-05, | |
| "loss": 1.0072, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 9.520594193112763e-06, | |
| "loss": 1.0057, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 8.845374746792707e-06, | |
| "loss": 1.0101, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 8.170155300472653e-06, | |
| "loss": 1.006, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 7.4949358541526005e-06, | |
| "loss": 1.0079, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 6.819716407832546e-06, | |
| "loss": 1.0066, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 6.144496961512492e-06, | |
| "loss": 1.0113, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 5.4692775151924376e-06, | |
| "loss": 1.0065, | |
| "step": 8000 | |
| } | |
| ], | |
| "max_steps": 8405, | |
| "num_train_epochs": 5, | |
| "total_flos": 5.140843815167534e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |