| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.758733462167385, |
| "global_step": 8000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 5e-06, |
| "loss": 5.2948, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1e-05, |
| "loss": 4.9959, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.5e-05, |
| "loss": 4.6899, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2e-05, |
| "loss": 4.5063, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2.5e-05, |
| "loss": 4.4335, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 3e-05, |
| "loss": 4.3943, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 3.5e-05, |
| "loss": 4.35, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4e-05, |
| "loss": 4.2928, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.5e-05, |
| "loss": 4.2246, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5e-05, |
| "loss": 4.1448, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 4.0853, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 6e-05, |
| "loss": 4.0358, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 3.9763, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7e-05, |
| "loss": 3.8816, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 3.7246, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 8e-05, |
| "loss": 3.5085, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 8.5e-05, |
| "loss": 3.2879, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9e-05, |
| "loss": 3.0898, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.5e-05, |
| "loss": 2.8867, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0001, |
| "loss": 2.6323, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.932478055367996e-05, |
| "loss": 2.3103, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.86495611073599e-05, |
| "loss": 2.0185, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.797434166103985e-05, |
| "loss": 1.7382, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.729912221471978e-05, |
| "loss": 1.5627, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.662390276839974e-05, |
| "loss": 1.4679, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.594868332207968e-05, |
| "loss": 1.4046, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.527346387575963e-05, |
| "loss": 1.3544, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.459824442943957e-05, |
| "loss": 1.325, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.392302498311952e-05, |
| "loss": 1.2984, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.324780553679947e-05, |
| "loss": 1.2735, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.25725860904794e-05, |
| "loss": 1.2617, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.189736664415936e-05, |
| "loss": 1.2475, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.12221471978393e-05, |
| "loss": 1.2348, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 9.054692775151925e-05, |
| "loss": 1.2387, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 8.98717083051992e-05, |
| "loss": 1.2149, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 8.919648885887914e-05, |
| "loss": 1.2004, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 8.852126941255908e-05, |
| "loss": 1.1918, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 8.784604996623903e-05, |
| "loss": 1.1825, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 8.717083051991897e-05, |
| "loss": 1.1794, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 8.649561107359893e-05, |
| "loss": 1.175, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 8.582039162727888e-05, |
| "loss": 1.1657, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 8.514517218095882e-05, |
| "loss": 1.1621, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 8.446995273463876e-05, |
| "loss": 1.1564, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 8.37947332883187e-05, |
| "loss": 1.149, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 8.311951384199866e-05, |
| "loss": 1.1478, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 8.24442943956786e-05, |
| "loss": 1.1449, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 8.176907494935855e-05, |
| "loss": 1.1393, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 8.109385550303849e-05, |
| "loss": 1.135, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 8.041863605671843e-05, |
| "loss": 1.1308, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 7.974341661039839e-05, |
| "loss": 1.1282, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.906819716407833e-05, |
| "loss": 1.1197, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 7.839297771775828e-05, |
| "loss": 1.1227, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 7.771775827143822e-05, |
| "loss": 1.1192, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 7.704253882511818e-05, |
| "loss": 1.1109, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 7.63673193787981e-05, |
| "loss": 1.1166, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 7.569209993247806e-05, |
| "loss": 1.1132, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 7.5016880486158e-05, |
| "loss": 1.1043, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 7.434166103983795e-05, |
| "loss": 1.1085, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.366644159351789e-05, |
| "loss": 1.1018, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.299122214719785e-05, |
| "loss": 1.1034, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.23160027008778e-05, |
| "loss": 1.0954, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.164078325455773e-05, |
| "loss": 1.0973, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.096556380823768e-05, |
| "loss": 1.0971, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.029034436191762e-05, |
| "loss": 1.0893, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 6.961512491559758e-05, |
| "loss": 1.0891, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.893990546927752e-05, |
| "loss": 1.0866, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.826468602295747e-05, |
| "loss": 1.0938, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.758946657663741e-05, |
| "loss": 1.1025, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.691424713031735e-05, |
| "loss": 1.0858, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.623902768399731e-05, |
| "loss": 1.0835, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 6.556380823767725e-05, |
| "loss": 1.0793, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 6.48885887913572e-05, |
| "loss": 1.0754, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 6.421336934503714e-05, |
| "loss": 1.073, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 6.353814989871708e-05, |
| "loss": 1.0774, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 6.286293045239702e-05, |
| "loss": 1.0763, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 6.218771100607698e-05, |
| "loss": 1.0693, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 6.151249155975692e-05, |
| "loss": 1.0736, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 6.083727211343687e-05, |
| "loss": 1.0762, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 6.016205266711682e-05, |
| "loss": 1.0691, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 5.948683322079677e-05, |
| "loss": 1.0668, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 5.88116137744767e-05, |
| "loss": 1.0638, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 5.813639432815665e-05, |
| "loss": 1.0671, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 5.7461174881836596e-05, |
| "loss": 1.0595, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 5.6785955435516544e-05, |
| "loss": 1.0606, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 5.61107359891965e-05, |
| "loss": 1.0621, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 5.5435516542876445e-05, |
| "loss": 1.0633, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 5.476029709655638e-05, |
| "loss": 1.056, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 5.4085077650236326e-05, |
| "loss": 1.0598, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.3409858203916274e-05, |
| "loss": 1.0532, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.273463875759622e-05, |
| "loss": 1.0536, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 5.205941931127617e-05, |
| "loss": 1.0522, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 5.1384199864956116e-05, |
| "loss": 1.0478, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 5.0708980418636057e-05, |
| "loss": 1.0514, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 5.0033760972316004e-05, |
| "loss": 1.0526, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 4.935854152599595e-05, |
| "loss": 1.0476, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 4.86833220796759e-05, |
| "loss": 1.0459, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 4.8008102633355846e-05, |
| "loss": 1.0463, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.733288318703579e-05, |
| "loss": 1.0469, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 4.6657663740715734e-05, |
| "loss": 1.0444, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.598244429439568e-05, |
| "loss": 1.0461, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 4.530722484807562e-05, |
| "loss": 1.0613, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 4.463200540175557e-05, |
| "loss": 1.0418, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 4.395678595543552e-05, |
| "loss": 1.0415, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 4.3281566509115464e-05, |
| "loss": 1.0401, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 4.260634706279541e-05, |
| "loss": 1.0404, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 4.193112761647536e-05, |
| "loss": 1.0372, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 4.125590817015531e-05, |
| "loss": 1.0423, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 4.058068872383525e-05, |
| "loss": 1.0343, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 3.9905469277515195e-05, |
| "loss": 1.0374, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 3.923024983119514e-05, |
| "loss": 1.0355, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 3.855503038487508e-05, |
| "loss": 1.0312, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 3.787981093855503e-05, |
| "loss": 1.0374, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 3.720459149223498e-05, |
| "loss": 1.034, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 3.6529372045914925e-05, |
| "loss": 1.0327, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 3.585415259959487e-05, |
| "loss": 1.0317, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 3.517893315327482e-05, |
| "loss": 1.0317, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 3.450371370695476e-05, |
| "loss": 1.028, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 3.382849426063471e-05, |
| "loss": 1.027, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 3.3153274814314655e-05, |
| "loss": 1.0253, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 3.2478055367994596e-05, |
| "loss": 1.0291, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 3.180283592167454e-05, |
| "loss": 1.0266, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 3.112761647535449e-05, |
| "loss": 1.0311, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 3.0452397029034435e-05, |
| "loss": 1.0281, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 2.9777177582714382e-05, |
| "loss": 1.0238, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 2.910195813639433e-05, |
| "loss": 1.0248, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 2.8426738690074277e-05, |
| "loss": 1.0235, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 2.775151924375422e-05, |
| "loss": 1.0252, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 2.7076299797434168e-05, |
| "loss": 1.0221, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 2.6401080351114116e-05, |
| "loss": 1.0204, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 2.572586090479406e-05, |
| "loss": 1.0205, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 2.5050641458474007e-05, |
| "loss": 1.0207, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 2.437542201215395e-05, |
| "loss": 1.0165, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 2.37002025658339e-05, |
| "loss": 1.0182, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 2.3024983119513842e-05, |
| "loss": 1.0165, |
| "step": 6700 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 2.234976367319379e-05, |
| "loss": 1.0316, |
| "step": 6750 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 2.1674544226873737e-05, |
| "loss": 1.0146, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.07, |
| "learning_rate": 2.099932478055368e-05, |
| "loss": 1.015, |
| "step": 6850 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 2.0324105334233625e-05, |
| "loss": 1.0156, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 1.9648885887913573e-05, |
| "loss": 1.016, |
| "step": 6950 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 1.8973666441593517e-05, |
| "loss": 1.0149, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 1.8298446995273467e-05, |
| "loss": 1.0127, |
| "step": 7050 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 1.762322754895341e-05, |
| "loss": 1.0085, |
| "step": 7100 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 1.6948008102633355e-05, |
| "loss": 1.0151, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 1.6272788656313303e-05, |
| "loss": 1.0136, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 1.5597569209993247e-05, |
| "loss": 1.0077, |
| "step": 7250 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 1.4922349763673194e-05, |
| "loss": 1.0103, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 1.4247130317353142e-05, |
| "loss": 1.0155, |
| "step": 7350 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 1.3571910871033086e-05, |
| "loss": 1.0098, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.2896691424713031e-05, |
| "loss": 1.0093, |
| "step": 7450 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 1.2221471978392979e-05, |
| "loss": 1.0112, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 1.1546252532072925e-05, |
| "loss": 1.0075, |
| "step": 7550 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 1.087103308575287e-05, |
| "loss": 1.0086, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.0195813639432816e-05, |
| "loss": 1.0072, |
| "step": 7650 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 9.520594193112763e-06, |
| "loss": 1.0057, |
| "step": 7700 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 8.845374746792707e-06, |
| "loss": 1.0101, |
| "step": 7750 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 8.170155300472653e-06, |
| "loss": 1.006, |
| "step": 7800 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 7.4949358541526005e-06, |
| "loss": 1.0079, |
| "step": 7850 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 6.819716407832546e-06, |
| "loss": 1.0066, |
| "step": 7900 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 6.144496961512492e-06, |
| "loss": 1.0113, |
| "step": 7950 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 5.4692775151924376e-06, |
| "loss": 1.0065, |
| "step": 8000 |
| } |
| ], |
| "max_steps": 8405, |
| "num_train_epochs": 5, |
| "total_flos": 5.140843815167534e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|