| { | |
| "best_global_step": 4500, | |
| "best_metric": 1.2012678384780884, | |
| "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500", | |
| "epoch": 24.882242990654206, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.24922118380062305, | |
| "grad_norm": 100.0, | |
| "learning_rate": 1.218905472636816e-06, | |
| "loss": 9.7783, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4984423676012461, | |
| "grad_norm": 74.5, | |
| "learning_rate": 2.4626865671641794e-06, | |
| "loss": 9.5155, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7476635514018691, | |
| "grad_norm": 74.0, | |
| "learning_rate": 3.706467661691542e-06, | |
| "loss": 9.0351, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9968847352024922, | |
| "grad_norm": 80.5, | |
| "learning_rate": 4.950248756218906e-06, | |
| "loss": 8.4023, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2442367601246107, | |
| "grad_norm": 80.0, | |
| "learning_rate": 6.194029850746269e-06, | |
| "loss": 7.3784, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.4934579439252336, | |
| "grad_norm": 134.0, | |
| "learning_rate": 7.437810945273633e-06, | |
| "loss": 5.7507, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7426791277258566, | |
| "grad_norm": 65.0, | |
| "learning_rate": 8.681592039800995e-06, | |
| "loss": 3.8008, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.9919003115264797, | |
| "grad_norm": 17.0, | |
| "learning_rate": 9.925373134328359e-06, | |
| "loss": 2.103, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.2392523364485983, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 9.995836696556696e-06, | |
| "loss": 1.4184, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.4884735202492214, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 9.982274873915892e-06, | |
| "loss": 1.2978, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.4884735202492214, | |
| "eval_loss": 1.3274219036102295, | |
| "eval_runtime": 15.5223, | |
| "eval_samples_per_second": 11.532, | |
| "eval_steps_per_second": 1.482, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.7376947040498445, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 9.95932312693483e-06, | |
| "loss": 1.2695, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.986915887850467, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 9.927024711991988e-06, | |
| "loss": 1.2801, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.2342679127725855, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 9.885440500813695e-06, | |
| "loss": 1.2495, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.4834890965732086, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 9.834648865751254e-06, | |
| "loss": 1.2456, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.7327102803738317, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.774745532075235e-06, | |
| "loss": 1.2472, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.9819314641744548, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 9.705843397565304e-06, | |
| "loss": 1.2243, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.229283489096574, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 9.628072319735607e-06, | |
| "loss": 1.2273, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.478504672897197, | |
| "grad_norm": 1.0, | |
| "learning_rate": 9.541578871096728e-06, | |
| "loss": 1.2192, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.72772585669782, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 9.446526062915449e-06, | |
| "loss": 1.2295, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.976947040498443, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 9.343093037992946e-06, | |
| "loss": 1.2226, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.976947040498443, | |
| "eval_loss": 1.2828963994979858, | |
| "eval_runtime": 15.5322, | |
| "eval_samples_per_second": 11.524, | |
| "eval_steps_per_second": 1.481, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.224299065420561, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 9.231474733040436e-06, | |
| "loss": 1.2206, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.473520249221184, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 9.111881511288579e-06, | |
| "loss": 1.2081, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.722741433021807, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 8.984538766023024e-06, | |
| "loss": 1.2224, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.97196261682243, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 8.849686495793349e-06, | |
| "loss": 1.2105, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.219314641744548, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8.707578852095928e-06, | |
| "loss": 1.2096, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 6.468535825545171, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 8.558483660383245e-06, | |
| "loss": 1.2097, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.717757009345794, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 8.402681915302344e-06, | |
| "loss": 1.2227, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 6.966978193146417, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 8.240467251113762e-06, | |
| "loss": 1.2102, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 7.214330218068536, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8.072145388289002e-06, | |
| "loss": 1.185, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 7.463551401869159, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 7.898033557329536e-06, | |
| "loss": 1.2051, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.463551401869159, | |
| "eval_loss": 1.2759937047958374, | |
| "eval_runtime": 15.5082, | |
| "eval_samples_per_second": 11.542, | |
| "eval_steps_per_second": 1.483, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.712772585669782, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 7.718459900893254e-06, | |
| "loss": 1.2148, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 7.961993769470405, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 7.533762855355126e-06, | |
| "loss": 1.2196, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 8.209345794392524, | |
| "grad_norm": 1.6328125, | |
| "learning_rate": 7.344290512967664e-06, | |
| "loss": 1.1935, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 8.458566978193147, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 7.150399965823252e-06, | |
| "loss": 1.208, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 8.70778816199377, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 6.952456632854821e-06, | |
| "loss": 1.1997, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 8.957009345794393, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 6.750833571143174e-06, | |
| "loss": 1.1962, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 9.20436137071651, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 6.5459107728289784e-06, | |
| "loss": 1.216, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 9.453582554517133, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 6.338074448954472e-06, | |
| "loss": 1.1888, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 9.702803738317757, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 6.127716301584618e-06, | |
| "loss": 1.1998, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 9.95202492211838, | |
| "grad_norm": 1.25, | |
| "learning_rate": 5.915232785579527e-06, | |
| "loss": 1.2089, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.95202492211838, | |
| "eval_loss": 1.2739386558532715, | |
| "eval_runtime": 15.5272, | |
| "eval_samples_per_second": 11.528, | |
| "eval_steps_per_second": 1.481, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.20436137071651, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 5.701024361409431e-06, | |
| "loss": 1.2265, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 10.453582554517133, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 5.485494740420431e-06, | |
| "loss": 1.198, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 10.702803738317757, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 5.26905012397343e-06, | |
| "loss": 1.2148, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 10.95202492211838, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 5.052098437890215e-06, | |
| "loss": 1.1983, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 11.199376947040498, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.835048563649499e-06, | |
| "loss": 1.2025, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 11.448598130841122, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.6183095677818825e-06, | |
| "loss": 1.1926, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 11.697819314641745, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.402289930916053e-06, | |
| "loss": 1.2022, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 11.947040498442368, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.187396777929205e-06, | |
| "loss": 1.2099, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 12.194392523364487, | |
| "grad_norm": 1.625, | |
| "learning_rate": 3.974035110652596e-06, | |
| "loss": 1.2093, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 12.44361370716511, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.762607044578357e-06, | |
| "loss": 1.1972, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.44361370716511, | |
| "eval_loss": 1.2259057760238647, | |
| "eval_runtime": 15.4359, | |
| "eval_samples_per_second": 11.596, | |
| "eval_steps_per_second": 1.49, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.692834890965733, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 3.55351105100606e-06, | |
| "loss": 1.1879, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 12.942056074766356, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 3.3471412060573944e-06, | |
| "loss": 1.215, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 13.189408099688473, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.1438864479742693e-06, | |
| "loss": 1.2105, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 13.438629283489096, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 2.9441298441001165e-06, | |
| "loss": 1.1956, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 13.687850467289719, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 2.7482478689258733e-06, | |
| "loss": 1.2139, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 13.937071651090342, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 2.556609694561273e-06, | |
| "loss": 1.1846, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 14.184423676012461, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.3695764949687234e-06, | |
| "loss": 1.1978, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 14.433644859813084, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 2.1875007652709768e-06, | |
| "loss": 1.199, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 14.682866043613707, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.0107256574155564e-06, | |
| "loss": 1.2097, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 14.93208722741433, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.8395843334479125e-06, | |
| "loss": 1.2051, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 14.93208722741433, | |
| "eval_loss": 1.2260087728500366, | |
| "eval_runtime": 15.4261, | |
| "eval_samples_per_second": 11.604, | |
| "eval_steps_per_second": 1.491, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 15.179439252336449, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 4.01645826177586e-06, | |
| "loss": 1.2379, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 15.428660436137072, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.846795459902898e-06, | |
| "loss": 1.2348, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 15.677881619937695, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 3.6785240237887355e-06, | |
| "loss": 1.2098, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 15.927102803738318, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 3.511846976493248e-06, | |
| "loss": 1.209, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 16.174454828660437, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 3.3469654174123565e-06, | |
| "loss": 1.2129, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 16.42367601246106, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.184078279647331e-06, | |
| "loss": 1.2187, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 16.672897196261683, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 3.0233820899877898e-06, | |
| "loss": 1.2018, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 16.922118380062305, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.8650707317979437e-06, | |
| "loss": 1.2255, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 17.169470404984423, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 2.709335211092214e-06, | |
| "loss": 1.1997, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 17.418691588785048, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 2.556363426082418e-06, | |
| "loss": 1.211, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 17.418691588785048, | |
| "eval_loss": 1.2014065980911255, | |
| "eval_runtime": 15.5998, | |
| "eval_samples_per_second": 11.474, | |
| "eval_steps_per_second": 1.474, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 17.66791277258567, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 2.4063399404745724e-06, | |
| "loss": 1.2199, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 17.917133956386294, | |
| "grad_norm": 1.625, | |
| "learning_rate": 2.2594457607888917e-06, | |
| "loss": 1.217, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 18.16448598130841, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 2.115858117971553e-06, | |
| "loss": 1.2021, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 18.413707165109034, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 1.9757502535618137e-06, | |
| "loss": 1.2159, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 18.662928348909656, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.839291210672407e-06, | |
| "loss": 1.2117, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 18.91214953271028, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 1.7066456300354462e-06, | |
| "loss": 1.225, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 19.1595015576324, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 1.577973551359877e-06, | |
| "loss": 1.2239, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 19.40872274143302, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 1.453430220240178e-06, | |
| "loss": 1.209, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 19.657943925233646, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1.333165900849255e-06, | |
| "loss": 1.2148, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 19.907165109034267, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 1.2173256946415214e-06, | |
| "loss": 1.2024, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 19.907165109034267, | |
| "eval_loss": 1.2013256549835205, | |
| "eval_runtime": 15.5958, | |
| "eval_samples_per_second": 11.477, | |
| "eval_steps_per_second": 1.475, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 20.154517133956386, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 1.106049365284918e-06, | |
| "loss": 1.2362, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 20.40373831775701, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 9.994711700330779e-07, | |
| "loss": 1.2212, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 20.652959501557632, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 8.97719697741104e-07, | |
| "loss": 1.1908, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 20.902180685358257, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8.009177137203794e-07, | |
| "loss": 1.2261, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 21.149532710280372, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 7.091820116196152e-07, | |
| "loss": 1.1987, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 21.398753894080997, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 6.2262327251084e-07, | |
| "loss": 1.2089, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 21.64797507788162, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 5.413459313503272e-07, | |
| "loss": 1.2162, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 21.897196261682243, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.654480509756082e-07, | |
| "loss": 1.216, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 22.144548286604362, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 3.9502120379057764e-07, | |
| "loss": 1.2089, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 22.393769470404983, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 3.301503612814444e-07, | |
| "loss": 1.2161, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 22.393769470404983, | |
| "eval_loss": 1.2012678384780884, | |
| "eval_runtime": 15.6175, | |
| "eval_samples_per_second": 11.462, | |
| "eval_steps_per_second": 1.473, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 22.642990654205608, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.7091379149682683e-07, | |
| "loss": 1.2239, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 22.89221183800623, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 2.1738296461569164e-07, | |
| "loss": 1.2121, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 23.139563862928348, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 1.6962246671706872e-07, | |
| "loss": 1.1973, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 23.388785046728973, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 1.2768992185557104e-07, | |
| "loss": 1.2183, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 23.638006230529594, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 9.163592253675247e-08, | |
| "loss": 1.2195, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 23.88722741433022, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 6.15039686761748e-08, | |
| "loss": 1.21, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 24.134579439252338, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 3.733041511583768e-08, | |
| "loss": 1.2056, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 24.38380062305296, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 1.914442776128622e-08, | |
| "loss": 1.1913, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 24.633021806853584, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 6.9679483923318356e-09, | |
| "loss": 1.2346, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 24.882242990654206, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 8.156681898252583e-10, | |
| "loss": 1.2149, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 24.882242990654206, | |
| "eval_loss": 1.201310396194458, | |
| "eval_runtime": 15.6169, | |
| "eval_samples_per_second": 11.462, | |
| "eval_steps_per_second": 1.473, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5025, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3835310591104778e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |