{ "best_global_step": 4500, "best_metric": 1.2012678384780884, "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500", "epoch": 24.882242990654206, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24922118380062305, "grad_norm": 100.0, "learning_rate": 1.218905472636816e-06, "loss": 9.7783, "step": 50 }, { "epoch": 0.4984423676012461, "grad_norm": 74.5, "learning_rate": 2.4626865671641794e-06, "loss": 9.5155, "step": 100 }, { "epoch": 0.7476635514018691, "grad_norm": 74.0, "learning_rate": 3.706467661691542e-06, "loss": 9.0351, "step": 150 }, { "epoch": 0.9968847352024922, "grad_norm": 80.5, "learning_rate": 4.950248756218906e-06, "loss": 8.4023, "step": 200 }, { "epoch": 1.2442367601246107, "grad_norm": 80.0, "learning_rate": 6.194029850746269e-06, "loss": 7.3784, "step": 250 }, { "epoch": 1.4934579439252336, "grad_norm": 134.0, "learning_rate": 7.437810945273633e-06, "loss": 5.7507, "step": 300 }, { "epoch": 1.7426791277258566, "grad_norm": 65.0, "learning_rate": 8.681592039800995e-06, "loss": 3.8008, "step": 350 }, { "epoch": 1.9919003115264797, "grad_norm": 17.0, "learning_rate": 9.925373134328359e-06, "loss": 2.103, "step": 400 }, { "epoch": 2.2392523364485983, "grad_norm": 1.65625, "learning_rate": 9.995836696556696e-06, "loss": 1.4184, "step": 450 }, { "epoch": 2.4884735202492214, "grad_norm": 0.84765625, "learning_rate": 9.982274873915892e-06, "loss": 1.2978, "step": 500 }, { "epoch": 2.4884735202492214, "eval_loss": 1.3274219036102295, "eval_runtime": 15.5223, "eval_samples_per_second": 11.532, "eval_steps_per_second": 1.482, "step": 500 }, { "epoch": 2.7376947040498445, "grad_norm": 0.87109375, "learning_rate": 9.95932312693483e-06, "loss": 1.2695, "step": 550 }, { "epoch": 2.986915887850467, "grad_norm": 1.3984375, "learning_rate": 9.927024711991988e-06, "loss": 1.2801, "step": 600 }, { "epoch": 3.2342679127725855, "grad_norm": 0.91015625, "learning_rate": 9.885440500813695e-06, "loss": 1.2495, "step": 650 }, { "epoch": 3.4834890965732086, "grad_norm": 0.9375, "learning_rate": 9.834648865751254e-06, "loss": 1.2456, "step": 700 }, { "epoch": 3.7327102803738317, "grad_norm": 1.078125, "learning_rate": 9.774745532075235e-06, "loss": 1.2472, "step": 750 }, { "epoch": 3.9819314641744548, "grad_norm": 0.89453125, "learning_rate": 9.705843397565304e-06, "loss": 1.2243, "step": 800 }, { "epoch": 4.229283489096574, "grad_norm": 1.1484375, "learning_rate": 9.628072319735607e-06, "loss": 1.2273, "step": 850 }, { "epoch": 4.478504672897197, "grad_norm": 1.0, "learning_rate": 9.541578871096728e-06, "loss": 1.2192, "step": 900 }, { "epoch": 4.72772585669782, "grad_norm": 0.85546875, "learning_rate": 9.446526062915449e-06, "loss": 1.2295, "step": 950 }, { "epoch": 4.976947040498443, "grad_norm": 1.2265625, "learning_rate": 9.343093037992946e-06, "loss": 1.2226, "step": 1000 }, { "epoch": 4.976947040498443, "eval_loss": 1.2828963994979858, "eval_runtime": 15.5322, "eval_samples_per_second": 11.524, "eval_steps_per_second": 1.481, "step": 1000 }, { "epoch": 5.224299065420561, "grad_norm": 0.92578125, "learning_rate": 9.231474733040436e-06, "loss": 1.2206, "step": 1050 }, { "epoch": 5.473520249221184, "grad_norm": 0.9609375, "learning_rate": 9.111881511288579e-06, "loss": 1.2081, "step": 1100 }, { "epoch": 5.722741433021807, "grad_norm": 1.0703125, "learning_rate": 8.984538766023024e-06, "loss": 1.2224, "step": 1150 }, { "epoch": 5.97196261682243, "grad_norm": 1.0625, "learning_rate": 8.849686495793349e-06, "loss": 1.2105, "step": 1200 }, { "epoch": 6.219314641744548, "grad_norm": 1.1015625, "learning_rate": 8.707578852095928e-06, "loss": 1.2096, "step": 1250 }, { "epoch": 6.468535825545171, "grad_norm": 1.3203125, "learning_rate": 8.558483660383245e-06, "loss": 1.2097, "step": 1300 }, { "epoch": 6.717757009345794, "grad_norm": 1.1875, "learning_rate": 8.402681915302344e-06, "loss": 1.2227, "step": 1350 }, { "epoch": 6.966978193146417, "grad_norm": 1.2265625, "learning_rate": 8.240467251113762e-06, "loss": 1.2102, "step": 1400 }, { "epoch": 7.214330218068536, "grad_norm": 1.1015625, "learning_rate": 8.072145388289002e-06, "loss": 1.185, "step": 1450 }, { "epoch": 7.463551401869159, "grad_norm": 1.1953125, "learning_rate": 7.898033557329536e-06, "loss": 1.2051, "step": 1500 }, { "epoch": 7.463551401869159, "eval_loss": 1.2759937047958374, "eval_runtime": 15.5082, "eval_samples_per_second": 11.542, "eval_steps_per_second": 1.483, "step": 1500 }, { "epoch": 7.712772585669782, "grad_norm": 0.9921875, "learning_rate": 7.718459900893254e-06, "loss": 1.2148, "step": 1550 }, { "epoch": 7.961993769470405, "grad_norm": 1.46875, "learning_rate": 7.533762855355126e-06, "loss": 1.2196, "step": 1600 }, { "epoch": 8.209345794392524, "grad_norm": 1.6328125, "learning_rate": 7.344290512967664e-06, "loss": 1.1935, "step": 1650 }, { "epoch": 8.458566978193147, "grad_norm": 1.3203125, "learning_rate": 7.150399965823252e-06, "loss": 1.208, "step": 1700 }, { "epoch": 8.70778816199377, "grad_norm": 1.484375, "learning_rate": 6.952456632854821e-06, "loss": 1.1997, "step": 1750 }, { "epoch": 8.957009345794393, "grad_norm": 1.171875, "learning_rate": 6.750833571143174e-06, "loss": 1.1962, "step": 1800 }, { "epoch": 9.20436137071651, "grad_norm": 1.015625, "learning_rate": 6.5459107728289784e-06, "loss": 1.216, "step": 1850 }, { "epoch": 9.453582554517133, "grad_norm": 1.2109375, "learning_rate": 6.338074448954472e-06, "loss": 1.1888, "step": 1900 }, { "epoch": 9.702803738317757, "grad_norm": 1.140625, "learning_rate": 6.127716301584618e-06, "loss": 1.1998, "step": 1950 }, { "epoch": 9.95202492211838, "grad_norm": 1.25, "learning_rate": 5.915232785579527e-06, "loss": 1.2089, "step": 2000 }, { "epoch": 9.95202492211838, "eval_loss": 1.2739386558532715, "eval_runtime": 15.5272, "eval_samples_per_second": 11.528, "eval_steps_per_second": 1.481, "step": 2000 }, { "epoch": 10.20436137071651, "grad_norm": 0.87109375, "learning_rate": 5.701024361409431e-06, "loss": 1.2265, "step": 2050 }, { "epoch": 10.453582554517133, "grad_norm": 0.95703125, "learning_rate": 5.485494740420431e-06, "loss": 1.198, "step": 2100 }, { "epoch": 10.702803738317757, "grad_norm": 1.7421875, "learning_rate": 5.26905012397343e-06, "loss": 1.2148, "step": 2150 }, { "epoch": 10.95202492211838, "grad_norm": 1.5234375, "learning_rate": 5.052098437890215e-06, "loss": 1.1983, "step": 2200 }, { "epoch": 11.199376947040498, "grad_norm": 1.15625, "learning_rate": 4.835048563649499e-06, "loss": 1.2025, "step": 2250 }, { "epoch": 11.448598130841122, "grad_norm": 1.0078125, "learning_rate": 4.6183095677818825e-06, "loss": 1.1926, "step": 2300 }, { "epoch": 11.697819314641745, "grad_norm": 1.4453125, "learning_rate": 4.402289930916053e-06, "loss": 1.2022, "step": 2350 }, { "epoch": 11.947040498442368, "grad_norm": 1.1328125, "learning_rate": 4.187396777929205e-06, "loss": 1.2099, "step": 2400 }, { "epoch": 12.194392523364487, "grad_norm": 1.625, "learning_rate": 3.974035110652596e-06, "loss": 1.2093, "step": 2450 }, { "epoch": 12.44361370716511, "grad_norm": 1.0625, "learning_rate": 3.762607044578357e-06, "loss": 1.1972, "step": 2500 }, { "epoch": 12.44361370716511, "eval_loss": 1.2259057760238647, "eval_runtime": 15.4359, "eval_samples_per_second": 11.596, "eval_steps_per_second": 1.49, "step": 2500 }, { "epoch": 12.692834890965733, "grad_norm": 1.40625, "learning_rate": 3.55351105100606e-06, "loss": 1.1879, "step": 2550 }, { "epoch": 12.942056074766356, "grad_norm": 0.89453125, "learning_rate": 3.3471412060573944e-06, "loss": 1.215, "step": 2600 }, { "epoch": 13.189408099688473, "grad_norm": 1.15625, "learning_rate": 3.1438864479742693e-06, "loss": 1.2105, "step": 2650 }, { "epoch": 13.438629283489096, "grad_norm": 1.2109375, "learning_rate": 2.9441298441001165e-06, "loss": 1.1956, "step": 2700 }, { "epoch": 13.687850467289719, "grad_norm": 1.15625, "learning_rate": 2.7482478689258733e-06, "loss": 1.2139, "step": 2750 }, { "epoch": 13.937071651090342, "grad_norm": 1.640625, "learning_rate": 2.556609694561273e-06, "loss": 1.1846, "step": 2800 }, { "epoch": 14.184423676012461, "grad_norm": 0.98828125, "learning_rate": 2.3695764949687234e-06, "loss": 1.1978, "step": 2850 }, { "epoch": 14.433644859813084, "grad_norm": 1.2890625, "learning_rate": 2.1875007652709768e-06, "loss": 1.199, "step": 2900 }, { "epoch": 14.682866043613707, "grad_norm": 0.9140625, "learning_rate": 2.0107256574155564e-06, "loss": 1.2097, "step": 2950 }, { "epoch": 14.93208722741433, "grad_norm": 1.0625, "learning_rate": 1.8395843334479125e-06, "loss": 1.2051, "step": 3000 }, { "epoch": 14.93208722741433, "eval_loss": 1.2260087728500366, "eval_runtime": 15.4261, "eval_samples_per_second": 11.604, "eval_steps_per_second": 1.491, "step": 3000 }, { "epoch": 15.179439252336449, "grad_norm": 1.4296875, "learning_rate": 4.01645826177586e-06, "loss": 1.2379, "step": 3050 }, { "epoch": 15.428660436137072, "grad_norm": 0.96875, "learning_rate": 3.846795459902898e-06, "loss": 1.2348, "step": 3100 }, { "epoch": 15.677881619937695, "grad_norm": 1.234375, "learning_rate": 3.6785240237887355e-06, "loss": 1.2098, "step": 3150 }, { "epoch": 15.927102803738318, "grad_norm": 1.7265625, "learning_rate": 3.511846976493248e-06, "loss": 1.209, "step": 3200 }, { "epoch": 16.174454828660437, "grad_norm": 1.2109375, "learning_rate": 3.3469654174123565e-06, "loss": 1.2129, "step": 3250 }, { "epoch": 16.42367601246106, "grad_norm": 1.15625, "learning_rate": 3.184078279647331e-06, "loss": 1.2187, "step": 3300 }, { "epoch": 16.672897196261683, "grad_norm": 1.2578125, "learning_rate": 3.0233820899877898e-06, "loss": 1.2018, "step": 3350 }, { "epoch": 16.922118380062305, "grad_norm": 1.1484375, "learning_rate": 2.8650707317979437e-06, "loss": 1.2255, "step": 3400 }, { "epoch": 17.169470404984423, "grad_norm": 1.1796875, "learning_rate": 2.709335211092214e-06, "loss": 1.1997, "step": 3450 }, { "epoch": 17.418691588785048, "grad_norm": 1.1875, "learning_rate": 2.556363426082418e-06, "loss": 1.211, "step": 3500 }, { "epoch": 17.418691588785048, "eval_loss": 1.2014065980911255, "eval_runtime": 15.5998, "eval_samples_per_second": 11.474, "eval_steps_per_second": 1.474, "step": 3500 }, { "epoch": 17.66791277258567, "grad_norm": 1.046875, "learning_rate": 2.4063399404745724e-06, "loss": 1.2199, "step": 3550 }, { "epoch": 17.917133956386294, "grad_norm": 1.625, "learning_rate": 2.2594457607888917e-06, "loss": 1.217, "step": 3600 }, { "epoch": 18.16448598130841, "grad_norm": 1.3515625, "learning_rate": 2.115858117971553e-06, "loss": 1.2021, "step": 3650 }, { "epoch": 18.413707165109034, "grad_norm": 1.484375, "learning_rate": 1.9757502535618137e-06, "loss": 1.2159, "step": 3700 }, { "epoch": 18.662928348909656, "grad_norm": 1.015625, "learning_rate": 1.839291210672407e-06, "loss": 1.2117, "step": 3750 }, { "epoch": 18.91214953271028, "grad_norm": 1.234375, "learning_rate": 1.7066456300354462e-06, "loss": 1.225, "step": 3800 }, { "epoch": 19.1595015576324, "grad_norm": 1.390625, "learning_rate": 1.577973551359877e-06, "loss": 1.2239, "step": 3850 }, { "epoch": 19.40872274143302, "grad_norm": 1.6015625, "learning_rate": 1.453430220240178e-06, "loss": 1.209, "step": 3900 }, { "epoch": 19.657943925233646, "grad_norm": 1.203125, "learning_rate": 1.333165900849255e-06, "loss": 1.2148, "step": 3950 }, { "epoch": 19.907165109034267, "grad_norm": 1.1484375, "learning_rate": 1.2173256946415214e-06, "loss": 1.2024, "step": 4000 }, { "epoch": 19.907165109034267, "eval_loss": 1.2013256549835205, "eval_runtime": 15.5958, "eval_samples_per_second": 11.477, "eval_steps_per_second": 1.475, "step": 4000 }, { "epoch": 20.154517133956386, "grad_norm": 1.1328125, "learning_rate": 1.106049365284918e-06, "loss": 1.2362, "step": 4050 }, { "epoch": 20.40373831775701, "grad_norm": 1.0078125, "learning_rate": 9.994711700330779e-07, "loss": 1.2212, "step": 4100 }, { "epoch": 20.652959501557632, "grad_norm": 1.5078125, "learning_rate": 8.97719697741104e-07, "loss": 1.1908, "step": 4150 }, { "epoch": 20.902180685358257, "grad_norm": 1.0390625, "learning_rate": 8.009177137203794e-07, "loss": 1.2261, "step": 4200 }, { "epoch": 21.149532710280372, "grad_norm": 1.046875, "learning_rate": 7.091820116196152e-07, "loss": 1.1987, "step": 4250 }, { "epoch": 21.398753894080997, "grad_norm": 1.46875, "learning_rate": 6.2262327251084e-07, "loss": 1.2089, "step": 4300 }, { "epoch": 21.64797507788162, "grad_norm": 1.1796875, "learning_rate": 5.413459313503272e-07, "loss": 1.2162, "step": 4350 }, { "epoch": 21.897196261682243, "grad_norm": 1.0859375, "learning_rate": 4.654480509756082e-07, "loss": 1.216, "step": 4400 }, { "epoch": 22.144548286604362, "grad_norm": 1.484375, "learning_rate": 3.9502120379057764e-07, "loss": 1.2089, "step": 4450 }, { "epoch": 22.393769470404983, "grad_norm": 1.171875, "learning_rate": 3.301503612814444e-07, "loss": 1.2161, "step": 4500 }, { "epoch": 22.393769470404983, "eval_loss": 1.2012678384780884, "eval_runtime": 15.6175, "eval_samples_per_second": 11.462, "eval_steps_per_second": 1.473, "step": 4500 }, { "epoch": 22.642990654205608, "grad_norm": 1.0625, "learning_rate": 2.7091379149682683e-07, "loss": 1.2239, "step": 4550 }, { "epoch": 22.89221183800623, "grad_norm": 1.3515625, "learning_rate": 2.1738296461569164e-07, "loss": 1.2121, "step": 4600 }, { "epoch": 23.139563862928348, "grad_norm": 1.2421875, "learning_rate": 1.6962246671706872e-07, "loss": 1.1973, "step": 4650 }, { "epoch": 23.388785046728973, "grad_norm": 1.2578125, "learning_rate": 1.2768992185557104e-07, "loss": 1.2183, "step": 4700 }, { "epoch": 23.638006230529594, "grad_norm": 1.3125, "learning_rate": 9.163592253675247e-08, "loss": 1.2195, "step": 4750 }, { "epoch": 23.88722741433022, "grad_norm": 1.2109375, "learning_rate": 6.15039686761748e-08, "loss": 1.21, "step": 4800 }, { "epoch": 24.134579439252338, "grad_norm": 1.2265625, "learning_rate": 3.733041511583768e-08, "loss": 1.2056, "step": 4850 }, { "epoch": 24.38380062305296, "grad_norm": 1.46875, "learning_rate": 1.914442776128622e-08, "loss": 1.1913, "step": 4900 }, { "epoch": 24.633021806853584, "grad_norm": 1.1796875, "learning_rate": 6.9679483923318356e-09, "loss": 1.2346, "step": 4950 }, { "epoch": 24.882242990654206, "grad_norm": 1.171875, "learning_rate": 8.156681898252583e-10, "loss": 1.2149, "step": 5000 }, { "epoch": 24.882242990654206, "eval_loss": 1.201310396194458, "eval_runtime": 15.6169, "eval_samples_per_second": 11.462, "eval_steps_per_second": 1.473, "step": 5000 } ], "logging_steps": 50, "max_steps": 5025, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3835310591104778e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }