orpheus_tr_model / trainer_state.json
fguryel's picture
Upload folder using huggingface_hub
4aa93ee verified
{
"best_global_step": 4500,
"best_metric": 1.2012678384780884,
"best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
"epoch": 24.882242990654206,
"eval_steps": 500,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24922118380062305,
"grad_norm": 100.0,
"learning_rate": 1.218905472636816e-06,
"loss": 9.7783,
"step": 50
},
{
"epoch": 0.4984423676012461,
"grad_norm": 74.5,
"learning_rate": 2.4626865671641794e-06,
"loss": 9.5155,
"step": 100
},
{
"epoch": 0.7476635514018691,
"grad_norm": 74.0,
"learning_rate": 3.706467661691542e-06,
"loss": 9.0351,
"step": 150
},
{
"epoch": 0.9968847352024922,
"grad_norm": 80.5,
"learning_rate": 4.950248756218906e-06,
"loss": 8.4023,
"step": 200
},
{
"epoch": 1.2442367601246107,
"grad_norm": 80.0,
"learning_rate": 6.194029850746269e-06,
"loss": 7.3784,
"step": 250
},
{
"epoch": 1.4934579439252336,
"grad_norm": 134.0,
"learning_rate": 7.437810945273633e-06,
"loss": 5.7507,
"step": 300
},
{
"epoch": 1.7426791277258566,
"grad_norm": 65.0,
"learning_rate": 8.681592039800995e-06,
"loss": 3.8008,
"step": 350
},
{
"epoch": 1.9919003115264797,
"grad_norm": 17.0,
"learning_rate": 9.925373134328359e-06,
"loss": 2.103,
"step": 400
},
{
"epoch": 2.2392523364485983,
"grad_norm": 1.65625,
"learning_rate": 9.995836696556696e-06,
"loss": 1.4184,
"step": 450
},
{
"epoch": 2.4884735202492214,
"grad_norm": 0.84765625,
"learning_rate": 9.982274873915892e-06,
"loss": 1.2978,
"step": 500
},
{
"epoch": 2.4884735202492214,
"eval_loss": 1.3274219036102295,
"eval_runtime": 15.5223,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.482,
"step": 500
},
{
"epoch": 2.7376947040498445,
"grad_norm": 0.87109375,
"learning_rate": 9.95932312693483e-06,
"loss": 1.2695,
"step": 550
},
{
"epoch": 2.986915887850467,
"grad_norm": 1.3984375,
"learning_rate": 9.927024711991988e-06,
"loss": 1.2801,
"step": 600
},
{
"epoch": 3.2342679127725855,
"grad_norm": 0.91015625,
"learning_rate": 9.885440500813695e-06,
"loss": 1.2495,
"step": 650
},
{
"epoch": 3.4834890965732086,
"grad_norm": 0.9375,
"learning_rate": 9.834648865751254e-06,
"loss": 1.2456,
"step": 700
},
{
"epoch": 3.7327102803738317,
"grad_norm": 1.078125,
"learning_rate": 9.774745532075235e-06,
"loss": 1.2472,
"step": 750
},
{
"epoch": 3.9819314641744548,
"grad_norm": 0.89453125,
"learning_rate": 9.705843397565304e-06,
"loss": 1.2243,
"step": 800
},
{
"epoch": 4.229283489096574,
"grad_norm": 1.1484375,
"learning_rate": 9.628072319735607e-06,
"loss": 1.2273,
"step": 850
},
{
"epoch": 4.478504672897197,
"grad_norm": 1.0,
"learning_rate": 9.541578871096728e-06,
"loss": 1.2192,
"step": 900
},
{
"epoch": 4.72772585669782,
"grad_norm": 0.85546875,
"learning_rate": 9.446526062915449e-06,
"loss": 1.2295,
"step": 950
},
{
"epoch": 4.976947040498443,
"grad_norm": 1.2265625,
"learning_rate": 9.343093037992946e-06,
"loss": 1.2226,
"step": 1000
},
{
"epoch": 4.976947040498443,
"eval_loss": 1.2828963994979858,
"eval_runtime": 15.5322,
"eval_samples_per_second": 11.524,
"eval_steps_per_second": 1.481,
"step": 1000
},
{
"epoch": 5.224299065420561,
"grad_norm": 0.92578125,
"learning_rate": 9.231474733040436e-06,
"loss": 1.2206,
"step": 1050
},
{
"epoch": 5.473520249221184,
"grad_norm": 0.9609375,
"learning_rate": 9.111881511288579e-06,
"loss": 1.2081,
"step": 1100
},
{
"epoch": 5.722741433021807,
"grad_norm": 1.0703125,
"learning_rate": 8.984538766023024e-06,
"loss": 1.2224,
"step": 1150
},
{
"epoch": 5.97196261682243,
"grad_norm": 1.0625,
"learning_rate": 8.849686495793349e-06,
"loss": 1.2105,
"step": 1200
},
{
"epoch": 6.219314641744548,
"grad_norm": 1.1015625,
"learning_rate": 8.707578852095928e-06,
"loss": 1.2096,
"step": 1250
},
{
"epoch": 6.468535825545171,
"grad_norm": 1.3203125,
"learning_rate": 8.558483660383245e-06,
"loss": 1.2097,
"step": 1300
},
{
"epoch": 6.717757009345794,
"grad_norm": 1.1875,
"learning_rate": 8.402681915302344e-06,
"loss": 1.2227,
"step": 1350
},
{
"epoch": 6.966978193146417,
"grad_norm": 1.2265625,
"learning_rate": 8.240467251113762e-06,
"loss": 1.2102,
"step": 1400
},
{
"epoch": 7.214330218068536,
"grad_norm": 1.1015625,
"learning_rate": 8.072145388289002e-06,
"loss": 1.185,
"step": 1450
},
{
"epoch": 7.463551401869159,
"grad_norm": 1.1953125,
"learning_rate": 7.898033557329536e-06,
"loss": 1.2051,
"step": 1500
},
{
"epoch": 7.463551401869159,
"eval_loss": 1.2759937047958374,
"eval_runtime": 15.5082,
"eval_samples_per_second": 11.542,
"eval_steps_per_second": 1.483,
"step": 1500
},
{
"epoch": 7.712772585669782,
"grad_norm": 0.9921875,
"learning_rate": 7.718459900893254e-06,
"loss": 1.2148,
"step": 1550
},
{
"epoch": 7.961993769470405,
"grad_norm": 1.46875,
"learning_rate": 7.533762855355126e-06,
"loss": 1.2196,
"step": 1600
},
{
"epoch": 8.209345794392524,
"grad_norm": 1.6328125,
"learning_rate": 7.344290512967664e-06,
"loss": 1.1935,
"step": 1650
},
{
"epoch": 8.458566978193147,
"grad_norm": 1.3203125,
"learning_rate": 7.150399965823252e-06,
"loss": 1.208,
"step": 1700
},
{
"epoch": 8.70778816199377,
"grad_norm": 1.484375,
"learning_rate": 6.952456632854821e-06,
"loss": 1.1997,
"step": 1750
},
{
"epoch": 8.957009345794393,
"grad_norm": 1.171875,
"learning_rate": 6.750833571143174e-06,
"loss": 1.1962,
"step": 1800
},
{
"epoch": 9.20436137071651,
"grad_norm": 1.015625,
"learning_rate": 6.5459107728289784e-06,
"loss": 1.216,
"step": 1850
},
{
"epoch": 9.453582554517133,
"grad_norm": 1.2109375,
"learning_rate": 6.338074448954472e-06,
"loss": 1.1888,
"step": 1900
},
{
"epoch": 9.702803738317757,
"grad_norm": 1.140625,
"learning_rate": 6.127716301584618e-06,
"loss": 1.1998,
"step": 1950
},
{
"epoch": 9.95202492211838,
"grad_norm": 1.25,
"learning_rate": 5.915232785579527e-06,
"loss": 1.2089,
"step": 2000
},
{
"epoch": 9.95202492211838,
"eval_loss": 1.2739386558532715,
"eval_runtime": 15.5272,
"eval_samples_per_second": 11.528,
"eval_steps_per_second": 1.481,
"step": 2000
},
{
"epoch": 10.20436137071651,
"grad_norm": 0.87109375,
"learning_rate": 5.701024361409431e-06,
"loss": 1.2265,
"step": 2050
},
{
"epoch": 10.453582554517133,
"grad_norm": 0.95703125,
"learning_rate": 5.485494740420431e-06,
"loss": 1.198,
"step": 2100
},
{
"epoch": 10.702803738317757,
"grad_norm": 1.7421875,
"learning_rate": 5.26905012397343e-06,
"loss": 1.2148,
"step": 2150
},
{
"epoch": 10.95202492211838,
"grad_norm": 1.5234375,
"learning_rate": 5.052098437890215e-06,
"loss": 1.1983,
"step": 2200
},
{
"epoch": 11.199376947040498,
"grad_norm": 1.15625,
"learning_rate": 4.835048563649499e-06,
"loss": 1.2025,
"step": 2250
},
{
"epoch": 11.448598130841122,
"grad_norm": 1.0078125,
"learning_rate": 4.6183095677818825e-06,
"loss": 1.1926,
"step": 2300
},
{
"epoch": 11.697819314641745,
"grad_norm": 1.4453125,
"learning_rate": 4.402289930916053e-06,
"loss": 1.2022,
"step": 2350
},
{
"epoch": 11.947040498442368,
"grad_norm": 1.1328125,
"learning_rate": 4.187396777929205e-06,
"loss": 1.2099,
"step": 2400
},
{
"epoch": 12.194392523364487,
"grad_norm": 1.625,
"learning_rate": 3.974035110652596e-06,
"loss": 1.2093,
"step": 2450
},
{
"epoch": 12.44361370716511,
"grad_norm": 1.0625,
"learning_rate": 3.762607044578357e-06,
"loss": 1.1972,
"step": 2500
},
{
"epoch": 12.44361370716511,
"eval_loss": 1.2259057760238647,
"eval_runtime": 15.4359,
"eval_samples_per_second": 11.596,
"eval_steps_per_second": 1.49,
"step": 2500
},
{
"epoch": 12.692834890965733,
"grad_norm": 1.40625,
"learning_rate": 3.55351105100606e-06,
"loss": 1.1879,
"step": 2550
},
{
"epoch": 12.942056074766356,
"grad_norm": 0.89453125,
"learning_rate": 3.3471412060573944e-06,
"loss": 1.215,
"step": 2600
},
{
"epoch": 13.189408099688473,
"grad_norm": 1.15625,
"learning_rate": 3.1438864479742693e-06,
"loss": 1.2105,
"step": 2650
},
{
"epoch": 13.438629283489096,
"grad_norm": 1.2109375,
"learning_rate": 2.9441298441001165e-06,
"loss": 1.1956,
"step": 2700
},
{
"epoch": 13.687850467289719,
"grad_norm": 1.15625,
"learning_rate": 2.7482478689258733e-06,
"loss": 1.2139,
"step": 2750
},
{
"epoch": 13.937071651090342,
"grad_norm": 1.640625,
"learning_rate": 2.556609694561273e-06,
"loss": 1.1846,
"step": 2800
},
{
"epoch": 14.184423676012461,
"grad_norm": 0.98828125,
"learning_rate": 2.3695764949687234e-06,
"loss": 1.1978,
"step": 2850
},
{
"epoch": 14.433644859813084,
"grad_norm": 1.2890625,
"learning_rate": 2.1875007652709768e-06,
"loss": 1.199,
"step": 2900
},
{
"epoch": 14.682866043613707,
"grad_norm": 0.9140625,
"learning_rate": 2.0107256574155564e-06,
"loss": 1.2097,
"step": 2950
},
{
"epoch": 14.93208722741433,
"grad_norm": 1.0625,
"learning_rate": 1.8395843334479125e-06,
"loss": 1.2051,
"step": 3000
},
{
"epoch": 14.93208722741433,
"eval_loss": 1.2260087728500366,
"eval_runtime": 15.4261,
"eval_samples_per_second": 11.604,
"eval_steps_per_second": 1.491,
"step": 3000
},
{
"epoch": 15.179439252336449,
"grad_norm": 1.4296875,
"learning_rate": 4.01645826177586e-06,
"loss": 1.2379,
"step": 3050
},
{
"epoch": 15.428660436137072,
"grad_norm": 0.96875,
"learning_rate": 3.846795459902898e-06,
"loss": 1.2348,
"step": 3100
},
{
"epoch": 15.677881619937695,
"grad_norm": 1.234375,
"learning_rate": 3.6785240237887355e-06,
"loss": 1.2098,
"step": 3150
},
{
"epoch": 15.927102803738318,
"grad_norm": 1.7265625,
"learning_rate": 3.511846976493248e-06,
"loss": 1.209,
"step": 3200
},
{
"epoch": 16.174454828660437,
"grad_norm": 1.2109375,
"learning_rate": 3.3469654174123565e-06,
"loss": 1.2129,
"step": 3250
},
{
"epoch": 16.42367601246106,
"grad_norm": 1.15625,
"learning_rate": 3.184078279647331e-06,
"loss": 1.2187,
"step": 3300
},
{
"epoch": 16.672897196261683,
"grad_norm": 1.2578125,
"learning_rate": 3.0233820899877898e-06,
"loss": 1.2018,
"step": 3350
},
{
"epoch": 16.922118380062305,
"grad_norm": 1.1484375,
"learning_rate": 2.8650707317979437e-06,
"loss": 1.2255,
"step": 3400
},
{
"epoch": 17.169470404984423,
"grad_norm": 1.1796875,
"learning_rate": 2.709335211092214e-06,
"loss": 1.1997,
"step": 3450
},
{
"epoch": 17.418691588785048,
"grad_norm": 1.1875,
"learning_rate": 2.556363426082418e-06,
"loss": 1.211,
"step": 3500
},
{
"epoch": 17.418691588785048,
"eval_loss": 1.2014065980911255,
"eval_runtime": 15.5998,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.474,
"step": 3500
},
{
"epoch": 17.66791277258567,
"grad_norm": 1.046875,
"learning_rate": 2.4063399404745724e-06,
"loss": 1.2199,
"step": 3550
},
{
"epoch": 17.917133956386294,
"grad_norm": 1.625,
"learning_rate": 2.2594457607888917e-06,
"loss": 1.217,
"step": 3600
},
{
"epoch": 18.16448598130841,
"grad_norm": 1.3515625,
"learning_rate": 2.115858117971553e-06,
"loss": 1.2021,
"step": 3650
},
{
"epoch": 18.413707165109034,
"grad_norm": 1.484375,
"learning_rate": 1.9757502535618137e-06,
"loss": 1.2159,
"step": 3700
},
{
"epoch": 18.662928348909656,
"grad_norm": 1.015625,
"learning_rate": 1.839291210672407e-06,
"loss": 1.2117,
"step": 3750
},
{
"epoch": 18.91214953271028,
"grad_norm": 1.234375,
"learning_rate": 1.7066456300354462e-06,
"loss": 1.225,
"step": 3800
},
{
"epoch": 19.1595015576324,
"grad_norm": 1.390625,
"learning_rate": 1.577973551359877e-06,
"loss": 1.2239,
"step": 3850
},
{
"epoch": 19.40872274143302,
"grad_norm": 1.6015625,
"learning_rate": 1.453430220240178e-06,
"loss": 1.209,
"step": 3900
},
{
"epoch": 19.657943925233646,
"grad_norm": 1.203125,
"learning_rate": 1.333165900849255e-06,
"loss": 1.2148,
"step": 3950
},
{
"epoch": 19.907165109034267,
"grad_norm": 1.1484375,
"learning_rate": 1.2173256946415214e-06,
"loss": 1.2024,
"step": 4000
},
{
"epoch": 19.907165109034267,
"eval_loss": 1.2013256549835205,
"eval_runtime": 15.5958,
"eval_samples_per_second": 11.477,
"eval_steps_per_second": 1.475,
"step": 4000
},
{
"epoch": 20.154517133956386,
"grad_norm": 1.1328125,
"learning_rate": 1.106049365284918e-06,
"loss": 1.2362,
"step": 4050
},
{
"epoch": 20.40373831775701,
"grad_norm": 1.0078125,
"learning_rate": 9.994711700330779e-07,
"loss": 1.2212,
"step": 4100
},
{
"epoch": 20.652959501557632,
"grad_norm": 1.5078125,
"learning_rate": 8.97719697741104e-07,
"loss": 1.1908,
"step": 4150
},
{
"epoch": 20.902180685358257,
"grad_norm": 1.0390625,
"learning_rate": 8.009177137203794e-07,
"loss": 1.2261,
"step": 4200
},
{
"epoch": 21.149532710280372,
"grad_norm": 1.046875,
"learning_rate": 7.091820116196152e-07,
"loss": 1.1987,
"step": 4250
},
{
"epoch": 21.398753894080997,
"grad_norm": 1.46875,
"learning_rate": 6.2262327251084e-07,
"loss": 1.2089,
"step": 4300
},
{
"epoch": 21.64797507788162,
"grad_norm": 1.1796875,
"learning_rate": 5.413459313503272e-07,
"loss": 1.2162,
"step": 4350
},
{
"epoch": 21.897196261682243,
"grad_norm": 1.0859375,
"learning_rate": 4.654480509756082e-07,
"loss": 1.216,
"step": 4400
},
{
"epoch": 22.144548286604362,
"grad_norm": 1.484375,
"learning_rate": 3.9502120379057764e-07,
"loss": 1.2089,
"step": 4450
},
{
"epoch": 22.393769470404983,
"grad_norm": 1.171875,
"learning_rate": 3.301503612814444e-07,
"loss": 1.2161,
"step": 4500
},
{
"epoch": 22.393769470404983,
"eval_loss": 1.2012678384780884,
"eval_runtime": 15.6175,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 1.473,
"step": 4500
},
{
"epoch": 22.642990654205608,
"grad_norm": 1.0625,
"learning_rate": 2.7091379149682683e-07,
"loss": 1.2239,
"step": 4550
},
{
"epoch": 22.89221183800623,
"grad_norm": 1.3515625,
"learning_rate": 2.1738296461569164e-07,
"loss": 1.2121,
"step": 4600
},
{
"epoch": 23.139563862928348,
"grad_norm": 1.2421875,
"learning_rate": 1.6962246671706872e-07,
"loss": 1.1973,
"step": 4650
},
{
"epoch": 23.388785046728973,
"grad_norm": 1.2578125,
"learning_rate": 1.2768992185557104e-07,
"loss": 1.2183,
"step": 4700
},
{
"epoch": 23.638006230529594,
"grad_norm": 1.3125,
"learning_rate": 9.163592253675247e-08,
"loss": 1.2195,
"step": 4750
},
{
"epoch": 23.88722741433022,
"grad_norm": 1.2109375,
"learning_rate": 6.15039686761748e-08,
"loss": 1.21,
"step": 4800
},
{
"epoch": 24.134579439252338,
"grad_norm": 1.2265625,
"learning_rate": 3.733041511583768e-08,
"loss": 1.2056,
"step": 4850
},
{
"epoch": 24.38380062305296,
"grad_norm": 1.46875,
"learning_rate": 1.914442776128622e-08,
"loss": 1.1913,
"step": 4900
},
{
"epoch": 24.633021806853584,
"grad_norm": 1.1796875,
"learning_rate": 6.9679483923318356e-09,
"loss": 1.2346,
"step": 4950
},
{
"epoch": 24.882242990654206,
"grad_norm": 1.171875,
"learning_rate": 8.156681898252583e-10,
"loss": 1.2149,
"step": 5000
},
{
"epoch": 24.882242990654206,
"eval_loss": 1.201310396194458,
"eval_runtime": 15.6169,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 1.473,
"step": 5000
}
],
"logging_steps": 50,
"max_steps": 5025,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3835310591104778e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}