PEFT
Safetensors
Agentic-V4 / trainer_state.json
k4rth33k's picture
Upload Agentic-V4
b8fafec verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.060836501901140684,
"eval_steps": 50,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003041825095057034,
"grad_norm": 0.2263541966676712,
"learning_rate": 9.090909090909091e-06,
"log_odds_chosen": 0.0022868067026138306,
"log_odds_ratio": -0.6926867961883545,
"logits/chosen": -2.233539581298828,
"logits/rejected": -2.2098608016967773,
"logps/chosen": -0.7675836682319641,
"logps/rejected": -0.7697547078132629,
"loss": 0.85230005,
"memory(GiB)": 243.33,
"nll_loss": 0.7675836682319641,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.07675836980342865,
"rewards/margins": 0.00021709827706217766,
"rewards/rejected": -0.07697547227144241,
"step": 1,
"train_speed(iter/s)": 0.010532
},
{
"epoch": 0.006083650190114068,
"grad_norm": 0.21064028143882751,
"learning_rate": 1.8181818181818182e-05,
"log_odds_chosen": 0.411478191614151,
"log_odds_ratio": -0.5463609099388123,
"logits/chosen": -2.2685160636901855,
"logits/rejected": -2.260843515396118,
"logps/chosen": -0.9355340003967285,
"logps/rejected": -1.236234188079834,
"loss": 0.87318134,
"memory(GiB)": 243.33,
"nll_loss": 0.9355340003967285,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09355340152978897,
"rewards/margins": 0.030070019885897636,
"rewards/rejected": -0.12362341582775116,
"step": 2,
"train_speed(iter/s)": 0.009734
},
{
"epoch": 0.009125475285171103,
"grad_norm": 0.19250068068504333,
"learning_rate": 2.727272727272727e-05,
"log_odds_chosen": 0.3938485383987427,
"log_odds_ratio": -0.5777455568313599,
"logits/chosen": -1.993510365486145,
"logits/rejected": -1.9485821723937988,
"logps/chosen": -0.6979267001152039,
"logps/rejected": -0.9728450775146484,
"loss": 0.75437951,
"memory(GiB)": 243.33,
"nll_loss": 0.6979267597198486,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.06979266554117203,
"rewards/margins": 0.027491841465234756,
"rewards/rejected": -0.09728450328111649,
"step": 3,
"train_speed(iter/s)": 0.01152
},
{
"epoch": 0.012167300380228136,
"grad_norm": 0.20081621408462524,
"learning_rate": 3.6363636363636364e-05,
"log_odds_chosen": 0.0849333107471466,
"log_odds_ratio": -0.6586424112319946,
"logits/chosen": -2.3017446994781494,
"logits/rejected": -2.300307512283325,
"logps/chosen": -0.7535954713821411,
"logps/rejected": -0.8145345449447632,
"loss": 0.85012603,
"memory(GiB)": 315.51,
"nll_loss": 0.7535954713821411,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0753595381975174,
"rewards/margins": 0.006093906704336405,
"rewards/rejected": -0.08145345002412796,
"step": 4,
"train_speed(iter/s)": 0.011285
},
{
"epoch": 0.015209125475285171,
"grad_norm": 0.20968054234981537,
"learning_rate": 4.545454545454545e-05,
"log_odds_chosen": 0.2265796959400177,
"log_odds_ratio": -0.605457067489624,
"logits/chosen": -2.132286548614502,
"logits/rejected": -2.0954604148864746,
"logps/chosen": -0.7475967407226562,
"logps/rejected": -0.9037913680076599,
"loss": 0.83076566,
"memory(GiB)": 349.51,
"nll_loss": 0.747596800327301,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.07475967705249786,
"rewards/margins": 0.015619462355971336,
"rewards/rejected": -0.09037913382053375,
"step": 5,
"train_speed(iter/s)": 0.01019
},
{
"epoch": 0.018250950570342206,
"grad_norm": 0.28389012813568115,
"learning_rate": 5.454545454545454e-05,
"log_odds_chosen": 0.07245179265737534,
"log_odds_ratio": -0.6642654538154602,
"logits/chosen": -2.207275867462158,
"logits/rejected": -2.194154977798462,
"logps/chosen": -0.7644542455673218,
"logps/rejected": -0.811606228351593,
"loss": 0.80996728,
"memory(GiB)": 304.24,
"nll_loss": 0.7644542455673218,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.07644543051719666,
"rewards/margins": 0.004715194460004568,
"rewards/rejected": -0.08116061985492706,
"step": 6,
"train_speed(iter/s)": 0.010519
},
{
"epoch": 0.02129277566539924,
"grad_norm": 0.2204827517271042,
"learning_rate": 6.363636363636363e-05,
"log_odds_chosen": 0.0014549940824508667,
"log_odds_ratio": -0.6934571266174316,
"logits/chosen": -2.2422585487365723,
"logits/rejected": -2.2558586597442627,
"logps/chosen": -0.7055240869522095,
"logps/rejected": -0.7079155445098877,
"loss": 0.80136919,
"memory(GiB)": 376.14,
"nll_loss": 0.7055240869522095,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.07055240869522095,
"rewards/margins": 0.0002391412854194641,
"rewards/rejected": -0.07079154998064041,
"step": 7,
"train_speed(iter/s)": 0.010453
},
{
"epoch": 0.024334600760456272,
"grad_norm": 0.18603529036045074,
"learning_rate": 7.272727272727273e-05,
"log_odds_chosen": 0.2205890268087387,
"log_odds_ratio": -0.6130101084709167,
"logits/chosen": -2.1775014400482178,
"logits/rejected": -2.1533212661743164,
"logps/chosen": -0.6938110589981079,
"logps/rejected": -0.8382289409637451,
"loss": 0.73867655,
"memory(GiB)": 337.96,
"nll_loss": 0.6938109993934631,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.06938110291957855,
"rewards/margins": 0.01444179005920887,
"rewards/rejected": -0.08382289111614227,
"step": 8,
"train_speed(iter/s)": 0.010026
},
{
"epoch": 0.02737642585551331,
"grad_norm": 0.2093774527311325,
"learning_rate": 8.18181818181818e-05,
"log_odds_chosen": 0.28880590200424194,
"log_odds_ratio": -0.5970734357833862,
"logits/chosen": -2.231719970703125,
"logits/rejected": -2.1949002742767334,
"logps/chosen": -0.6275652647018433,
"logps/rejected": -0.8133708834648132,
"loss": 0.73195064,
"memory(GiB)": 337.96,
"nll_loss": 0.627565324306488,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.06275652348995209,
"rewards/margins": 0.018580568954348564,
"rewards/rejected": -0.0813370943069458,
"step": 9,
"train_speed(iter/s)": 0.010471
},
{
"epoch": 0.030418250950570342,
"grad_norm": 0.17292404174804688,
"learning_rate": 9.09090909090909e-05,
"log_odds_chosen": 0.1367553025484085,
"log_odds_ratio": -0.6314333081245422,
"logits/chosen": -2.193378448486328,
"logits/rejected": -2.2005226612091064,
"logps/chosen": -0.712138831615448,
"logps/rejected": -0.7979795932769775,
"loss": 0.72619164,
"memory(GiB)": 337.96,
"nll_loss": 0.7121387720108032,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.07121387869119644,
"rewards/margins": 0.00858408585190773,
"rewards/rejected": -0.07979796826839447,
"step": 10,
"train_speed(iter/s)": 0.010603
},
{
"epoch": 0.033460076045627375,
"grad_norm": 0.18145819008350372,
"learning_rate": 9.999999999999999e-05,
"log_odds_chosen": 0.39647987484931946,
"log_odds_ratio": -0.5509629249572754,
"logits/chosen": -1.9035614728927612,
"logits/rejected": -1.8070275783538818,
"logps/chosen": -0.6602268218994141,
"logps/rejected": -0.9264889359474182,
"loss": 0.63924813,
"memory(GiB)": 337.96,
"nll_loss": 0.6602268218994141,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.06602267920970917,
"rewards/margins": 0.026626210659742355,
"rewards/rejected": -0.09264889359474182,
"step": 11,
"train_speed(iter/s)": 0.011075
},
{
"epoch": 0.03650190114068441,
"grad_norm": 0.1996176391839981,
"learning_rate": 0.00010909090909090908,
"log_odds_chosen": -0.008832626044750214,
"log_odds_ratio": -0.6989511251449585,
"logits/chosen": -2.1484761238098145,
"logits/rejected": -2.1583542823791504,
"logps/chosen": -0.5783950686454773,
"logps/rejected": -0.5806748867034912,
"loss": 0.66453189,
"memory(GiB)": 337.96,
"nll_loss": 0.5783950686454773,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.05783950537443161,
"rewards/margins": 0.0002279849722981453,
"rewards/rejected": -0.05806749314069748,
"step": 12,
"train_speed(iter/s)": 0.011198
},
{
"epoch": 0.03954372623574144,
"grad_norm": 0.18517741560935974,
"learning_rate": 0.00011818181818181817,
"log_odds_chosen": 0.1502607762813568,
"log_odds_ratio": -0.6237398386001587,
"logits/chosen": -2.078707695007324,
"logits/rejected": -2.0867443084716797,
"logps/chosen": -0.6652951240539551,
"logps/rejected": -0.7532068490982056,
"loss": 0.71045852,
"memory(GiB)": 337.96,
"nll_loss": 0.6652951240539551,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0665295198559761,
"rewards/margins": 0.008791167289018631,
"rewards/rejected": -0.07532068341970444,
"step": 13,
"train_speed(iter/s)": 0.011144
},
{
"epoch": 0.04258555133079848,
"grad_norm": 0.460842102766037,
"learning_rate": 0.00012727272727272725,
"log_odds_chosen": 0.08848065137863159,
"log_odds_ratio": -0.6523964405059814,
"logits/chosen": -2.0903429985046387,
"logits/rejected": -2.071244716644287,
"logps/chosen": -0.5602254867553711,
"logps/rejected": -0.6098269820213318,
"loss": 0.62673962,
"memory(GiB)": 337.96,
"nll_loss": 0.5602255463600159,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.05602255091071129,
"rewards/margins": 0.004960143007338047,
"rewards/rejected": -0.06098269298672676,
"step": 14,
"train_speed(iter/s)": 0.011223
},
{
"epoch": 0.045627376425855515,
"grad_norm": 0.201682448387146,
"learning_rate": 0.00013636363636363634,
"log_odds_chosen": 0.0586659274995327,
"log_odds_ratio": -0.6654743552207947,
"logits/chosen": -2.460188388824463,
"logits/rejected": -2.4563236236572266,
"logps/chosen": -0.3543914556503296,
"logps/rejected": -0.3674345910549164,
"loss": 0.49967062,
"memory(GiB)": 337.96,
"nll_loss": 0.3543914258480072,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0354391448199749,
"rewards/margins": 0.0013043158687651157,
"rewards/rejected": -0.03674346208572388,
"step": 15,
"train_speed(iter/s)": 0.011535
},
{
"epoch": 0.048669201520912544,
"grad_norm": 0.20425710082054138,
"learning_rate": 0.00014545454545454546,
"log_odds_chosen": 0.06445352733135223,
"log_odds_ratio": -0.6619336009025574,
"logits/chosen": -1.9317834377288818,
"logits/rejected": -1.9417508840560913,
"logps/chosen": -0.5264174938201904,
"logps/rejected": -0.5525864958763123,
"loss": 0.62982225,
"memory(GiB)": 337.96,
"nll_loss": 0.5264174938201904,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.05264175310730934,
"rewards/margins": 0.0026168967597186565,
"rewards/rejected": -0.055258650332689285,
"step": 16,
"train_speed(iter/s)": 0.011487
},
{
"epoch": 0.05171102661596958,
"grad_norm": 0.19479066133499146,
"learning_rate": 0.00015454545454545452,
"log_odds_chosen": 0.05428645759820938,
"log_odds_ratio": -0.6680891513824463,
"logits/chosen": -2.0513648986816406,
"logits/rejected": -2.0547256469726562,
"logps/chosen": -0.5574195384979248,
"logps/rejected": -0.5769228935241699,
"loss": 0.65339994,
"memory(GiB)": 337.96,
"nll_loss": 0.5574195384979248,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.05574195832014084,
"rewards/margins": 0.001950339414179325,
"rewards/rejected": -0.05769229307770729,
"step": 17,
"train_speed(iter/s)": 0.011193
},
{
"epoch": 0.05475285171102662,
"grad_norm": 0.20096692442893982,
"learning_rate": 0.0001636363636363636,
"log_odds_chosen": 0.11649945378303528,
"log_odds_ratio": -0.6375795006752014,
"logits/chosen": -1.979109764099121,
"logits/rejected": -1.9783812761306763,
"logps/chosen": -0.6062220335006714,
"logps/rejected": -0.6605215072631836,
"loss": 0.61762023,
"memory(GiB)": 337.96,
"nll_loss": 0.6062220931053162,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0606222040951252,
"rewards/margins": 0.005429944489151239,
"rewards/rejected": -0.06605214625597,
"step": 18,
"train_speed(iter/s)": 0.010973
},
{
"epoch": 0.05779467680608365,
"grad_norm": 0.22299402952194214,
"learning_rate": 0.00017272727272727272,
"log_odds_chosen": 0.275879442691803,
"log_odds_ratio": -0.5872041583061218,
"logits/chosen": -1.7563273906707764,
"logits/rejected": -1.7962664365768433,
"logps/chosen": -0.5380970239639282,
"logps/rejected": -0.7168144583702087,
"loss": 0.62556648,
"memory(GiB)": 337.96,
"nll_loss": 0.5380970239639282,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.05380970239639282,
"rewards/margins": 0.017871743068099022,
"rewards/rejected": -0.071681447327137,
"step": 19,
"train_speed(iter/s)": 0.010995
},
{
"epoch": 0.060836501901140684,
"grad_norm": 0.19399508833885193,
"learning_rate": 0.0001818181818181818,
"log_odds_chosen": -0.016166899353265762,
"log_odds_ratio": -0.702415943145752,
"logits/chosen": -2.1521270275115967,
"logits/rejected": -2.151902914047241,
"logps/chosen": -0.41145703196525574,
"logps/rejected": -0.4066007733345032,
"loss": 0.49458012,
"memory(GiB)": 337.96,
"nll_loss": 0.41145703196525574,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.041145700961351395,
"rewards/margins": -0.000485624885186553,
"rewards/rejected": -0.040660079568624496,
"step": 20,
"train_speed(iter/s)": 0.011065
}
],
"logging_steps": 1,
"max_steps": 328,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.570806552906957e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}