FormlessAI's picture
Training in progress, step 40, checkpoint
8e7cc5c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.018685040289618125,
"eval_steps": 500,
"global_step": 40,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00046712600724045314,
"grad_norm": 29.624664306640625,
"learning_rate": 0.0,
"logits/chosen": -3.153887987136841,
"logits/rejected": -3.3905792236328125,
"logps/chosen": -164.62596130371094,
"logps/rejected": -154.77557373046875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.0009342520144809063,
"grad_norm": 29.32240104675293,
"learning_rate": 1.5e-06,
"logits/chosen": -3.1478431224823,
"logits/rejected": -3.0448203086853027,
"logps/chosen": -156.60809326171875,
"logps/rejected": -134.02630615234375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.0014013780217213593,
"grad_norm": 39.618228912353516,
"learning_rate": 3e-06,
"logits/chosen": -3.099796772003174,
"logits/rejected": -3.1112475395202637,
"logps/chosen": -139.82913208007812,
"logps/rejected": -142.367919921875,
"loss": 0.6916,
"rewards/accuracies": 0.46875,
"rewards/chosen": 0.0017557624960318208,
"rewards/margins": 0.0038681034930050373,
"rewards/rejected": -0.0021123411133885384,
"step": 3
},
{
"epoch": 0.0018685040289618125,
"grad_norm": 48.11921691894531,
"learning_rate": 4.5e-06,
"logits/chosen": -2.7930030822753906,
"logits/rejected": -3.279337167739868,
"logps/chosen": -147.18673706054688,
"logps/rejected": -148.54122924804688,
"loss": 0.6996,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.033105019479990005,
"rewards/margins": -0.008252889849245548,
"rewards/rejected": -0.024852126836776733,
"step": 4
},
{
"epoch": 0.0023356300362022656,
"grad_norm": 33.73259353637695,
"learning_rate": 6e-06,
"logits/chosen": -3.058988094329834,
"logits/rejected": -2.9058432579040527,
"logps/chosen": -157.9241180419922,
"logps/rejected": -181.765380859375,
"loss": 0.728,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08487213402986526,
"rewards/margins": -0.055237509310245514,
"rewards/rejected": -0.029634615406394005,
"step": 5
},
{
"epoch": 0.0028027560434427186,
"grad_norm": 33.588905334472656,
"learning_rate": 7.5e-06,
"logits/chosen": -3.073216199874878,
"logits/rejected": -2.8430886268615723,
"logps/chosen": -158.05972290039062,
"logps/rejected": -150.7171630859375,
"loss": 0.759,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.1916729062795639,
"rewards/margins": -0.05884008854627609,
"rewards/rejected": -0.1328328400850296,
"step": 6
},
{
"epoch": 0.0032698820506831716,
"grad_norm": 34.93351745605469,
"learning_rate": 9e-06,
"logits/chosen": -3.4525327682495117,
"logits/rejected": -3.336601495742798,
"logps/chosen": -164.04249572753906,
"logps/rejected": -165.49948120117188,
"loss": 0.769,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.322049081325531,
"rewards/margins": -0.07949253916740417,
"rewards/rejected": -0.24255654215812683,
"step": 7
},
{
"epoch": 0.003737008057923625,
"grad_norm": 32.109195709228516,
"learning_rate": 1.05e-05,
"logits/chosen": -3.057377338409424,
"logits/rejected": -2.9476959705352783,
"logps/chosen": -179.3075408935547,
"logps/rejected": -163.44024658203125,
"loss": 0.6912,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.4484987258911133,
"rewards/margins": 0.13945631682872772,
"rewards/rejected": -0.5879549980163574,
"step": 8
},
{
"epoch": 0.004204134065164078,
"grad_norm": 38.499698638916016,
"learning_rate": 1.2e-05,
"logits/chosen": -3.4018871784210205,
"logits/rejected": -2.770911455154419,
"logps/chosen": -138.96697998046875,
"logps/rejected": -155.197509765625,
"loss": 0.7849,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.5717523097991943,
"rewards/margins": 0.018239814788103104,
"rewards/rejected": -0.589992105960846,
"step": 9
},
{
"epoch": 0.004671260072404531,
"grad_norm": 29.717857360839844,
"learning_rate": 1.3500000000000001e-05,
"logits/chosen": -3.2118136882781982,
"logits/rejected": -2.8460254669189453,
"logps/chosen": -158.110107421875,
"logps/rejected": -147.25413513183594,
"loss": 0.8467,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.4080456793308258,
"rewards/margins": -0.05621982365846634,
"rewards/rejected": -0.3518258333206177,
"step": 10
},
{
"epoch": 0.005138386079644984,
"grad_norm": 35.5257682800293,
"learning_rate": 1.5e-05,
"logits/chosen": -2.7199769020080566,
"logits/rejected": -3.1992592811584473,
"logps/chosen": -182.67335510253906,
"logps/rejected": -205.45220947265625,
"loss": 0.7824,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.34646672010421753,
"rewards/margins": 0.4741722643375397,
"rewards/rejected": -0.8206390142440796,
"step": 11
},
{
"epoch": 0.005605512086885437,
"grad_norm": 33.356773376464844,
"learning_rate": 1.65e-05,
"logits/chosen": -3.128831386566162,
"logits/rejected": -3.167382001876831,
"logps/chosen": -157.21823120117188,
"logps/rejected": -169.51663208007812,
"loss": 0.6211,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.16993041336536407,
"rewards/margins": 0.4490576684474945,
"rewards/rejected": -0.6189880967140198,
"step": 12
},
{
"epoch": 0.00607263809412589,
"grad_norm": 43.2087516784668,
"learning_rate": 1.8e-05,
"logits/chosen": -3.032745838165283,
"logits/rejected": -3.1566920280456543,
"logps/chosen": -160.5401153564453,
"logps/rejected": -156.610107421875,
"loss": 0.8161,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.329096257686615,
"rewards/margins": 0.2510414123535156,
"rewards/rejected": -0.5801376700401306,
"step": 13
},
{
"epoch": 0.006539764101366343,
"grad_norm": 44.841331481933594,
"learning_rate": 1.95e-05,
"logits/chosen": -2.9390594959259033,
"logits/rejected": -2.639657974243164,
"logps/chosen": -158.00621032714844,
"logps/rejected": -205.90988159179688,
"loss": 0.6193,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.4224599599838257,
"rewards/margins": 0.8774706721305847,
"rewards/rejected": -1.2999305725097656,
"step": 14
},
{
"epoch": 0.007006890108606796,
"grad_norm": 60.16761779785156,
"learning_rate": 2.1e-05,
"logits/chosen": -2.844076156616211,
"logits/rejected": -3.0058369636535645,
"logps/chosen": -174.17816162109375,
"logps/rejected": -162.8614959716797,
"loss": 0.9068,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.4890695810317993,
"rewards/margins": 0.31313809752464294,
"rewards/rejected": -0.8022076487541199,
"step": 15
},
{
"epoch": 0.00747401611584725,
"grad_norm": 42.41041946411133,
"learning_rate": 2.25e-05,
"logits/chosen": -2.8459668159484863,
"logits/rejected": -2.870767593383789,
"logps/chosen": -159.3683624267578,
"logps/rejected": -137.30758666992188,
"loss": 1.0076,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.8330074548721313,
"rewards/margins": -0.10208512842655182,
"rewards/rejected": -0.7309223413467407,
"step": 16
},
{
"epoch": 0.007941142123087703,
"grad_norm": 46.14881134033203,
"learning_rate": 2.4e-05,
"logits/chosen": -3.2765953540802,
"logits/rejected": -3.2238590717315674,
"logps/chosen": -165.30923461914062,
"logps/rejected": -130.40892028808594,
"loss": 0.8999,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.6034781336784363,
"rewards/margins": 0.04197956249117851,
"rewards/rejected": -0.6454576849937439,
"step": 17
},
{
"epoch": 0.008408268130328156,
"grad_norm": 36.91789245605469,
"learning_rate": 2.55e-05,
"logits/chosen": -2.8234622478485107,
"logits/rejected": -3.0411720275878906,
"logps/chosen": -175.682373046875,
"logps/rejected": -149.590576171875,
"loss": 1.2818,
"rewards/accuracies": 0.53125,
"rewards/chosen": -1.4478638172149658,
"rewards/margins": -0.04692135751247406,
"rewards/rejected": -1.4009425640106201,
"step": 18
},
{
"epoch": 0.00887539413756861,
"grad_norm": 53.27765655517578,
"learning_rate": 2.7000000000000002e-05,
"logits/chosen": -3.185443639755249,
"logits/rejected": -3.126272439956665,
"logps/chosen": -187.12286376953125,
"logps/rejected": -188.28640747070312,
"loss": 0.7968,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.0597200393676758,
"rewards/margins": 0.5409759283065796,
"rewards/rejected": -1.6006958484649658,
"step": 19
},
{
"epoch": 0.009342520144809062,
"grad_norm": 40.505897521972656,
"learning_rate": 2.8499999999999998e-05,
"logits/chosen": -2.845191478729248,
"logits/rejected": -2.9458093643188477,
"logps/chosen": -177.2282257080078,
"logps/rejected": -210.2263641357422,
"loss": 1.3512,
"rewards/accuracies": 0.4375,
"rewards/chosen": -2.458928108215332,
"rewards/margins": -0.08396562933921814,
"rewards/rejected": -2.374962329864502,
"step": 20
},
{
"epoch": 0.009809646152049515,
"grad_norm": 68.49879455566406,
"learning_rate": 3e-05,
"logits/chosen": -3.1144859790802,
"logits/rejected": -2.9034385681152344,
"logps/chosen": -216.12496948242188,
"logps/rejected": -190.55833435058594,
"loss": 1.5832,
"rewards/accuracies": 0.46875,
"rewards/chosen": -3.5558087825775146,
"rewards/margins": -0.48250633478164673,
"rewards/rejected": -3.0733022689819336,
"step": 21
},
{
"epoch": 0.010276772159289968,
"grad_norm": 61.08313751220703,
"learning_rate": 2.9999922925895862e-05,
"logits/chosen": -3.1215250492095947,
"logits/rejected": -2.597733974456787,
"logps/chosen": -219.85137939453125,
"logps/rejected": -196.4637908935547,
"loss": 1.4738,
"rewards/accuracies": 0.40625,
"rewards/chosen": -3.155580997467041,
"rewards/margins": -0.5727983117103577,
"rewards/rejected": -2.582782745361328,
"step": 22
},
{
"epoch": 0.010743898166530421,
"grad_norm": 41.851783752441406,
"learning_rate": 2.999969170437549e-05,
"logits/chosen": -3.0340187549591064,
"logits/rejected": -2.84521484375,
"logps/chosen": -177.37728881835938,
"logps/rejected": -183.36668395996094,
"loss": 1.0908,
"rewards/accuracies": 0.46875,
"rewards/chosen": -1.8575204610824585,
"rewards/margins": 0.3737794756889343,
"rewards/rejected": -2.231299877166748,
"step": 23
},
{
"epoch": 0.011211024173770874,
"grad_norm": 39.802371978759766,
"learning_rate": 2.9999306337815055e-05,
"logits/chosen": -2.8932125568389893,
"logits/rejected": -3.1646995544433594,
"logps/chosen": -176.2078399658203,
"logps/rejected": -199.5233612060547,
"loss": 1.0934,
"rewards/accuracies": 0.53125,
"rewards/chosen": -1.9304168224334717,
"rewards/margins": 0.2455454170703888,
"rewards/rejected": -2.175962448120117,
"step": 24
},
{
"epoch": 0.011678150181011327,
"grad_norm": 29.43556785583496,
"learning_rate": 2.999876683017479e-05,
"logits/chosen": -2.940061092376709,
"logits/rejected": -2.888075590133667,
"logps/chosen": -193.4315948486328,
"logps/rejected": -197.52081298828125,
"loss": 1.1539,
"rewards/accuracies": 0.5,
"rewards/chosen": -1.5905036926269531,
"rewards/margins": 0.9262561202049255,
"rewards/rejected": -2.5167598724365234,
"step": 25
},
{
"epoch": 0.01214527618825178,
"grad_norm": 54.44322204589844,
"learning_rate": 2.999807318699897e-05,
"logits/chosen": -2.6563143730163574,
"logits/rejected": -2.6990714073181152,
"logps/chosen": -156.08848571777344,
"logps/rejected": -182.99139404296875,
"loss": 1.2892,
"rewards/accuracies": 0.4375,
"rewards/chosen": -1.465782642364502,
"rewards/margins": 0.17339667677879333,
"rewards/rejected": -1.6391793489456177,
"step": 26
},
{
"epoch": 0.012612402195492234,
"grad_norm": 33.6212272644043,
"learning_rate": 2.999722541541585e-05,
"logits/chosen": -2.61735200881958,
"logits/rejected": -2.4723963737487793,
"logps/chosen": -155.68017578125,
"logps/rejected": -158.19589233398438,
"loss": 1.0627,
"rewards/accuracies": 0.34375,
"rewards/chosen": -1.1587157249450684,
"rewards/margins": 0.1697230488061905,
"rewards/rejected": -1.3284387588500977,
"step": 27
},
{
"epoch": 0.013079528202732687,
"grad_norm": 19.592470169067383,
"learning_rate": 2.99962235241376e-05,
"logits/chosen": -2.5871520042419434,
"logits/rejected": -2.7644474506378174,
"logps/chosen": -184.37257385253906,
"logps/rejected": -169.0127716064453,
"loss": 0.8552,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.5363420248031616,
"rewards/margins": 0.4760582447052002,
"rewards/rejected": -1.0124002695083618,
"step": 28
},
{
"epoch": 0.01354665420997314,
"grad_norm": 20.270267486572266,
"learning_rate": 2.9995067523460198e-05,
"logits/chosen": -2.8748791217803955,
"logits/rejected": -3.1334304809570312,
"logps/chosen": -157.93455505371094,
"logps/rejected": -176.13912963867188,
"loss": 0.9041,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.36636897921562195,
"rewards/margins": 0.27877098321914673,
"rewards/rejected": -0.6451399922370911,
"step": 29
},
{
"epoch": 0.014013780217213593,
"grad_norm": 43.62104415893555,
"learning_rate": 2.9993757425263343e-05,
"logits/chosen": -2.7570629119873047,
"logits/rejected": -2.771437644958496,
"logps/chosen": -157.29891967773438,
"logps/rejected": -166.71913146972656,
"loss": 1.5401,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.970974862575531,
"rewards/margins": -0.46308204531669617,
"rewards/rejected": -0.5078927874565125,
"step": 30
},
{
"epoch": 0.014480906224454046,
"grad_norm": 21.04092025756836,
"learning_rate": 2.999229324301032e-05,
"logits/chosen": -2.814603328704834,
"logits/rejected": -2.561202049255371,
"logps/chosen": -126.19819641113281,
"logps/rejected": -131.07566833496094,
"loss": 0.7815,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.11197692900896072,
"rewards/margins": 0.5568960309028625,
"rewards/rejected": -0.4449191689491272,
"step": 31
},
{
"epoch": 0.0149480322316945,
"grad_norm": 23.462085723876953,
"learning_rate": 2.9990674991747865e-05,
"logits/chosen": -2.608139753341675,
"logits/rejected": -3.061997175216675,
"logps/chosen": -158.59423828125,
"logps/rejected": -130.51840209960938,
"loss": 1.2301,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.5943432450294495,
"rewards/margins": -0.38749587535858154,
"rewards/rejected": -0.2068473994731903,
"step": 32
},
{
"epoch": 0.015415158238934953,
"grad_norm": 27.918336868286133,
"learning_rate": 2.9988902688106014e-05,
"logits/chosen": -2.9852523803710938,
"logits/rejected": -2.5511088371276855,
"logps/chosen": -166.7327880859375,
"logps/rejected": -155.55520629882812,
"loss": 0.8347,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.009264327585697174,
"rewards/margins": 0.6062629222869873,
"rewards/rejected": -0.615527331829071,
"step": 33
},
{
"epoch": 0.015882284246175406,
"grad_norm": 25.436614990234375,
"learning_rate": 2.9986976350297933e-05,
"logits/chosen": -2.850193500518799,
"logits/rejected": -2.510417938232422,
"logps/chosen": -149.20751953125,
"logps/rejected": -155.28610229492188,
"loss": 0.9801,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.5431436896324158,
"rewards/margins": 0.01562432199716568,
"rewards/rejected": -0.5587680339813232,
"step": 34
},
{
"epoch": 0.01634941025341586,
"grad_norm": 38.4265022277832,
"learning_rate": 2.9984895998119723e-05,
"logits/chosen": -2.4000887870788574,
"logits/rejected": -2.29819393157959,
"logps/chosen": -171.50152587890625,
"logps/rejected": -191.80938720703125,
"loss": 1.2524,
"rewards/accuracies": 0.5625,
"rewards/chosen": -1.1593430042266846,
"rewards/margins": 0.3696390390396118,
"rewards/rejected": -1.5289819240570068,
"step": 35
},
{
"epoch": 0.016816536260656312,
"grad_norm": 31.203420639038086,
"learning_rate": 2.998266165295021e-05,
"logits/chosen": -2.8619065284729004,
"logits/rejected": -2.9606268405914307,
"logps/chosen": -144.86415100097656,
"logps/rejected": -180.39205932617188,
"loss": 0.7866,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.18791238963603973,
"rewards/margins": 0.6746108531951904,
"rewards/rejected": -0.8625231981277466,
"step": 36
},
{
"epoch": 0.017283662267896766,
"grad_norm": 24.40894889831543,
"learning_rate": 2.9980273337750767e-05,
"logits/chosen": -2.757246255874634,
"logits/rejected": -2.466960906982422,
"logps/chosen": -192.2352294921875,
"logps/rejected": -181.03878784179688,
"loss": 0.8876,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.6490573287010193,
"rewards/margins": 0.9436599016189575,
"rewards/rejected": -1.592717170715332,
"step": 37
},
{
"epoch": 0.01775078827513722,
"grad_norm": 27.00850486755371,
"learning_rate": 2.9977731077065013e-05,
"logits/chosen": -2.9453818798065186,
"logits/rejected": -2.8008580207824707,
"logps/chosen": -152.46038818359375,
"logps/rejected": -173.5645751953125,
"loss": 0.6833,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.2769155204296112,
"rewards/margins": 1.4065779447555542,
"rewards/rejected": -1.6834933757781982,
"step": 38
},
{
"epoch": 0.01821791428237767,
"grad_norm": 30.273073196411133,
"learning_rate": 2.9975034897018614e-05,
"logits/chosen": -2.7667531967163086,
"logits/rejected": -2.948810338973999,
"logps/chosen": -123.97679901123047,
"logps/rejected": -174.9097900390625,
"loss": 1.0514,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.6435041427612305,
"rewards/margins": 0.23302727937698364,
"rewards/rejected": -0.8765315413475037,
"step": 39
},
{
"epoch": 0.018685040289618125,
"grad_norm": 33.39718246459961,
"learning_rate": 2.9972184825318994e-05,
"logits/chosen": -2.9786558151245117,
"logits/rejected": -2.691629409790039,
"logps/chosen": -186.82821655273438,
"logps/rejected": -167.65603637695312,
"loss": 1.2466,
"rewards/accuracies": 0.59375,
"rewards/chosen": -1.0377531051635742,
"rewards/margins": 0.2545713186264038,
"rewards/rejected": -1.292324423789978,
"step": 40
}
],
"logging_steps": 1,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}