CT-Apertus-Step50 / trainer_state.json
Delta-Vector's picture
Upload folder using huggingface_hub
99441b7 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.06666666666666667,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"advantage/absmean": 0.12451171875,
"entropy": 1.3988752365112305,
"epoch": 0.0013333333333333333,
"grad_norm": 0.01450820083840917,
"importance_ratio": 0.9983458518981934,
"learning_rate": 0.0,
"loss": -0.0028,
"mismatch_kl": 0.004329901188611984,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 1,
"timing/generation_ms": 12196.653502061963,
"timing/scoring_ms": 0.0,
"timing/total_ms": 12196.653502061963,
"tokens/completion": 562.04296875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 163.39810061454773
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.0297880172729492,
"epoch": 0.0026666666666666666,
"grad_norm": 0.006125098422428371,
"importance_ratio": 0.9977808594703674,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0118,
"mismatch_kl": 0.0036596579011529684,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 2,
"timing/generation_ms": 10855.522208847106,
"timing/scoring_ms": 0.0,
"timing/total_ms": 10855.522208847106,
"tokens/completion": 652.203125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 98.15957498550415
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.2343848943710327,
"epoch": 0.004,
"grad_norm": 0.0093110934895908,
"importance_ratio": 0.9983258843421936,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0068,
"mismatch_kl": 0.00391958886757493,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 3,
"timing/generation_ms": 14581.869984045625,
"timing/scoring_ms": 0.0,
"timing/total_ms": 14581.869984045625,
"tokens/completion": 722.37109375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 171.60404181480408
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.581649661064148,
"epoch": 0.005333333333333333,
"grad_norm": 0.007696628408420481,
"importance_ratio": 0.9986447095870972,
"learning_rate": 3e-06,
"loss": -0.0043,
"mismatch_kl": 0.0024762798566371202,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 4,
"timing/generation_ms": 11191.347393207252,
"timing/scoring_ms": 0.0,
"timing/total_ms": 11191.347393207252,
"tokens/completion": 595.73046875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 186.33580946922302
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8588891625404358,
"epoch": 0.006666666666666667,
"grad_norm": 0.0055080213738955075,
"importance_ratio": 0.9988943934440613,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0033,
"mismatch_kl": 0.0031517043244093657,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 5,
"timing/generation_ms": 10668.582463636994,
"timing/scoring_ms": 0.0,
"timing/total_ms": 10668.582463636994,
"tokens/completion": 636.53125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 82.27488708496094
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.071407675743103,
"epoch": 0.008,
"grad_norm": 0.02271832942623967,
"importance_ratio": 0.998067319393158,
"learning_rate": 5e-06,
"loss": 0.0019,
"mismatch_kl": 0.003643231000751257,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 6,
"timing/generation_ms": 3378.591795451939,
"timing/scoring_ms": 0.0,
"timing/total_ms": 3378.591795451939,
"tokens/completion": 178.73828125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 105.60203862190247
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8531922698020935,
"epoch": 0.009333333333333334,
"grad_norm": 0.018354067998903482,
"importance_ratio": 0.9980432391166687,
"learning_rate": 5e-06,
"loss": -0.0002,
"mismatch_kl": 0.003655636915937066,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 7,
"timing/generation_ms": 12279.695899225771,
"timing/scoring_ms": 0.0,
"timing/total_ms": 12279.695899225771,
"tokens/completion": 631.35546875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 104.63563537597656
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7072162628173828,
"epoch": 0.010666666666666666,
"grad_norm": 0.005552532749027135,
"importance_ratio": 0.9982293844223022,
"learning_rate": 5e-06,
"loss": -0.0014,
"mismatch_kl": 0.0029395928140729666,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 8,
"timing/generation_ms": 6614.743183366954,
"timing/scoring_ms": 0.0,
"timing/total_ms": 6614.743183366954,
"tokens/completion": 339.1640625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 132.164165019989
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8156145215034485,
"epoch": 0.012,
"grad_norm": 0.008176505226750404,
"importance_ratio": 0.9981797933578491,
"learning_rate": 5e-06,
"loss": 0.0027,
"mismatch_kl": 0.0031279518734663725,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 9,
"timing/generation_ms": 8826.908372342587,
"timing/scoring_ms": 0.0,
"timing/total_ms": 8826.908372342587,
"tokens/completion": 444.53515625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 144.61542773246765
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8708666563034058,
"epoch": 0.013333333333333334,
"grad_norm": 0.009382372847258274,
"importance_ratio": 0.9981642961502075,
"learning_rate": 5e-06,
"loss": 0.0126,
"mismatch_kl": 0.0030885515734553337,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 10,
"timing/generation_ms": 7367.4805322662,
"timing/scoring_ms": 0.0,
"timing/total_ms": 7367.4805322662,
"tokens/completion": 400.74609375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 176.57727003097534
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.6906348466873169,
"epoch": 0.014666666666666666,
"grad_norm": 0.007616251351947248,
"importance_ratio": 1.0045424699783325,
"learning_rate": 5e-06,
"loss": 0.0542,
"mismatch_kl": 0.03194786608219147,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 11,
"timing/generation_ms": 26879.562875255942,
"timing/scoring_ms": 0.0,
"timing/total_ms": 26879.562875255942,
"tokens/completion": 1682.0546875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 190.3386266231537
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.6506091356277466,
"epoch": 0.016,
"grad_norm": 0.004382353798954015,
"importance_ratio": 0.9982648491859436,
"learning_rate": 5e-06,
"loss": 0.043,
"mismatch_kl": 0.02482638508081436,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 12,
"timing/generation_ms": 22301.60311050713,
"timing/scoring_ms": 0.0,
"timing/total_ms": 22301.60311050713,
"tokens/completion": 1387.734375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 177.6784646511078
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.143129825592041,
"epoch": 0.017333333333333333,
"grad_norm": 0.009321138085104996,
"importance_ratio": 1.001217007637024,
"learning_rate": 5e-06,
"loss": -0.0139,
"mismatch_kl": 0.0036374337505549192,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 13,
"timing/generation_ms": 6277.724616229534,
"timing/scoring_ms": 0.0,
"timing/total_ms": 6277.724616229534,
"tokens/completion": 432.66796875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 129.69259929656982
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.650863766670227,
"epoch": 0.018666666666666668,
"grad_norm": 0.0076614251264825245,
"importance_ratio": 0.9983827471733093,
"learning_rate": 5e-06,
"loss": 0.0049,
"mismatch_kl": 0.0027237425092607737,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 14,
"timing/generation_ms": 7103.812717832625,
"timing/scoring_ms": 0.0,
"timing/total_ms": 7103.812717832625,
"tokens/completion": 404.96484375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 57.65379452705383
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7439635992050171,
"epoch": 0.02,
"grad_norm": 0.009401568464987338,
"importance_ratio": 0.9981654286384583,
"learning_rate": 5e-06,
"loss": 0.0109,
"mismatch_kl": 0.002909082220867276,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 15,
"timing/generation_ms": 8292.532542720437,
"timing/scoring_ms": 0.0,
"timing/total_ms": 8292.532542720437,
"tokens/completion": 459.85546875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 92.21157336235046
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7638830542564392,
"epoch": 0.021333333333333333,
"grad_norm": 0.010374572910211358,
"importance_ratio": 0.9969711899757385,
"learning_rate": 5e-06,
"loss": -0.005,
"mismatch_kl": 0.0034673516638576984,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 16,
"timing/generation_ms": 5712.000676430762,
"timing/scoring_ms": 0.0,
"timing/total_ms": 5712.000676430762,
"tokens/completion": 308.4296875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 52.6866238117218
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.18189160525798798,
"epoch": 0.02266666666666667,
"grad_norm": 0.00257455457059234,
"importance_ratio": 0.9984971880912781,
"learning_rate": 5e-06,
"loss": 0.0681,
"mismatch_kl": 0.018514186143875122,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 17,
"timing/generation_ms": 14281.423358246684,
"timing/scoring_ms": 0.0,
"timing/total_ms": 14281.423358246684,
"tokens/completion": 1101.546875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 129.24327325820923
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.5917271375656128,
"epoch": 0.024,
"grad_norm": 0.005668903472483887,
"importance_ratio": 0.999828577041626,
"learning_rate": 5e-06,
"loss": 0.027,
"mismatch_kl": 0.002100760815665126,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 18,
"timing/generation_ms": 24175.399120897055,
"timing/scoring_ms": 0.0,
"timing/total_ms": 24175.399120897055,
"tokens/completion": 1504.0390625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 158.57504653930664
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.3282008171081543,
"epoch": 0.025333333333333333,
"grad_norm": 0.006636047431888786,
"importance_ratio": 1.0022344589233398,
"learning_rate": 5e-06,
"loss": -0.0015,
"mismatch_kl": 0.004634195473045111,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 19,
"timing/generation_ms": 15713.139976374805,
"timing/scoring_ms": 0.0,
"timing/total_ms": 15713.139976374805,
"tokens/completion": 764.3203125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 78.56244468688965
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.02470862865448,
"epoch": 0.02666666666666667,
"grad_norm": 0.00833481021786943,
"importance_ratio": 1.0026451349258423,
"learning_rate": 5e-06,
"loss": -0.0052,
"mismatch_kl": 0.004158638883382082,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 20,
"timing/generation_ms": 6632.851202040911,
"timing/scoring_ms": 0.0,
"timing/total_ms": 6632.851202040911,
"tokens/completion": 382.6171875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 199.43552422523499
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.07275530695915222,
"epoch": 0.028,
"grad_norm": 0.005944388738403685,
"importance_ratio": 0.9988561868667603,
"learning_rate": 5e-06,
"loss": 0.0452,
"mismatch_kl": 0.00023643655003979802,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 21,
"timing/generation_ms": 119174.6030151844,
"timing/scoring_ms": 0.0,
"timing/total_ms": 119174.6030151844,
"tokens/completion": 4008.38671875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 685.4423098564148
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.83598792552948,
"epoch": 0.029333333333333333,
"grad_norm": 0.00977617477475085,
"importance_ratio": 0.995696485042572,
"learning_rate": 5e-06,
"loss": 0.0066,
"mismatch_kl": 0.003957619424909353,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 22,
"timing/generation_ms": 12322.44247943163,
"timing/scoring_ms": 0.0,
"timing/total_ms": 12322.44247943163,
"tokens/completion": 442.85546875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 94.37256598472595
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8219886422157288,
"epoch": 0.030666666666666665,
"grad_norm": 0.0057449599218849946,
"importance_ratio": 0.9990558624267578,
"learning_rate": 5e-06,
"loss": 0.0388,
"mismatch_kl": 0.031180420890450478,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 23,
"timing/generation_ms": 29090.628595091403,
"timing/scoring_ms": 0.0,
"timing/total_ms": 29090.628595091403,
"tokens/completion": 1716.3515625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 169.45334482192993
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.0089167356491089,
"epoch": 0.032,
"grad_norm": 0.009762837519367,
"importance_ratio": 0.9979202151298523,
"learning_rate": 5e-06,
"loss": 0.0012,
"mismatch_kl": 0.00405939482152462,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 24,
"timing/generation_ms": 17154.327374882996,
"timing/scoring_ms": 0.0,
"timing/total_ms": 17154.327374882996,
"tokens/completion": 883.390625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 130.7891206741333
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.5053093433380127,
"epoch": 0.03333333333333333,
"grad_norm": 0.007416974683241316,
"importance_ratio": 0.9982149600982666,
"learning_rate": 5e-06,
"loss": -0.0068,
"mismatch_kl": 0.0024536694400012493,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 25,
"timing/generation_ms": 28463.361867703497,
"timing/scoring_ms": 0.0,
"timing/total_ms": 28463.361867703497,
"tokens/completion": 1409.54296875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 178.60342526435852
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.4973055422306061,
"epoch": 0.034666666666666665,
"grad_norm": 0.004048717808220336,
"importance_ratio": 1.0012173652648926,
"learning_rate": 5e-06,
"loss": 0.0547,
"mismatch_kl": 0.03473234549164772,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 26,
"timing/generation_ms": 18848.746892996132,
"timing/scoring_ms": 0.0,
"timing/total_ms": 18848.746892996132,
"tokens/completion": 1286.6875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 181.75563287734985
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.5914682149887085,
"epoch": 0.036,
"grad_norm": 0.010568088931367656,
"importance_ratio": 0.9986244440078735,
"learning_rate": 5e-06,
"loss": -0.0214,
"mismatch_kl": 0.002536088228225708,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 27,
"timing/generation_ms": 11602.461927570403,
"timing/scoring_ms": 0.0,
"timing/total_ms": 11602.461927570403,
"tokens/completion": 734.80078125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 188.88015818595886
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.4526905119419098,
"epoch": 0.037333333333333336,
"grad_norm": 0.0035728175606856527,
"importance_ratio": 0.9999799728393555,
"learning_rate": 5e-06,
"loss": 0.0026,
"mismatch_kl": 0.0024842985440045595,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 28,
"timing/generation_ms": 30549.59301650524,
"timing/scoring_ms": 0.0,
"timing/total_ms": 30549.59301650524,
"tokens/completion": 1536.96875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 328.0478210449219
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.32794511318206787,
"epoch": 0.03866666666666667,
"grad_norm": 0.003333518820406266,
"importance_ratio": 0.9995192885398865,
"learning_rate": 5e-06,
"loss": 0.056,
"mismatch_kl": 0.028769802302122116,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 29,
"timing/generation_ms": 18838.333567604423,
"timing/scoring_ms": 0.0,
"timing/total_ms": 18838.333567604423,
"tokens/completion": 1263.640625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 290.5948350429535
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.9063822031021118,
"epoch": 0.04,
"grad_norm": 0.007342388496075293,
"importance_ratio": 0.9953157901763916,
"learning_rate": 5e-06,
"loss": 0.0025,
"mismatch_kl": 0.004266439005732536,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 30,
"timing/generation_ms": 9477.213966660202,
"timing/scoring_ms": 0.0,
"timing/total_ms": 9477.213966660202,
"tokens/completion": 473.26953125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 62.30127143859863
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7977282404899597,
"epoch": 0.04133333333333333,
"grad_norm": 0.00884043332375607,
"importance_ratio": 0.9971498847007751,
"learning_rate": 5e-06,
"loss": -0.0029,
"mismatch_kl": 0.004033135715872049,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 31,
"timing/generation_ms": 18995.201839134097,
"timing/scoring_ms": 0.0,
"timing/total_ms": 18995.201839134097,
"tokens/completion": 958.625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 88.6347918510437
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8451470732688904,
"epoch": 0.042666666666666665,
"grad_norm": 0.018842389370386323,
"importance_ratio": 0.9982671141624451,
"learning_rate": 5e-06,
"loss": 0.0369,
"mismatch_kl": 0.003600390162318945,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 32,
"timing/generation_ms": 5587.277088314295,
"timing/scoring_ms": 0.0,
"timing/total_ms": 5587.277088314295,
"tokens/completion": 407.12109375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 140.4788475036621
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.1521912813186646,
"epoch": 0.044,
"grad_norm": 0.006379742039913797,
"importance_ratio": 0.997858464717865,
"learning_rate": 5e-06,
"loss": -0.0065,
"mismatch_kl": 0.005035887472331524,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 33,
"timing/generation_ms": 18916.152058169246,
"timing/scoring_ms": 0.0,
"timing/total_ms": 18916.152058169246,
"tokens/completion": 966.55859375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 90.41954302787781
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.1553761959075928,
"epoch": 0.04533333333333334,
"grad_norm": 0.010733713274389883,
"importance_ratio": 1.0111567974090576,
"learning_rate": 5e-06,
"loss": 0.0014,
"mismatch_kl": 0.006704343948513269,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 34,
"timing/generation_ms": 17302.85968258977,
"timing/scoring_ms": 0.0,
"timing/total_ms": 17302.85968258977,
"tokens/completion": 864.44140625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 143.8659963607788
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.3105199635028839,
"epoch": 0.04666666666666667,
"grad_norm": 0.003940130100379767,
"importance_ratio": 1.0006911754608154,
"learning_rate": 5e-06,
"loss": 0.0315,
"mismatch_kl": 0.022524980828166008,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 35,
"timing/generation_ms": 29806.298807263374,
"timing/scoring_ms": 0.0,
"timing/total_ms": 29806.298807263374,
"tokens/completion": 1672.48828125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 164.04821372032166
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7972971200942993,
"epoch": 0.048,
"grad_norm": 0.008409173142054645,
"importance_ratio": 0.9948906898498535,
"learning_rate": 5e-06,
"loss": 0.004,
"mismatch_kl": 0.004282351583242416,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 36,
"timing/generation_ms": 14936.399303376675,
"timing/scoring_ms": 0.0,
"timing/total_ms": 14936.399303376675,
"tokens/completion": 787.78125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 196.79586815834045
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.5769950747489929,
"epoch": 0.04933333333333333,
"grad_norm": 0.009636377939254703,
"importance_ratio": 0.9972301721572876,
"learning_rate": 5e-06,
"loss": -0.0011,
"mismatch_kl": 0.003603809280321002,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 37,
"timing/generation_ms": 13729.571803472936,
"timing/scoring_ms": 0.0,
"timing/total_ms": 13729.571803472936,
"tokens/completion": 697.64453125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 76.77378511428833
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.715777575969696,
"epoch": 0.050666666666666665,
"grad_norm": 0.005305945077729364,
"importance_ratio": 0.9969701766967773,
"learning_rate": 5e-06,
"loss": 0.0093,
"mismatch_kl": 0.004232620354741812,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 38,
"timing/generation_ms": 26689.202761277556,
"timing/scoring_ms": 0.0,
"timing/total_ms": 26689.202761277556,
"tokens/completion": 1302.75390625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 119.53459739685059
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.108477234840393,
"epoch": 0.052,
"grad_norm": 0.01158876392732835,
"importance_ratio": 0.9918505549430847,
"learning_rate": 5e-06,
"loss": 0.0069,
"mismatch_kl": 0.0055715711787343025,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 39,
"timing/generation_ms": 9316.26115180552,
"timing/scoring_ms": 0.0,
"timing/total_ms": 9316.26115180552,
"tokens/completion": 510.88671875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 134.0537760257721
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.0468562841415405,
"epoch": 0.05333333333333334,
"grad_norm": 0.006250915142780056,
"importance_ratio": 0.993874192237854,
"learning_rate": 5e-06,
"loss": -0.004,
"mismatch_kl": 0.00569565873593092,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 40,
"timing/generation_ms": 22657.30178449303,
"timing/scoring_ms": 0.0,
"timing/total_ms": 22657.30178449303,
"tokens/completion": 1117.0390625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 104.31990480422974
},
{
"advantage/absmean": 0.12451171875,
"entropy": 1.0242066383361816,
"epoch": 0.05466666666666667,
"grad_norm": 0.009730238448609988,
"importance_ratio": 1.0014866590499878,
"learning_rate": 5e-06,
"loss": 0.0029,
"mismatch_kl": 0.006813807878643274,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 41,
"timing/generation_ms": 15266.15516282618,
"timing/scoring_ms": 0.0,
"timing/total_ms": 15266.15516282618,
"tokens/completion": 789.296875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 137.6475269794464
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.9917812943458557,
"epoch": 0.056,
"grad_norm": 0.015130940878153589,
"importance_ratio": 0.9915910959243774,
"learning_rate": 5e-06,
"loss": -0.0016,
"mismatch_kl": 0.006494010798633099,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 42,
"timing/generation_ms": 8552.51188017428,
"timing/scoring_ms": 0.0,
"timing/total_ms": 8552.51188017428,
"tokens/completion": 427.00390625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 58.246270418167114
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7529230117797852,
"epoch": 0.05733333333333333,
"grad_norm": 0.017225340266775354,
"importance_ratio": 0.9983583092689514,
"learning_rate": 5e-06,
"loss": 0.0017,
"mismatch_kl": 0.005849814508110285,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 43,
"timing/generation_ms": 5776.03021170944,
"timing/scoring_ms": 0.0,
"timing/total_ms": 5776.03021170944,
"tokens/completion": 292.6484375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 138.62879586219788
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.9116057753562927,
"epoch": 0.058666666666666666,
"grad_norm": 0.013240792131345649,
"importance_ratio": 0.993713915348053,
"learning_rate": 5e-06,
"loss": -0.006,
"mismatch_kl": 0.00599726801738143,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 44,
"timing/generation_ms": 4909.729053266346,
"timing/scoring_ms": 0.0,
"timing/total_ms": 4909.729053266346,
"tokens/completion": 252.2890625,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 53.461458683013916
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.6952740550041199,
"epoch": 0.06,
"grad_norm": 0.007271643900369788,
"importance_ratio": 0.9978048205375671,
"learning_rate": 5e-06,
"loss": -0.0094,
"mismatch_kl": 0.004028764553368092,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 45,
"timing/generation_ms": 12042.251928709447,
"timing/scoring_ms": 0.0,
"timing/total_ms": 12042.251928709447,
"tokens/completion": 668.03125,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 77.72424340248108
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8735002279281616,
"epoch": 0.06133333333333333,
"grad_norm": 0.00817327643152143,
"importance_ratio": 1.0016076564788818,
"learning_rate": 5e-06,
"loss": 0.0002,
"mismatch_kl": 0.004535754211246967,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 46,
"timing/generation_ms": 8553.523855283856,
"timing/scoring_ms": 0.0,
"timing/total_ms": 8553.523855283856,
"tokens/completion": 459.87109375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 104.68091750144958
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7288662195205688,
"epoch": 0.06266666666666666,
"grad_norm": 0.016180435920793518,
"importance_ratio": 1.0001567602157593,
"learning_rate": 5e-06,
"loss": 0.0002,
"mismatch_kl": 0.006666674744337797,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 47,
"timing/generation_ms": 7466.575676575303,
"timing/scoring_ms": 0.0,
"timing/total_ms": 7466.575676575303,
"tokens/completion": 361.484375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 59.11225175857544
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.6449630856513977,
"epoch": 0.064,
"grad_norm": 0.004581873635760183,
"importance_ratio": 1.0026441812515259,
"learning_rate": 5e-06,
"loss": 0.0588,
"mismatch_kl": 0.059744831174612045,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 48,
"timing/generation_ms": 14945.35976741463,
"timing/scoring_ms": 0.0,
"timing/total_ms": 14945.35976741463,
"tokens/completion": 1044.0,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 182.3247947692871
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.8048098683357239,
"epoch": 0.06533333333333333,
"grad_norm": 0.0052364810032066635,
"importance_ratio": 0.9973055720329285,
"learning_rate": 5e-06,
"loss": 0.0347,
"mismatch_kl": 0.0451083704829216,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 49,
"timing/generation_ms": 28440.53523708135,
"timing/scoring_ms": 0.0,
"timing/total_ms": 28440.53523708135,
"tokens/completion": 1630.4296875,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 221.0147523880005
},
{
"advantage/absmean": 0.12451171875,
"entropy": 0.7735000252723694,
"epoch": 0.06666666666666667,
"grad_norm": 0.015103596816141955,
"importance_ratio": 0.9899436831474304,
"learning_rate": 5e-06,
"loss": -0.0022,
"mismatch_kl": 0.008240272291004658,
"reward": 0.12451171875,
"reward/std": 0.1738164722919464,
"step": 50,
"timing/generation_ms": 6061.147706583142,
"timing/scoring_ms": 0.0,
"timing/total_ms": 6061.147706583142,
"tokens/completion": 331.859375,
"tokens/masked_fraction": 0.0,
"wall_clock/generate_s": 108.05560183525085
}
],
"logging_steps": 1,
"max_steps": 750,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}