DexVLA / trainer_state.json
kuromivv's picture
Upload 42 files
8867dbf verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.1715696358773915,
"eval_steps": 200,
"global_step": 60000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"action_loss": 0.0698,
"epoch": 0.0051429746965644925,
"learning_rate": 2e-05,
"llm_loss": 0.4221,
"loss": 0.4918,
"step": 50
},
{
"action_loss": 0.0388,
"epoch": 0.010285949393128985,
"learning_rate": 2e-05,
"llm_loss": 0.0524,
"loss": 0.0912,
"step": 100
},
{
"action_loss": 0.0486,
"epoch": 0.015428924089693478,
"learning_rate": 2e-05,
"llm_loss": 0.0371,
"loss": 0.0857,
"step": 150
},
{
"action_loss": 0.0387,
"epoch": 0.02057189878625797,
"learning_rate": 2e-05,
"llm_loss": 0.0326,
"loss": 0.0713,
"step": 200
},
{
"action_loss": 0.0383,
"epoch": 0.025714873482822465,
"learning_rate": 2e-05,
"llm_loss": 0.0298,
"loss": 0.0681,
"step": 250
},
{
"action_loss": 0.0348,
"epoch": 0.030857848179386957,
"learning_rate": 2e-05,
"llm_loss": 0.0263,
"loss": 0.061,
"step": 300
},
{
"action_loss": 0.0392,
"epoch": 0.03600082287595145,
"learning_rate": 2e-05,
"llm_loss": 0.0228,
"loss": 0.0619,
"step": 350
},
{
"action_loss": 0.0396,
"epoch": 0.04114379757251594,
"learning_rate": 2e-05,
"llm_loss": 0.0239,
"loss": 0.0636,
"step": 400
},
{
"action_loss": 0.0338,
"epoch": 0.046286772269080435,
"learning_rate": 2e-05,
"llm_loss": 0.0243,
"loss": 0.0581,
"step": 450
},
{
"action_loss": 0.0331,
"epoch": 0.05142974696564493,
"learning_rate": 2e-05,
"llm_loss": 0.0232,
"loss": 0.0564,
"step": 500
},
{
"action_loss": 0.042,
"epoch": 0.05657272166220942,
"learning_rate": 2e-05,
"llm_loss": 0.0205,
"loss": 0.0625,
"step": 550
},
{
"action_loss": 0.0343,
"epoch": 0.061715696358773914,
"learning_rate": 2e-05,
"llm_loss": 0.018,
"loss": 0.0523,
"step": 600
},
{
"action_loss": 0.0285,
"epoch": 0.06685867105533841,
"learning_rate": 2e-05,
"llm_loss": 0.0231,
"loss": 0.0516,
"step": 650
},
{
"action_loss": 0.0344,
"epoch": 0.0720016457519029,
"learning_rate": 2e-05,
"llm_loss": 0.0186,
"loss": 0.053,
"step": 700
},
{
"action_loss": 0.0294,
"epoch": 0.0771446204484674,
"learning_rate": 2e-05,
"llm_loss": 0.0187,
"loss": 0.0481,
"step": 750
},
{
"action_loss": 0.0391,
"epoch": 0.08228759514503188,
"learning_rate": 2e-05,
"llm_loss": 0.0189,
"loss": 0.058,
"step": 800
},
{
"action_loss": 0.0321,
"epoch": 0.08743056984159638,
"learning_rate": 2e-05,
"llm_loss": 0.0159,
"loss": 0.048,
"step": 850
},
{
"action_loss": 0.0442,
"epoch": 0.09257354453816087,
"learning_rate": 2e-05,
"llm_loss": 0.0176,
"loss": 0.0617,
"step": 900
},
{
"action_loss": 0.0435,
"epoch": 0.09771651923472537,
"learning_rate": 2e-05,
"llm_loss": 0.0164,
"loss": 0.0598,
"step": 950
},
{
"action_loss": 0.0353,
"epoch": 0.10285949393128986,
"learning_rate": 2e-05,
"llm_loss": 0.0176,
"loss": 0.0529,
"step": 1000
},
{
"action_loss": 0.0333,
"epoch": 0.10800246862785436,
"learning_rate": 2e-05,
"llm_loss": 0.0219,
"loss": 0.0552,
"step": 1050
},
{
"action_loss": 0.0445,
"epoch": 0.11314544332441884,
"learning_rate": 2e-05,
"llm_loss": 0.0164,
"loss": 0.0609,
"step": 1100
},
{
"action_loss": 0.0288,
"epoch": 0.11828841802098333,
"learning_rate": 2e-05,
"llm_loss": 0.0139,
"loss": 0.0427,
"step": 1150
},
{
"action_loss": 0.0291,
"epoch": 0.12343139271754783,
"learning_rate": 2e-05,
"llm_loss": 0.0138,
"loss": 0.0429,
"step": 1200
},
{
"action_loss": 0.0444,
"epoch": 0.12857436741411232,
"learning_rate": 2e-05,
"llm_loss": 0.0145,
"loss": 0.0589,
"step": 1250
},
{
"action_loss": 0.0327,
"epoch": 0.13371734211067682,
"learning_rate": 2e-05,
"llm_loss": 0.015,
"loss": 0.0477,
"step": 1300
},
{
"action_loss": 0.0265,
"epoch": 0.1388603168072413,
"learning_rate": 2e-05,
"llm_loss": 0.014,
"loss": 0.0405,
"step": 1350
},
{
"action_loss": 0.0353,
"epoch": 0.1440032915038058,
"learning_rate": 2e-05,
"llm_loss": 0.0144,
"loss": 0.0497,
"step": 1400
},
{
"action_loss": 0.0288,
"epoch": 0.1491462662003703,
"learning_rate": 2e-05,
"llm_loss": 0.016,
"loss": 0.0448,
"step": 1450
},
{
"action_loss": 0.0345,
"epoch": 0.1542892408969348,
"learning_rate": 2e-05,
"llm_loss": 0.0179,
"loss": 0.0524,
"step": 1500
},
{
"action_loss": 0.0284,
"epoch": 0.1594322155934993,
"learning_rate": 2e-05,
"llm_loss": 0.014,
"loss": 0.0424,
"step": 1550
},
{
"action_loss": 0.0324,
"epoch": 0.16457519029006376,
"learning_rate": 2e-05,
"llm_loss": 0.0135,
"loss": 0.046,
"step": 1600
},
{
"action_loss": 0.035,
"epoch": 0.16971816498662826,
"learning_rate": 2e-05,
"llm_loss": 0.0133,
"loss": 0.0483,
"step": 1650
},
{
"action_loss": 0.0278,
"epoch": 0.17486113968319275,
"learning_rate": 2e-05,
"llm_loss": 0.0141,
"loss": 0.0419,
"step": 1700
},
{
"action_loss": 0.0312,
"epoch": 0.18000411437975725,
"learning_rate": 2e-05,
"llm_loss": 0.0139,
"loss": 0.0451,
"step": 1750
},
{
"action_loss": 0.0385,
"epoch": 0.18514708907632174,
"learning_rate": 2e-05,
"llm_loss": 0.0139,
"loss": 0.0525,
"step": 1800
},
{
"action_loss": 0.0425,
"epoch": 0.19029006377288624,
"learning_rate": 2e-05,
"llm_loss": 0.0145,
"loss": 0.057,
"step": 1850
},
{
"action_loss": 0.0349,
"epoch": 0.19543303846945073,
"learning_rate": 2e-05,
"llm_loss": 0.0147,
"loss": 0.0497,
"step": 1900
},
{
"action_loss": 0.0367,
"epoch": 0.20057601316601523,
"learning_rate": 2e-05,
"llm_loss": 0.0138,
"loss": 0.0505,
"step": 1950
},
{
"action_loss": 0.0317,
"epoch": 0.20571898786257972,
"learning_rate": 2e-05,
"llm_loss": 0.014,
"loss": 0.0457,
"step": 2000
},
{
"action_loss": 0.0357,
"epoch": 0.21086196255914422,
"learning_rate": 2e-05,
"llm_loss": 0.0127,
"loss": 0.0484,
"step": 2050
},
{
"action_loss": 0.0354,
"epoch": 0.2160049372557087,
"learning_rate": 2e-05,
"llm_loss": 0.0137,
"loss": 0.0491,
"step": 2100
},
{
"action_loss": 0.0332,
"epoch": 0.2211479119522732,
"learning_rate": 2e-05,
"llm_loss": 0.0143,
"loss": 0.0475,
"step": 2150
},
{
"action_loss": 0.0328,
"epoch": 0.22629088664883767,
"learning_rate": 2e-05,
"llm_loss": 0.0133,
"loss": 0.0461,
"step": 2200
},
{
"action_loss": 0.0268,
"epoch": 0.23143386134540217,
"learning_rate": 2e-05,
"llm_loss": 0.0131,
"loss": 0.0399,
"step": 2250
},
{
"action_loss": 0.0321,
"epoch": 0.23657683604196666,
"learning_rate": 2e-05,
"llm_loss": 0.0122,
"loss": 0.0443,
"step": 2300
},
{
"action_loss": 0.0304,
"epoch": 0.24171981073853116,
"learning_rate": 2e-05,
"llm_loss": 0.0129,
"loss": 0.0433,
"step": 2350
},
{
"action_loss": 0.0391,
"epoch": 0.24686278543509566,
"learning_rate": 2e-05,
"llm_loss": 0.0131,
"loss": 0.0521,
"step": 2400
},
{
"action_loss": 0.0304,
"epoch": 0.25200576013166015,
"learning_rate": 2e-05,
"llm_loss": 0.0125,
"loss": 0.0429,
"step": 2450
},
{
"action_loss": 0.0262,
"epoch": 0.25714873482822465,
"learning_rate": 2e-05,
"llm_loss": 0.0125,
"loss": 0.0386,
"step": 2500
},
{
"action_loss": 0.0301,
"epoch": 0.26229170952478914,
"learning_rate": 2e-05,
"llm_loss": 0.0113,
"loss": 0.0414,
"step": 2550
},
{
"action_loss": 0.0395,
"epoch": 0.26743468422135364,
"learning_rate": 2e-05,
"llm_loss": 0.0133,
"loss": 0.0529,
"step": 2600
},
{
"action_loss": 0.0281,
"epoch": 0.27257765891791813,
"learning_rate": 2e-05,
"llm_loss": 0.0121,
"loss": 0.0403,
"step": 2650
},
{
"action_loss": 0.0406,
"epoch": 0.2777206336144826,
"learning_rate": 2e-05,
"llm_loss": 0.0128,
"loss": 0.0535,
"step": 2700
},
{
"action_loss": 0.0369,
"epoch": 0.2828636083110471,
"learning_rate": 2e-05,
"llm_loss": 0.0124,
"loss": 0.0493,
"step": 2750
},
{
"action_loss": 0.0302,
"epoch": 0.2880065830076116,
"learning_rate": 2e-05,
"llm_loss": 0.0125,
"loss": 0.0427,
"step": 2800
},
{
"action_loss": 0.0272,
"epoch": 0.2931495577041761,
"learning_rate": 2e-05,
"llm_loss": 0.0114,
"loss": 0.0385,
"step": 2850
},
{
"action_loss": 0.03,
"epoch": 0.2982925324007406,
"learning_rate": 2e-05,
"llm_loss": 0.0123,
"loss": 0.0423,
"step": 2900
},
{
"action_loss": 0.0283,
"epoch": 0.3034355070973051,
"learning_rate": 2e-05,
"llm_loss": 0.0123,
"loss": 0.0405,
"step": 2950
},
{
"action_loss": 0.032,
"epoch": 0.3085784817938696,
"learning_rate": 2e-05,
"llm_loss": 0.0115,
"loss": 0.0435,
"step": 3000
},
{
"action_loss": 0.0343,
"epoch": 0.3137214564904341,
"learning_rate": 2e-05,
"llm_loss": 0.0123,
"loss": 0.0466,
"step": 3050
},
{
"action_loss": 0.0205,
"epoch": 0.3188644311869986,
"learning_rate": 2e-05,
"llm_loss": 0.0119,
"loss": 0.0324,
"step": 3100
},
{
"action_loss": 0.0295,
"epoch": 0.324007405883563,
"learning_rate": 2e-05,
"llm_loss": 0.0107,
"loss": 0.0402,
"step": 3150
},
{
"action_loss": 0.0319,
"epoch": 0.3291503805801275,
"learning_rate": 2e-05,
"llm_loss": 0.0109,
"loss": 0.0428,
"step": 3200
},
{
"action_loss": 0.0255,
"epoch": 0.334293355276692,
"learning_rate": 2e-05,
"llm_loss": 0.0107,
"loss": 0.0363,
"step": 3250
},
{
"action_loss": 0.0292,
"epoch": 0.3394363299732565,
"learning_rate": 2e-05,
"llm_loss": 0.0107,
"loss": 0.0398,
"step": 3300
},
{
"action_loss": 0.0289,
"epoch": 0.344579304669821,
"learning_rate": 2e-05,
"llm_loss": 0.0123,
"loss": 0.0412,
"step": 3350
},
{
"action_loss": 0.024,
"epoch": 0.3497222793663855,
"learning_rate": 2e-05,
"llm_loss": 0.0109,
"loss": 0.0349,
"step": 3400
},
{
"action_loss": 0.0328,
"epoch": 0.35486525406295,
"learning_rate": 2e-05,
"llm_loss": 0.0098,
"loss": 0.0426,
"step": 3450
},
{
"action_loss": 0.0341,
"epoch": 0.3600082287595145,
"learning_rate": 2e-05,
"llm_loss": 0.0113,
"loss": 0.0454,
"step": 3500
},
{
"action_loss": 0.0275,
"epoch": 0.365151203456079,
"learning_rate": 2e-05,
"llm_loss": 0.0118,
"loss": 0.0393,
"step": 3550
},
{
"action_loss": 0.025,
"epoch": 0.3702941781526435,
"learning_rate": 2e-05,
"llm_loss": 0.0113,
"loss": 0.0363,
"step": 3600
},
{
"action_loss": 0.0297,
"epoch": 0.375437152849208,
"learning_rate": 2e-05,
"llm_loss": 0.0115,
"loss": 0.0412,
"step": 3650
},
{
"action_loss": 0.0322,
"epoch": 0.3805801275457725,
"learning_rate": 2e-05,
"llm_loss": 0.0101,
"loss": 0.0423,
"step": 3700
},
{
"action_loss": 0.0413,
"epoch": 0.38572310224233697,
"learning_rate": 2e-05,
"llm_loss": 0.0119,
"loss": 0.0532,
"step": 3750
},
{
"action_loss": 0.0387,
"epoch": 0.39086607693890146,
"learning_rate": 2e-05,
"llm_loss": 0.0112,
"loss": 0.0499,
"step": 3800
},
{
"action_loss": 0.038,
"epoch": 0.39600905163546596,
"learning_rate": 2e-05,
"llm_loss": 0.0108,
"loss": 0.0488,
"step": 3850
},
{
"action_loss": 0.0249,
"epoch": 0.40115202633203045,
"learning_rate": 2e-05,
"llm_loss": 0.0111,
"loss": 0.036,
"step": 3900
},
{
"action_loss": 0.0392,
"epoch": 0.40629500102859495,
"learning_rate": 2e-05,
"llm_loss": 0.0098,
"loss": 0.049,
"step": 3950
},
{
"action_loss": 0.0283,
"epoch": 0.41143797572515944,
"learning_rate": 2e-05,
"llm_loss": 0.0112,
"loss": 0.0395,
"step": 4000
},
{
"action_loss": 0.0254,
"epoch": 0.41658095042172394,
"learning_rate": 2e-05,
"llm_loss": 0.011,
"loss": 0.0365,
"step": 4050
},
{
"action_loss": 0.0282,
"epoch": 0.42172392511828843,
"learning_rate": 2e-05,
"llm_loss": 0.0109,
"loss": 0.0391,
"step": 4100
},
{
"action_loss": 0.0253,
"epoch": 0.42686689981485293,
"learning_rate": 2e-05,
"llm_loss": 0.0102,
"loss": 0.0355,
"step": 4150
},
{
"action_loss": 0.0311,
"epoch": 0.4320098745114174,
"learning_rate": 2e-05,
"llm_loss": 0.0094,
"loss": 0.0404,
"step": 4200
},
{
"action_loss": 0.0378,
"epoch": 0.4371528492079819,
"learning_rate": 2e-05,
"llm_loss": 0.0099,
"loss": 0.0477,
"step": 4250
},
{
"action_loss": 0.0324,
"epoch": 0.4422958239045464,
"learning_rate": 2e-05,
"llm_loss": 0.011,
"loss": 0.0434,
"step": 4300
},
{
"action_loss": 0.0344,
"epoch": 0.4474387986011109,
"learning_rate": 2e-05,
"llm_loss": 0.0102,
"loss": 0.0446,
"step": 4350
},
{
"action_loss": 0.0272,
"epoch": 0.45258177329767535,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.0367,
"step": 4400
},
{
"action_loss": 0.0405,
"epoch": 0.45772474799423984,
"learning_rate": 2e-05,
"llm_loss": 0.011,
"loss": 0.0515,
"step": 4450
},
{
"action_loss": 0.0309,
"epoch": 0.46286772269080434,
"learning_rate": 2e-05,
"llm_loss": 0.0107,
"loss": 0.0416,
"step": 4500
},
{
"action_loss": 0.0348,
"epoch": 0.46801069738736883,
"learning_rate": 2e-05,
"llm_loss": 0.0109,
"loss": 0.0457,
"step": 4550
},
{
"action_loss": 0.0329,
"epoch": 0.47315367208393333,
"learning_rate": 2e-05,
"llm_loss": 0.0087,
"loss": 0.0416,
"step": 4600
},
{
"action_loss": 0.0332,
"epoch": 0.4782966467804978,
"learning_rate": 2e-05,
"llm_loss": 0.0088,
"loss": 0.042,
"step": 4650
},
{
"action_loss": 0.0264,
"epoch": 0.4834396214770623,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0353,
"step": 4700
},
{
"action_loss": 0.0299,
"epoch": 0.4885825961736268,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.0394,
"step": 4750
},
{
"action_loss": 0.0314,
"epoch": 0.4937255708701913,
"learning_rate": 2e-05,
"llm_loss": 0.0091,
"loss": 0.0406,
"step": 4800
},
{
"action_loss": 0.0191,
"epoch": 0.4988685455667558,
"learning_rate": 2e-05,
"llm_loss": 0.0093,
"loss": 0.0284,
"step": 4850
},
{
"action_loss": 0.0277,
"epoch": 0.5040115202633203,
"learning_rate": 2e-05,
"llm_loss": 0.0099,
"loss": 0.0375,
"step": 4900
},
{
"action_loss": 0.025,
"epoch": 0.5091544949598849,
"learning_rate": 2e-05,
"llm_loss": 0.009,
"loss": 0.034,
"step": 4950
},
{
"action_loss": 0.0373,
"epoch": 0.5142974696564493,
"learning_rate": 2e-05,
"llm_loss": 0.01,
"loss": 0.0473,
"step": 5000
},
{
"action_loss": 0.0326,
"epoch": 0.5194404443530137,
"learning_rate": 2e-05,
"llm_loss": 0.0101,
"loss": 0.0427,
"step": 5050
},
{
"action_loss": 0.0252,
"epoch": 0.5245834190495783,
"learning_rate": 2e-05,
"llm_loss": 0.01,
"loss": 0.0352,
"step": 5100
},
{
"action_loss": 0.025,
"epoch": 0.5297263937461427,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0339,
"step": 5150
},
{
"action_loss": 0.0313,
"epoch": 0.5348693684427073,
"learning_rate": 2e-05,
"llm_loss": 0.0097,
"loss": 0.041,
"step": 5200
},
{
"action_loss": 0.0293,
"epoch": 0.5400123431392717,
"learning_rate": 2e-05,
"llm_loss": 0.0094,
"loss": 0.0387,
"step": 5250
},
{
"action_loss": 0.0258,
"epoch": 0.5451553178358363,
"learning_rate": 2e-05,
"llm_loss": 0.0097,
"loss": 0.0355,
"step": 5300
},
{
"action_loss": 0.0238,
"epoch": 0.5502982925324007,
"learning_rate": 2e-05,
"llm_loss": 0.0096,
"loss": 0.0333,
"step": 5350
},
{
"action_loss": 0.0287,
"epoch": 0.5554412672289653,
"learning_rate": 2e-05,
"llm_loss": 0.0101,
"loss": 0.0389,
"step": 5400
},
{
"action_loss": 0.0294,
"epoch": 0.5605842419255297,
"learning_rate": 2e-05,
"llm_loss": 0.0102,
"loss": 0.0396,
"step": 5450
},
{
"action_loss": 0.034,
"epoch": 0.5657272166220942,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.0435,
"step": 5500
},
{
"action_loss": 0.0287,
"epoch": 0.5708701913186587,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.0382,
"step": 5550
},
{
"action_loss": 0.0233,
"epoch": 0.5760131660152232,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.0328,
"step": 5600
},
{
"action_loss": 0.0333,
"epoch": 0.5811561407117877,
"learning_rate": 2e-05,
"llm_loss": 0.0084,
"loss": 0.0417,
"step": 5650
},
{
"action_loss": 0.035,
"epoch": 0.5862991154083522,
"learning_rate": 2e-05,
"llm_loss": 0.0117,
"loss": 0.0468,
"step": 5700
},
{
"action_loss": 0.0353,
"epoch": 0.5914420901049167,
"learning_rate": 2e-05,
"llm_loss": 0.0081,
"loss": 0.0434,
"step": 5750
},
{
"action_loss": 0.0344,
"epoch": 0.5965850648014812,
"learning_rate": 2e-05,
"llm_loss": 0.0078,
"loss": 0.0422,
"step": 5800
},
{
"action_loss": 0.0297,
"epoch": 0.6017280394980457,
"learning_rate": 2e-05,
"llm_loss": 0.0083,
"loss": 0.0381,
"step": 5850
},
{
"action_loss": 0.0306,
"epoch": 0.6068710141946102,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0395,
"step": 5900
},
{
"action_loss": 0.034,
"epoch": 0.6120139888911746,
"learning_rate": 2e-05,
"llm_loss": 0.0091,
"loss": 0.0431,
"step": 5950
},
{
"action_loss": 0.0337,
"epoch": 0.6171569635877392,
"learning_rate": 2e-05,
"llm_loss": 0.0085,
"loss": 0.0421,
"step": 6000
},
{
"action_loss": 0.0251,
"epoch": 0.6222999382843036,
"learning_rate": 2e-05,
"llm_loss": 0.0087,
"loss": 0.0337,
"step": 6050
},
{
"action_loss": 0.0223,
"epoch": 0.6274429129808682,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0312,
"step": 6100
},
{
"action_loss": 0.028,
"epoch": 0.6325858876774326,
"learning_rate": 2e-05,
"llm_loss": 0.0085,
"loss": 0.0365,
"step": 6150
},
{
"action_loss": 0.0284,
"epoch": 0.6377288623739972,
"learning_rate": 2e-05,
"llm_loss": 0.008,
"loss": 0.0364,
"step": 6200
},
{
"action_loss": 0.0345,
"epoch": 0.6428718370705616,
"learning_rate": 2e-05,
"llm_loss": 0.0086,
"loss": 0.0431,
"step": 6250
},
{
"action_loss": 0.0291,
"epoch": 0.648014811767126,
"learning_rate": 2e-05,
"llm_loss": 0.0088,
"loss": 0.0379,
"step": 6300
},
{
"action_loss": 0.0355,
"epoch": 0.6531577864636906,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0444,
"step": 6350
},
{
"action_loss": 0.0392,
"epoch": 0.658300761160255,
"learning_rate": 2e-05,
"llm_loss": 0.0082,
"loss": 0.0473,
"step": 6400
},
{
"action_loss": 0.0263,
"epoch": 0.6634437358568196,
"learning_rate": 2e-05,
"llm_loss": 0.0083,
"loss": 0.0346,
"step": 6450
},
{
"action_loss": 0.0284,
"epoch": 0.668586710553384,
"learning_rate": 2e-05,
"llm_loss": 0.0087,
"loss": 0.0372,
"step": 6500
},
{
"action_loss": 0.0237,
"epoch": 0.6737296852499486,
"learning_rate": 2e-05,
"llm_loss": 0.0093,
"loss": 0.033,
"step": 6550
},
{
"action_loss": 0.0292,
"epoch": 0.678872659946513,
"learning_rate": 2e-05,
"llm_loss": 0.0093,
"loss": 0.0384,
"step": 6600
},
{
"action_loss": 0.0188,
"epoch": 0.6840156346430776,
"learning_rate": 2e-05,
"llm_loss": 0.0091,
"loss": 0.0279,
"step": 6650
},
{
"action_loss": 0.0221,
"epoch": 0.689158609339642,
"learning_rate": 2e-05,
"llm_loss": 0.0079,
"loss": 0.03,
"step": 6700
},
{
"action_loss": 0.031,
"epoch": 0.6943015840362066,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0398,
"step": 6750
},
{
"action_loss": 0.0268,
"epoch": 0.699444558732771,
"learning_rate": 2e-05,
"llm_loss": 0.0078,
"loss": 0.0346,
"step": 6800
},
{
"action_loss": 0.0239,
"epoch": 0.7045875334293356,
"learning_rate": 2e-05,
"llm_loss": 0.0083,
"loss": 0.0322,
"step": 6850
},
{
"action_loss": 0.0312,
"epoch": 0.7097305081259,
"learning_rate": 2e-05,
"llm_loss": 0.0085,
"loss": 0.0396,
"step": 6900
},
{
"action_loss": 0.0227,
"epoch": 0.7148734828224645,
"learning_rate": 2e-05,
"llm_loss": 0.0082,
"loss": 0.031,
"step": 6950
},
{
"action_loss": 0.0304,
"epoch": 0.720016457519029,
"learning_rate": 2e-05,
"llm_loss": 0.0085,
"loss": 0.0389,
"step": 7000
},
{
"action_loss": 0.0346,
"epoch": 0.7251594322155935,
"learning_rate": 2e-05,
"llm_loss": 0.0078,
"loss": 0.0424,
"step": 7050
},
{
"action_loss": 0.0319,
"epoch": 0.730302406912158,
"learning_rate": 2e-05,
"llm_loss": 0.0083,
"loss": 0.0402,
"step": 7100
},
{
"action_loss": 0.0288,
"epoch": 0.7354453816087225,
"learning_rate": 2e-05,
"llm_loss": 0.0079,
"loss": 0.0368,
"step": 7150
},
{
"action_loss": 0.0289,
"epoch": 0.740588356305287,
"learning_rate": 2e-05,
"llm_loss": 0.0086,
"loss": 0.0375,
"step": 7200
},
{
"action_loss": 0.029,
"epoch": 0.7457313310018515,
"learning_rate": 2e-05,
"llm_loss": 0.0087,
"loss": 0.0378,
"step": 7250
},
{
"action_loss": 0.0276,
"epoch": 0.750874305698416,
"learning_rate": 2e-05,
"llm_loss": 0.0076,
"loss": 0.0352,
"step": 7300
},
{
"action_loss": 0.0329,
"epoch": 0.7560172803949805,
"learning_rate": 2e-05,
"llm_loss": 0.0084,
"loss": 0.0414,
"step": 7350
},
{
"action_loss": 0.0313,
"epoch": 0.761160255091545,
"learning_rate": 2e-05,
"llm_loss": 0.0086,
"loss": 0.0399,
"step": 7400
},
{
"action_loss": 0.0325,
"epoch": 0.7663032297881095,
"learning_rate": 2e-05,
"llm_loss": 0.0089,
"loss": 0.0414,
"step": 7450
},
{
"action_loss": 0.0373,
"epoch": 0.7714462044846739,
"learning_rate": 2e-05,
"llm_loss": 0.0078,
"loss": 0.0451,
"step": 7500
},
{
"action_loss": 0.0197,
"epoch": 0.7765891791812384,
"learning_rate": 2e-05,
"llm_loss": 0.0085,
"loss": 0.0282,
"step": 7550
},
{
"action_loss": 0.0211,
"epoch": 0.7817321538778029,
"learning_rate": 2e-05,
"llm_loss": 0.0077,
"loss": 0.0288,
"step": 7600
},
{
"action_loss": 0.0174,
"epoch": 0.7868751285743674,
"learning_rate": 2e-05,
"llm_loss": 0.0082,
"loss": 0.0256,
"step": 7650
},
{
"action_loss": 0.0264,
"epoch": 0.7920181032709319,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0335,
"step": 7700
},
{
"action_loss": 0.0295,
"epoch": 0.7971610779674964,
"learning_rate": 2e-05,
"llm_loss": 0.0082,
"loss": 0.0378,
"step": 7750
},
{
"action_loss": 0.0279,
"epoch": 0.8023040526640609,
"learning_rate": 2e-05,
"llm_loss": 0.007,
"loss": 0.0349,
"step": 7800
},
{
"action_loss": 0.0323,
"epoch": 0.8074470273606253,
"learning_rate": 2e-05,
"llm_loss": 0.0082,
"loss": 0.0405,
"step": 7850
},
{
"action_loss": 0.034,
"epoch": 0.8125900020571899,
"learning_rate": 2e-05,
"llm_loss": 0.0078,
"loss": 0.0419,
"step": 7900
},
{
"action_loss": 0.0271,
"epoch": 0.8177329767537543,
"learning_rate": 2e-05,
"llm_loss": 0.0084,
"loss": 0.0355,
"step": 7950
},
{
"action_loss": 0.0391,
"epoch": 0.8228759514503189,
"learning_rate": 2e-05,
"llm_loss": 0.0074,
"loss": 0.0465,
"step": 8000
},
{
"action_loss": 0.0313,
"epoch": 0.8280189261468833,
"learning_rate": 2e-05,
"llm_loss": 0.008,
"loss": 0.0393,
"step": 8050
},
{
"action_loss": 0.0229,
"epoch": 0.8331619008434479,
"learning_rate": 2e-05,
"llm_loss": 0.0075,
"loss": 0.0304,
"step": 8100
},
{
"action_loss": 0.0325,
"epoch": 0.8383048755400123,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0398,
"step": 8150
},
{
"action_loss": 0.0261,
"epoch": 0.8434478502365769,
"learning_rate": 2e-05,
"llm_loss": 0.0083,
"loss": 0.0344,
"step": 8200
},
{
"action_loss": 0.0211,
"epoch": 0.8485908249331413,
"learning_rate": 2e-05,
"llm_loss": 0.0077,
"loss": 0.0288,
"step": 8250
},
{
"action_loss": 0.0306,
"epoch": 0.8537337996297059,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0378,
"step": 8300
},
{
"action_loss": 0.0291,
"epoch": 0.8588767743262703,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0361,
"step": 8350
},
{
"action_loss": 0.029,
"epoch": 0.8640197490228348,
"learning_rate": 2e-05,
"llm_loss": 0.008,
"loss": 0.037,
"step": 8400
},
{
"action_loss": 0.0384,
"epoch": 0.8691627237193993,
"learning_rate": 2e-05,
"llm_loss": 0.0077,
"loss": 0.0461,
"step": 8450
},
{
"action_loss": 0.0216,
"epoch": 0.8743056984159638,
"learning_rate": 2e-05,
"llm_loss": 0.0079,
"loss": 0.0294,
"step": 8500
},
{
"action_loss": 0.0281,
"epoch": 0.8794486731125283,
"learning_rate": 2e-05,
"llm_loss": 0.0076,
"loss": 0.0357,
"step": 8550
},
{
"action_loss": 0.0273,
"epoch": 0.8845916478090928,
"learning_rate": 2e-05,
"llm_loss": 0.0072,
"loss": 0.0345,
"step": 8600
},
{
"action_loss": 0.0265,
"epoch": 0.8897346225056573,
"learning_rate": 2e-05,
"llm_loss": 0.0074,
"loss": 0.0339,
"step": 8650
},
{
"action_loss": 0.0393,
"epoch": 0.8948775972022218,
"learning_rate": 2e-05,
"llm_loss": 0.0076,
"loss": 0.0469,
"step": 8700
},
{
"action_loss": 0.0347,
"epoch": 0.9000205718987863,
"learning_rate": 2e-05,
"llm_loss": 0.0072,
"loss": 0.0419,
"step": 8750
},
{
"action_loss": 0.0299,
"epoch": 0.9051635465953507,
"learning_rate": 2e-05,
"llm_loss": 0.0081,
"loss": 0.038,
"step": 8800
},
{
"action_loss": 0.0213,
"epoch": 0.9103065212919152,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0283,
"step": 8850
},
{
"action_loss": 0.0295,
"epoch": 0.9154494959884797,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0366,
"step": 8900
},
{
"action_loss": 0.0288,
"epoch": 0.9205924706850442,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0359,
"step": 8950
},
{
"action_loss": 0.0299,
"epoch": 0.9257354453816087,
"learning_rate": 2e-05,
"llm_loss": 0.0069,
"loss": 0.0368,
"step": 9000
},
{
"action_loss": 0.0302,
"epoch": 0.9308784200781732,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0375,
"step": 9050
},
{
"action_loss": 0.028,
"epoch": 0.9360213947747377,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0352,
"step": 9100
},
{
"action_loss": 0.0411,
"epoch": 0.9411643694713022,
"learning_rate": 2e-05,
"llm_loss": 0.0072,
"loss": 0.0483,
"step": 9150
},
{
"action_loss": 0.0338,
"epoch": 0.9463073441678667,
"learning_rate": 2e-05,
"llm_loss": 0.0077,
"loss": 0.0415,
"step": 9200
},
{
"action_loss": 0.022,
"epoch": 0.9514503188644312,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0291,
"step": 9250
},
{
"action_loss": 0.026,
"epoch": 0.9565932935609957,
"learning_rate": 2e-05,
"llm_loss": 0.0081,
"loss": 0.0341,
"step": 9300
},
{
"action_loss": 0.0341,
"epoch": 0.9617362682575602,
"learning_rate": 2e-05,
"llm_loss": 0.0068,
"loss": 0.0408,
"step": 9350
},
{
"action_loss": 0.024,
"epoch": 0.9668792429541246,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0304,
"step": 9400
},
{
"action_loss": 0.0207,
"epoch": 0.9720222176506892,
"learning_rate": 2e-05,
"llm_loss": 0.0074,
"loss": 0.028,
"step": 9450
},
{
"action_loss": 0.0214,
"epoch": 0.9771651923472536,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0287,
"step": 9500
},
{
"action_loss": 0.0224,
"epoch": 0.9823081670438182,
"learning_rate": 2e-05,
"llm_loss": 0.0077,
"loss": 0.0301,
"step": 9550
},
{
"action_loss": 0.0273,
"epoch": 0.9874511417403826,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0334,
"step": 9600
},
{
"action_loss": 0.0268,
"epoch": 0.9925941164369472,
"learning_rate": 2e-05,
"llm_loss": 0.007,
"loss": 0.0337,
"step": 9650
},
{
"action_loss": 0.0247,
"epoch": 0.9977370911335116,
"learning_rate": 2e-05,
"llm_loss": 0.0075,
"loss": 0.0321,
"step": 9700
},
{
"action_loss": 0.0347,
"epoch": 1.002880065830076,
"learning_rate": 2e-05,
"llm_loss": 0.0069,
"loss": 0.0416,
"step": 9750
},
{
"action_loss": 0.0347,
"epoch": 1.0080230405266406,
"learning_rate": 2e-05,
"llm_loss": 0.0065,
"loss": 0.0413,
"step": 9800
},
{
"action_loss": 0.0226,
"epoch": 1.0131660152232052,
"learning_rate": 2e-05,
"llm_loss": 0.0072,
"loss": 0.0298,
"step": 9850
},
{
"action_loss": 0.0193,
"epoch": 1.0183089899197697,
"learning_rate": 2e-05,
"llm_loss": 0.0073,
"loss": 0.0266,
"step": 9900
},
{
"action_loss": 0.0296,
"epoch": 1.023451964616334,
"learning_rate": 2e-05,
"llm_loss": 0.0065,
"loss": 0.0361,
"step": 9950
},
{
"action_loss": 0.0362,
"epoch": 1.0285949393128986,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0425,
"step": 10000
},
{
"action_loss": 0.026,
"epoch": 1.0337379140094631,
"learning_rate": 2e-05,
"llm_loss": 0.0071,
"loss": 0.0331,
"step": 10050
},
{
"action_loss": 0.0259,
"epoch": 1.0388808887060275,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0316,
"step": 10100
},
{
"action_loss": 0.0235,
"epoch": 1.044023863402592,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0297,
"step": 10150
},
{
"action_loss": 0.0309,
"epoch": 1.0491668380991566,
"learning_rate": 2e-05,
"llm_loss": 0.007,
"loss": 0.0379,
"step": 10200
},
{
"action_loss": 0.0177,
"epoch": 1.0543098127957211,
"learning_rate": 2e-05,
"llm_loss": 0.0061,
"loss": 0.0238,
"step": 10250
},
{
"action_loss": 0.0288,
"epoch": 1.0594527874922854,
"learning_rate": 2e-05,
"llm_loss": 0.0061,
"loss": 0.0349,
"step": 10300
},
{
"action_loss": 0.0251,
"epoch": 1.06459576218885,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0308,
"step": 10350
},
{
"action_loss": 0.0277,
"epoch": 1.0697387368854145,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0343,
"step": 10400
},
{
"action_loss": 0.0312,
"epoch": 1.074881711581979,
"learning_rate": 2e-05,
"llm_loss": 0.0072,
"loss": 0.0384,
"step": 10450
},
{
"action_loss": 0.0254,
"epoch": 1.0800246862785434,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0313,
"step": 10500
},
{
"action_loss": 0.0282,
"epoch": 1.085167660975108,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0339,
"step": 10550
},
{
"action_loss": 0.0357,
"epoch": 1.0903106356716725,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.042,
"step": 10600
},
{
"action_loss": 0.028,
"epoch": 1.095453610368237,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0347,
"step": 10650
},
{
"action_loss": 0.0252,
"epoch": 1.1005965850648014,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0314,
"step": 10700
},
{
"action_loss": 0.0263,
"epoch": 1.105739559761366,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0325,
"step": 10750
},
{
"action_loss": 0.023,
"epoch": 1.1108825344579305,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0296,
"step": 10800
},
{
"action_loss": 0.0303,
"epoch": 1.116025509154495,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0365,
"step": 10850
},
{
"action_loss": 0.0296,
"epoch": 1.1211684838510594,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0358,
"step": 10900
},
{
"action_loss": 0.0281,
"epoch": 1.126311458547624,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0338,
"step": 10950
},
{
"action_loss": 0.0305,
"epoch": 1.1314544332441885,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.036,
"step": 11000
},
{
"action_loss": 0.0304,
"epoch": 1.1365974079407528,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.036,
"step": 11050
},
{
"action_loss": 0.0383,
"epoch": 1.1417403826373174,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0446,
"step": 11100
},
{
"action_loss": 0.0248,
"epoch": 1.146883357333882,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0312,
"step": 11150
},
{
"action_loss": 0.0274,
"epoch": 1.1520263320304465,
"learning_rate": 2e-05,
"llm_loss": 0.0065,
"loss": 0.0339,
"step": 11200
},
{
"action_loss": 0.0234,
"epoch": 1.157169306727011,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0301,
"step": 11250
},
{
"action_loss": 0.0244,
"epoch": 1.1623122814235753,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.03,
"step": 11300
},
{
"action_loss": 0.025,
"epoch": 1.16745525612014,
"learning_rate": 2e-05,
"llm_loss": 0.0065,
"loss": 0.0315,
"step": 11350
},
{
"action_loss": 0.0268,
"epoch": 1.1725982308167044,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0322,
"step": 11400
},
{
"action_loss": 0.0209,
"epoch": 1.1777412055132688,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0272,
"step": 11450
},
{
"action_loss": 0.0294,
"epoch": 1.1828841802098333,
"learning_rate": 2e-05,
"llm_loss": 0.0061,
"loss": 0.0355,
"step": 11500
},
{
"action_loss": 0.0234,
"epoch": 1.1880271549063979,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0298,
"step": 11550
},
{
"action_loss": 0.0306,
"epoch": 1.1931701296029624,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0367,
"step": 11600
},
{
"action_loss": 0.027,
"epoch": 1.1983131042995268,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0333,
"step": 11650
},
{
"action_loss": 0.0186,
"epoch": 1.2034560789960913,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0249,
"step": 11700
},
{
"action_loss": 0.0397,
"epoch": 1.2085990536926559,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0461,
"step": 11750
},
{
"action_loss": 0.0337,
"epoch": 1.2137420283892204,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0391,
"step": 11800
},
{
"action_loss": 0.0202,
"epoch": 1.2188850030857847,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0262,
"step": 11850
},
{
"action_loss": 0.0277,
"epoch": 1.2240279777823493,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0336,
"step": 11900
},
{
"action_loss": 0.0242,
"epoch": 1.2291709524789138,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0303,
"step": 11950
},
{
"action_loss": 0.0324,
"epoch": 1.2343139271754784,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0381,
"step": 12000
},
{
"action_loss": 0.0206,
"epoch": 1.2394569018720427,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0257,
"step": 12050
},
{
"action_loss": 0.0252,
"epoch": 1.2445998765686073,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0315,
"step": 12100
},
{
"action_loss": 0.0256,
"epoch": 1.2497428512651718,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0314,
"step": 12150
},
{
"action_loss": 0.0322,
"epoch": 1.2548858259617361,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0386,
"step": 12200
},
{
"action_loss": 0.0276,
"epoch": 1.2600288006583007,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0331,
"step": 12250
},
{
"action_loss": 0.0267,
"epoch": 1.2651717753548652,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0333,
"step": 12300
},
{
"action_loss": 0.0276,
"epoch": 1.2703147500514298,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0333,
"step": 12350
},
{
"action_loss": 0.0336,
"epoch": 1.2754577247479943,
"learning_rate": 2e-05,
"llm_loss": 0.0067,
"loss": 0.0403,
"step": 12400
},
{
"action_loss": 0.0257,
"epoch": 1.2806006994445587,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0321,
"step": 12450
},
{
"action_loss": 0.0227,
"epoch": 1.2857436741411232,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0286,
"step": 12500
},
{
"action_loss": 0.026,
"epoch": 1.2908866488376878,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0319,
"step": 12550
},
{
"action_loss": 0.0255,
"epoch": 1.296029623534252,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0317,
"step": 12600
},
{
"action_loss": 0.0175,
"epoch": 1.3011725982308167,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0229,
"step": 12650
},
{
"action_loss": 0.0267,
"epoch": 1.3063155729273812,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0324,
"step": 12700
},
{
"action_loss": 0.0271,
"epoch": 1.3114585476239458,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0324,
"step": 12750
},
{
"action_loss": 0.0255,
"epoch": 1.3166015223205103,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0316,
"step": 12800
},
{
"action_loss": 0.0327,
"epoch": 1.3217444970170746,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0384,
"step": 12850
},
{
"action_loss": 0.0288,
"epoch": 1.3268874717136392,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0353,
"step": 12900
},
{
"action_loss": 0.035,
"epoch": 1.3320304464102037,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0408,
"step": 12950
},
{
"action_loss": 0.0178,
"epoch": 1.337173421106768,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0238,
"step": 13000
},
{
"action_loss": 0.0332,
"epoch": 1.3423163958033326,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0389,
"step": 13050
},
{
"action_loss": 0.0379,
"epoch": 1.3474593704998972,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.043,
"step": 13100
},
{
"action_loss": 0.0306,
"epoch": 1.3526023451964617,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0364,
"step": 13150
},
{
"action_loss": 0.0181,
"epoch": 1.357745319893026,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0237,
"step": 13200
},
{
"action_loss": 0.033,
"epoch": 1.3628882945895906,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.038,
"step": 13250
},
{
"action_loss": 0.0263,
"epoch": 1.3680312692861551,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.032,
"step": 13300
},
{
"action_loss": 0.0351,
"epoch": 1.3731742439827195,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0409,
"step": 13350
},
{
"action_loss": 0.0183,
"epoch": 1.378317218679284,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0236,
"step": 13400
},
{
"action_loss": 0.0322,
"epoch": 1.3834601933758486,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0377,
"step": 13450
},
{
"action_loss": 0.0276,
"epoch": 1.3886031680724131,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0333,
"step": 13500
},
{
"action_loss": 0.0344,
"epoch": 1.3937461427689777,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0401,
"step": 13550
},
{
"action_loss": 0.0307,
"epoch": 1.398889117465542,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0369,
"step": 13600
},
{
"action_loss": 0.0383,
"epoch": 1.4040320921621066,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0443,
"step": 13650
},
{
"action_loss": 0.0325,
"epoch": 1.409175066858671,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0385,
"step": 13700
},
{
"action_loss": 0.0256,
"epoch": 1.4143180415552354,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0317,
"step": 13750
},
{
"action_loss": 0.0261,
"epoch": 1.4194610162518,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0316,
"step": 13800
},
{
"action_loss": 0.0296,
"epoch": 1.4246039909483645,
"learning_rate": 2e-05,
"llm_loss": 0.0066,
"loss": 0.0361,
"step": 13850
},
{
"action_loss": 0.03,
"epoch": 1.429746965644929,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0347,
"step": 13900
},
{
"action_loss": 0.0285,
"epoch": 1.4348899403414936,
"learning_rate": 2e-05,
"llm_loss": 0.0095,
"loss": 0.038,
"step": 13950
},
{
"action_loss": 0.0241,
"epoch": 1.440032915038058,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0296,
"step": 14000
},
{
"action_loss": 0.0291,
"epoch": 1.4451758897346225,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0341,
"step": 14050
},
{
"action_loss": 0.0167,
"epoch": 1.450318864431187,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.022,
"step": 14100
},
{
"action_loss": 0.023,
"epoch": 1.4554618391277514,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.028,
"step": 14150
},
{
"action_loss": 0.0237,
"epoch": 1.460604813824316,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0297,
"step": 14200
},
{
"action_loss": 0.0241,
"epoch": 1.4657477885208805,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0288,
"step": 14250
},
{
"action_loss": 0.0247,
"epoch": 1.470890763217445,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0305,
"step": 14300
},
{
"action_loss": 0.0244,
"epoch": 1.4760337379140094,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0296,
"step": 14350
},
{
"action_loss": 0.03,
"epoch": 1.481176712610574,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0358,
"step": 14400
},
{
"action_loss": 0.0312,
"epoch": 1.4863196873071385,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0366,
"step": 14450
},
{
"action_loss": 0.0338,
"epoch": 1.4914626620037028,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0392,
"step": 14500
},
{
"action_loss": 0.0195,
"epoch": 1.4966056367002674,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0248,
"step": 14550
},
{
"action_loss": 0.023,
"epoch": 1.501748611396832,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0282,
"step": 14600
},
{
"action_loss": 0.0266,
"epoch": 1.5068915860933965,
"learning_rate": 2e-05,
"llm_loss": 0.0062,
"loss": 0.0328,
"step": 14650
},
{
"action_loss": 0.0306,
"epoch": 1.512034560789961,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0361,
"step": 14700
},
{
"action_loss": 0.0184,
"epoch": 1.5171775354865256,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0235,
"step": 14750
},
{
"action_loss": 0.0294,
"epoch": 1.52232051018309,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0344,
"step": 14800
},
{
"action_loss": 0.0254,
"epoch": 1.5274634848796544,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0312,
"step": 14850
},
{
"action_loss": 0.0299,
"epoch": 1.5326064595762188,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0352,
"step": 14900
},
{
"action_loss": 0.0283,
"epoch": 1.5377494342727833,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0332,
"step": 14950
},
{
"action_loss": 0.0279,
"epoch": 1.5428924089693479,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0339,
"step": 15000
},
{
"action_loss": 0.0244,
"epoch": 1.5480353836659124,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0299,
"step": 15050
},
{
"action_loss": 0.0272,
"epoch": 1.553178358362477,
"learning_rate": 2e-05,
"llm_loss": 0.0064,
"loss": 0.0335,
"step": 15100
},
{
"action_loss": 0.0339,
"epoch": 1.5583213330590413,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0394,
"step": 15150
},
{
"action_loss": 0.0192,
"epoch": 1.5634643077556059,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.024,
"step": 15200
},
{
"action_loss": 0.0277,
"epoch": 1.5686072824521702,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0328,
"step": 15250
},
{
"action_loss": 0.0165,
"epoch": 1.5737502571487347,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0224,
"step": 15300
},
{
"action_loss": 0.023,
"epoch": 1.5788932318452993,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0293,
"step": 15350
},
{
"action_loss": 0.0282,
"epoch": 1.5840362065418638,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0338,
"step": 15400
},
{
"action_loss": 0.0161,
"epoch": 1.5891791812384284,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.022,
"step": 15450
},
{
"action_loss": 0.0333,
"epoch": 1.594322155934993,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0378,
"step": 15500
},
{
"action_loss": 0.0245,
"epoch": 1.5994651306315573,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0298,
"step": 15550
},
{
"action_loss": 0.0264,
"epoch": 1.6046081053281218,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0315,
"step": 15600
},
{
"action_loss": 0.0253,
"epoch": 1.6097510800246861,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.031,
"step": 15650
},
{
"action_loss": 0.021,
"epoch": 1.6148940547212507,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0266,
"step": 15700
},
{
"action_loss": 0.0276,
"epoch": 1.6200370294178152,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0327,
"step": 15750
},
{
"action_loss": 0.0385,
"epoch": 1.6251800041143798,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0436,
"step": 15800
},
{
"action_loss": 0.031,
"epoch": 1.6303229788109443,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0366,
"step": 15850
},
{
"action_loss": 0.0293,
"epoch": 1.635465953507509,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0346,
"step": 15900
},
{
"action_loss": 0.022,
"epoch": 1.6406089282040732,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0277,
"step": 15950
},
{
"action_loss": 0.0242,
"epoch": 1.6457519029006378,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0293,
"step": 16000
},
{
"action_loss": 0.0229,
"epoch": 1.650894877597202,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0275,
"step": 16050
},
{
"action_loss": 0.0208,
"epoch": 1.6560378522937667,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0265,
"step": 16100
},
{
"action_loss": 0.0277,
"epoch": 1.6611808269903312,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0332,
"step": 16150
},
{
"action_loss": 0.0318,
"epoch": 1.6663238016868958,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0372,
"step": 16200
},
{
"action_loss": 0.0203,
"epoch": 1.6714667763834603,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0256,
"step": 16250
},
{
"action_loss": 0.0312,
"epoch": 1.6766097510800246,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0361,
"step": 16300
},
{
"action_loss": 0.0299,
"epoch": 1.6817527257765892,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0355,
"step": 16350
},
{
"action_loss": 0.0382,
"epoch": 1.6868957004731535,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0432,
"step": 16400
},
{
"action_loss": 0.0252,
"epoch": 1.692038675169718,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0309,
"step": 16450
},
{
"action_loss": 0.0303,
"epoch": 1.6971816498662826,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0354,
"step": 16500
},
{
"action_loss": 0.0346,
"epoch": 1.7023246245628472,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0392,
"step": 16550
},
{
"action_loss": 0.0225,
"epoch": 1.7074675992594117,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0273,
"step": 16600
},
{
"action_loss": 0.0243,
"epoch": 1.7126105739559763,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0297,
"step": 16650
},
{
"action_loss": 0.0231,
"epoch": 1.7177535486525406,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0277,
"step": 16700
},
{
"action_loss": 0.0267,
"epoch": 1.7228965233491051,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0316,
"step": 16750
},
{
"action_loss": 0.04,
"epoch": 1.7280394980456695,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0455,
"step": 16800
},
{
"action_loss": 0.0408,
"epoch": 1.733182472742234,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0457,
"step": 16850
},
{
"action_loss": 0.028,
"epoch": 1.7383254474387986,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.034,
"step": 16900
},
{
"action_loss": 0.0328,
"epoch": 1.7434684221353631,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0382,
"step": 16950
},
{
"action_loss": 0.0221,
"epoch": 1.7486113968319277,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0274,
"step": 17000
},
{
"action_loss": 0.0203,
"epoch": 1.7537543715284922,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0252,
"step": 17050
},
{
"action_loss": 0.0303,
"epoch": 1.7588973462250566,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0353,
"step": 17100
},
{
"action_loss": 0.0194,
"epoch": 1.764040320921621,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0242,
"step": 17150
},
{
"action_loss": 0.0276,
"epoch": 1.7691832956181854,
"learning_rate": 2e-05,
"llm_loss": 0.0063,
"loss": 0.0339,
"step": 17200
},
{
"action_loss": 0.0276,
"epoch": 1.77432627031475,
"learning_rate": 2e-05,
"llm_loss": 0.0111,
"loss": 0.0387,
"step": 17250
},
{
"action_loss": 0.0201,
"epoch": 1.7794692450113145,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.025,
"step": 17300
},
{
"action_loss": 0.0224,
"epoch": 1.784612219707879,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.027,
"step": 17350
},
{
"action_loss": 0.0249,
"epoch": 1.7897551944044436,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0299,
"step": 17400
},
{
"action_loss": 0.0217,
"epoch": 1.794898169101008,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0265,
"step": 17450
},
{
"action_loss": 0.0313,
"epoch": 1.8000411437975725,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0367,
"step": 17500
},
{
"action_loss": 0.0215,
"epoch": 1.8051841184941368,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0263,
"step": 17550
},
{
"action_loss": 0.0292,
"epoch": 1.8103270931907014,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.034,
"step": 17600
},
{
"action_loss": 0.0273,
"epoch": 1.815470067887266,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0314,
"step": 17650
},
{
"action_loss": 0.0331,
"epoch": 1.8206130425838305,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0385,
"step": 17700
},
{
"action_loss": 0.0261,
"epoch": 1.825756017280395,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0311,
"step": 17750
},
{
"action_loss": 0.0251,
"epoch": 1.8308989919769596,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0306,
"step": 17800
},
{
"action_loss": 0.0285,
"epoch": 1.836041966673524,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0336,
"step": 17850
},
{
"action_loss": 0.0328,
"epoch": 1.8411849413700885,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0373,
"step": 17900
},
{
"action_loss": 0.0333,
"epoch": 1.8463279160666528,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.038,
"step": 17950
},
{
"action_loss": 0.0375,
"epoch": 1.8514708907632174,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0423,
"step": 18000
},
{
"action_loss": 0.0207,
"epoch": 1.856613865459782,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0254,
"step": 18050
},
{
"action_loss": 0.0232,
"epoch": 1.8617568401563465,
"learning_rate": 2e-05,
"llm_loss": 0.0055,
"loss": 0.0287,
"step": 18100
},
{
"action_loss": 0.0193,
"epoch": 1.866899814852911,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0234,
"step": 18150
},
{
"action_loss": 0.0217,
"epoch": 1.8720427895494756,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0265,
"step": 18200
},
{
"action_loss": 0.0297,
"epoch": 1.87718576424604,
"learning_rate": 2e-05,
"llm_loss": 0.006,
"loss": 0.0356,
"step": 18250
},
{
"action_loss": 0.025,
"epoch": 1.8823287389426044,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0302,
"step": 18300
},
{
"action_loss": 0.0275,
"epoch": 1.8874717136391688,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0322,
"step": 18350
},
{
"action_loss": 0.0236,
"epoch": 1.8926146883357333,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0293,
"step": 18400
},
{
"action_loss": 0.0347,
"epoch": 1.8977576630322979,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0391,
"step": 18450
},
{
"action_loss": 0.0294,
"epoch": 1.9029006377288624,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.0348,
"step": 18500
},
{
"action_loss": 0.03,
"epoch": 1.908043612425427,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0347,
"step": 18550
},
{
"action_loss": 0.025,
"epoch": 1.9131865871219915,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0296,
"step": 18600
},
{
"action_loss": 0.0282,
"epoch": 1.9183295618185559,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0325,
"step": 18650
},
{
"action_loss": 0.0301,
"epoch": 1.9234725365151202,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0347,
"step": 18700
},
{
"action_loss": 0.0309,
"epoch": 1.9286155112116847,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0358,
"step": 18750
},
{
"action_loss": 0.028,
"epoch": 1.9337584859082493,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0336,
"step": 18800
},
{
"action_loss": 0.0352,
"epoch": 1.9389014606048138,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.04,
"step": 18850
},
{
"action_loss": 0.027,
"epoch": 1.9440444353013784,
"learning_rate": 2e-05,
"llm_loss": 0.0057,
"loss": 0.0327,
"step": 18900
},
{
"action_loss": 0.0214,
"epoch": 1.949187409997943,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0266,
"step": 18950
},
{
"action_loss": 0.0301,
"epoch": 1.9543303846945073,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0346,
"step": 19000
},
{
"action_loss": 0.0284,
"epoch": 1.9594733593910718,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0331,
"step": 19050
},
{
"action_loss": 0.0274,
"epoch": 1.9646163340876361,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0329,
"step": 19100
},
{
"action_loss": 0.018,
"epoch": 1.9697593087842007,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0229,
"step": 19150
},
{
"action_loss": 0.0213,
"epoch": 1.9749022834807652,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0261,
"step": 19200
},
{
"action_loss": 0.027,
"epoch": 1.9800452581773298,
"learning_rate": 2e-05,
"llm_loss": 0.0056,
"loss": 0.0325,
"step": 19250
},
{
"action_loss": 0.0204,
"epoch": 1.9851882328738943,
"learning_rate": 2e-05,
"llm_loss": 0.0054,
"loss": 0.0258,
"step": 19300
},
{
"action_loss": 0.0371,
"epoch": 1.990331207570459,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0422,
"step": 19350
},
{
"action_loss": 0.0285,
"epoch": 1.9954741822670232,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0333,
"step": 19400
},
{
"action_loss": 0.0237,
"epoch": 2.0006171569635876,
"learning_rate": 2e-05,
"llm_loss": 0.0053,
"loss": 0.029,
"step": 19450
},
{
"action_loss": 0.0297,
"epoch": 2.005760131660152,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0346,
"step": 19500
},
{
"action_loss": 0.0239,
"epoch": 2.0109031063567167,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0281,
"step": 19550
},
{
"action_loss": 0.0337,
"epoch": 2.016046081053281,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0378,
"step": 19600
},
{
"action_loss": 0.0235,
"epoch": 2.0211890557498458,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0277,
"step": 19650
},
{
"action_loss": 0.0365,
"epoch": 2.0263320304464103,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.041,
"step": 19700
},
{
"action_loss": 0.0303,
"epoch": 2.031475005142975,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0353,
"step": 19750
},
{
"action_loss": 0.0287,
"epoch": 2.0366179798395394,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0332,
"step": 19800
},
{
"action_loss": 0.0375,
"epoch": 2.0417609545361035,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0417,
"step": 19850
},
{
"action_loss": 0.0304,
"epoch": 2.046903929232668,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0349,
"step": 19900
},
{
"action_loss": 0.0246,
"epoch": 2.0520469039292326,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0297,
"step": 19950
},
{
"action_loss": 0.0256,
"epoch": 2.057189878625797,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0292,
"step": 20000
},
{
"action_loss": 0.0207,
"epoch": 2.0623328533223617,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0245,
"step": 20050
},
{
"action_loss": 0.0175,
"epoch": 2.0674758280189263,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.022,
"step": 20100
},
{
"action_loss": 0.0272,
"epoch": 2.072618802715491,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0317,
"step": 20150
},
{
"action_loss": 0.0309,
"epoch": 2.077761777412055,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0354,
"step": 20200
},
{
"action_loss": 0.0307,
"epoch": 2.0829047521086195,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0347,
"step": 20250
},
{
"action_loss": 0.025,
"epoch": 2.088047726805184,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0288,
"step": 20300
},
{
"action_loss": 0.0327,
"epoch": 2.0931907015017486,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0378,
"step": 20350
},
{
"action_loss": 0.0371,
"epoch": 2.098333676198313,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0414,
"step": 20400
},
{
"action_loss": 0.019,
"epoch": 2.1034766508948777,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0232,
"step": 20450
},
{
"action_loss": 0.022,
"epoch": 2.1086196255914422,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0267,
"step": 20500
},
{
"action_loss": 0.0214,
"epoch": 2.1137626002880068,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.026,
"step": 20550
},
{
"action_loss": 0.0207,
"epoch": 2.118905574984571,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0251,
"step": 20600
},
{
"action_loss": 0.0358,
"epoch": 2.1240485496811354,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0396,
"step": 20650
},
{
"action_loss": 0.0233,
"epoch": 2.1291915243777,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0277,
"step": 20700
},
{
"action_loss": 0.0243,
"epoch": 2.1343344990742645,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0289,
"step": 20750
},
{
"action_loss": 0.02,
"epoch": 2.139477473770829,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0244,
"step": 20800
},
{
"action_loss": 0.0209,
"epoch": 2.1446204484673936,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0251,
"step": 20850
},
{
"action_loss": 0.0207,
"epoch": 2.149763423163958,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0253,
"step": 20900
},
{
"action_loss": 0.0271,
"epoch": 2.1549063978605227,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0311,
"step": 20950
},
{
"action_loss": 0.0241,
"epoch": 2.160049372557087,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0285,
"step": 21000
},
{
"action_loss": 0.0265,
"epoch": 2.1651923472536514,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0301,
"step": 21050
},
{
"action_loss": 0.0241,
"epoch": 2.170335321950216,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0285,
"step": 21100
},
{
"action_loss": 0.0343,
"epoch": 2.1754782966467805,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0386,
"step": 21150
},
{
"action_loss": 0.0229,
"epoch": 2.180621271343345,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0268,
"step": 21200
},
{
"action_loss": 0.0207,
"epoch": 2.1857642460399096,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0249,
"step": 21250
},
{
"action_loss": 0.0282,
"epoch": 2.190907220736474,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0323,
"step": 21300
},
{
"action_loss": 0.0174,
"epoch": 2.1960501954330383,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0217,
"step": 21350
},
{
"action_loss": 0.0249,
"epoch": 2.201193170129603,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0292,
"step": 21400
},
{
"action_loss": 0.0206,
"epoch": 2.2063361448261674,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0247,
"step": 21450
},
{
"action_loss": 0.0269,
"epoch": 2.211479119522732,
"learning_rate": 2e-05,
"llm_loss": 0.0052,
"loss": 0.0321,
"step": 21500
},
{
"action_loss": 0.0238,
"epoch": 2.2166220942192965,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0279,
"step": 21550
},
{
"action_loss": 0.0194,
"epoch": 2.221765068915861,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0238,
"step": 21600
},
{
"action_loss": 0.0324,
"epoch": 2.2269080436124256,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0361,
"step": 21650
},
{
"action_loss": 0.0299,
"epoch": 2.23205101830899,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0342,
"step": 21700
},
{
"action_loss": 0.0232,
"epoch": 2.237193993005554,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0281,
"step": 21750
},
{
"action_loss": 0.0241,
"epoch": 2.2423369677021188,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0284,
"step": 21800
},
{
"action_loss": 0.037,
"epoch": 2.2474799423986833,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0417,
"step": 21850
},
{
"action_loss": 0.0283,
"epoch": 2.252622917095248,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0323,
"step": 21900
},
{
"action_loss": 0.0243,
"epoch": 2.2577658917918124,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.029,
"step": 21950
},
{
"action_loss": 0.0285,
"epoch": 2.262908866488377,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0332,
"step": 22000
},
{
"action_loss": 0.0272,
"epoch": 2.2680518411849415,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0321,
"step": 22050
},
{
"action_loss": 0.0327,
"epoch": 2.2731948158815056,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0366,
"step": 22100
},
{
"action_loss": 0.0305,
"epoch": 2.27833779057807,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0354,
"step": 22150
},
{
"action_loss": 0.023,
"epoch": 2.2834807652746347,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0273,
"step": 22200
},
{
"action_loss": 0.0318,
"epoch": 2.2886237399711993,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0366,
"step": 22250
},
{
"action_loss": 0.0236,
"epoch": 2.293766714667764,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0278,
"step": 22300
},
{
"action_loss": 0.0142,
"epoch": 2.2989096893643284,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0184,
"step": 22350
},
{
"action_loss": 0.0239,
"epoch": 2.304052664060893,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0283,
"step": 22400
},
{
"action_loss": 0.0125,
"epoch": 2.3091956387574575,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0175,
"step": 22450
},
{
"action_loss": 0.0285,
"epoch": 2.314338613454022,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0326,
"step": 22500
},
{
"action_loss": 0.0265,
"epoch": 2.319481588150586,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0309,
"step": 22550
},
{
"action_loss": 0.019,
"epoch": 2.3246245628471507,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0236,
"step": 22600
},
{
"action_loss": 0.0312,
"epoch": 2.3297675375437152,
"learning_rate": 2e-05,
"llm_loss": 0.0059,
"loss": 0.0371,
"step": 22650
},
{
"action_loss": 0.0196,
"epoch": 2.33491051224028,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0233,
"step": 22700
},
{
"action_loss": 0.0276,
"epoch": 2.3400534869368443,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0323,
"step": 22750
},
{
"action_loss": 0.034,
"epoch": 2.345196461633409,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0379,
"step": 22800
},
{
"action_loss": 0.031,
"epoch": 2.3503394363299734,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0348,
"step": 22850
},
{
"action_loss": 0.0271,
"epoch": 2.3554824110265375,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0312,
"step": 22900
},
{
"action_loss": 0.0302,
"epoch": 2.360625385723102,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0342,
"step": 22950
},
{
"action_loss": 0.0182,
"epoch": 2.3657683604196666,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.022,
"step": 23000
},
{
"action_loss": 0.0284,
"epoch": 2.370911335116231,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0325,
"step": 23050
},
{
"action_loss": 0.0282,
"epoch": 2.3760543098127958,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0325,
"step": 23100
},
{
"action_loss": 0.0213,
"epoch": 2.3811972845093603,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0249,
"step": 23150
},
{
"action_loss": 0.0248,
"epoch": 2.386340259205925,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0291,
"step": 23200
},
{
"action_loss": 0.0242,
"epoch": 2.3914832339024894,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.029,
"step": 23250
},
{
"action_loss": 0.0244,
"epoch": 2.3966262085990535,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0289,
"step": 23300
},
{
"action_loss": 0.0261,
"epoch": 2.401769183295618,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.031,
"step": 23350
},
{
"action_loss": 0.0205,
"epoch": 2.4069121579921826,
"learning_rate": 2e-05,
"llm_loss": 0.0058,
"loss": 0.0263,
"step": 23400
},
{
"action_loss": 0.026,
"epoch": 2.412055132688747,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0301,
"step": 23450
},
{
"action_loss": 0.0317,
"epoch": 2.4171981073853117,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0364,
"step": 23500
},
{
"action_loss": 0.0256,
"epoch": 2.4223410820818763,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0304,
"step": 23550
},
{
"action_loss": 0.0208,
"epoch": 2.427484056778441,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.025,
"step": 23600
},
{
"action_loss": 0.0259,
"epoch": 2.432627031475005,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0302,
"step": 23650
},
{
"action_loss": 0.033,
"epoch": 2.4377700061715695,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0378,
"step": 23700
},
{
"action_loss": 0.0287,
"epoch": 2.442912980868134,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.033,
"step": 23750
},
{
"action_loss": 0.0298,
"epoch": 2.4480559555646986,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0341,
"step": 23800
},
{
"action_loss": 0.0235,
"epoch": 2.453198930261263,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0278,
"step": 23850
},
{
"action_loss": 0.0262,
"epoch": 2.4583419049578277,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0309,
"step": 23900
},
{
"action_loss": 0.015,
"epoch": 2.4634848796543922,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0195,
"step": 23950
},
{
"action_loss": 0.0306,
"epoch": 2.4686278543509568,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0351,
"step": 24000
},
{
"action_loss": 0.0297,
"epoch": 2.4737708290475213,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0334,
"step": 24050
},
{
"action_loss": 0.0189,
"epoch": 2.4789138037440854,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0233,
"step": 24100
},
{
"action_loss": 0.0237,
"epoch": 2.48405677844065,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.028,
"step": 24150
},
{
"action_loss": 0.017,
"epoch": 2.4891997531372145,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0217,
"step": 24200
},
{
"action_loss": 0.0235,
"epoch": 2.494342727833779,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0283,
"step": 24250
},
{
"action_loss": 0.0258,
"epoch": 2.4994857025303436,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0306,
"step": 24300
},
{
"action_loss": 0.0309,
"epoch": 2.504628677226908,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0349,
"step": 24350
},
{
"action_loss": 0.0322,
"epoch": 2.5097716519234723,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0364,
"step": 24400
},
{
"action_loss": 0.028,
"epoch": 2.514914626620037,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.032,
"step": 24450
},
{
"action_loss": 0.0285,
"epoch": 2.5200576013166014,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0329,
"step": 24500
},
{
"action_loss": 0.0272,
"epoch": 2.525200576013166,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0316,
"step": 24550
},
{
"action_loss": 0.0304,
"epoch": 2.5303435507097305,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0338,
"step": 24600
},
{
"action_loss": 0.0302,
"epoch": 2.535486525406295,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0346,
"step": 24650
},
{
"action_loss": 0.0248,
"epoch": 2.5406295001028596,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0291,
"step": 24700
},
{
"action_loss": 0.0268,
"epoch": 2.545772474799424,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0308,
"step": 24750
},
{
"action_loss": 0.0387,
"epoch": 2.5509154494959887,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0427,
"step": 24800
},
{
"action_loss": 0.017,
"epoch": 2.5560584241925532,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0217,
"step": 24850
},
{
"action_loss": 0.0298,
"epoch": 2.5612013988891174,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0344,
"step": 24900
},
{
"action_loss": 0.0308,
"epoch": 2.566344373585682,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0355,
"step": 24950
},
{
"action_loss": 0.0221,
"epoch": 2.5714873482822465,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0261,
"step": 25000
},
{
"action_loss": 0.024,
"epoch": 2.576630322978811,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0284,
"step": 25050
},
{
"action_loss": 0.0131,
"epoch": 2.5817732976753756,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0175,
"step": 25100
},
{
"action_loss": 0.0251,
"epoch": 2.5869162723719397,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0285,
"step": 25150
},
{
"action_loss": 0.0301,
"epoch": 2.592059247068504,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0346,
"step": 25200
},
{
"action_loss": 0.022,
"epoch": 2.5972022217650688,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.026,
"step": 25250
},
{
"action_loss": 0.0351,
"epoch": 2.6023451964616333,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0396,
"step": 25300
},
{
"action_loss": 0.0274,
"epoch": 2.607488171158198,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0311,
"step": 25350
},
{
"action_loss": 0.0242,
"epoch": 2.6126311458547624,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.028,
"step": 25400
},
{
"action_loss": 0.0293,
"epoch": 2.617774120551327,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0334,
"step": 25450
},
{
"action_loss": 0.0289,
"epoch": 2.6229170952478915,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0334,
"step": 25500
},
{
"action_loss": 0.0293,
"epoch": 2.628060069944456,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0335,
"step": 25550
},
{
"action_loss": 0.02,
"epoch": 2.6332030446410206,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0246,
"step": 25600
},
{
"action_loss": 0.0261,
"epoch": 2.6383460193375847,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0302,
"step": 25650
},
{
"action_loss": 0.0225,
"epoch": 2.6434889940341493,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0262,
"step": 25700
},
{
"action_loss": 0.0293,
"epoch": 2.648631968730714,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0336,
"step": 25750
},
{
"action_loss": 0.0185,
"epoch": 2.6537749434272784,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0229,
"step": 25800
},
{
"action_loss": 0.0335,
"epoch": 2.658917918123843,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0375,
"step": 25850
},
{
"action_loss": 0.032,
"epoch": 2.6640608928204075,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0359,
"step": 25900
},
{
"action_loss": 0.02,
"epoch": 2.6692038675169716,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0244,
"step": 25950
},
{
"action_loss": 0.0226,
"epoch": 2.674346842213536,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0271,
"step": 26000
},
{
"action_loss": 0.0209,
"epoch": 2.6794898169101007,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.025,
"step": 26050
},
{
"action_loss": 0.0287,
"epoch": 2.6846327916066652,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0329,
"step": 26100
},
{
"action_loss": 0.0272,
"epoch": 2.68977576630323,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0319,
"step": 26150
},
{
"action_loss": 0.0324,
"epoch": 2.6949187409997943,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0372,
"step": 26200
},
{
"action_loss": 0.0295,
"epoch": 2.700061715696359,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0339,
"step": 26250
},
{
"action_loss": 0.0261,
"epoch": 2.7052046903929234,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0309,
"step": 26300
},
{
"action_loss": 0.0345,
"epoch": 2.710347665089488,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0387,
"step": 26350
},
{
"action_loss": 0.0242,
"epoch": 2.715490639786052,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0287,
"step": 26400
},
{
"action_loss": 0.0262,
"epoch": 2.7206336144826166,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0309,
"step": 26450
},
{
"action_loss": 0.0192,
"epoch": 2.725776589179181,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0236,
"step": 26500
},
{
"action_loss": 0.0173,
"epoch": 2.7309195638757457,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0213,
"step": 26550
},
{
"action_loss": 0.0256,
"epoch": 2.7360625385723103,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.0304,
"step": 26600
},
{
"action_loss": 0.0268,
"epoch": 2.741205513268875,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0309,
"step": 26650
},
{
"action_loss": 0.02,
"epoch": 2.746348487965439,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0238,
"step": 26700
},
{
"action_loss": 0.03,
"epoch": 2.7514914626620035,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0345,
"step": 26750
},
{
"action_loss": 0.0189,
"epoch": 2.756634437358568,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.023,
"step": 26800
},
{
"action_loss": 0.0351,
"epoch": 2.7617774120551326,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0394,
"step": 26850
},
{
"action_loss": 0.0347,
"epoch": 2.766920386751697,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.039,
"step": 26900
},
{
"action_loss": 0.0203,
"epoch": 2.7720633614482617,
"learning_rate": 2e-05,
"llm_loss": 0.0049,
"loss": 0.0252,
"step": 26950
},
{
"action_loss": 0.0238,
"epoch": 2.7772063361448263,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0289,
"step": 27000
},
{
"action_loss": 0.0204,
"epoch": 2.782349310841391,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0238,
"step": 27050
},
{
"action_loss": 0.0245,
"epoch": 2.7874922855379554,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0288,
"step": 27100
},
{
"action_loss": 0.0247,
"epoch": 2.79263526023452,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0288,
"step": 27150
},
{
"action_loss": 0.0202,
"epoch": 2.797778234931084,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0239,
"step": 27200
},
{
"action_loss": 0.0223,
"epoch": 2.8029212096276486,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0262,
"step": 27250
},
{
"action_loss": 0.0364,
"epoch": 2.808064184324213,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0411,
"step": 27300
},
{
"action_loss": 0.0256,
"epoch": 2.8132071590207777,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.029,
"step": 27350
},
{
"action_loss": 0.0214,
"epoch": 2.818350133717342,
"learning_rate": 2e-05,
"llm_loss": 0.005,
"loss": 0.0264,
"step": 27400
},
{
"action_loss": 0.0209,
"epoch": 2.8234931084139063,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0253,
"step": 27450
},
{
"action_loss": 0.0316,
"epoch": 2.828636083110471,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0352,
"step": 27500
},
{
"action_loss": 0.0256,
"epoch": 2.8337790578070354,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0303,
"step": 27550
},
{
"action_loss": 0.0195,
"epoch": 2.8389220325036,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0236,
"step": 27600
},
{
"action_loss": 0.0244,
"epoch": 2.8440650072001645,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0285,
"step": 27650
},
{
"action_loss": 0.0199,
"epoch": 2.849207981896729,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0245,
"step": 27700
},
{
"action_loss": 0.0263,
"epoch": 2.8543509565932936,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0306,
"step": 27750
},
{
"action_loss": 0.0322,
"epoch": 2.859493931289858,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0366,
"step": 27800
},
{
"action_loss": 0.029,
"epoch": 2.8646369059864227,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.033,
"step": 27850
},
{
"action_loss": 0.0335,
"epoch": 2.8697798806829873,
"learning_rate": 2e-05,
"llm_loss": 0.0051,
"loss": 0.0386,
"step": 27900
},
{
"action_loss": 0.0277,
"epoch": 2.8749228553795514,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0313,
"step": 27950
},
{
"action_loss": 0.0247,
"epoch": 2.880065830076116,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0286,
"step": 28000
},
{
"action_loss": 0.0235,
"epoch": 2.8852088047726805,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0278,
"step": 28050
},
{
"action_loss": 0.0246,
"epoch": 2.890351779469245,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0281,
"step": 28100
},
{
"action_loss": 0.0262,
"epoch": 2.8954947541658096,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0306,
"step": 28150
},
{
"action_loss": 0.0242,
"epoch": 2.900637728862374,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.028,
"step": 28200
},
{
"action_loss": 0.029,
"epoch": 2.9057807035589382,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0332,
"step": 28250
},
{
"action_loss": 0.0261,
"epoch": 2.910923678255503,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0298,
"step": 28300
},
{
"action_loss": 0.0325,
"epoch": 2.9160666529520674,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.037,
"step": 28350
},
{
"action_loss": 0.0328,
"epoch": 2.921209627648632,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0372,
"step": 28400
},
{
"action_loss": 0.0186,
"epoch": 2.9263526023451965,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0229,
"step": 28450
},
{
"action_loss": 0.0191,
"epoch": 2.931495577041761,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.0238,
"step": 28500
},
{
"action_loss": 0.021,
"epoch": 2.9366385517383256,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0249,
"step": 28550
},
{
"action_loss": 0.0272,
"epoch": 2.94178152643489,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0315,
"step": 28600
},
{
"action_loss": 0.0212,
"epoch": 2.9469245011314547,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0255,
"step": 28650
},
{
"action_loss": 0.0294,
"epoch": 2.9520674758280188,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0331,
"step": 28700
},
{
"action_loss": 0.026,
"epoch": 2.9572104505245833,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.03,
"step": 28750
},
{
"action_loss": 0.0275,
"epoch": 2.962353425221148,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0319,
"step": 28800
},
{
"action_loss": 0.0266,
"epoch": 2.9674963999177124,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0309,
"step": 28850
},
{
"action_loss": 0.022,
"epoch": 2.972639374614277,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0265,
"step": 28900
},
{
"action_loss": 0.0312,
"epoch": 2.9777823493108415,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0353,
"step": 28950
},
{
"action_loss": 0.0341,
"epoch": 2.9829253240074056,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0381,
"step": 29000
},
{
"action_loss": 0.0316,
"epoch": 2.98806829870397,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0345,
"step": 29050
},
{
"action_loss": 0.0251,
"epoch": 2.9932112734005347,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0291,
"step": 29100
},
{
"action_loss": 0.031,
"epoch": 2.9983542480970993,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0352,
"step": 29150
},
{
"action_loss": 0.0266,
"epoch": 3.003497222793664,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0304,
"step": 29200
},
{
"action_loss": 0.0257,
"epoch": 3.0086401974902284,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0295,
"step": 29250
},
{
"action_loss": 0.0273,
"epoch": 3.013783172186793,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0307,
"step": 29300
},
{
"action_loss": 0.0244,
"epoch": 3.0189261468833575,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0282,
"step": 29350
},
{
"action_loss": 0.0289,
"epoch": 3.024069121579922,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.033,
"step": 29400
},
{
"action_loss": 0.0303,
"epoch": 3.029212096276486,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0339,
"step": 29450
},
{
"action_loss": 0.0259,
"epoch": 3.0343550709730507,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0291,
"step": 29500
},
{
"action_loss": 0.0266,
"epoch": 3.0394980456696152,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0305,
"step": 29550
},
{
"action_loss": 0.0118,
"epoch": 3.04464102036618,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0159,
"step": 29600
},
{
"action_loss": 0.0216,
"epoch": 3.0497839950627443,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0254,
"step": 29650
},
{
"action_loss": 0.0323,
"epoch": 3.054926969759309,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0362,
"step": 29700
},
{
"action_loss": 0.0308,
"epoch": 3.0600699444558734,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0346,
"step": 29750
},
{
"action_loss": 0.0312,
"epoch": 3.0652129191524375,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0344,
"step": 29800
},
{
"action_loss": 0.0345,
"epoch": 3.070355893849002,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0376,
"step": 29850
},
{
"action_loss": 0.0166,
"epoch": 3.0754988685455666,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0204,
"step": 29900
},
{
"action_loss": 0.0312,
"epoch": 3.080641843242131,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0347,
"step": 29950
},
{
"action_loss": 0.0268,
"epoch": 3.0857848179386957,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0301,
"step": 30000
},
{
"action_loss": 0.0237,
"epoch": 3.0909277926352603,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.027,
"step": 30050
},
{
"action_loss": 0.0285,
"epoch": 3.096070767331825,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.032,
"step": 30100
},
{
"action_loss": 0.0285,
"epoch": 3.1012137420283894,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0322,
"step": 30150
},
{
"action_loss": 0.0239,
"epoch": 3.1063567167249535,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0272,
"step": 30200
},
{
"action_loss": 0.0221,
"epoch": 3.111499691421518,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0255,
"step": 30250
},
{
"action_loss": 0.023,
"epoch": 3.1166426661180826,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.0275,
"step": 30300
},
{
"action_loss": 0.0236,
"epoch": 3.121785640814647,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0272,
"step": 30350
},
{
"action_loss": 0.0259,
"epoch": 3.1269286155112117,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0296,
"step": 30400
},
{
"action_loss": 0.0227,
"epoch": 3.1320715902077763,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0263,
"step": 30450
},
{
"action_loss": 0.0203,
"epoch": 3.137214564904341,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0238,
"step": 30500
},
{
"action_loss": 0.0245,
"epoch": 3.1423575396009054,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0282,
"step": 30550
},
{
"action_loss": 0.0306,
"epoch": 3.1475005142974695,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0339,
"step": 30600
},
{
"action_loss": 0.0182,
"epoch": 3.152643488994034,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.022,
"step": 30650
},
{
"action_loss": 0.0328,
"epoch": 3.1577864636905986,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0363,
"step": 30700
},
{
"action_loss": 0.0322,
"epoch": 3.162929438387163,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0361,
"step": 30750
},
{
"action_loss": 0.0197,
"epoch": 3.1680724130837277,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0237,
"step": 30800
},
{
"action_loss": 0.0288,
"epoch": 3.173215387780292,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0321,
"step": 30850
},
{
"action_loss": 0.0307,
"epoch": 3.1783583624768568,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0345,
"step": 30900
},
{
"action_loss": 0.0379,
"epoch": 3.1835013371734213,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0416,
"step": 30950
},
{
"action_loss": 0.019,
"epoch": 3.1886443118699854,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0231,
"step": 31000
},
{
"action_loss": 0.0254,
"epoch": 3.19378728656655,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0287,
"step": 31050
},
{
"action_loss": 0.0238,
"epoch": 3.1989302612631145,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0276,
"step": 31100
},
{
"action_loss": 0.0233,
"epoch": 3.204073235959679,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0275,
"step": 31150
},
{
"action_loss": 0.0268,
"epoch": 3.2092162106562436,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0311,
"step": 31200
},
{
"action_loss": 0.0185,
"epoch": 3.214359185352808,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0227,
"step": 31250
},
{
"action_loss": 0.0332,
"epoch": 3.2195021600493727,
"learning_rate": 2e-05,
"llm_loss": 0.0048,
"loss": 0.038,
"step": 31300
},
{
"action_loss": 0.0227,
"epoch": 3.224645134745937,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0262,
"step": 31350
},
{
"action_loss": 0.0211,
"epoch": 3.2297881094425014,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0246,
"step": 31400
},
{
"action_loss": 0.0327,
"epoch": 3.234931084139066,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0363,
"step": 31450
},
{
"action_loss": 0.0148,
"epoch": 3.2400740588356305,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.018,
"step": 31500
},
{
"action_loss": 0.0303,
"epoch": 3.245217033532195,
"learning_rate": 2e-05,
"llm_loss": 0.0047,
"loss": 0.035,
"step": 31550
},
{
"action_loss": 0.0262,
"epoch": 3.2503600082287596,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0296,
"step": 31600
},
{
"action_loss": 0.0324,
"epoch": 3.255502982925324,
"learning_rate": 2e-05,
"llm_loss": 0.0045,
"loss": 0.037,
"step": 31650
},
{
"action_loss": 0.024,
"epoch": 3.2606459576218887,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0279,
"step": 31700
},
{
"action_loss": 0.0183,
"epoch": 3.2657889323184532,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0219,
"step": 31750
},
{
"action_loss": 0.0219,
"epoch": 3.2709319070150173,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0254,
"step": 31800
},
{
"action_loss": 0.0177,
"epoch": 3.276074881711582,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0218,
"step": 31850
},
{
"action_loss": 0.0168,
"epoch": 3.2812178564081464,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0212,
"step": 31900
},
{
"action_loss": 0.0345,
"epoch": 3.286360831104711,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0379,
"step": 31950
},
{
"action_loss": 0.016,
"epoch": 3.2915038058012756,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0198,
"step": 32000
},
{
"action_loss": 0.0333,
"epoch": 3.29664678049784,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.037,
"step": 32050
},
{
"action_loss": 0.0228,
"epoch": 3.301789755194404,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0263,
"step": 32100
},
{
"action_loss": 0.0175,
"epoch": 3.3069327298909688,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0213,
"step": 32150
},
{
"action_loss": 0.0335,
"epoch": 3.3120757045875333,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.037,
"step": 32200
},
{
"action_loss": 0.0223,
"epoch": 3.317218679284098,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0256,
"step": 32250
},
{
"action_loss": 0.022,
"epoch": 3.3223616539806624,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.026,
"step": 32300
},
{
"action_loss": 0.0269,
"epoch": 3.327504628677227,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0307,
"step": 32350
},
{
"action_loss": 0.0172,
"epoch": 3.3326476033737915,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.021,
"step": 32400
},
{
"action_loss": 0.018,
"epoch": 3.337790578070356,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0218,
"step": 32450
},
{
"action_loss": 0.0236,
"epoch": 3.3429335527669206,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0272,
"step": 32500
},
{
"action_loss": 0.0252,
"epoch": 3.3480765274634847,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0286,
"step": 32550
},
{
"action_loss": 0.0245,
"epoch": 3.3532195021600493,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0282,
"step": 32600
},
{
"action_loss": 0.0271,
"epoch": 3.358362476856614,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0306,
"step": 32650
},
{
"action_loss": 0.0176,
"epoch": 3.3635054515531784,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0214,
"step": 32700
},
{
"action_loss": 0.0252,
"epoch": 3.368648426249743,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0289,
"step": 32750
},
{
"action_loss": 0.0189,
"epoch": 3.3737914009463075,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0233,
"step": 32800
},
{
"action_loss": 0.0215,
"epoch": 3.378934375642872,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0251,
"step": 32850
},
{
"action_loss": 0.0271,
"epoch": 3.384077350339436,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.031,
"step": 32900
},
{
"action_loss": 0.0275,
"epoch": 3.3892203250360007,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0317,
"step": 32950
},
{
"action_loss": 0.0198,
"epoch": 3.3943632997325652,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0236,
"step": 33000
},
{
"action_loss": 0.0256,
"epoch": 3.39950627442913,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0292,
"step": 33050
},
{
"action_loss": 0.03,
"epoch": 3.4046492491256943,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.034,
"step": 33100
},
{
"action_loss": 0.0318,
"epoch": 3.409792223822259,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0347,
"step": 33150
},
{
"action_loss": 0.0285,
"epoch": 3.4149351985188234,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0316,
"step": 33200
},
{
"action_loss": 0.0278,
"epoch": 3.420078173215388,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0319,
"step": 33250
},
{
"action_loss": 0.0304,
"epoch": 3.425221147911952,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0337,
"step": 33300
},
{
"action_loss": 0.0176,
"epoch": 3.4303641226085166,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0208,
"step": 33350
},
{
"action_loss": 0.0307,
"epoch": 3.435507097305081,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0341,
"step": 33400
},
{
"action_loss": 0.0171,
"epoch": 3.4406500720016457,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0204,
"step": 33450
},
{
"action_loss": 0.0256,
"epoch": 3.4457930466982103,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0293,
"step": 33500
},
{
"action_loss": 0.0285,
"epoch": 3.450936021394775,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0326,
"step": 33550
},
{
"action_loss": 0.0193,
"epoch": 3.4560789960913394,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0226,
"step": 33600
},
{
"action_loss": 0.0214,
"epoch": 3.4612219707879035,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0251,
"step": 33650
},
{
"action_loss": 0.0231,
"epoch": 3.466364945484468,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.027,
"step": 33700
},
{
"action_loss": 0.0147,
"epoch": 3.4715079201810326,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0182,
"step": 33750
},
{
"action_loss": 0.0238,
"epoch": 3.476650894877597,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0271,
"step": 33800
},
{
"action_loss": 0.0167,
"epoch": 3.4817938695741617,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0204,
"step": 33850
},
{
"action_loss": 0.0288,
"epoch": 3.4869368442707263,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0324,
"step": 33900
},
{
"action_loss": 0.0308,
"epoch": 3.492079818967291,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0342,
"step": 33950
},
{
"action_loss": 0.0361,
"epoch": 3.4972227936638554,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0393,
"step": 34000
},
{
"action_loss": 0.0253,
"epoch": 3.50236576836042,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0289,
"step": 34050
},
{
"action_loss": 0.0276,
"epoch": 3.507508743056984,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0311,
"step": 34100
},
{
"action_loss": 0.0277,
"epoch": 3.5126517177535486,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.031,
"step": 34150
},
{
"action_loss": 0.0232,
"epoch": 3.517794692450113,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0268,
"step": 34200
},
{
"action_loss": 0.0153,
"epoch": 3.5229376671466777,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0186,
"step": 34250
},
{
"action_loss": 0.0209,
"epoch": 3.528080641843242,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0246,
"step": 34300
},
{
"action_loss": 0.0139,
"epoch": 3.5332236165398068,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0172,
"step": 34350
},
{
"action_loss": 0.015,
"epoch": 3.538366591236371,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.019,
"step": 34400
},
{
"action_loss": 0.0266,
"epoch": 3.5435095659329354,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0303,
"step": 34450
},
{
"action_loss": 0.0237,
"epoch": 3.5486525406295,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0271,
"step": 34500
},
{
"action_loss": 0.0205,
"epoch": 3.5537955153260645,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0252,
"step": 34550
},
{
"action_loss": 0.0247,
"epoch": 3.558938490022629,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0291,
"step": 34600
},
{
"action_loss": 0.0269,
"epoch": 3.5640814647191936,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0302,
"step": 34650
},
{
"action_loss": 0.0217,
"epoch": 3.569224439415758,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0254,
"step": 34700
},
{
"action_loss": 0.0187,
"epoch": 3.5743674141123227,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0223,
"step": 34750
},
{
"action_loss": 0.0258,
"epoch": 3.5795103888088873,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0297,
"step": 34800
},
{
"action_loss": 0.0277,
"epoch": 3.584653363505452,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0316,
"step": 34850
},
{
"action_loss": 0.028,
"epoch": 3.589796338202016,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0317,
"step": 34900
},
{
"action_loss": 0.027,
"epoch": 3.5949393128985805,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0307,
"step": 34950
},
{
"action_loss": 0.0209,
"epoch": 3.600082287595145,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0244,
"step": 35000
},
{
"action_loss": 0.0165,
"epoch": 3.6052252622917096,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0203,
"step": 35050
},
{
"action_loss": 0.0198,
"epoch": 3.610368236988274,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0238,
"step": 35100
},
{
"action_loss": 0.0207,
"epoch": 3.6155112116848382,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0248,
"step": 35150
},
{
"action_loss": 0.0261,
"epoch": 3.620654186381403,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0295,
"step": 35200
},
{
"action_loss": 0.03,
"epoch": 3.6257971610779673,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0336,
"step": 35250
},
{
"action_loss": 0.0282,
"epoch": 3.630940135774532,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0313,
"step": 35300
},
{
"action_loss": 0.0295,
"epoch": 3.6360831104710964,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0326,
"step": 35350
},
{
"action_loss": 0.0206,
"epoch": 3.641226085167661,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0239,
"step": 35400
},
{
"action_loss": 0.0226,
"epoch": 3.6463690598642255,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0261,
"step": 35450
},
{
"action_loss": 0.0218,
"epoch": 3.65151203456079,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0257,
"step": 35500
},
{
"action_loss": 0.017,
"epoch": 3.6566550092573546,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0209,
"step": 35550
},
{
"action_loss": 0.0208,
"epoch": 3.661797983953919,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0247,
"step": 35600
},
{
"action_loss": 0.0303,
"epoch": 3.6669409586504833,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0334,
"step": 35650
},
{
"action_loss": 0.022,
"epoch": 3.672083933347048,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0258,
"step": 35700
},
{
"action_loss": 0.0239,
"epoch": 3.6772269080436124,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0273,
"step": 35750
},
{
"action_loss": 0.025,
"epoch": 3.682369882740177,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0283,
"step": 35800
},
{
"action_loss": 0.0283,
"epoch": 3.6875128574367415,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0323,
"step": 35850
},
{
"action_loss": 0.021,
"epoch": 3.692655832133306,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0244,
"step": 35900
},
{
"action_loss": 0.0262,
"epoch": 3.69779880682987,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.03,
"step": 35950
},
{
"action_loss": 0.0361,
"epoch": 3.7029417815264347,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0396,
"step": 36000
},
{
"action_loss": 0.0286,
"epoch": 3.7080847562229993,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.032,
"step": 36050
},
{
"action_loss": 0.0125,
"epoch": 3.713227730919564,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0167,
"step": 36100
},
{
"action_loss": 0.0284,
"epoch": 3.7183707056161284,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0318,
"step": 36150
},
{
"action_loss": 0.0262,
"epoch": 3.723513680312693,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0295,
"step": 36200
},
{
"action_loss": 0.0234,
"epoch": 3.7286566550092575,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0265,
"step": 36250
},
{
"action_loss": 0.0292,
"epoch": 3.733799629705822,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0338,
"step": 36300
},
{
"action_loss": 0.0242,
"epoch": 3.7389426044023866,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0281,
"step": 36350
},
{
"action_loss": 0.0141,
"epoch": 3.7440855790989507,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0174,
"step": 36400
},
{
"action_loss": 0.0201,
"epoch": 3.7492285537955152,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0237,
"step": 36450
},
{
"action_loss": 0.0254,
"epoch": 3.75437152849208,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0289,
"step": 36500
},
{
"action_loss": 0.0184,
"epoch": 3.7595145031886443,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0222,
"step": 36550
},
{
"action_loss": 0.0229,
"epoch": 3.764657477885209,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.027,
"step": 36600
},
{
"action_loss": 0.024,
"epoch": 3.7698004525817734,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0279,
"step": 36650
},
{
"action_loss": 0.0215,
"epoch": 3.7749434272783375,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0251,
"step": 36700
},
{
"action_loss": 0.026,
"epoch": 3.780086401974902,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0298,
"step": 36750
},
{
"action_loss": 0.034,
"epoch": 3.7852293766714666,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0375,
"step": 36800
},
{
"action_loss": 0.0289,
"epoch": 3.790372351368031,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0328,
"step": 36850
},
{
"action_loss": 0.0232,
"epoch": 3.7955153260645957,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0262,
"step": 36900
},
{
"action_loss": 0.0278,
"epoch": 3.8006583007611603,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0316,
"step": 36950
},
{
"action_loss": 0.0267,
"epoch": 3.805801275457725,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0301,
"step": 37000
},
{
"action_loss": 0.0323,
"epoch": 3.8109442501542894,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0359,
"step": 37050
},
{
"action_loss": 0.0258,
"epoch": 3.816087224850854,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0295,
"step": 37100
},
{
"action_loss": 0.016,
"epoch": 3.8212301995474185,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0205,
"step": 37150
},
{
"action_loss": 0.0302,
"epoch": 3.8263731742439826,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0334,
"step": 37200
},
{
"action_loss": 0.028,
"epoch": 3.831516148940547,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.031,
"step": 37250
},
{
"action_loss": 0.0228,
"epoch": 3.8366591236371117,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0266,
"step": 37300
},
{
"action_loss": 0.0205,
"epoch": 3.8418020983336763,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0238,
"step": 37350
},
{
"action_loss": 0.0248,
"epoch": 3.846945073030241,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0279,
"step": 37400
},
{
"action_loss": 0.0265,
"epoch": 3.852088047726805,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.03,
"step": 37450
},
{
"action_loss": 0.0319,
"epoch": 3.8572310224233695,
"learning_rate": 2e-05,
"llm_loss": 0.0041,
"loss": 0.0361,
"step": 37500
},
{
"action_loss": 0.0313,
"epoch": 3.862373997119934,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0344,
"step": 37550
},
{
"action_loss": 0.0243,
"epoch": 3.8675169718164986,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0278,
"step": 37600
},
{
"action_loss": 0.0189,
"epoch": 3.872659946513063,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0225,
"step": 37650
},
{
"action_loss": 0.0204,
"epoch": 3.8778029212096277,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0241,
"step": 37700
},
{
"action_loss": 0.0298,
"epoch": 3.882945895906192,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0332,
"step": 37750
},
{
"action_loss": 0.035,
"epoch": 3.8880888706027568,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0386,
"step": 37800
},
{
"action_loss": 0.0278,
"epoch": 3.8932318452993213,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0314,
"step": 37850
},
{
"action_loss": 0.0222,
"epoch": 3.898374819995886,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0268,
"step": 37900
},
{
"action_loss": 0.0233,
"epoch": 3.90351779469245,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0278,
"step": 37950
},
{
"action_loss": 0.0196,
"epoch": 3.9086607693890145,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0239,
"step": 38000
},
{
"action_loss": 0.0228,
"epoch": 3.913803744085579,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0263,
"step": 38050
},
{
"action_loss": 0.021,
"epoch": 3.9189467187821436,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0246,
"step": 38100
},
{
"action_loss": 0.0211,
"epoch": 3.924089693478708,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0245,
"step": 38150
},
{
"action_loss": 0.0324,
"epoch": 3.9292326681752727,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0356,
"step": 38200
},
{
"action_loss": 0.0231,
"epoch": 3.934375642871837,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0267,
"step": 38250
},
{
"action_loss": 0.0204,
"epoch": 3.9395186175684014,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.024,
"step": 38300
},
{
"action_loss": 0.0261,
"epoch": 3.944661592264966,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0292,
"step": 38350
},
{
"action_loss": 0.0196,
"epoch": 3.9498045669615305,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0233,
"step": 38400
},
{
"action_loss": 0.0341,
"epoch": 3.954947541658095,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0377,
"step": 38450
},
{
"action_loss": 0.0225,
"epoch": 3.9600905163546596,
"learning_rate": 2e-05,
"llm_loss": 0.0044,
"loss": 0.0268,
"step": 38500
},
{
"action_loss": 0.0241,
"epoch": 3.965233491051224,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0273,
"step": 38550
},
{
"action_loss": 0.0246,
"epoch": 3.9703764657477887,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0278,
"step": 38600
},
{
"action_loss": 0.0216,
"epoch": 3.9755194404443532,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0256,
"step": 38650
},
{
"action_loss": 0.0315,
"epoch": 3.9806624151409173,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0345,
"step": 38700
},
{
"action_loss": 0.0218,
"epoch": 3.985805389837482,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0249,
"step": 38750
},
{
"action_loss": 0.0245,
"epoch": 3.9909483645340464,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0282,
"step": 38800
},
{
"action_loss": 0.0206,
"epoch": 3.996091339230611,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0244,
"step": 38850
},
{
"action_loss": 0.0231,
"epoch": 4.001234313927175,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0267,
"step": 38900
},
{
"action_loss": 0.0189,
"epoch": 4.00637728862374,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0222,
"step": 38950
},
{
"action_loss": 0.0143,
"epoch": 4.011520263320304,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0179,
"step": 39000
},
{
"action_loss": 0.0197,
"epoch": 4.016663238016869,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0227,
"step": 39050
},
{
"action_loss": 0.0262,
"epoch": 4.021806212713433,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0297,
"step": 39100
},
{
"action_loss": 0.0347,
"epoch": 4.026949187409998,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0377,
"step": 39150
},
{
"action_loss": 0.0248,
"epoch": 4.032092162106562,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0283,
"step": 39200
},
{
"action_loss": 0.0122,
"epoch": 4.037235136803127,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0151,
"step": 39250
},
{
"action_loss": 0.0288,
"epoch": 4.0423781114996915,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0319,
"step": 39300
},
{
"action_loss": 0.0262,
"epoch": 4.047521086196256,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0289,
"step": 39350
},
{
"action_loss": 0.0317,
"epoch": 4.052664060892821,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0356,
"step": 39400
},
{
"action_loss": 0.0236,
"epoch": 4.057807035589385,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0268,
"step": 39450
},
{
"action_loss": 0.039,
"epoch": 4.06295001028595,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.042,
"step": 39500
},
{
"action_loss": 0.0253,
"epoch": 4.068092984982514,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0287,
"step": 39550
},
{
"action_loss": 0.0355,
"epoch": 4.073235959679079,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.039,
"step": 39600
},
{
"action_loss": 0.0288,
"epoch": 4.0783789343756425,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.032,
"step": 39650
},
{
"action_loss": 0.0185,
"epoch": 4.083521909072207,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0215,
"step": 39700
},
{
"action_loss": 0.0246,
"epoch": 4.088664883768772,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0285,
"step": 39750
},
{
"action_loss": 0.0314,
"epoch": 4.093807858465336,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0347,
"step": 39800
},
{
"action_loss": 0.0235,
"epoch": 4.098950833161901,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0263,
"step": 39850
},
{
"action_loss": 0.0275,
"epoch": 4.104093807858465,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0307,
"step": 39900
},
{
"action_loss": 0.0346,
"epoch": 4.10923678255503,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0379,
"step": 39950
},
{
"action_loss": 0.0255,
"epoch": 4.114379757251594,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0282,
"step": 40000
},
{
"action_loss": 0.0146,
"epoch": 4.119522731948159,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0177,
"step": 40050
},
{
"action_loss": 0.0309,
"epoch": 4.124665706644723,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0339,
"step": 40100
},
{
"action_loss": 0.0247,
"epoch": 4.129808681341288,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0277,
"step": 40150
},
{
"action_loss": 0.0278,
"epoch": 4.1349516560378525,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0311,
"step": 40200
},
{
"action_loss": 0.0215,
"epoch": 4.140094630734417,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.025,
"step": 40250
},
{
"action_loss": 0.0213,
"epoch": 4.145237605430982,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0245,
"step": 40300
},
{
"action_loss": 0.0315,
"epoch": 4.150380580127546,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0348,
"step": 40350
},
{
"action_loss": 0.0225,
"epoch": 4.15552355482411,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0259,
"step": 40400
},
{
"action_loss": 0.0287,
"epoch": 4.160666529520674,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0315,
"step": 40450
},
{
"action_loss": 0.0236,
"epoch": 4.165809504217239,
"learning_rate": 2e-05,
"llm_loss": 0.004,
"loss": 0.0277,
"step": 40500
},
{
"action_loss": 0.0191,
"epoch": 4.1709524789138035,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0223,
"step": 40550
},
{
"action_loss": 0.031,
"epoch": 4.176095453610368,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0347,
"step": 40600
},
{
"action_loss": 0.0377,
"epoch": 4.181238428306933,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0409,
"step": 40650
},
{
"action_loss": 0.035,
"epoch": 4.186381403003497,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0385,
"step": 40700
},
{
"action_loss": 0.0252,
"epoch": 4.191524377700062,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0287,
"step": 40750
},
{
"action_loss": 0.0343,
"epoch": 4.196667352396626,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0376,
"step": 40800
},
{
"action_loss": 0.0318,
"epoch": 4.201810327093191,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0352,
"step": 40850
},
{
"action_loss": 0.0237,
"epoch": 4.206953301789755,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0271,
"step": 40900
},
{
"action_loss": 0.0319,
"epoch": 4.21209627648632,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0348,
"step": 40950
},
{
"action_loss": 0.0288,
"epoch": 4.2172392511828845,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0319,
"step": 41000
},
{
"action_loss": 0.0224,
"epoch": 4.222382225879449,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0254,
"step": 41050
},
{
"action_loss": 0.0235,
"epoch": 4.2275252005760136,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0261,
"step": 41100
},
{
"action_loss": 0.0296,
"epoch": 4.232668175272577,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0324,
"step": 41150
},
{
"action_loss": 0.0356,
"epoch": 4.237811149969142,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0386,
"step": 41200
},
{
"action_loss": 0.0245,
"epoch": 4.242954124665706,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0277,
"step": 41250
},
{
"action_loss": 0.0194,
"epoch": 4.248097099362271,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0227,
"step": 41300
},
{
"action_loss": 0.0262,
"epoch": 4.253240074058835,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.03,
"step": 41350
},
{
"action_loss": 0.0256,
"epoch": 4.2583830487554,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0292,
"step": 41400
},
{
"action_loss": 0.03,
"epoch": 4.2635260234519645,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0335,
"step": 41450
},
{
"action_loss": 0.0293,
"epoch": 4.268668998148529,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0325,
"step": 41500
},
{
"action_loss": 0.0315,
"epoch": 4.273811972845094,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0345,
"step": 41550
},
{
"action_loss": 0.0249,
"epoch": 4.278954947541658,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0278,
"step": 41600
},
{
"action_loss": 0.025,
"epoch": 4.284097922238223,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0285,
"step": 41650
},
{
"action_loss": 0.034,
"epoch": 4.289240896934787,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0374,
"step": 41700
},
{
"action_loss": 0.0359,
"epoch": 4.294383871631352,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0391,
"step": 41750
},
{
"action_loss": 0.019,
"epoch": 4.299526846327916,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0221,
"step": 41800
},
{
"action_loss": 0.0249,
"epoch": 4.304669821024481,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0285,
"step": 41850
},
{
"action_loss": 0.0213,
"epoch": 4.3098127957210455,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0244,
"step": 41900
},
{
"action_loss": 0.0213,
"epoch": 4.314955770417609,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0246,
"step": 41950
},
{
"action_loss": 0.03,
"epoch": 4.320098745114174,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.033,
"step": 42000
},
{
"action_loss": 0.0249,
"epoch": 4.325241719810738,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0283,
"step": 42050
},
{
"action_loss": 0.0218,
"epoch": 4.330384694507303,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0255,
"step": 42100
},
{
"action_loss": 0.0202,
"epoch": 4.335527669203867,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0239,
"step": 42150
},
{
"action_loss": 0.0312,
"epoch": 4.340670643900432,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0346,
"step": 42200
},
{
"action_loss": 0.0212,
"epoch": 4.345813618596996,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0244,
"step": 42250
},
{
"action_loss": 0.0233,
"epoch": 4.350956593293561,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0268,
"step": 42300
},
{
"action_loss": 0.0125,
"epoch": 4.3560995679901255,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0152,
"step": 42350
},
{
"action_loss": 0.0223,
"epoch": 4.36124254268669,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0255,
"step": 42400
},
{
"action_loss": 0.022,
"epoch": 4.366385517383255,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0251,
"step": 42450
},
{
"action_loss": 0.0276,
"epoch": 4.371528492079819,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0311,
"step": 42500
},
{
"action_loss": 0.0232,
"epoch": 4.376671466776384,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0268,
"step": 42550
},
{
"action_loss": 0.0334,
"epoch": 4.381814441472948,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0369,
"step": 42600
},
{
"action_loss": 0.0186,
"epoch": 4.386957416169513,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0221,
"step": 42650
},
{
"action_loss": 0.0224,
"epoch": 4.3921003908660765,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0253,
"step": 42700
},
{
"action_loss": 0.0245,
"epoch": 4.397243365562641,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0279,
"step": 42750
},
{
"action_loss": 0.0266,
"epoch": 4.402386340259206,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0295,
"step": 42800
},
{
"action_loss": 0.0383,
"epoch": 4.40752931495577,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0417,
"step": 42850
},
{
"action_loss": 0.0242,
"epoch": 4.412672289652335,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0274,
"step": 42900
},
{
"action_loss": 0.0231,
"epoch": 4.417815264348899,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0258,
"step": 42950
},
{
"action_loss": 0.0219,
"epoch": 4.422958239045464,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.025,
"step": 43000
},
{
"action_loss": 0.0353,
"epoch": 4.428101213742028,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0384,
"step": 43050
},
{
"action_loss": 0.0255,
"epoch": 4.433244188438593,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0292,
"step": 43100
},
{
"action_loss": 0.0262,
"epoch": 4.4383871631351575,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0298,
"step": 43150
},
{
"action_loss": 0.0243,
"epoch": 4.443530137831722,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0274,
"step": 43200
},
{
"action_loss": 0.0216,
"epoch": 4.448673112528287,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.025,
"step": 43250
},
{
"action_loss": 0.0327,
"epoch": 4.453816087224851,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0358,
"step": 43300
},
{
"action_loss": 0.0244,
"epoch": 4.458959061921416,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0272,
"step": 43350
},
{
"action_loss": 0.035,
"epoch": 4.46410203661798,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0381,
"step": 43400
},
{
"action_loss": 0.0238,
"epoch": 4.469245011314545,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.027,
"step": 43450
},
{
"action_loss": 0.0245,
"epoch": 4.474387986011108,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0275,
"step": 43500
},
{
"action_loss": 0.0213,
"epoch": 4.479530960707673,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0245,
"step": 43550
},
{
"action_loss": 0.0294,
"epoch": 4.4846739354042375,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0327,
"step": 43600
},
{
"action_loss": 0.0287,
"epoch": 4.489816910100802,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0324,
"step": 43650
},
{
"action_loss": 0.0229,
"epoch": 4.494959884797367,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0258,
"step": 43700
},
{
"action_loss": 0.0347,
"epoch": 4.500102859493931,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0377,
"step": 43750
},
{
"action_loss": 0.0249,
"epoch": 4.505245834190496,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0281,
"step": 43800
},
{
"action_loss": 0.0217,
"epoch": 4.51038880888706,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0247,
"step": 43850
},
{
"action_loss": 0.0249,
"epoch": 4.515531783583625,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0279,
"step": 43900
},
{
"action_loss": 0.0276,
"epoch": 4.520674758280189,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0312,
"step": 43950
},
{
"action_loss": 0.0225,
"epoch": 4.525817732976754,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.026,
"step": 44000
},
{
"action_loss": 0.0261,
"epoch": 4.5309607076733185,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0295,
"step": 44050
},
{
"action_loss": 0.0207,
"epoch": 4.536103682369883,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0241,
"step": 44100
},
{
"action_loss": 0.0325,
"epoch": 4.541246657066448,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0361,
"step": 44150
},
{
"action_loss": 0.0185,
"epoch": 4.546389631763011,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0216,
"step": 44200
},
{
"action_loss": 0.0239,
"epoch": 4.551532606459576,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0271,
"step": 44250
},
{
"action_loss": 0.0218,
"epoch": 4.55667558115614,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0254,
"step": 44300
},
{
"action_loss": 0.0212,
"epoch": 4.561818555852705,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0248,
"step": 44350
},
{
"action_loss": 0.0354,
"epoch": 4.5669615305492695,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0389,
"step": 44400
},
{
"action_loss": 0.0183,
"epoch": 4.572104505245834,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0215,
"step": 44450
},
{
"action_loss": 0.0264,
"epoch": 4.5772474799423986,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0294,
"step": 44500
},
{
"action_loss": 0.0186,
"epoch": 4.582390454638963,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0211,
"step": 44550
},
{
"action_loss": 0.0232,
"epoch": 4.587533429335528,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0264,
"step": 44600
},
{
"action_loss": 0.0203,
"epoch": 4.592676404032092,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0239,
"step": 44650
},
{
"action_loss": 0.0229,
"epoch": 4.597819378728657,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0262,
"step": 44700
},
{
"action_loss": 0.0172,
"epoch": 4.602962353425221,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0203,
"step": 44750
},
{
"action_loss": 0.0289,
"epoch": 4.608105328121786,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0318,
"step": 44800
},
{
"action_loss": 0.0217,
"epoch": 4.61324830281835,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0249,
"step": 44850
},
{
"action_loss": 0.0199,
"epoch": 4.618391277514915,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.023,
"step": 44900
},
{
"action_loss": 0.0294,
"epoch": 4.6235342522114795,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0328,
"step": 44950
},
{
"action_loss": 0.0276,
"epoch": 4.628677226908044,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0309,
"step": 45000
},
{
"action_loss": 0.0188,
"epoch": 4.633820201604608,
"learning_rate": 2e-05,
"llm_loss": 0.0046,
"loss": 0.0233,
"step": 45050
},
{
"action_loss": 0.0283,
"epoch": 4.638963176301172,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0318,
"step": 45100
},
{
"action_loss": 0.0326,
"epoch": 4.644106150997737,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0363,
"step": 45150
},
{
"action_loss": 0.0231,
"epoch": 4.649249125694301,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0262,
"step": 45200
},
{
"action_loss": 0.0266,
"epoch": 4.654392100390866,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0301,
"step": 45250
},
{
"action_loss": 0.0264,
"epoch": 4.6595350750874305,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0294,
"step": 45300
},
{
"action_loss": 0.0328,
"epoch": 4.664678049783995,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.036,
"step": 45350
},
{
"action_loss": 0.0203,
"epoch": 4.66982102448056,
"learning_rate": 2e-05,
"llm_loss": 0.0043,
"loss": 0.0246,
"step": 45400
},
{
"action_loss": 0.0312,
"epoch": 4.674963999177124,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0347,
"step": 45450
},
{
"action_loss": 0.0348,
"epoch": 4.680106973873689,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0379,
"step": 45500
},
{
"action_loss": 0.0286,
"epoch": 4.685249948570253,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0318,
"step": 45550
},
{
"action_loss": 0.0273,
"epoch": 4.690392923266818,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0306,
"step": 45600
},
{
"action_loss": 0.0293,
"epoch": 4.695535897963382,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0323,
"step": 45650
},
{
"action_loss": 0.0188,
"epoch": 4.700678872659947,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0223,
"step": 45700
},
{
"action_loss": 0.0248,
"epoch": 4.7058218473565105,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0279,
"step": 45750
},
{
"action_loss": 0.0289,
"epoch": 4.710964822053075,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0321,
"step": 45800
},
{
"action_loss": 0.0283,
"epoch": 4.71610779674964,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0316,
"step": 45850
},
{
"action_loss": 0.0267,
"epoch": 4.721250771446204,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.03,
"step": 45900
},
{
"action_loss": 0.0246,
"epoch": 4.726393746142769,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0279,
"step": 45950
},
{
"action_loss": 0.0376,
"epoch": 4.731536720839333,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0409,
"step": 46000
},
{
"action_loss": 0.019,
"epoch": 4.736679695535898,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.022,
"step": 46050
},
{
"action_loss": 0.0168,
"epoch": 4.741822670232462,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0202,
"step": 46100
},
{
"action_loss": 0.027,
"epoch": 4.746965644929027,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0303,
"step": 46150
},
{
"action_loss": 0.0297,
"epoch": 4.7521086196255915,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0325,
"step": 46200
},
{
"action_loss": 0.0245,
"epoch": 4.757251594322156,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0284,
"step": 46250
},
{
"action_loss": 0.0198,
"epoch": 4.762394569018721,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.0235,
"step": 46300
},
{
"action_loss": 0.0252,
"epoch": 4.767537543715285,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0286,
"step": 46350
},
{
"action_loss": 0.025,
"epoch": 4.77268051841185,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0279,
"step": 46400
},
{
"action_loss": 0.0301,
"epoch": 4.777823493108414,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0331,
"step": 46450
},
{
"action_loss": 0.0193,
"epoch": 4.782966467804979,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0224,
"step": 46500
},
{
"action_loss": 0.0207,
"epoch": 4.788109442501543,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0237,
"step": 46550
},
{
"action_loss": 0.034,
"epoch": 4.793252417198107,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0375,
"step": 46600
},
{
"action_loss": 0.0241,
"epoch": 4.798395391894672,
"learning_rate": 2e-05,
"llm_loss": 0.0039,
"loss": 0.0281,
"step": 46650
},
{
"action_loss": 0.0217,
"epoch": 4.803538366591236,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0252,
"step": 46700
},
{
"action_loss": 0.0275,
"epoch": 4.808681341287801,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0312,
"step": 46750
},
{
"action_loss": 0.0311,
"epoch": 4.813824315984365,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0343,
"step": 46800
},
{
"action_loss": 0.0297,
"epoch": 4.81896729068093,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0332,
"step": 46850
},
{
"action_loss": 0.0254,
"epoch": 4.824110265377494,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0287,
"step": 46900
},
{
"action_loss": 0.0242,
"epoch": 4.829253240074059,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0271,
"step": 46950
},
{
"action_loss": 0.0272,
"epoch": 4.834396214770623,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0301,
"step": 47000
},
{
"action_loss": 0.0162,
"epoch": 4.839539189467188,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0195,
"step": 47050
},
{
"action_loss": 0.032,
"epoch": 4.8446821641637525,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0354,
"step": 47100
},
{
"action_loss": 0.0264,
"epoch": 4.849825138860317,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0298,
"step": 47150
},
{
"action_loss": 0.0237,
"epoch": 4.854968113556882,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0265,
"step": 47200
},
{
"action_loss": 0.0303,
"epoch": 4.860111088253446,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0335,
"step": 47250
},
{
"action_loss": 0.0208,
"epoch": 4.86525406295001,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0237,
"step": 47300
},
{
"action_loss": 0.0223,
"epoch": 4.870397037646574,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0258,
"step": 47350
},
{
"action_loss": 0.0241,
"epoch": 4.875540012343139,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0275,
"step": 47400
},
{
"action_loss": 0.0312,
"epoch": 4.8806829870397035,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0355,
"step": 47450
},
{
"action_loss": 0.0302,
"epoch": 4.885825961736268,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0335,
"step": 47500
},
{
"action_loss": 0.0184,
"epoch": 4.890968936432833,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0219,
"step": 47550
},
{
"action_loss": 0.0281,
"epoch": 4.896111911129397,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0315,
"step": 47600
},
{
"action_loss": 0.0268,
"epoch": 4.901254885825962,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0295,
"step": 47650
},
{
"action_loss": 0.0356,
"epoch": 4.906397860522526,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0387,
"step": 47700
},
{
"action_loss": 0.0321,
"epoch": 4.911540835219091,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0352,
"step": 47750
},
{
"action_loss": 0.0217,
"epoch": 4.916683809915655,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0251,
"step": 47800
},
{
"action_loss": 0.0327,
"epoch": 4.92182678461222,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0354,
"step": 47850
},
{
"action_loss": 0.0214,
"epoch": 4.9269697593087844,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0247,
"step": 47900
},
{
"action_loss": 0.0286,
"epoch": 4.932112734005349,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0319,
"step": 47950
},
{
"action_loss": 0.027,
"epoch": 4.9372557087019135,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0298,
"step": 48000
},
{
"action_loss": 0.0247,
"epoch": 4.942398683398478,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0279,
"step": 48050
},
{
"action_loss": 0.0151,
"epoch": 4.947541658095043,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0181,
"step": 48100
},
{
"action_loss": 0.0369,
"epoch": 4.952684632791606,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0403,
"step": 48150
},
{
"action_loss": 0.0102,
"epoch": 4.957827607488171,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0135,
"step": 48200
},
{
"action_loss": 0.0243,
"epoch": 4.962970582184735,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0273,
"step": 48250
},
{
"action_loss": 0.0165,
"epoch": 4.9681135568813,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0196,
"step": 48300
},
{
"action_loss": 0.023,
"epoch": 4.9732565315778645,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0261,
"step": 48350
},
{
"action_loss": 0.0224,
"epoch": 4.978399506274429,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0261,
"step": 48400
},
{
"action_loss": 0.0238,
"epoch": 4.983542480970994,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0269,
"step": 48450
},
{
"action_loss": 0.0254,
"epoch": 4.988685455667558,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0292,
"step": 48500
},
{
"action_loss": 0.0132,
"epoch": 4.993828430364123,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.0175,
"step": 48550
},
{
"action_loss": 0.0212,
"epoch": 4.998971405060687,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0243,
"step": 48600
},
{
"action_loss": 0.0217,
"epoch": 5.004114379757252,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0243,
"step": 48650
},
{
"action_loss": 0.0297,
"epoch": 5.009257354453816,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0329,
"step": 48700
},
{
"action_loss": 0.0179,
"epoch": 5.014400329150381,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.021,
"step": 48750
},
{
"action_loss": 0.0218,
"epoch": 5.0195433038469455,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0247,
"step": 48800
},
{
"action_loss": 0.0195,
"epoch": 5.024686278543509,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0224,
"step": 48850
},
{
"action_loss": 0.0199,
"epoch": 5.029829253240074,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0231,
"step": 48900
},
{
"action_loss": 0.032,
"epoch": 5.034972227936638,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0349,
"step": 48950
},
{
"action_loss": 0.0343,
"epoch": 5.040115202633203,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0368,
"step": 49000
},
{
"action_loss": 0.0339,
"epoch": 5.045258177329767,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0369,
"step": 49050
},
{
"action_loss": 0.02,
"epoch": 5.050401152026332,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.023,
"step": 49100
},
{
"action_loss": 0.0312,
"epoch": 5.055544126722896,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0344,
"step": 49150
},
{
"action_loss": 0.0237,
"epoch": 5.060687101419461,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0265,
"step": 49200
},
{
"action_loss": 0.0233,
"epoch": 5.0658300761160255,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.026,
"step": 49250
},
{
"action_loss": 0.021,
"epoch": 5.07097305081259,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0237,
"step": 49300
},
{
"action_loss": 0.0273,
"epoch": 5.076116025509155,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0304,
"step": 49350
},
{
"action_loss": 0.0157,
"epoch": 5.081259000205719,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0188,
"step": 49400
},
{
"action_loss": 0.0189,
"epoch": 5.086401974902284,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0217,
"step": 49450
},
{
"action_loss": 0.0291,
"epoch": 5.091544949598848,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0318,
"step": 49500
},
{
"action_loss": 0.0288,
"epoch": 5.096687924295413,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0316,
"step": 49550
},
{
"action_loss": 0.0239,
"epoch": 5.101830898991977,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0266,
"step": 49600
},
{
"action_loss": 0.0224,
"epoch": 5.106973873688541,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0248,
"step": 49650
},
{
"action_loss": 0.0373,
"epoch": 5.112116848385106,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0399,
"step": 49700
},
{
"action_loss": 0.0307,
"epoch": 5.11725982308167,
"learning_rate": 2e-05,
"llm_loss": 0.0023,
"loss": 0.033,
"step": 49750
},
{
"action_loss": 0.0223,
"epoch": 5.122402797778235,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0253,
"step": 49800
},
{
"action_loss": 0.021,
"epoch": 5.127545772474799,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0237,
"step": 49850
},
{
"action_loss": 0.0266,
"epoch": 5.132688747171364,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0293,
"step": 49900
},
{
"action_loss": 0.0285,
"epoch": 5.137831721867928,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0315,
"step": 49950
},
{
"action_loss": 0.0233,
"epoch": 5.142974696564493,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0265,
"step": 50000
},
{
"action_loss": 0.025,
"epoch": 5.1481176712610575,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0282,
"step": 50050
},
{
"action_loss": 0.0256,
"epoch": 5.153260645957622,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0285,
"step": 50100
},
{
"action_loss": 0.0247,
"epoch": 5.158403620654187,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0271,
"step": 50150
},
{
"action_loss": 0.0242,
"epoch": 5.163546595350751,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0266,
"step": 50200
},
{
"action_loss": 0.0277,
"epoch": 5.168689570047316,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0305,
"step": 50250
},
{
"action_loss": 0.0203,
"epoch": 5.17383254474388,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0233,
"step": 50300
},
{
"action_loss": 0.0234,
"epoch": 5.178975519440445,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0267,
"step": 50350
},
{
"action_loss": 0.0213,
"epoch": 5.184118494137008,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0239,
"step": 50400
},
{
"action_loss": 0.0195,
"epoch": 5.189261468833573,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0226,
"step": 50450
},
{
"action_loss": 0.0246,
"epoch": 5.1944044435301375,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0277,
"step": 50500
},
{
"action_loss": 0.0317,
"epoch": 5.199547418226702,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0348,
"step": 50550
},
{
"action_loss": 0.0211,
"epoch": 5.204690392923267,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0245,
"step": 50600
},
{
"action_loss": 0.0299,
"epoch": 5.209833367619831,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0327,
"step": 50650
},
{
"action_loss": 0.0219,
"epoch": 5.214976342316396,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0245,
"step": 50700
},
{
"action_loss": 0.0347,
"epoch": 5.22011931701296,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.038,
"step": 50750
},
{
"action_loss": 0.0342,
"epoch": 5.225262291709525,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.037,
"step": 50800
},
{
"action_loss": 0.0251,
"epoch": 5.230405266406089,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0277,
"step": 50850
},
{
"action_loss": 0.0287,
"epoch": 5.235548241102654,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0315,
"step": 50900
},
{
"action_loss": 0.0234,
"epoch": 5.2406912157992185,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0269,
"step": 50950
},
{
"action_loss": 0.0158,
"epoch": 5.245834190495783,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0184,
"step": 51000
},
{
"action_loss": 0.0269,
"epoch": 5.250977165192348,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0294,
"step": 51050
},
{
"action_loss": 0.0145,
"epoch": 5.256120139888912,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0175,
"step": 51100
},
{
"action_loss": 0.0245,
"epoch": 5.261263114585477,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0273,
"step": 51150
},
{
"action_loss": 0.0251,
"epoch": 5.26640608928204,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0278,
"step": 51200
},
{
"action_loss": 0.0278,
"epoch": 5.271549063978605,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0311,
"step": 51250
},
{
"action_loss": 0.0251,
"epoch": 5.2766920386751694,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0287,
"step": 51300
},
{
"action_loss": 0.021,
"epoch": 5.281835013371734,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0245,
"step": 51350
},
{
"action_loss": 0.0282,
"epoch": 5.2869779880682986,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0316,
"step": 51400
},
{
"action_loss": 0.028,
"epoch": 5.292120962764863,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0308,
"step": 51450
},
{
"action_loss": 0.021,
"epoch": 5.297263937461428,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0243,
"step": 51500
},
{
"action_loss": 0.0332,
"epoch": 5.302406912157992,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.036,
"step": 51550
},
{
"action_loss": 0.02,
"epoch": 5.307549886854557,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0232,
"step": 51600
},
{
"action_loss": 0.0309,
"epoch": 5.312692861551121,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0338,
"step": 51650
},
{
"action_loss": 0.0229,
"epoch": 5.317835836247686,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0259,
"step": 51700
},
{
"action_loss": 0.022,
"epoch": 5.32297881094425,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.025,
"step": 51750
},
{
"action_loss": 0.0318,
"epoch": 5.328121785640815,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0346,
"step": 51800
},
{
"action_loss": 0.0326,
"epoch": 5.3332647603373795,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0353,
"step": 51850
},
{
"action_loss": 0.03,
"epoch": 5.338407735033943,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.033,
"step": 51900
},
{
"action_loss": 0.0213,
"epoch": 5.343550709730508,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0244,
"step": 51950
},
{
"action_loss": 0.0227,
"epoch": 5.348693684427072,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0253,
"step": 52000
},
{
"action_loss": 0.0233,
"epoch": 5.353836659123637,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.026,
"step": 52050
},
{
"action_loss": 0.0305,
"epoch": 5.358979633820201,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0332,
"step": 52100
},
{
"action_loss": 0.0212,
"epoch": 5.364122608516766,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0239,
"step": 52150
},
{
"action_loss": 0.0284,
"epoch": 5.3692655832133305,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0315,
"step": 52200
},
{
"action_loss": 0.0252,
"epoch": 5.374408557909895,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0279,
"step": 52250
},
{
"action_loss": 0.0204,
"epoch": 5.37955153260646,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0234,
"step": 52300
},
{
"action_loss": 0.0216,
"epoch": 5.384694507303024,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0242,
"step": 52350
},
{
"action_loss": 0.0221,
"epoch": 5.389837481999589,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0253,
"step": 52400
},
{
"action_loss": 0.0275,
"epoch": 5.394980456696153,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0308,
"step": 52450
},
{
"action_loss": 0.0321,
"epoch": 5.400123431392718,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0348,
"step": 52500
},
{
"action_loss": 0.0182,
"epoch": 5.405266406089282,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0216,
"step": 52550
},
{
"action_loss": 0.0235,
"epoch": 5.410409380785847,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0264,
"step": 52600
},
{
"action_loss": 0.0287,
"epoch": 5.415552355482411,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0323,
"step": 52650
},
{
"action_loss": 0.0164,
"epoch": 5.420695330178976,
"learning_rate": 2e-05,
"llm_loss": 0.0038,
"loss": 0.0202,
"step": 52700
},
{
"action_loss": 0.0188,
"epoch": 5.42583830487554,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0215,
"step": 52750
},
{
"action_loss": 0.0217,
"epoch": 5.430981279572104,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0247,
"step": 52800
},
{
"action_loss": 0.0263,
"epoch": 5.436124254268669,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0295,
"step": 52850
},
{
"action_loss": 0.0314,
"epoch": 5.441267228965233,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0347,
"step": 52900
},
{
"action_loss": 0.0228,
"epoch": 5.446410203661798,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0258,
"step": 52950
},
{
"action_loss": 0.0149,
"epoch": 5.451553178358362,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0183,
"step": 53000
},
{
"action_loss": 0.0186,
"epoch": 5.456696153054927,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0217,
"step": 53050
},
{
"action_loss": 0.0232,
"epoch": 5.4618391277514915,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0263,
"step": 53100
},
{
"action_loss": 0.0246,
"epoch": 5.466982102448056,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0271,
"step": 53150
},
{
"action_loss": 0.0223,
"epoch": 5.472125077144621,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0255,
"step": 53200
},
{
"action_loss": 0.0308,
"epoch": 5.477268051841185,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.034,
"step": 53250
},
{
"action_loss": 0.0316,
"epoch": 5.48241102653775,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0349,
"step": 53300
},
{
"action_loss": 0.0183,
"epoch": 5.487554001234314,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0211,
"step": 53350
},
{
"action_loss": 0.0167,
"epoch": 5.492696975930879,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0202,
"step": 53400
},
{
"action_loss": 0.0363,
"epoch": 5.4978399506274425,
"learning_rate": 2e-05,
"llm_loss": 0.0037,
"loss": 0.04,
"step": 53450
},
{
"action_loss": 0.0178,
"epoch": 5.502982925324007,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0207,
"step": 53500
},
{
"action_loss": 0.0269,
"epoch": 5.508125900020572,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0303,
"step": 53550
},
{
"action_loss": 0.022,
"epoch": 5.513268874717136,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0246,
"step": 53600
},
{
"action_loss": 0.0316,
"epoch": 5.518411849413701,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0345,
"step": 53650
},
{
"action_loss": 0.0236,
"epoch": 5.523554824110265,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0268,
"step": 53700
},
{
"action_loss": 0.0253,
"epoch": 5.52869779880683,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0287,
"step": 53750
},
{
"action_loss": 0.0202,
"epoch": 5.533840773503394,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0232,
"step": 53800
},
{
"action_loss": 0.028,
"epoch": 5.538983748199959,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0309,
"step": 53850
},
{
"action_loss": 0.0232,
"epoch": 5.544126722896523,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0266,
"step": 53900
},
{
"action_loss": 0.0321,
"epoch": 5.549269697593088,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0348,
"step": 53950
},
{
"action_loss": 0.026,
"epoch": 5.5544126722896525,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.029,
"step": 54000
},
{
"action_loss": 0.0224,
"epoch": 5.559555646986217,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0251,
"step": 54050
},
{
"action_loss": 0.0151,
"epoch": 5.564698621682782,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0179,
"step": 54100
},
{
"action_loss": 0.0217,
"epoch": 5.569841596379346,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0247,
"step": 54150
},
{
"action_loss": 0.0264,
"epoch": 5.574984571075911,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0299,
"step": 54200
},
{
"action_loss": 0.023,
"epoch": 5.580127545772475,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.026,
"step": 54250
},
{
"action_loss": 0.0229,
"epoch": 5.585270520469039,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0258,
"step": 54300
},
{
"action_loss": 0.0216,
"epoch": 5.5904134951656035,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0246,
"step": 54350
},
{
"action_loss": 0.0227,
"epoch": 5.595556469862168,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0255,
"step": 54400
},
{
"action_loss": 0.0226,
"epoch": 5.600699444558733,
"learning_rate": 2e-05,
"llm_loss": 0.0021,
"loss": 0.0247,
"step": 54450
},
{
"action_loss": 0.0249,
"epoch": 5.605842419255297,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0279,
"step": 54500
},
{
"action_loss": 0.0265,
"epoch": 5.610985393951862,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0298,
"step": 54550
},
{
"action_loss": 0.0238,
"epoch": 5.616128368648426,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0265,
"step": 54600
},
{
"action_loss": 0.016,
"epoch": 5.621271343344991,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0194,
"step": 54650
},
{
"action_loss": 0.029,
"epoch": 5.626414318041555,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.032,
"step": 54700
},
{
"action_loss": 0.023,
"epoch": 5.63155729273812,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0261,
"step": 54750
},
{
"action_loss": 0.0225,
"epoch": 5.636700267434684,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0256,
"step": 54800
},
{
"action_loss": 0.0311,
"epoch": 5.641843242131249,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0341,
"step": 54850
},
{
"action_loss": 0.0327,
"epoch": 5.6469862168278135,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0359,
"step": 54900
},
{
"action_loss": 0.0249,
"epoch": 5.652129191524377,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0283,
"step": 54950
},
{
"action_loss": 0.0229,
"epoch": 5.657272166220942,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0258,
"step": 55000
},
{
"action_loss": 0.0259,
"epoch": 5.662415140917506,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0289,
"step": 55050
},
{
"action_loss": 0.0199,
"epoch": 5.667558115614071,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0231,
"step": 55100
},
{
"action_loss": 0.0149,
"epoch": 5.672701090310635,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0179,
"step": 55150
},
{
"action_loss": 0.0194,
"epoch": 5.6778440650072,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0223,
"step": 55200
},
{
"action_loss": 0.0299,
"epoch": 5.6829870397037645,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0332,
"step": 55250
},
{
"action_loss": 0.0298,
"epoch": 5.688130014400329,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0331,
"step": 55300
},
{
"action_loss": 0.0137,
"epoch": 5.693272989096894,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.017,
"step": 55350
},
{
"action_loss": 0.0112,
"epoch": 5.698415963793458,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0143,
"step": 55400
},
{
"action_loss": 0.0247,
"epoch": 5.703558938490023,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0271,
"step": 55450
},
{
"action_loss": 0.0256,
"epoch": 5.708701913186587,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0287,
"step": 55500
},
{
"action_loss": 0.0261,
"epoch": 5.713844887883152,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0291,
"step": 55550
},
{
"action_loss": 0.023,
"epoch": 5.718987862579716,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0261,
"step": 55600
},
{
"action_loss": 0.0206,
"epoch": 5.724130837276281,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0234,
"step": 55650
},
{
"action_loss": 0.0256,
"epoch": 5.7292738119728455,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0289,
"step": 55700
},
{
"action_loss": 0.016,
"epoch": 5.73441678666941,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0188,
"step": 55750
},
{
"action_loss": 0.0263,
"epoch": 5.739559761365975,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0294,
"step": 55800
},
{
"action_loss": 0.0152,
"epoch": 5.744702736062538,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0182,
"step": 55850
},
{
"action_loss": 0.0207,
"epoch": 5.749845710759103,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0242,
"step": 55900
},
{
"action_loss": 0.0201,
"epoch": 5.754988685455667,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0229,
"step": 55950
},
{
"action_loss": 0.0217,
"epoch": 5.760131660152232,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0246,
"step": 56000
},
{
"action_loss": 0.0333,
"epoch": 5.765274634848796,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0367,
"step": 56050
},
{
"action_loss": 0.0263,
"epoch": 5.770417609545361,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0294,
"step": 56100
},
{
"action_loss": 0.0242,
"epoch": 5.7755605842419255,
"learning_rate": 2e-05,
"llm_loss": 0.0023,
"loss": 0.0266,
"step": 56150
},
{
"action_loss": 0.0183,
"epoch": 5.78070355893849,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0209,
"step": 56200
},
{
"action_loss": 0.0324,
"epoch": 5.785846533635055,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0353,
"step": 56250
},
{
"action_loss": 0.0155,
"epoch": 5.790989508331619,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0185,
"step": 56300
},
{
"action_loss": 0.018,
"epoch": 5.796132483028184,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0207,
"step": 56350
},
{
"action_loss": 0.0326,
"epoch": 5.801275457724748,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0358,
"step": 56400
},
{
"action_loss": 0.022,
"epoch": 5.806418432421313,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0249,
"step": 56450
},
{
"action_loss": 0.0165,
"epoch": 5.8115614071178765,
"learning_rate": 2e-05,
"llm_loss": 0.0036,
"loss": 0.0201,
"step": 56500
},
{
"action_loss": 0.0136,
"epoch": 5.816704381814441,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.017,
"step": 56550
},
{
"action_loss": 0.0286,
"epoch": 5.821847356511006,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0314,
"step": 56600
},
{
"action_loss": 0.0304,
"epoch": 5.82699033120757,
"learning_rate": 2e-05,
"llm_loss": 0.0034,
"loss": 0.0337,
"step": 56650
},
{
"action_loss": 0.0311,
"epoch": 5.832133305904135,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0342,
"step": 56700
},
{
"action_loss": 0.0284,
"epoch": 5.837276280600699,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0315,
"step": 56750
},
{
"action_loss": 0.0273,
"epoch": 5.842419255297264,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0295,
"step": 56800
},
{
"action_loss": 0.0292,
"epoch": 5.847562229993828,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0327,
"step": 56850
},
{
"action_loss": 0.0234,
"epoch": 5.852705204690393,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0263,
"step": 56900
},
{
"action_loss": 0.029,
"epoch": 5.8578481793869575,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0325,
"step": 56950
},
{
"action_loss": 0.0265,
"epoch": 5.862991154083522,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0297,
"step": 57000
},
{
"action_loss": 0.0163,
"epoch": 5.8681341287800866,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.019,
"step": 57050
},
{
"action_loss": 0.0206,
"epoch": 5.873277103476651,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.023,
"step": 57100
},
{
"action_loss": 0.0229,
"epoch": 5.878420078173216,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0254,
"step": 57150
},
{
"action_loss": 0.0309,
"epoch": 5.88356305286978,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0341,
"step": 57200
},
{
"action_loss": 0.0166,
"epoch": 5.888706027566345,
"learning_rate": 2e-05,
"llm_loss": 0.0035,
"loss": 0.0201,
"step": 57250
},
{
"action_loss": 0.0173,
"epoch": 5.893849002262909,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0206,
"step": 57300
},
{
"action_loss": 0.0298,
"epoch": 5.898991976959473,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0326,
"step": 57350
},
{
"action_loss": 0.0159,
"epoch": 5.9041349516560375,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0186,
"step": 57400
},
{
"action_loss": 0.0269,
"epoch": 5.909277926352602,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0298,
"step": 57450
},
{
"action_loss": 0.0244,
"epoch": 5.914420901049167,
"learning_rate": 2e-05,
"llm_loss": 0.0033,
"loss": 0.0277,
"step": 57500
},
{
"action_loss": 0.0186,
"epoch": 5.919563875745731,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0216,
"step": 57550
},
{
"action_loss": 0.0227,
"epoch": 5.924706850442296,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0257,
"step": 57600
},
{
"action_loss": 0.0179,
"epoch": 5.92984982513886,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0207,
"step": 57650
},
{
"action_loss": 0.0289,
"epoch": 5.934992799835425,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0316,
"step": 57700
},
{
"action_loss": 0.02,
"epoch": 5.940135774531989,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.023,
"step": 57750
},
{
"action_loss": 0.026,
"epoch": 5.945278749228554,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0289,
"step": 57800
},
{
"action_loss": 0.034,
"epoch": 5.9504217239251185,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0371,
"step": 57850
},
{
"action_loss": 0.0203,
"epoch": 5.955564698621683,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0231,
"step": 57900
},
{
"action_loss": 0.0237,
"epoch": 5.960707673318248,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0266,
"step": 57950
},
{
"action_loss": 0.0208,
"epoch": 5.965850648014812,
"learning_rate": 2e-05,
"llm_loss": 0.0042,
"loss": 0.025,
"step": 58000
},
{
"action_loss": 0.0235,
"epoch": 5.970993622711376,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0267,
"step": 58050
},
{
"action_loss": 0.0181,
"epoch": 5.97613659740794,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0207,
"step": 58100
},
{
"action_loss": 0.031,
"epoch": 5.981279572104505,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.034,
"step": 58150
},
{
"action_loss": 0.0264,
"epoch": 5.9864225468010694,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0294,
"step": 58200
},
{
"action_loss": 0.0284,
"epoch": 5.991565521497634,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0311,
"step": 58250
},
{
"action_loss": 0.0223,
"epoch": 5.9967084961941985,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0253,
"step": 58300
},
{
"action_loss": 0.0232,
"epoch": 6.001851470890763,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0258,
"step": 58350
},
{
"action_loss": 0.0293,
"epoch": 6.006994445587328,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.032,
"step": 58400
},
{
"action_loss": 0.0259,
"epoch": 6.012137420283892,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0281,
"step": 58450
},
{
"action_loss": 0.026,
"epoch": 6.017280394980457,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0282,
"step": 58500
},
{
"action_loss": 0.0226,
"epoch": 6.022423369677021,
"learning_rate": 2e-05,
"llm_loss": 0.0023,
"loss": 0.0249,
"step": 58550
},
{
"action_loss": 0.0188,
"epoch": 6.027566344373586,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0214,
"step": 58600
},
{
"action_loss": 0.0262,
"epoch": 6.03270931907015,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0285,
"step": 58650
},
{
"action_loss": 0.0173,
"epoch": 6.037852293766715,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.02,
"step": 58700
},
{
"action_loss": 0.0245,
"epoch": 6.0429952684632795,
"learning_rate": 2e-05,
"llm_loss": 0.0023,
"loss": 0.0268,
"step": 58750
},
{
"action_loss": 0.0209,
"epoch": 6.048138243159844,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0234,
"step": 58800
},
{
"action_loss": 0.0309,
"epoch": 6.053281217856408,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.0338,
"step": 58850
},
{
"action_loss": 0.0226,
"epoch": 6.058424192552972,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0253,
"step": 58900
},
{
"action_loss": 0.0277,
"epoch": 6.063567167249537,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0299,
"step": 58950
},
{
"action_loss": 0.0345,
"epoch": 6.068710141946101,
"learning_rate": 2e-05,
"llm_loss": 0.0031,
"loss": 0.0376,
"step": 59000
},
{
"action_loss": 0.0318,
"epoch": 6.073853116642666,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0345,
"step": 59050
},
{
"action_loss": 0.0294,
"epoch": 6.0789960913392305,
"learning_rate": 2e-05,
"llm_loss": 0.003,
"loss": 0.0323,
"step": 59100
},
{
"action_loss": 0.0277,
"epoch": 6.084139066035795,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0304,
"step": 59150
},
{
"action_loss": 0.0227,
"epoch": 6.08928204073236,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0254,
"step": 59200
},
{
"action_loss": 0.0231,
"epoch": 6.094425015428924,
"learning_rate": 2e-05,
"llm_loss": 0.0021,
"loss": 0.0252,
"step": 59250
},
{
"action_loss": 0.0282,
"epoch": 6.099567990125489,
"learning_rate": 2e-05,
"llm_loss": 0.0024,
"loss": 0.0306,
"step": 59300
},
{
"action_loss": 0.0311,
"epoch": 6.104710964822053,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0337,
"step": 59350
},
{
"action_loss": 0.0283,
"epoch": 6.109853939518618,
"learning_rate": 2e-05,
"llm_loss": 0.0023,
"loss": 0.0306,
"step": 59400
},
{
"action_loss": 0.0316,
"epoch": 6.114996914215182,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0338,
"step": 59450
},
{
"action_loss": 0.0302,
"epoch": 6.120139888911747,
"learning_rate": 2e-05,
"llm_loss": 0.0029,
"loss": 0.033,
"step": 59500
},
{
"action_loss": 0.0264,
"epoch": 6.125282863608311,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.029,
"step": 59550
},
{
"action_loss": 0.0301,
"epoch": 6.130425838304875,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0324,
"step": 59600
},
{
"action_loss": 0.0243,
"epoch": 6.13556881300144,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0269,
"step": 59650
},
{
"action_loss": 0.031,
"epoch": 6.140711787698004,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0335,
"step": 59700
},
{
"action_loss": 0.0266,
"epoch": 6.145854762394569,
"learning_rate": 2e-05,
"llm_loss": 0.0028,
"loss": 0.0294,
"step": 59750
},
{
"action_loss": 0.0197,
"epoch": 6.150997737091133,
"learning_rate": 2e-05,
"llm_loss": 0.0027,
"loss": 0.0224,
"step": 59800
},
{
"action_loss": 0.0287,
"epoch": 6.156140711787698,
"learning_rate": 2e-05,
"llm_loss": 0.0022,
"loss": 0.0309,
"step": 59850
},
{
"action_loss": 0.0272,
"epoch": 6.161283686484262,
"learning_rate": 2e-05,
"llm_loss": 0.0032,
"loss": 0.0305,
"step": 59900
},
{
"action_loss": 0.0193,
"epoch": 6.166426661180827,
"learning_rate": 2e-05,
"llm_loss": 0.0026,
"loss": 0.0219,
"step": 59950
},
{
"action_loss": 0.0178,
"epoch": 6.1715696358773915,
"learning_rate": 2e-05,
"llm_loss": 0.0025,
"loss": 0.0203,
"step": 60000
},
{
"epoch": 6.1715696358773915,
"step": 60000,
"total_flos": 3.62488162750038e+19,
"train_loss": 0.0319730894813935,
"train_runtime": 64003.3532,
"train_samples_per_second": 89.995,
"train_steps_per_second": 0.937
}
],
"logging_steps": 50,
"max_steps": 60000,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.62488162750038e+19,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}