{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 200,
  "global_step": 3369,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00029682398337785694,
      "grad_norm": 16.833446502685547,
      "learning_rate": 1.483679525222552e-08,
      "loss": 0.1938,
      "step": 1,
      "video_reward_cumulative_accuracy": 0.5
    },
    {
      "epoch": 0.0005936479667557139,
      "grad_norm": 24.454694747924805,
      "learning_rate": 2.967359050445104e-08,
      "loss": 0.3887,
      "step": 2,
      "video_reward_cumulative_accuracy": 0.5
    },
    {
      "epoch": 0.0008904719501335708,
      "grad_norm": 21.62911605834961,
      "learning_rate": 4.451038575667656e-08,
      "loss": 0.2016,
      "step": 3,
      "video_reward_cumulative_accuracy": 0.5
    },
    {
      "epoch": 0.0011872959335114278,
      "grad_norm": 18.787561416625977,
      "learning_rate": 5.934718100890208e-08,
      "loss": 0.1834,
      "step": 4,
      "video_reward_cumulative_accuracy": 0.625
    },
    {
      "epoch": 0.0014841199168892847,
      "grad_norm": 25.317777633666992,
      "learning_rate": 7.418397626112761e-08,
      "loss": 0.3673,
      "step": 5,
      "video_reward_cumulative_accuracy": 0.7
    },
    {
      "epoch": 0.0017809439002671415,
      "grad_norm": 12.732484817504883,
      "learning_rate": 8.902077151335312e-08,
      "loss": 0.1034,
      "step": 6,
      "video_reward_cumulative_accuracy": 0.5833333333333334
    },
    {
      "epoch": 0.0020777678836449986,
      "grad_norm": 20.14723777770996,
      "learning_rate": 1.0385756676557864e-07,
      "loss": 0.2094,
      "step": 7,
      "video_reward_cumulative_accuracy": 0.5714285714285714
    },
    {
      "epoch": 0.0023745918670228555,
      "grad_norm": 33.73411178588867,
      "learning_rate": 1.1869436201780416e-07,
      "loss": 0.3328,
      "step": 8,
      "video_reward_cumulative_accuracy": 0.5625
    },
    {
      "epoch": 0.0026714158504007124,
      "grad_norm": 21.074481964111328,
      "learning_rate": 1.3353115727002968e-07,
      "loss": 0.1414,
      "step": 9,
      "video_reward_cumulative_accuracy": 0.5555555555555556
    },
    {
      "epoch": 0.0029682398337785693,
      "grad_norm": 23.00200080871582,
      "learning_rate": 1.4836795252225522e-07,
      "loss": 0.4463,
      "step": 10,
      "video_reward_cumulative_accuracy": 0.55
    },
    {
      "epoch": 0.003265063817156426,
      "grad_norm": 23.951406478881836,
      "learning_rate": 1.6320474777448073e-07,
      "loss": 0.3231,
      "step": 11,
      "video_reward_cumulative_accuracy": 0.5909090909090909
    },
    {
      "epoch": 0.003561887800534283,
      "grad_norm": 25.632526397705078,
      "learning_rate": 1.7804154302670624e-07,
      "loss": 0.2746,
      "step": 12,
      "video_reward_cumulative_accuracy": 0.625
    },
    {
      "epoch": 0.00385871178391214,
      "grad_norm": 17.966325759887695,
      "learning_rate": 1.9287833827893176e-07,
      "loss": 0.224,
      "step": 13,
      "video_reward_cumulative_accuracy": 0.6538461538461539
    },
    {
      "epoch": 0.004155535767289997,
      "grad_norm": 27.712692260742188,
      "learning_rate": 2.0771513353115727e-07,
      "loss": 0.1764,
      "step": 14,
      "video_reward_cumulative_accuracy": 0.6071428571428571
    },
    {
      "epoch": 0.004452359750667854,
      "grad_norm": 24.923372268676758,
      "learning_rate": 2.225519287833828e-07,
      "loss": 0.2788,
      "step": 15,
      "video_reward_cumulative_accuracy": 0.6333333333333333
    },
    {
      "epoch": 0.004749183734045711,
      "grad_norm": 13.765485763549805,
      "learning_rate": 2.3738872403560833e-07,
      "loss": 0.1745,
      "step": 16,
      "video_reward_cumulative_accuracy": 0.65625
    },
    {
      "epoch": 0.0050460077174235675,
      "grad_norm": 17.97304344177246,
      "learning_rate": 2.5222551928783384e-07,
      "loss": 0.2362,
      "step": 17,
      "video_reward_cumulative_accuracy": 0.6470588235294118
    },
    {
      "epoch": 0.005342831700801425,
      "grad_norm": 10.485559463500977,
      "learning_rate": 2.6706231454005935e-07,
      "loss": 0.1295,
      "step": 18,
      "video_reward_cumulative_accuracy": 0.6666666666666666
    },
    {
      "epoch": 0.005639655684179281,
      "grad_norm": 15.357043266296387,
      "learning_rate": 2.8189910979228487e-07,
      "loss": 0.2338,
      "step": 19,
      "video_reward_cumulative_accuracy": 0.6842105263157895
    },
    {
      "epoch": 0.005936479667557139,
      "grad_norm": 15.526144027709961,
      "learning_rate": 2.9673590504451043e-07,
      "loss": 0.2443,
      "step": 20,
      "video_reward_cumulative_accuracy": 0.7
    },
    {
      "epoch": 0.006233303650934996,
      "grad_norm": 19.776208877563477,
      "learning_rate": 3.1157270029673595e-07,
      "loss": 0.1632,
      "step": 21,
      "video_reward_cumulative_accuracy": 0.6904761904761905
    },
    {
      "epoch": 0.006530127634312852,
      "grad_norm": 31.176301956176758,
      "learning_rate": 3.2640949554896146e-07,
      "loss": 0.3122,
      "step": 22,
      "video_reward_cumulative_accuracy": 0.6590909090909091
    },
    {
      "epoch": 0.00682695161769071,
      "grad_norm": 12.820406913757324,
      "learning_rate": 3.41246290801187e-07,
      "loss": 0.1422,
      "step": 23,
      "video_reward_cumulative_accuracy": 0.6521739130434783
    },
    {
      "epoch": 0.007123775601068566,
      "grad_norm": 12.706981658935547,
      "learning_rate": 3.560830860534125e-07,
      "loss": 0.169,
      "step": 24,
      "video_reward_cumulative_accuracy": 0.6458333333333334
    },
    {
      "epoch": 0.0074205995844464235,
      "grad_norm": 16.90671157836914,
      "learning_rate": 3.70919881305638e-07,
      "loss": 0.1716,
      "step": 25,
      "video_reward_cumulative_accuracy": 0.64
    },
    {
      "epoch": 0.00771742356782428,
      "grad_norm": 12.562002182006836,
      "learning_rate": 3.857566765578635e-07,
      "loss": 0.1565,
      "step": 26,
      "video_reward_cumulative_accuracy": 0.6538461538461539
    },
    {
      "epoch": 0.008014247551202136,
      "grad_norm": 10.192179679870605,
      "learning_rate": 4.005934718100891e-07,
      "loss": 0.1055,
      "step": 27,
      "video_reward_cumulative_accuracy": 0.6481481481481481
    },
    {
      "epoch": 0.008311071534579995,
      "grad_norm": 15.415340423583984,
      "learning_rate": 4.1543026706231454e-07,
      "loss": 0.2231,
      "step": 28,
      "video_reward_cumulative_accuracy": 0.6428571428571429
    },
    {
      "epoch": 0.008607895517957851,
      "grad_norm": 21.16411781311035,
      "learning_rate": 4.302670623145401e-07,
      "loss": 0.3278,
      "step": 29,
      "video_reward_cumulative_accuracy": 0.6379310344827587
    },
    {
      "epoch": 0.008904719501335707,
      "grad_norm": 21.195640563964844,
      "learning_rate": 4.451038575667656e-07,
      "loss": 0.2429,
      "step": 30,
      "video_reward_cumulative_accuracy": 0.6333333333333333
    },
    {
      "epoch": 0.009201543484713566,
      "grad_norm": 10.36475658416748,
      "learning_rate": 4.5994065281899114e-07,
      "loss": 0.125,
      "step": 31,
      "video_reward_cumulative_accuracy": 0.6290322580645161
    },
    {
      "epoch": 0.009498367468091422,
      "grad_norm": 14.530949592590332,
      "learning_rate": 4.7477744807121665e-07,
      "loss": 0.2392,
      "step": 32,
      "video_reward_cumulative_accuracy": 0.640625
    },
    {
      "epoch": 0.009795191451469279,
      "grad_norm": 17.089160919189453,
      "learning_rate": 4.896142433234421e-07,
      "loss": 0.2253,
      "step": 33,
      "video_reward_cumulative_accuracy": 0.6363636363636364
    },
    {
      "epoch": 0.010092015434847135,
      "grad_norm": 14.45820426940918,
      "learning_rate": 5.044510385756677e-07,
      "loss": 0.2298,
      "step": 34,
      "video_reward_cumulative_accuracy": 0.6323529411764706
    },
    {
      "epoch": 0.010388839418224993,
      "grad_norm": 12.284329414367676,
      "learning_rate": 5.192878338278932e-07,
      "loss": 0.0988,
      "step": 35,
      "video_reward_cumulative_accuracy": 0.6428571428571429
    },
    {
      "epoch": 0.01068566340160285,
      "grad_norm": 14.295129776000977,
      "learning_rate": 5.341246290801187e-07,
      "loss": 0.2106,
      "step": 36,
      "video_reward_cumulative_accuracy": 0.6388888888888888
    },
    {
      "epoch": 0.010982487384980706,
      "grad_norm": 7.684972286224365,
      "learning_rate": 5.489614243323443e-07,
      "loss": 0.0939,
      "step": 37,
      "video_reward_cumulative_accuracy": 0.6486486486486487
    },
    {
      "epoch": 0.011279311368358563,
      "grad_norm": 9.849855422973633,
      "learning_rate": 5.637982195845697e-07,
      "loss": 0.1303,
      "step": 38,
      "video_reward_cumulative_accuracy": 0.6578947368421053
    },
    {
      "epoch": 0.01157613535173642,
      "grad_norm": 10.725906372070312,
      "learning_rate": 5.786350148367953e-07,
      "loss": 0.1924,
      "step": 39,
      "video_reward_cumulative_accuracy": 0.6538461538461539
    },
    {
      "epoch": 0.011872959335114277,
      "grad_norm": 9.24576473236084,
      "learning_rate": 5.934718100890209e-07,
      "loss": 0.108,
      "step": 40,
      "video_reward_cumulative_accuracy": 0.6375
    },
    {
      "epoch": 0.012169783318492134,
      "grad_norm": 6.677659511566162,
      "learning_rate": 6.083086053412463e-07,
      "loss": 0.0879,
      "step": 41,
      "video_reward_cumulative_accuracy": 0.6463414634146342
    },
    {
      "epoch": 0.012466607301869992,
      "grad_norm": 7.723426342010498,
      "learning_rate": 6.231454005934719e-07,
      "loss": 0.073,
      "step": 42,
      "video_reward_cumulative_accuracy": 0.6428571428571429
    },
    {
      "epoch": 0.012763431285247848,
      "grad_norm": 10.541435241699219,
      "learning_rate": 6.379821958456974e-07,
      "loss": 0.1862,
      "step": 43,
      "video_reward_cumulative_accuracy": 0.6395348837209303
    },
    {
      "epoch": 0.013060255268625705,
      "grad_norm": 7.638758659362793,
      "learning_rate": 6.528189910979229e-07,
      "loss": 0.1184,
      "step": 44,
      "video_reward_cumulative_accuracy": 0.6477272727272727
    },
    {
      "epoch": 0.013357079252003561,
      "grad_norm": 10.385604858398438,
      "learning_rate": 6.676557863501485e-07,
      "loss": 0.1647,
      "step": 45,
      "video_reward_cumulative_accuracy": 0.6555555555555556
    },
    {
      "epoch": 0.01365390323538142,
      "grad_norm": 6.365200996398926,
      "learning_rate": 6.82492581602374e-07,
      "loss": 0.0985,
      "step": 46,
      "video_reward_cumulative_accuracy": 0.6413043478260869
    },
    {
      "epoch": 0.013950727218759276,
      "grad_norm": 7.856001853942871,
      "learning_rate": 6.973293768545995e-07,
      "loss": 0.1468,
      "step": 47,
      "video_reward_cumulative_accuracy": 0.6382978723404256
    },
    {
      "epoch": 0.014247551202137132,
      "grad_norm": 6.851215839385986,
      "learning_rate": 7.12166172106825e-07,
      "loss": 0.0869,
      "step": 48,
      "video_reward_cumulative_accuracy": 0.6354166666666666
    },
    {
      "epoch": 0.014544375185514989,
      "grad_norm": 7.089118003845215,
      "learning_rate": 7.270029673590504e-07,
      "loss": 0.1313,
      "step": 49,
      "video_reward_cumulative_accuracy": 0.6326530612244898
    },
    {
      "epoch": 0.014841199168892847,
      "grad_norm": 3.806837558746338,
      "learning_rate": 7.41839762611276e-07,
      "loss": 0.0955,
      "step": 50,
      "video_reward_cumulative_accuracy": 0.63
    },
    {
      "epoch": 0.015138023152270703,
      "grad_norm": 3.002065420150757,
      "learning_rate": 7.566765578635016e-07,
      "loss": 0.0829,
      "step": 51,
      "video_reward_cumulative_accuracy": 0.6372549019607843
    },
    {
      "epoch": 0.01543484713564856,
      "grad_norm": 8.006941795349121,
      "learning_rate": 7.71513353115727e-07,
      "loss": 0.1208,
      "step": 52,
      "video_reward_cumulative_accuracy": 0.6442307692307693
    },
    {
      "epoch": 0.015731671119026416,
      "grad_norm": 4.04028844833374,
      "learning_rate": 7.863501483679525e-07,
      "loss": 0.0905,
      "step": 53,
      "video_reward_cumulative_accuracy": 0.6415094339622641
    },
    {
      "epoch": 0.016028495102404273,
      "grad_norm": 11.572624206542969,
      "learning_rate": 8.011869436201782e-07,
      "loss": 0.1112,
      "step": 54,
      "video_reward_cumulative_accuracy": 0.6296296296296297
    },
    {
      "epoch": 0.016325319085782133,
      "grad_norm": 2.590923309326172,
      "learning_rate": 8.160237388724036e-07,
      "loss": 0.0886,
      "step": 55,
      "video_reward_cumulative_accuracy": 0.6363636363636364
    },
    {
      "epoch": 0.01662214306915999,
      "grad_norm": 6.2428717613220215,
      "learning_rate": 8.308605341246291e-07,
      "loss": 0.1035,
      "step": 56,
      "video_reward_cumulative_accuracy": 0.6339285714285714
    },
    {
      "epoch": 0.016918967052537846,
      "grad_norm": 5.258223056793213,
      "learning_rate": 8.456973293768548e-07,
      "loss": 0.0816,
      "step": 57,
      "video_reward_cumulative_accuracy": 0.6403508771929824
    },
    {
      "epoch": 0.017215791035915702,
      "grad_norm": 2.0548617839813232,
      "learning_rate": 8.605341246290802e-07,
      "loss": 0.083,
      "step": 58,
      "video_reward_cumulative_accuracy": 0.6379310344827587
    },
    {
      "epoch": 0.01751261501929356,
      "grad_norm": 6.927907466888428,
      "learning_rate": 8.753709198813057e-07,
      "loss": 0.1002,
      "step": 59,
      "video_reward_cumulative_accuracy": 0.635593220338983
    },
    {
      "epoch": 0.017809439002671415,
      "grad_norm": 6.766244411468506,
      "learning_rate": 8.902077151335312e-07,
      "loss": 0.096,
      "step": 60,
      "video_reward_cumulative_accuracy": 0.6416666666666667
    },
    {
      "epoch": 0.01810626298604927,
      "grad_norm": 3.500997304916382,
      "learning_rate": 9.050445103857568e-07,
      "loss": 0.0837,
      "step": 61,
      "video_reward_cumulative_accuracy": 0.6475409836065574
    },
    {
      "epoch": 0.01840308696942713,
      "grad_norm": 2.852778196334839,
      "learning_rate": 9.198813056379823e-07,
      "loss": 0.0913,
      "step": 62,
      "video_reward_cumulative_accuracy": 0.6451612903225806
    },
    {
      "epoch": 0.018699910952804988,
      "grad_norm": 10.269067764282227,
      "learning_rate": 9.347181008902077e-07,
      "loss": 0.0754,
      "step": 63,
      "video_reward_cumulative_accuracy": 0.6507936507936508
    },
    {
      "epoch": 0.018996734936182844,
      "grad_norm": 3.8665616512298584,
      "learning_rate": 9.495548961424333e-07,
      "loss": 0.0886,
      "step": 64,
      "video_reward_cumulative_accuracy": 0.65625
    },
    {
      "epoch": 0.0192935589195607,
      "grad_norm": 2.6777122020721436,
      "learning_rate": 9.643916913946588e-07,
      "loss": 0.0877,
      "step": 65,
      "video_reward_cumulative_accuracy": 0.6615384615384615
    },
    {
      "epoch": 0.019590382902938557,
      "grad_norm": 1.6913548707962036,
      "learning_rate": 9.792284866468842e-07,
      "loss": 0.0684,
      "step": 66,
      "video_reward_cumulative_accuracy": 0.6666666666666666
    },
    {
      "epoch": 0.019887206886316414,
      "grad_norm": 2.17547345161438,
      "learning_rate": 9.9406528189911e-07,
      "loss": 0.0903,
      "step": 67,
      "video_reward_cumulative_accuracy": 0.6716417910447762
    },
    {
      "epoch": 0.02018403086969427,
      "grad_norm": 9.660552978515625,
      "learning_rate": 1.0089020771513354e-06,
      "loss": 0.1137,
      "step": 68,
      "video_reward_cumulative_accuracy": 0.6617647058823529
    },
    {
      "epoch": 0.020480854853072127,
      "grad_norm": 6.081414222717285,
      "learning_rate": 1.0237388724035608e-06,
      "loss": 0.0836,
      "step": 69,
      "video_reward_cumulative_accuracy": 0.6594202898550725
    },
    {
      "epoch": 0.020777678836449986,
      "grad_norm": 5.362737655639648,
      "learning_rate": 1.0385756676557865e-06,
      "loss": 0.0828,
      "step": 70,
      "video_reward_cumulative_accuracy": 0.6642857142857143
    },
    {
      "epoch": 0.021074502819827843,
      "grad_norm": 3.4765052795410156,
      "learning_rate": 1.053412462908012e-06,
      "loss": 0.0678,
      "step": 71,
      "video_reward_cumulative_accuracy": 0.6619718309859155
    },
    {
      "epoch": 0.0213713268032057,
      "grad_norm": 3.868277072906494,
      "learning_rate": 1.0682492581602374e-06,
      "loss": 0.0704,
      "step": 72,
      "video_reward_cumulative_accuracy": 0.6666666666666666
    },
    {
      "epoch": 0.021668150786583556,
      "grad_norm": 2.056610107421875,
      "learning_rate": 1.083086053412463e-06,
      "loss": 0.0699,
      "step": 73,
      "video_reward_cumulative_accuracy": 0.6712328767123288
    },
    {
      "epoch": 0.021964974769961412,
      "grad_norm": 4.672041893005371,
      "learning_rate": 1.0979228486646885e-06,
      "loss": 0.0789,
      "step": 74,
      "video_reward_cumulative_accuracy": 0.668918918918919
    },
    {
      "epoch": 0.02226179875333927,
      "grad_norm": 10.368182182312012,
      "learning_rate": 1.112759643916914e-06,
      "loss": 0.0785,
      "step": 75,
      "video_reward_cumulative_accuracy": 0.6666666666666666
    },
    {
      "epoch": 0.022558622736717125,
      "grad_norm": 5.704588890075684,
      "learning_rate": 1.1275964391691395e-06,
      "loss": 0.0781,
      "step": 76,
      "video_reward_cumulative_accuracy": 0.6710526315789473
    },
    {
      "epoch": 0.022855446720094985,
      "grad_norm": 4.419633388519287,
      "learning_rate": 1.1424332344213651e-06,
      "loss": 0.0859,
      "step": 77,
      "video_reward_cumulative_accuracy": 0.6623376623376623
    },
    {
      "epoch": 0.02315227070347284,
      "grad_norm": 8.239113807678223,
      "learning_rate": 1.1572700296735906e-06,
      "loss": 0.0967,
      "step": 78,
      "video_reward_cumulative_accuracy": 0.6538461538461539
    },
    {
      "epoch": 0.023449094686850698,
      "grad_norm": 2.648416519165039,
      "learning_rate": 1.172106824925816e-06,
      "loss": 0.0906,
      "step": 79,
      "video_reward_cumulative_accuracy": 0.6582278481012658
    },
    {
      "epoch": 0.023745918670228554,
      "grad_norm": 6.028584003448486,
      "learning_rate": 1.1869436201780417e-06,
      "loss": 0.0789,
      "step": 80,
      "video_reward_cumulative_accuracy": 0.65625
    },
    {
      "epoch": 0.02404274265360641,
      "grad_norm": 4.417842864990234,
      "learning_rate": 1.2017804154302672e-06,
      "loss": 0.0756,
      "step": 81,
      "video_reward_cumulative_accuracy": 0.654320987654321
    },
    {
      "epoch": 0.024339566636984267,
      "grad_norm": 5.482295513153076,
      "learning_rate": 1.2166172106824927e-06,
      "loss": 0.0518,
      "step": 82,
      "video_reward_cumulative_accuracy": 0.6585365853658537
    },
    {
      "epoch": 0.024636390620362124,
      "grad_norm": 3.1841440200805664,
      "learning_rate": 1.2314540059347183e-06,
      "loss": 0.0881,
      "step": 83,
      "video_reward_cumulative_accuracy": 0.6566265060240963
    },
    {
      "epoch": 0.024933214603739984,
      "grad_norm": 5.477931976318359,
      "learning_rate": 1.2462908011869438e-06,
      "loss": 0.0712,
      "step": 84,
      "video_reward_cumulative_accuracy": 0.6547619047619048
    },
    {
      "epoch": 0.02523003858711784,
      "grad_norm": 2.8534862995147705,
      "learning_rate": 1.2611275964391693e-06,
      "loss": 0.0796,
      "step": 85,
      "video_reward_cumulative_accuracy": 0.6588235294117647
    },
    {
      "epoch": 0.025526862570495697,
      "grad_norm": 6.181415557861328,
      "learning_rate": 1.2759643916913947e-06,
      "loss": 0.0667,
      "step": 86,
      "video_reward_cumulative_accuracy": 0.6569767441860465
    },
    {
      "epoch": 0.025823686553873553,
      "grad_norm": 4.147750377655029,
      "learning_rate": 1.2908011869436202e-06,
      "loss": 0.0738,
      "step": 87,
      "video_reward_cumulative_accuracy": 0.6609195402298851
    },
    {
      "epoch": 0.02612051053725141,
      "grad_norm": 4.225668907165527,
      "learning_rate": 1.3056379821958458e-06,
      "loss": 0.1034,
      "step": 88,
      "video_reward_cumulative_accuracy": 0.6647727272727273
    },
    {
      "epoch": 0.026417334520629266,
      "grad_norm": 2.278589963912964,
      "learning_rate": 1.3204747774480713e-06,
      "loss": 0.0505,
      "step": 89,
      "video_reward_cumulative_accuracy": 0.6685393258426966
    },
    {
      "epoch": 0.026714158504007122,
      "grad_norm": 6.009743690490723,
      "learning_rate": 1.335311572700297e-06,
      "loss": 0.0665,
      "step": 90,
      "video_reward_cumulative_accuracy": 0.6722222222222223
    },
    {
      "epoch": 0.027010982487384982,
      "grad_norm": 3.622457265853882,
      "learning_rate": 1.3501483679525224e-06,
      "loss": 0.0803,
      "step": 91,
      "video_reward_cumulative_accuracy": 0.6703296703296703
    },
    {
      "epoch": 0.02730780647076284,
      "grad_norm": 2.3656132221221924,
      "learning_rate": 1.364985163204748e-06,
      "loss": 0.0679,
      "step": 92,
      "video_reward_cumulative_accuracy": 0.6739130434782609
    },
    {
      "epoch": 0.027604630454140695,
      "grad_norm": 5.818204879760742,
      "learning_rate": 1.3798219584569734e-06,
      "loss": 0.0931,
      "step": 93,
      "video_reward_cumulative_accuracy": 0.6720430107526881
    },
    {
      "epoch": 0.027901454437518552,
      "grad_norm": 3.5401792526245117,
      "learning_rate": 1.394658753709199e-06,
      "loss": 0.0723,
      "step": 94,
      "video_reward_cumulative_accuracy": 0.675531914893617
    },
    {
      "epoch": 0.028198278420896408,
      "grad_norm": 6.389036178588867,
      "learning_rate": 1.4094955489614245e-06,
      "loss": 0.0554,
      "step": 95,
      "video_reward_cumulative_accuracy": 0.6789473684210526
    },
    {
      "epoch": 0.028495102404274265,
      "grad_norm": 3.825948476791382,
      "learning_rate": 1.42433234421365e-06,
      "loss": 0.0705,
      "step": 96,
      "video_reward_cumulative_accuracy": 0.6822916666666666
    },
    {
      "epoch": 0.02879192638765212,
      "grad_norm": 3.565723180770874,
      "learning_rate": 1.4391691394658754e-06,
      "loss": 0.0737,
      "step": 97,
      "video_reward_cumulative_accuracy": 0.6855670103092784
    },
    {
      "epoch": 0.029088750371029978,
      "grad_norm": 2.599555253982544,
      "learning_rate": 1.4540059347181009e-06,
      "loss": 0.0468,
      "step": 98,
      "video_reward_cumulative_accuracy": 0.6887755102040817
    },
    {
      "epoch": 0.029385574354407838,
      "grad_norm": 2.7549595832824707,
      "learning_rate": 1.4688427299703265e-06,
      "loss": 0.0644,
      "step": 99,
      "video_reward_cumulative_accuracy": 0.6919191919191919
    },
    {
      "epoch": 0.029682398337785694,
      "grad_norm": 5.881991386413574,
      "learning_rate": 1.483679525222552e-06,
      "loss": 0.0955,
      "step": 100,
      "video_reward_cumulative_accuracy": 0.685
    },
    {
      "epoch": 0.02997922232116355,
      "grad_norm": 2.9013118743896484,
      "learning_rate": 1.4985163204747777e-06,
      "loss": 0.0603,
      "step": 101,
      "video_reward_cumulative_accuracy": 0.6881188118811881
    },
    {
      "epoch": 0.030276046304541407,
      "grad_norm": 3.3732762336730957,
      "learning_rate": 1.5133531157270031e-06,
      "loss": 0.0615,
      "step": 102,
      "video_reward_cumulative_accuracy": 0.6911764705882353
    },
    {
      "epoch": 0.030572870287919263,
      "grad_norm": 4.168172359466553,
      "learning_rate": 1.5281899109792286e-06,
      "loss": 0.1154,
      "step": 103,
      "video_reward_cumulative_accuracy": 0.6893203883495146
    },
    {
      "epoch": 0.03086969427129712,
      "grad_norm": 3.426560163497925,
      "learning_rate": 1.543026706231454e-06,
      "loss": 0.0481,
      "step": 104,
      "video_reward_cumulative_accuracy": 0.6923076923076923
    },
    {
      "epoch": 0.031166518254674976,
      "grad_norm": 8.7980375289917,
      "learning_rate": 1.5578635014836795e-06,
      "loss": 0.116,
      "step": 105,
      "video_reward_cumulative_accuracy": 0.6952380952380952
    },
    {
      "epoch": 0.03146334223805283,
      "grad_norm": 3.171637773513794,
      "learning_rate": 1.572700296735905e-06,
      "loss": 0.0592,
      "step": 106,
      "video_reward_cumulative_accuracy": 0.6933962264150944
    },
    {
      "epoch": 0.03176016622143069,
      "grad_norm": 2.1238746643066406,
      "learning_rate": 1.5875370919881309e-06,
      "loss": 0.0498,
      "step": 107,
      "video_reward_cumulative_accuracy": 0.6962616822429907
    },
    {
      "epoch": 0.032056990204808546,
      "grad_norm": 3.829890727996826,
      "learning_rate": 1.6023738872403563e-06,
      "loss": 0.0897,
      "step": 108,
      "video_reward_cumulative_accuracy": 0.6990740740740741
    },
    {
      "epoch": 0.032353814188186406,
      "grad_norm": 3.1230406761169434,
      "learning_rate": 1.6172106824925818e-06,
      "loss": 0.0446,
      "step": 109,
      "video_reward_cumulative_accuracy": 0.7018348623853211
    },
    {
      "epoch": 0.032650638171564265,
      "grad_norm": 5.97599983215332,
      "learning_rate": 1.6320474777448073e-06,
      "loss": 0.1308,
      "step": 110,
      "video_reward_cumulative_accuracy": 0.7045454545454546
    },
    {
      "epoch": 0.03294746215494212,
      "grad_norm": 2.300419330596924,
      "learning_rate": 1.6468842729970327e-06,
      "loss": 0.0632,
      "step": 111,
      "video_reward_cumulative_accuracy": 0.7072072072072072
    },
    {
      "epoch": 0.03324428613831998,
      "grad_norm": 2.3834316730499268,
      "learning_rate": 1.6617210682492582e-06,
      "loss": 0.0585,
      "step": 112,
      "video_reward_cumulative_accuracy": 0.7098214285714286
    },
    {
      "epoch": 0.03354111012169783,
      "grad_norm": 2.218229293823242,
      "learning_rate": 1.6765578635014836e-06,
      "loss": 0.0574,
      "step": 113,
      "video_reward_cumulative_accuracy": 0.7079646017699115
    },
    {
      "epoch": 0.03383793410507569,
      "grad_norm": 6.411158084869385,
      "learning_rate": 1.6913946587537095e-06,
      "loss": 0.0608,
      "step": 114,
      "video_reward_cumulative_accuracy": 0.706140350877193
    },
    {
      "epoch": 0.034134758088453544,
      "grad_norm": 3.1801657676696777,
      "learning_rate": 1.706231454005935e-06,
      "loss": 0.0894,
      "step": 115,
      "video_reward_cumulative_accuracy": 0.7043478260869566
    },
    {
      "epoch": 0.034431582071831404,
      "grad_norm": 6.5272536277771,
      "learning_rate": 1.7210682492581604e-06,
      "loss": 0.0919,
      "step": 116,
      "video_reward_cumulative_accuracy": 0.7025862068965517
    },
    {
      "epoch": 0.034728406055209264,
      "grad_norm": 4.742123603820801,
      "learning_rate": 1.735905044510386e-06,
      "loss": 0.0324,
      "step": 117,
      "video_reward_cumulative_accuracy": 0.7051282051282052
    },
    {
      "epoch": 0.03502523003858712,
      "grad_norm": 3.718932628631592,
      "learning_rate": 1.7507418397626114e-06,
      "loss": 0.0611,
      "step": 118,
      "video_reward_cumulative_accuracy": 0.7033898305084746
    },
    {
      "epoch": 0.03532205402196498,
      "grad_norm": 4.320330619812012,
      "learning_rate": 1.7655786350148368e-06,
      "loss": 0.054,
      "step": 119,
      "video_reward_cumulative_accuracy": 0.7058823529411765
    },
    {
      "epoch": 0.03561887800534283,
      "grad_norm": 4.672208786010742,
      "learning_rate": 1.7804154302670625e-06,
      "loss": 0.0525,
      "step": 120,
      "video_reward_cumulative_accuracy": 0.7083333333333334
    },
    {
      "epoch": 0.03591570198872069,
      "grad_norm": 2.6272125244140625,
      "learning_rate": 1.7952522255192882e-06,
      "loss": 0.0652,
      "step": 121,
      "video_reward_cumulative_accuracy": 0.7107438016528925
    },
    {
      "epoch": 0.03621252597209854,
      "grad_norm": 1.9446464776992798,
      "learning_rate": 1.8100890207715136e-06,
      "loss": 0.043,
      "step": 122,
      "video_reward_cumulative_accuracy": 0.7131147540983607
    },
    {
      "epoch": 0.0365093499554764,
      "grad_norm": 4.1938910484313965,
      "learning_rate": 1.824925816023739e-06,
      "loss": 0.1023,
      "step": 123,
      "video_reward_cumulative_accuracy": 0.7154471544715447
    },
    {
      "epoch": 0.03680617393885426,
      "grad_norm": 3.999626874923706,
      "learning_rate": 1.8397626112759646e-06,
      "loss": 0.062,
      "step": 124,
      "video_reward_cumulative_accuracy": 0.7137096774193549
    },
    {
      "epoch": 0.037102997922232116,
      "grad_norm": 2.076876640319824,
      "learning_rate": 1.85459940652819e-06,
      "loss": 0.0479,
      "step": 125,
      "video_reward_cumulative_accuracy": 0.716
    },
    {
      "epoch": 0.037399821905609976,
      "grad_norm": 6.920149326324463,
      "learning_rate": 1.8694362017804155e-06,
      "loss": 0.0538,
      "step": 126,
      "video_reward_cumulative_accuracy": 0.7182539682539683
    },
    {
      "epoch": 0.03769664588898783,
      "grad_norm": 2.615006923675537,
      "learning_rate": 1.8842729970326411e-06,
      "loss": 0.0352,
      "step": 127,
      "video_reward_cumulative_accuracy": 0.7204724409448819
    },
    {
      "epoch": 0.03799346987236569,
      "grad_norm": 2.167612075805664,
      "learning_rate": 1.8991097922848666e-06,
      "loss": 0.0397,
      "step": 128,
      "video_reward_cumulative_accuracy": 0.72265625
    },
    {
      "epoch": 0.03829029385574354,
      "grad_norm": 3.5347766876220703,
      "learning_rate": 1.9139465875370923e-06,
      "loss": 0.0699,
      "step": 129,
      "video_reward_cumulative_accuracy": 0.7209302325581395
    },
    {
      "epoch": 0.0385871178391214,
      "grad_norm": 6.354689121246338,
      "learning_rate": 1.9287833827893175e-06,
      "loss": 0.0554,
      "step": 130,
      "video_reward_cumulative_accuracy": 0.7192307692307692
    },
    {
      "epoch": 0.03888394182249926,
      "grad_norm": 2.4313793182373047,
      "learning_rate": 1.943620178041543e-06,
      "loss": 0.0529,
      "step": 131,
      "video_reward_cumulative_accuracy": 0.7175572519083969
    },
    {
      "epoch": 0.039180765805877114,
      "grad_norm": 5.276021480560303,
      "learning_rate": 1.9584569732937684e-06,
      "loss": 0.0958,
      "step": 132,
      "video_reward_cumulative_accuracy": 0.7159090909090909
    },
    {
      "epoch": 0.039477589789254974,
      "grad_norm": 6.521552562713623,
      "learning_rate": 1.9732937685459945e-06,
      "loss": 0.0698,
      "step": 133,
      "video_reward_cumulative_accuracy": 0.7142857142857143
    },
    {
      "epoch": 0.03977441377263283,
      "grad_norm": 4.296199798583984,
      "learning_rate": 1.98813056379822e-06,
      "loss": 0.0632,
      "step": 134,
      "video_reward_cumulative_accuracy": 0.7089552238805971
    },
    {
      "epoch": 0.04007123775601069,
      "grad_norm": 5.775026321411133,
      "learning_rate": 2.0029673590504455e-06,
      "loss": 0.1013,
      "step": 135,
      "video_reward_cumulative_accuracy": 0.7111111111111111
    },
    {
      "epoch": 0.04036806173938854,
      "grad_norm": 2.675187349319458,
      "learning_rate": 2.0178041543026707e-06,
      "loss": 0.0515,
      "step": 136,
      "video_reward_cumulative_accuracy": 0.7132352941176471
    },
    {
      "epoch": 0.0406648857227664,
      "grad_norm": 6.260149955749512,
      "learning_rate": 2.0326409495548964e-06,
      "loss": 0.0883,
      "step": 137,
      "video_reward_cumulative_accuracy": 0.7153284671532847
    },
    {
      "epoch": 0.04096170970614425,
      "grad_norm": 11.186830520629883,
      "learning_rate": 2.0474777448071216e-06,
      "loss": 0.0918,
      "step": 138,
      "video_reward_cumulative_accuracy": 0.717391304347826
    },
    {
      "epoch": 0.04125853368952211,
      "grad_norm": 6.043707847595215,
      "learning_rate": 2.0623145400593473e-06,
      "loss": 0.085,
      "step": 139,
      "video_reward_cumulative_accuracy": 0.7158273381294964
    },
    {
      "epoch": 0.04155535767289997,
      "grad_norm": 9.900096893310547,
      "learning_rate": 2.077151335311573e-06,
      "loss": 0.0977,
      "step": 140,
      "video_reward_cumulative_accuracy": 0.7178571428571429
    },
    {
      "epoch": 0.041852181656277826,
      "grad_norm": 2.8422248363494873,
      "learning_rate": 2.0919881305637987e-06,
      "loss": 0.0399,
      "step": 141,
      "video_reward_cumulative_accuracy": 0.7163120567375887
    },
    {
      "epoch": 0.042149005639655686,
      "grad_norm": 2.6769790649414062,
      "learning_rate": 2.106824925816024e-06,
      "loss": 0.0355,
      "step": 142,
      "video_reward_cumulative_accuracy": 0.7183098591549296
    },
    {
      "epoch": 0.04244582962303354,
      "grad_norm": 3.1609225273132324,
      "learning_rate": 2.1216617210682496e-06,
      "loss": 0.0677,
      "step": 143,
      "video_reward_cumulative_accuracy": 0.7132867132867133
    },
    {
      "epoch": 0.0427426536064114,
      "grad_norm": 2.4717013835906982,
      "learning_rate": 2.136498516320475e-06,
      "loss": 0.0365,
      "step": 144,
      "video_reward_cumulative_accuracy": 0.7118055555555556
    },
    {
      "epoch": 0.04303947758978925,
      "grad_norm": 8.373668670654297,
      "learning_rate": 2.1513353115727005e-06,
      "loss": 0.1185,
      "step": 145,
      "video_reward_cumulative_accuracy": 0.7103448275862069
    },
    {
      "epoch": 0.04333630157316711,
      "grad_norm": 2.4484148025512695,
      "learning_rate": 2.166172106824926e-06,
      "loss": 0.0479,
      "step": 146,
      "video_reward_cumulative_accuracy": 0.7123287671232876
    },
    {
      "epoch": 0.04363312555654497,
      "grad_norm": 4.690200328826904,
      "learning_rate": 2.1810089020771514e-06,
      "loss": 0.0706,
      "step": 147,
      "video_reward_cumulative_accuracy": 0.7142857142857143
    },
    {
      "epoch": 0.043929949539922825,
      "grad_norm": 5.8376898765563965,
      "learning_rate": 2.195845697329377e-06,
      "loss": 0.0849,
      "step": 148,
      "video_reward_cumulative_accuracy": 0.7162162162162162
    },
    {
      "epoch": 0.044226773523300685,
      "grad_norm": 2.741074562072754,
      "learning_rate": 2.2106824925816028e-06,
      "loss": 0.0741,
      "step": 149,
      "video_reward_cumulative_accuracy": 0.7114093959731543
    },
    {
      "epoch": 0.04452359750667854,
      "grad_norm": 5.629610061645508,
      "learning_rate": 2.225519287833828e-06,
      "loss": 0.0525,
      "step": 150,
      "video_reward_cumulative_accuracy": 0.71
    },
    {
      "epoch": 0.0448204214900564,
      "grad_norm": 6.277879238128662,
      "learning_rate": 2.2403560830860537e-06,
      "loss": 0.0803,
      "step": 151,
      "video_reward_cumulative_accuracy": 0.7086092715231788
    },
    {
      "epoch": 0.04511724547343425,
      "grad_norm": 2.9074411392211914,
      "learning_rate": 2.255192878338279e-06,
      "loss": 0.0607,
      "step": 152,
      "video_reward_cumulative_accuracy": 0.7105263157894737
    },
    {
      "epoch": 0.04541406945681211,
      "grad_norm": 8.067234992980957,
      "learning_rate": 2.2700296735905046e-06,
      "loss": 0.0987,
      "step": 153,
      "video_reward_cumulative_accuracy": 0.7091503267973857
    },
    {
      "epoch": 0.04571089344018997,
      "grad_norm": 3.610557794570923,
      "learning_rate": 2.2848664688427303e-06,
      "loss": 0.0549,
      "step": 154,
      "video_reward_cumulative_accuracy": 0.7077922077922078
    },
    {
      "epoch": 0.04600771742356782,
      "grad_norm": 3.5607757568359375,
      "learning_rate": 2.2997032640949555e-06,
      "loss": 0.0864,
      "step": 155,
      "video_reward_cumulative_accuracy": 0.7064516129032258
    },
    {
      "epoch": 0.04630454140694568,
      "grad_norm": 4.017952919006348,
      "learning_rate": 2.314540059347181e-06,
      "loss": 0.0522,
      "step": 156,
      "video_reward_cumulative_accuracy": 0.7083333333333334
    },
    {
      "epoch": 0.046601365390323536,
      "grad_norm": 4.041268825531006,
      "learning_rate": 2.3293768545994065e-06,
      "loss": 0.0456,
      "step": 157,
      "video_reward_cumulative_accuracy": 0.7101910828025477
    },
    {
      "epoch": 0.046898189373701396,
      "grad_norm": 2.6721463203430176,
      "learning_rate": 2.344213649851632e-06,
      "loss": 0.0379,
      "step": 158,
      "video_reward_cumulative_accuracy": 0.7120253164556962
    },
    {
      "epoch": 0.04719501335707925,
      "grad_norm": 5.769506931304932,
      "learning_rate": 2.359050445103858e-06,
      "loss": 0.0505,
      "step": 159,
      "video_reward_cumulative_accuracy": 0.710691823899371
    },
    {
      "epoch": 0.04749183734045711,
      "grad_norm": 2.384072780609131,
      "learning_rate": 2.3738872403560835e-06,
      "loss": 0.0713,
      "step": 160,
      "video_reward_cumulative_accuracy": 0.70625
    },
    {
      "epoch": 0.04778866132383497,
      "grad_norm": 4.968862533569336,
      "learning_rate": 2.3887240356083087e-06,
      "loss": 0.0545,
      "step": 161,
      "video_reward_cumulative_accuracy": 0.7080745341614907
    },
    {
      "epoch": 0.04808548530721282,
      "grad_norm": 2.6680426597595215,
      "learning_rate": 2.4035608308605344e-06,
      "loss": 0.0545,
      "step": 162,
      "video_reward_cumulative_accuracy": 0.7098765432098766
    },
    {
      "epoch": 0.04838230929059068,
      "grad_norm": 5.463686943054199,
      "learning_rate": 2.4183976261127596e-06,
      "loss": 0.0798,
      "step": 163,
      "video_reward_cumulative_accuracy": 0.7085889570552147
    },
    {
      "epoch": 0.048679133273968535,
      "grad_norm": 4.160032749176025,
      "learning_rate": 2.4332344213649853e-06,
      "loss": 0.0885,
      "step": 164,
      "video_reward_cumulative_accuracy": 0.7042682926829268
    },
    {
      "epoch": 0.048975957257346395,
      "grad_norm": 8.451370239257812,
      "learning_rate": 2.4480712166172106e-06,
      "loss": 0.0836,
      "step": 165,
      "video_reward_cumulative_accuracy": 0.706060606060606
    },
    {
      "epoch": 0.04927278124072425,
      "grad_norm": 2.2728710174560547,
      "learning_rate": 2.4629080118694367e-06,
      "loss": 0.039,
      "step": 166,
      "video_reward_cumulative_accuracy": 0.7078313253012049
    },
    {
      "epoch": 0.04956960522410211,
      "grad_norm": 6.519056797027588,
      "learning_rate": 2.477744807121662e-06,
      "loss": 0.0594,
      "step": 167,
      "video_reward_cumulative_accuracy": 0.7065868263473054
    },
    {
      "epoch": 0.04986642920747997,
      "grad_norm": 11.774324417114258,
      "learning_rate": 2.4925816023738876e-06,
      "loss": 0.1125,
      "step": 168,
      "video_reward_cumulative_accuracy": 0.7083333333333334
    },
    {
      "epoch": 0.05016325319085782,
      "grad_norm": 1.901655673980713,
      "learning_rate": 2.507418397626113e-06,
      "loss": 0.0589,
      "step": 169,
      "video_reward_cumulative_accuracy": 0.7100591715976331
    },
    {
      "epoch": 0.05046007717423568,
      "grad_norm": 1.9185417890548706,
      "learning_rate": 2.5222551928783385e-06,
      "loss": 0.041,
      "step": 170,
      "video_reward_cumulative_accuracy": 0.711764705882353
    },
    {
      "epoch": 0.05075690115761353,
      "grad_norm": 1.7406995296478271,
      "learning_rate": 2.5370919881305638e-06,
      "loss": 0.0625,
      "step": 171,
      "video_reward_cumulative_accuracy": 0.7105263157894737
    },
    {
      "epoch": 0.05105372514099139,
      "grad_norm": 2.75722074508667,
      "learning_rate": 2.5519287833827894e-06,
      "loss": 0.036,
      "step": 172,
      "video_reward_cumulative_accuracy": 0.7122093023255814
    },
    {
      "epoch": 0.051350549124369246,
      "grad_norm": 4.977455139160156,
      "learning_rate": 2.5667655786350147e-06,
      "loss": 0.0586,
      "step": 173,
      "video_reward_cumulative_accuracy": 0.7109826589595376
    },
    {
      "epoch": 0.051647373107747106,
      "grad_norm": 3.5028724670410156,
      "learning_rate": 2.5816023738872403e-06,
      "loss": 0.0368,
      "step": 174,
      "video_reward_cumulative_accuracy": 0.7126436781609196
    },
    {
      "epoch": 0.051944197091124966,
      "grad_norm": 4.553066730499268,
      "learning_rate": 2.5964391691394664e-06,
      "loss": 0.042,
      "step": 175,
      "video_reward_cumulative_accuracy": 0.7142857142857143
    },
    {
      "epoch": 0.05224102107450282,
      "grad_norm": 5.41193962097168,
      "learning_rate": 2.6112759643916917e-06,
      "loss": 0.0806,
      "step": 176,
      "video_reward_cumulative_accuracy": 0.7159090909090909
    },
    {
      "epoch": 0.05253784505788068,
      "grad_norm": 2.606978178024292,
      "learning_rate": 2.6261127596439174e-06,
      "loss": 0.0336,
      "step": 177,
      "video_reward_cumulative_accuracy": 0.7175141242937854
    },
    {
      "epoch": 0.05283466904125853,
      "grad_norm": 3.2309417724609375,
      "learning_rate": 2.6409495548961426e-06,
      "loss": 0.0476,
      "step": 178,
      "video_reward_cumulative_accuracy": 0.7191011235955056
    },
    {
      "epoch": 0.05313149302463639,
      "grad_norm": 4.6940178871154785,
      "learning_rate": 2.6557863501483683e-06,
      "loss": 0.0644,
      "step": 179,
      "video_reward_cumulative_accuracy": 0.7178770949720671
    },
    {
      "epoch": 0.053428317008014245,
      "grad_norm": 4.964528560638428,
      "learning_rate": 2.670623145400594e-06,
      "loss": 0.0668,
      "step": 180,
      "video_reward_cumulative_accuracy": 0.7166666666666667
    },
    {
      "epoch": 0.053725140991392105,
      "grad_norm": 1.1753082275390625,
      "learning_rate": 2.685459940652819e-06,
      "loss": 0.0107,
      "step": 181,
      "video_reward_cumulative_accuracy": 0.7182320441988951
    },
    {
      "epoch": 0.054021964974769965,
      "grad_norm": 3.13619065284729,
      "learning_rate": 2.700296735905045e-06,
      "loss": 0.0513,
      "step": 182,
      "video_reward_cumulative_accuracy": 0.717032967032967
    },
    {
      "epoch": 0.05431878895814782,
      "grad_norm": 8.615299224853516,
      "learning_rate": 2.71513353115727e-06,
      "loss": 0.1667,
      "step": 183,
      "video_reward_cumulative_accuracy": 0.7158469945355191
    },
    {
      "epoch": 0.05461561294152568,
      "grad_norm": 8.474091529846191,
      "learning_rate": 2.729970326409496e-06,
      "loss": 0.0565,
      "step": 184,
      "video_reward_cumulative_accuracy": 0.717391304347826
    },
    {
      "epoch": 0.05491243692490353,
      "grad_norm": 3.5511362552642822,
      "learning_rate": 2.744807121661721e-06,
      "loss": 0.0296,
      "step": 185,
      "video_reward_cumulative_accuracy": 0.7189189189189189
    },
    {
      "epoch": 0.05520926090828139,
      "grad_norm": 3.176490306854248,
      "learning_rate": 2.7596439169139467e-06,
      "loss": 0.0326,
      "step": 186,
      "video_reward_cumulative_accuracy": 0.7204301075268817
    },
    {
      "epoch": 0.055506084891659244,
      "grad_norm": 4.008922100067139,
      "learning_rate": 2.774480712166172e-06,
      "loss": 0.0519,
      "step": 187,
      "video_reward_cumulative_accuracy": 0.7192513368983957
    },
    {
      "epoch": 0.055802908875037104,
      "grad_norm": 6.8299560546875,
      "learning_rate": 2.789317507418398e-06,
      "loss": 0.1015,
      "step": 188,
      "video_reward_cumulative_accuracy": 0.7154255319148937
    },
    {
      "epoch": 0.05609973285841496,
      "grad_norm": 2.7319183349609375,
      "learning_rate": 2.8041543026706237e-06,
      "loss": 0.0582,
      "step": 189,
      "video_reward_cumulative_accuracy": 0.716931216931217
    },
    {
      "epoch": 0.056396556841792816,
      "grad_norm": 7.440029621124268,
      "learning_rate": 2.818991097922849e-06,
      "loss": 0.0758,
      "step": 190,
      "video_reward_cumulative_accuracy": 0.7157894736842105
    },
    {
      "epoch": 0.056693380825170676,
      "grad_norm": 8.07271957397461,
      "learning_rate": 2.8338278931750747e-06,
      "loss": 0.0519,
      "step": 191,
      "video_reward_cumulative_accuracy": 0.7172774869109948
    },
    {
      "epoch": 0.05699020480854853,
      "grad_norm": 4.536227703094482,
      "learning_rate": 2.8486646884273e-06,
      "loss": 0.0574,
      "step": 192,
      "video_reward_cumulative_accuracy": 0.71875
    },
    {
      "epoch": 0.05728702879192639,
      "grad_norm": 2.7957005500793457,
      "learning_rate": 2.8635014836795256e-06,
      "loss": 0.0303,
      "step": 193,
      "video_reward_cumulative_accuracy": 0.7176165803108808
    },
    {
      "epoch": 0.05758385277530424,
      "grad_norm": 2.4321742057800293,
      "learning_rate": 2.878338278931751e-06,
      "loss": 0.0494,
      "step": 194,
      "video_reward_cumulative_accuracy": 0.7190721649484536
    },
    {
      "epoch": 0.0578806767586821,
      "grad_norm": 3.885902166366577,
      "learning_rate": 2.8931750741839765e-06,
      "loss": 0.0772,
      "step": 195,
      "video_reward_cumulative_accuracy": 0.7205128205128205
    },
    {
      "epoch": 0.058177500742059955,
      "grad_norm": 6.06294584274292,
      "learning_rate": 2.9080118694362018e-06,
      "loss": 0.0446,
      "step": 196,
      "video_reward_cumulative_accuracy": 0.7219387755102041
    },
    {
      "epoch": 0.058474324725437815,
      "grad_norm": 2.9600000381469727,
      "learning_rate": 2.9228486646884274e-06,
      "loss": 0.0723,
      "step": 197,
      "video_reward_cumulative_accuracy": 0.7233502538071066
    },
    {
      "epoch": 0.058771148708815675,
      "grad_norm": 5.309525012969971,
      "learning_rate": 2.937685459940653e-06,
      "loss": 0.0678,
      "step": 198,
      "video_reward_cumulative_accuracy": 0.7247474747474747
    },
    {
      "epoch": 0.05906797269219353,
      "grad_norm": 6.021256446838379,
      "learning_rate": 2.9525222551928783e-06,
      "loss": 0.1086,
      "step": 199,
      "video_reward_cumulative_accuracy": 0.7236180904522613
    },
    {
      "epoch": 0.05936479667557139,
      "grad_norm": 4.920889377593994,
      "learning_rate": 2.967359050445104e-06,
      "loss": 0.0766,
      "step": 200,
      "video_reward_cumulative_accuracy": 0.7225
    },
    {
      "epoch": 0.05936479667557139,
      "eval_runtime": 129.5769,
      "eval_samples_per_second": 6.089,
      "eval_steps_per_second": 0.764,
      "eval_test_set_accuracy": 0.696969696969697,
      "step": 200
    },
    {
      "epoch": 0.05966162065894924,
      "grad_norm": 8.54822063446045,
      "learning_rate": 2.9821958456973297e-06,
      "loss": 0.0915,
      "step": 201,
      "video_reward_cumulative_accuracy": 0.7213930348258707
    },
    {
      "epoch": 0.0599584446423271,
      "grad_norm": 3.513885974884033,
      "learning_rate": 2.9970326409495554e-06,
      "loss": 0.0686,
      "step": 202,
      "video_reward_cumulative_accuracy": 0.7227722772277227
    },
    {
      "epoch": 0.060255268625704954,
      "grad_norm": 4.311375141143799,
      "learning_rate": 3.011869436201781e-06,
      "loss": 0.0586,
      "step": 203,
      "video_reward_cumulative_accuracy": 0.7192118226600985
    },
    {
      "epoch": 0.060552092609082814,
      "grad_norm": 4.355630397796631,
      "learning_rate": 3.0267062314540063e-06,
      "loss": 0.0683,
      "step": 204,
      "video_reward_cumulative_accuracy": 0.7181372549019608
    },
    {
      "epoch": 0.060848916592460674,
      "grad_norm": 5.103359699249268,
      "learning_rate": 3.041543026706232e-06,
      "loss": 0.0869,
      "step": 205,
      "video_reward_cumulative_accuracy": 0.7195121951219512
    },
    {
      "epoch": 0.06114574057583853,
      "grad_norm": 4.328181266784668,
      "learning_rate": 3.056379821958457e-06,
      "loss": 0.0628,
      "step": 206,
      "video_reward_cumulative_accuracy": 0.720873786407767
    },
    {
      "epoch": 0.06144256455921639,
      "grad_norm": 2.0452539920806885,
      "learning_rate": 3.071216617210683e-06,
      "loss": 0.0691,
      "step": 207,
      "video_reward_cumulative_accuracy": 0.7222222222222222
    },
    {
      "epoch": 0.06173938854259424,
      "grad_norm": 2.371507167816162,
      "learning_rate": 3.086053412462908e-06,
      "loss": 0.0485,
      "step": 208,
      "video_reward_cumulative_accuracy": 0.7235576923076923
    },
    {
      "epoch": 0.0620362125259721,
      "grad_norm": 4.104339122772217,
      "learning_rate": 3.100890207715134e-06,
      "loss": 0.0686,
      "step": 209,
      "video_reward_cumulative_accuracy": 0.7248803827751196
    },
    {
      "epoch": 0.06233303650934995,
      "grad_norm": 8.020886421203613,
      "learning_rate": 3.115727002967359e-06,
      "loss": 0.0653,
      "step": 210,
      "video_reward_cumulative_accuracy": 0.7238095238095238
    },
    {
      "epoch": 0.06262986049272781,
      "grad_norm": 2.7191717624664307,
      "learning_rate": 3.1305637982195847e-06,
      "loss": 0.0474,
      "step": 211,
      "video_reward_cumulative_accuracy": 0.7251184834123223
    },
    {
      "epoch": 0.06292668447610567,
      "grad_norm": 2.388265609741211,
      "learning_rate": 3.14540059347181e-06,
      "loss": 0.0625,
      "step": 212,
      "video_reward_cumulative_accuracy": 0.7264150943396226
    },
    {
      "epoch": 0.06322350845948353,
      "grad_norm": 2.529482841491699,
      "learning_rate": 3.1602373887240356e-06,
      "loss": 0.054,
      "step": 213,
      "video_reward_cumulative_accuracy": 0.7253521126760564
    },
    {
      "epoch": 0.06352033244286139,
      "grad_norm": 5.697103977203369,
      "learning_rate": 3.1750741839762617e-06,
      "loss": 0.0667,
      "step": 214,
      "video_reward_cumulative_accuracy": 0.7242990654205608
    },
    {
      "epoch": 0.06381715642623924,
      "grad_norm": 2.470099449157715,
      "learning_rate": 3.189910979228487e-06,
      "loss": 0.0767,
      "step": 215,
      "video_reward_cumulative_accuracy": 0.7255813953488373
    },
    {
      "epoch": 0.06411398040961709,
      "grad_norm": 2.413121223449707,
      "learning_rate": 3.2047477744807127e-06,
      "loss": 0.0707,
      "step": 216,
      "video_reward_cumulative_accuracy": 0.7222222222222222
    },
    {
      "epoch": 0.06441080439299496,
      "grad_norm": 4.319202899932861,
      "learning_rate": 3.219584569732938e-06,
      "loss": 0.0661,
      "step": 217,
      "video_reward_cumulative_accuracy": 0.7235023041474654
    },
    {
      "epoch": 0.06470762837637281,
      "grad_norm": 10.807517051696777,
      "learning_rate": 3.2344213649851636e-06,
      "loss": 0.1928,
      "step": 218,
      "video_reward_cumulative_accuracy": 0.7201834862385321
    },
    {
      "epoch": 0.06500445235975066,
      "grad_norm": 3.2382359504699707,
      "learning_rate": 3.2492581602373893e-06,
      "loss": 0.0677,
      "step": 219,
      "video_reward_cumulative_accuracy": 0.7214611872146118
    },
    {
      "epoch": 0.06530127634312853,
      "grad_norm": 3.427091121673584,
      "learning_rate": 3.2640949554896145e-06,
      "loss": 0.0519,
      "step": 220,
      "video_reward_cumulative_accuracy": 0.7204545454545455
    },
    {
      "epoch": 0.06559810032650638,
      "grad_norm": 5.226832389831543,
      "learning_rate": 3.27893175074184e-06,
      "loss": 0.0667,
      "step": 221,
      "video_reward_cumulative_accuracy": 0.7194570135746606
    },
    {
      "epoch": 0.06589492430988424,
      "grad_norm": 4.402886390686035,
      "learning_rate": 3.2937685459940654e-06,
      "loss": 0.0532,
      "step": 222,
      "video_reward_cumulative_accuracy": 0.7207207207207207
    },
    {
      "epoch": 0.06619174829326209,
      "grad_norm": 5.255795001983643,
      "learning_rate": 3.308605341246291e-06,
      "loss": 0.0755,
      "step": 223,
      "video_reward_cumulative_accuracy": 0.7219730941704036
    },
    {
      "epoch": 0.06648857227663996,
      "grad_norm": 8.409960746765137,
      "learning_rate": 3.3234421364985163e-06,
      "loss": 0.1155,
      "step": 224,
      "video_reward_cumulative_accuracy": 0.7232142857142857
    },
    {
      "epoch": 0.06678539626001781,
      "grad_norm": 8.938908576965332,
      "learning_rate": 3.338278931750742e-06,
      "loss": 0.1178,
      "step": 225,
      "video_reward_cumulative_accuracy": 0.7222222222222222
    },
    {
      "epoch": 0.06708222024339566,
      "grad_norm": 8.283513069152832,
      "learning_rate": 3.3531157270029673e-06,
      "loss": 0.1187,
      "step": 226,
      "video_reward_cumulative_accuracy": 0.7234513274336283
    },
    {
      "epoch": 0.06737904422677353,
      "grad_norm": 6.216405391693115,
      "learning_rate": 3.3679525222551934e-06,
      "loss": 0.0735,
      "step": 227,
      "video_reward_cumulative_accuracy": 0.7224669603524229
    },
    {
      "epoch": 0.06767586821015138,
      "grad_norm": 4.862206935882568,
      "learning_rate": 3.382789317507419e-06,
      "loss": 0.0792,
      "step": 228,
      "video_reward_cumulative_accuracy": 0.7236842105263158
    },
    {
      "epoch": 0.06797269219352924,
      "grad_norm": 2.139225959777832,
      "learning_rate": 3.3976261127596443e-06,
      "loss": 0.0682,
      "step": 229,
      "video_reward_cumulative_accuracy": 0.7248908296943232
    },
    {
      "epoch": 0.06826951617690709,
      "grad_norm": 3.5411124229431152,
      "learning_rate": 3.41246290801187e-06,
      "loss": 0.0719,
      "step": 230,
      "video_reward_cumulative_accuracy": 0.7260869565217392
    },
    {
      "epoch": 0.06856634016028496,
      "grad_norm": 3.569051742553711,
      "learning_rate": 3.427299703264095e-06,
      "loss": 0.0443,
      "step": 231,
      "video_reward_cumulative_accuracy": 0.7272727272727273
    },
    {
      "epoch": 0.06886316414366281,
      "grad_norm": 5.477877616882324,
      "learning_rate": 3.442136498516321e-06,
      "loss": 0.0875,
      "step": 232,
      "video_reward_cumulative_accuracy": 0.7262931034482759
    },
    {
      "epoch": 0.06915998812704066,
      "grad_norm": 4.547797203063965,
      "learning_rate": 3.456973293768546e-06,
      "loss": 0.0537,
      "step": 233,
      "video_reward_cumulative_accuracy": 0.7274678111587983
    },
    {
      "epoch": 0.06945681211041853,
      "grad_norm": 3.6796634197235107,
      "learning_rate": 3.471810089020772e-06,
      "loss": 0.0563,
      "step": 234,
      "video_reward_cumulative_accuracy": 0.7264957264957265
    },
    {
      "epoch": 0.06975363609379638,
      "grad_norm": 1.6680197715759277,
      "learning_rate": 3.486646884272997e-06,
      "loss": 0.0447,
      "step": 235,
      "video_reward_cumulative_accuracy": 0.7276595744680852
    },
    {
      "epoch": 0.07005046007717423,
      "grad_norm": 3.814924478530884,
      "learning_rate": 3.5014836795252227e-06,
      "loss": 0.0532,
      "step": 236,
      "video_reward_cumulative_accuracy": 0.7266949152542372
    },
    {
      "epoch": 0.07034728406055209,
      "grad_norm": 2.366469383239746,
      "learning_rate": 3.5163204747774484e-06,
      "loss": 0.0393,
      "step": 237,
      "video_reward_cumulative_accuracy": 0.7278481012658228
    },
    {
      "epoch": 0.07064410804392995,
      "grad_norm": 5.3031206130981445,
      "learning_rate": 3.5311572700296736e-06,
      "loss": 0.0637,
      "step": 238,
      "video_reward_cumulative_accuracy": 0.7289915966386554
    },
    {
      "epoch": 0.07094093202730781,
      "grad_norm": 2.591233491897583,
      "learning_rate": 3.5459940652818993e-06,
      "loss": 0.0779,
      "step": 239,
      "video_reward_cumulative_accuracy": 0.7301255230125523
    },
    {
      "epoch": 0.07123775601068566,
      "grad_norm": 5.400726795196533,
      "learning_rate": 3.560830860534125e-06,
      "loss": 0.0881,
      "step": 240,
      "video_reward_cumulative_accuracy": 0.73125
    },
    {
      "epoch": 0.07153457999406353,
      "grad_norm": 6.161452293395996,
      "learning_rate": 3.5756676557863507e-06,
      "loss": 0.1075,
      "step": 241,
      "video_reward_cumulative_accuracy": 0.7323651452282157
    },
    {
      "epoch": 0.07183140397744138,
      "grad_norm": 4.78709077835083,
      "learning_rate": 3.5905044510385763e-06,
      "loss": 0.0733,
      "step": 242,
      "video_reward_cumulative_accuracy": 0.7334710743801653
    },
    {
      "epoch": 0.07212822796081923,
      "grad_norm": 2.570233106613159,
      "learning_rate": 3.6053412462908016e-06,
      "loss": 0.0624,
      "step": 243,
      "video_reward_cumulative_accuracy": 0.7345679012345679
    },
    {
      "epoch": 0.07242505194419709,
      "grad_norm": 2.175311803817749,
      "learning_rate": 3.6201780415430273e-06,
      "loss": 0.0712,
      "step": 244,
      "video_reward_cumulative_accuracy": 0.735655737704918
    },
    {
      "epoch": 0.07272187592757495,
      "grad_norm": 2.6615543365478516,
      "learning_rate": 3.6350148367952525e-06,
      "loss": 0.0627,
      "step": 245,
      "video_reward_cumulative_accuracy": 0.736734693877551
    },
    {
      "epoch": 0.0730186999109528,
      "grad_norm": 3.832801580429077,
      "learning_rate": 3.649851632047478e-06,
      "loss": 0.0511,
      "step": 246,
      "video_reward_cumulative_accuracy": 0.7378048780487805
    },
    {
      "epoch": 0.07331552389433066,
      "grad_norm": 2.9405055046081543,
      "learning_rate": 3.6646884272997034e-06,
      "loss": 0.0773,
      "step": 247,
      "video_reward_cumulative_accuracy": 0.7388663967611336
    },
    {
      "epoch": 0.07361234787770853,
      "grad_norm": 1.5752394199371338,
      "learning_rate": 3.679525222551929e-06,
      "loss": 0.0362,
      "step": 248,
      "video_reward_cumulative_accuracy": 0.7399193548387096
    },
    {
      "epoch": 0.07390917186108638,
      "grad_norm": 2.3977644443511963,
      "learning_rate": 3.6943620178041544e-06,
      "loss": 0.0669,
      "step": 249,
      "video_reward_cumulative_accuracy": 0.7409638554216867
    },
    {
      "epoch": 0.07420599584446423,
      "grad_norm": 2.2234885692596436,
      "learning_rate": 3.70919881305638e-06,
      "loss": 0.0717,
      "step": 250,
      "video_reward_cumulative_accuracy": 0.74
    },
    {
      "epoch": 0.07450281982784208,
      "grad_norm": 2.77751088142395,
      "learning_rate": 3.7240356083086053e-06,
      "loss": 0.0726,
      "step": 251,
      "video_reward_cumulative_accuracy": 0.7410358565737052
    },
    {
      "epoch": 0.07479964381121995,
      "grad_norm": 2.137138843536377,
      "learning_rate": 3.738872403560831e-06,
      "loss": 0.046,
      "step": 252,
      "video_reward_cumulative_accuracy": 0.7420634920634921
    },
    {
      "epoch": 0.0750964677945978,
      "grad_norm": 3.0533056259155273,
      "learning_rate": 3.753709198813057e-06,
      "loss": 0.038,
      "step": 253,
      "video_reward_cumulative_accuracy": 0.7430830039525692
    },
    {
      "epoch": 0.07539329177797566,
      "grad_norm": 1.6132036447525024,
      "learning_rate": 3.7685459940652823e-06,
      "loss": 0.0406,
      "step": 254,
      "video_reward_cumulative_accuracy": 0.7440944881889764
    },
    {
      "epoch": 0.07569011576135352,
      "grad_norm": 1.4988843202590942,
      "learning_rate": 3.783382789317508e-06,
      "loss": 0.039,
      "step": 255,
      "video_reward_cumulative_accuracy": 0.7450980392156863
    },
    {
      "epoch": 0.07598693974473138,
      "grad_norm": 3.1757826805114746,
      "learning_rate": 3.7982195845697332e-06,
      "loss": 0.0607,
      "step": 256,
      "video_reward_cumulative_accuracy": 0.74609375
    },
    {
      "epoch": 0.07628376372810923,
      "grad_norm": 5.67103385925293,
      "learning_rate": 3.813056379821959e-06,
      "loss": 0.0528,
      "step": 257,
      "video_reward_cumulative_accuracy": 0.7470817120622568
    },
    {
      "epoch": 0.07658058771148708,
      "grad_norm": 2.0683236122131348,
      "learning_rate": 3.8278931750741846e-06,
      "loss": 0.0275,
      "step": 258,
      "video_reward_cumulative_accuracy": 0.748062015503876
    },
    {
      "epoch": 0.07687741169486495,
      "grad_norm": 4.611932277679443,
      "learning_rate": 3.84272997032641e-06,
      "loss": 0.0939,
      "step": 259,
      "video_reward_cumulative_accuracy": 0.747104247104247
    },
    {
      "epoch": 0.0771742356782428,
      "grad_norm": 4.704992294311523,
      "learning_rate": 3.857566765578635e-06,
      "loss": 0.1056,
      "step": 260,
      "video_reward_cumulative_accuracy": 0.7480769230769231
    },
    {
      "epoch": 0.07747105966162066,
      "grad_norm": 6.33126974105835,
      "learning_rate": 3.872403560830861e-06,
      "loss": 0.1326,
      "step": 261,
      "video_reward_cumulative_accuracy": 0.7490421455938697
    },
    {
      "epoch": 0.07776788364499852,
      "grad_norm": 2.4778928756713867,
      "learning_rate": 3.887240356083086e-06,
      "loss": 0.0467,
      "step": 262,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.07806470762837638,
      "grad_norm": 11.440229415893555,
      "learning_rate": 3.902077151335312e-06,
      "loss": 0.1549,
      "step": 263,
      "video_reward_cumulative_accuracy": 0.7490494296577946
    },
    {
      "epoch": 0.07836153161175423,
      "grad_norm": 4.551571369171143,
      "learning_rate": 3.916913946587537e-06,
      "loss": 0.0389,
      "step": 264,
      "video_reward_cumulative_accuracy": 0.7481060606060606
    },
    {
      "epoch": 0.07865835559513208,
      "grad_norm": 4.240347385406494,
      "learning_rate": 3.931750741839763e-06,
      "loss": 0.0743,
      "step": 265,
      "video_reward_cumulative_accuracy": 0.7490566037735849
    },
    {
      "epoch": 0.07895517957850995,
      "grad_norm": 7.178493976593018,
      "learning_rate": 3.946587537091989e-06,
      "loss": 0.083,
      "step": 266,
      "video_reward_cumulative_accuracy": 0.7481203007518797
    },
    {
      "epoch": 0.0792520035618878,
      "grad_norm": 8.066522598266602,
      "learning_rate": 3.961424332344214e-06,
      "loss": 0.0835,
      "step": 267,
      "video_reward_cumulative_accuracy": 0.7471910112359551
    },
    {
      "epoch": 0.07954882754526565,
      "grad_norm": 6.96315860748291,
      "learning_rate": 3.97626112759644e-06,
      "loss": 0.0599,
      "step": 268,
      "video_reward_cumulative_accuracy": 0.7481343283582089
    },
    {
      "epoch": 0.07984565152864351,
      "grad_norm": 2.7894434928894043,
      "learning_rate": 3.991097922848665e-06,
      "loss": 0.0418,
      "step": 269,
      "video_reward_cumulative_accuracy": 0.7490706319702602
    },
    {
      "epoch": 0.08014247551202137,
      "grad_norm": 2.1485440731048584,
      "learning_rate": 4.005934718100891e-06,
      "loss": 0.058,
      "step": 270,
      "video_reward_cumulative_accuracy": 0.7481481481481481
    },
    {
      "epoch": 0.08043929949539923,
      "grad_norm": 3.671163320541382,
      "learning_rate": 4.020771513353116e-06,
      "loss": 0.0447,
      "step": 271,
      "video_reward_cumulative_accuracy": 0.7490774907749077
    },
    {
      "epoch": 0.08073612347877708,
      "grad_norm": 2.1427624225616455,
      "learning_rate": 4.0356083086053414e-06,
      "loss": 0.0365,
      "step": 272,
      "video_reward_cumulative_accuracy": 0.7481617647058824
    },
    {
      "epoch": 0.08103294746215495,
      "grad_norm": 2.3186142444610596,
      "learning_rate": 4.050445103857567e-06,
      "loss": 0.0578,
      "step": 273,
      "video_reward_cumulative_accuracy": 0.7472527472527473
    },
    {
      "epoch": 0.0813297714455328,
      "grad_norm": 7.067409515380859,
      "learning_rate": 4.065281899109793e-06,
      "loss": 0.1045,
      "step": 274,
      "video_reward_cumulative_accuracy": 0.7463503649635036
    },
    {
      "epoch": 0.08162659542891065,
      "grad_norm": 0.9794759154319763,
      "learning_rate": 4.080118694362018e-06,
      "loss": 0.0146,
      "step": 275,
      "video_reward_cumulative_accuracy": 0.7472727272727273
    },
    {
      "epoch": 0.0819234194122885,
      "grad_norm": 5.27300500869751,
      "learning_rate": 4.094955489614243e-06,
      "loss": 0.0394,
      "step": 276,
      "video_reward_cumulative_accuracy": 0.7481884057971014
    },
    {
      "epoch": 0.08222024339566637,
      "grad_norm": 4.100281238555908,
      "learning_rate": 4.109792284866469e-06,
      "loss": 0.0616,
      "step": 277,
      "video_reward_cumulative_accuracy": 0.7490974729241877
    },
    {
      "epoch": 0.08251706737904423,
      "grad_norm": 4.657914638519287,
      "learning_rate": 4.124629080118695e-06,
      "loss": 0.0861,
      "step": 278,
      "video_reward_cumulative_accuracy": 0.7482014388489209
    },
    {
      "epoch": 0.08281389136242208,
      "grad_norm": 4.093924045562744,
      "learning_rate": 4.139465875370921e-06,
      "loss": 0.048,
      "step": 279,
      "video_reward_cumulative_accuracy": 0.7491039426523297
    },
    {
      "epoch": 0.08311071534579995,
      "grad_norm": 7.382143020629883,
      "learning_rate": 4.154302670623146e-06,
      "loss": 0.0895,
      "step": 280,
      "video_reward_cumulative_accuracy": 0.7464285714285714
    },
    {
      "epoch": 0.0834075393291778,
      "grad_norm": 11.922904968261719,
      "learning_rate": 4.169139465875371e-06,
      "loss": 0.1006,
      "step": 281,
      "video_reward_cumulative_accuracy": 0.7473309608540926
    },
    {
      "epoch": 0.08370436331255565,
      "grad_norm": 3.6098527908325195,
      "learning_rate": 4.183976261127597e-06,
      "loss": 0.0253,
      "step": 282,
      "video_reward_cumulative_accuracy": 0.74822695035461
    },
    {
      "epoch": 0.0840011872959335,
      "grad_norm": 4.491762161254883,
      "learning_rate": 4.1988130563798226e-06,
      "loss": 0.0394,
      "step": 283,
      "video_reward_cumulative_accuracy": 0.7473498233215548
    },
    {
      "epoch": 0.08429801127931137,
      "grad_norm": 9.078802108764648,
      "learning_rate": 4.213649851632048e-06,
      "loss": 0.1382,
      "step": 284,
      "video_reward_cumulative_accuracy": 0.7464788732394366
    },
    {
      "epoch": 0.08459483526268922,
      "grad_norm": 4.624719619750977,
      "learning_rate": 4.228486646884273e-06,
      "loss": 0.0321,
      "step": 285,
      "video_reward_cumulative_accuracy": 0.7473684210526316
    },
    {
      "epoch": 0.08489165924606708,
      "grad_norm": 4.962191104888916,
      "learning_rate": 4.243323442136499e-06,
      "loss": 0.1042,
      "step": 286,
      "video_reward_cumulative_accuracy": 0.7482517482517482
    },
    {
      "epoch": 0.08518848322944494,
      "grad_norm": 3.402569055557251,
      "learning_rate": 4.258160237388724e-06,
      "loss": 0.0473,
      "step": 287,
      "video_reward_cumulative_accuracy": 0.7491289198606271
    },
    {
      "epoch": 0.0854853072128228,
      "grad_norm": 5.989389419555664,
      "learning_rate": 4.27299703264095e-06,
      "loss": 0.1256,
      "step": 288,
      "video_reward_cumulative_accuracy": 0.7482638888888888
    },
    {
      "epoch": 0.08578213119620065,
      "grad_norm": 7.128279685974121,
      "learning_rate": 4.287833827893175e-06,
      "loss": 0.0807,
      "step": 289,
      "video_reward_cumulative_accuracy": 0.7491349480968859
    },
    {
      "epoch": 0.0860789551795785,
      "grad_norm": 3.677250623703003,
      "learning_rate": 4.302670623145401e-06,
      "loss": 0.0731,
      "step": 290,
      "video_reward_cumulative_accuracy": 0.7482758620689656
    },
    {
      "epoch": 0.08637577916295637,
      "grad_norm": 3.578486680984497,
      "learning_rate": 4.317507418397626e-06,
      "loss": 0.0695,
      "step": 291,
      "video_reward_cumulative_accuracy": 0.7491408934707904
    },
    {
      "epoch": 0.08667260314633422,
      "grad_norm": 2.7287142276763916,
      "learning_rate": 4.332344213649852e-06,
      "loss": 0.0301,
      "step": 292,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.08696942712971208,
      "grad_norm": 2.3789162635803223,
      "learning_rate": 4.347181008902078e-06,
      "loss": 0.0378,
      "step": 293,
      "video_reward_cumulative_accuracy": 0.7491467576791809
    },
    {
      "epoch": 0.08726625111308994,
      "grad_norm": 1.2172014713287354,
      "learning_rate": 4.362017804154303e-06,
      "loss": 0.0288,
      "step": 294,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.0875630750964678,
      "grad_norm": 4.625653266906738,
      "learning_rate": 4.376854599406529e-06,
      "loss": 0.096,
      "step": 295,
      "video_reward_cumulative_accuracy": 0.7491525423728813
    },
    {
      "epoch": 0.08785989907984565,
      "grad_norm": 3.8820786476135254,
      "learning_rate": 4.391691394658754e-06,
      "loss": 0.0471,
      "step": 296,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.0881567230632235,
      "grad_norm": 5.01461935043335,
      "learning_rate": 4.4065281899109794e-06,
      "loss": 0.0686,
      "step": 297,
      "video_reward_cumulative_accuracy": 0.7508417508417509
    },
    {
      "epoch": 0.08845354704660137,
      "grad_norm": 8.346698760986328,
      "learning_rate": 4.4213649851632055e-06,
      "loss": 0.1304,
      "step": 298,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.08875037102997922,
      "grad_norm": 2.789457082748413,
      "learning_rate": 4.436201780415431e-06,
      "loss": 0.0618,
      "step": 299,
      "video_reward_cumulative_accuracy": 0.7508361204013378
    },
    {
      "epoch": 0.08904719501335707,
      "grad_norm": 4.110484600067139,
      "learning_rate": 4.451038575667656e-06,
      "loss": 0.091,
      "step": 300,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.08934401899673494,
      "grad_norm": 1.834205150604248,
      "learning_rate": 4.465875370919881e-06,
      "loss": 0.0305,
      "step": 301,
      "video_reward_cumulative_accuracy": 0.7508305647840532
    },
    {
      "epoch": 0.0896408429801128,
      "grad_norm": 3.296640157699585,
      "learning_rate": 4.480712166172107e-06,
      "loss": 0.0954,
      "step": 302,
      "video_reward_cumulative_accuracy": 0.75
    },
    {
      "epoch": 0.08993766696349065,
      "grad_norm": 1.8095667362213135,
      "learning_rate": 4.495548961424333e-06,
      "loss": 0.0251,
      "step": 303,
      "video_reward_cumulative_accuracy": 0.7508250825082509
    },
    {
      "epoch": 0.0902344909468685,
      "grad_norm": 4.808000564575195,
      "learning_rate": 4.510385756676558e-06,
      "loss": 0.0786,
      "step": 304,
      "video_reward_cumulative_accuracy": 0.7516447368421053
    },
    {
      "epoch": 0.09053131493024637,
      "grad_norm": 4.050952911376953,
      "learning_rate": 4.525222551928784e-06,
      "loss": 0.0847,
      "step": 305,
      "video_reward_cumulative_accuracy": 0.7524590163934426
    },
    {
      "epoch": 0.09082813891362422,
      "grad_norm": 6.732780933380127,
      "learning_rate": 4.540059347181009e-06,
      "loss": 0.1121,
      "step": 306,
      "video_reward_cumulative_accuracy": 0.7532679738562091
    },
    {
      "epoch": 0.09112496289700207,
      "grad_norm": 5.494693279266357,
      "learning_rate": 4.554896142433235e-06,
      "loss": 0.0543,
      "step": 307,
      "video_reward_cumulative_accuracy": 0.754071661237785
    },
    {
      "epoch": 0.09142178688037994,
      "grad_norm": 2.1636104583740234,
      "learning_rate": 4.5697329376854606e-06,
      "loss": 0.0511,
      "step": 308,
      "video_reward_cumulative_accuracy": 0.7532467532467533
    },
    {
      "epoch": 0.0917186108637578,
      "grad_norm": 6.281263828277588,
      "learning_rate": 4.584569732937686e-06,
      "loss": 0.0951,
      "step": 309,
      "video_reward_cumulative_accuracy": 0.7540453074433657
    },
    {
      "epoch": 0.09201543484713565,
      "grad_norm": 3.543318033218384,
      "learning_rate": 4.599406528189911e-06,
      "loss": 0.0731,
      "step": 310,
      "video_reward_cumulative_accuracy": 0.7532258064516129
    },
    {
      "epoch": 0.0923122588305135,
      "grad_norm": 2.5978658199310303,
      "learning_rate": 4.614243323442137e-06,
      "loss": 0.0714,
      "step": 311,
      "video_reward_cumulative_accuracy": 0.7540192926045016
    },
    {
      "epoch": 0.09260908281389137,
      "grad_norm": 2.553865909576416,
      "learning_rate": 4.629080118694362e-06,
      "loss": 0.0742,
      "step": 312,
      "video_reward_cumulative_accuracy": 0.7548076923076923
    },
    {
      "epoch": 0.09290590679726922,
      "grad_norm": 1.7062216997146606,
      "learning_rate": 4.643916913946588e-06,
      "loss": 0.0572,
      "step": 313,
      "video_reward_cumulative_accuracy": 0.7539936102236422
    },
    {
      "epoch": 0.09320273078064707,
      "grad_norm": 1.2176084518432617,
      "learning_rate": 4.658753709198813e-06,
      "loss": 0.049,
      "step": 314,
      "video_reward_cumulative_accuracy": 0.7547770700636943
    },
    {
      "epoch": 0.09349955476402494,
      "grad_norm": 1.2772020101547241,
      "learning_rate": 4.673590504451039e-06,
      "loss": 0.0449,
      "step": 315,
      "video_reward_cumulative_accuracy": 0.753968253968254
    },
    {
      "epoch": 0.09379637874740279,
      "grad_norm": 1.6296868324279785,
      "learning_rate": 4.688427299703264e-06,
      "loss": 0.043,
      "step": 316,
      "video_reward_cumulative_accuracy": 0.754746835443038
    },
    {
      "epoch": 0.09409320273078065,
      "grad_norm": 2.408234119415283,
      "learning_rate": 4.7032640949554895e-06,
      "loss": 0.0572,
      "step": 317,
      "video_reward_cumulative_accuracy": 0.7555205047318612
    },
    {
      "epoch": 0.0943900267141585,
      "grad_norm": 1.397177815437317,
      "learning_rate": 4.718100890207716e-06,
      "loss": 0.0276,
      "step": 318,
      "video_reward_cumulative_accuracy": 0.7562893081761006
    },
    {
      "epoch": 0.09468685069753636,
      "grad_norm": 2.097848415374756,
      "learning_rate": 4.732937685459941e-06,
      "loss": 0.0501,
      "step": 319,
      "video_reward_cumulative_accuracy": 0.7570532915360502
    },
    {
      "epoch": 0.09498367468091422,
      "grad_norm": 2.369933605194092,
      "learning_rate": 4.747774480712167e-06,
      "loss": 0.0377,
      "step": 320,
      "video_reward_cumulative_accuracy": 0.7578125
    },
    {
      "epoch": 0.09528049866429207,
      "grad_norm": 2.974731922149658,
      "learning_rate": 4.762611275964392e-06,
      "loss": 0.0609,
      "step": 321,
      "video_reward_cumulative_accuracy": 0.7570093457943925
    },
    {
      "epoch": 0.09557732264766994,
      "grad_norm": 2.996340036392212,
      "learning_rate": 4.7774480712166174e-06,
      "loss": 0.0795,
      "step": 322,
      "video_reward_cumulative_accuracy": 0.7577639751552795
    },
    {
      "epoch": 0.09587414663104779,
      "grad_norm": 2.6763088703155518,
      "learning_rate": 4.7922848664688435e-06,
      "loss": 0.067,
      "step": 323,
      "video_reward_cumulative_accuracy": 0.7585139318885449
    },
    {
      "epoch": 0.09617097061442564,
      "grad_norm": 3.8970932960510254,
      "learning_rate": 4.807121661721069e-06,
      "loss": 0.0637,
      "step": 324,
      "video_reward_cumulative_accuracy": 0.7592592592592593
    },
    {
      "epoch": 0.0964677945978035,
      "grad_norm": 3.0511136054992676,
      "learning_rate": 4.821958456973294e-06,
      "loss": 0.0545,
      "step": 325,
      "video_reward_cumulative_accuracy": 0.76
    },
    {
      "epoch": 0.09676461858118136,
      "grad_norm": 2.455991744995117,
      "learning_rate": 4.836795252225519e-06,
      "loss": 0.052,
      "step": 326,
      "video_reward_cumulative_accuracy": 0.75920245398773
    },
    {
      "epoch": 0.09706144256455922,
      "grad_norm": 5.562345504760742,
      "learning_rate": 4.851632047477745e-06,
      "loss": 0.0677,
      "step": 327,
      "video_reward_cumulative_accuracy": 0.7584097859327217
    },
    {
      "epoch": 0.09735826654793707,
      "grad_norm": 3.8642868995666504,
      "learning_rate": 4.866468842729971e-06,
      "loss": 0.0758,
      "step": 328,
      "video_reward_cumulative_accuracy": 0.7576219512195121
    },
    {
      "epoch": 0.09765509053131494,
      "grad_norm": 5.224849224090576,
      "learning_rate": 4.881305637982196e-06,
      "loss": 0.0496,
      "step": 329,
      "video_reward_cumulative_accuracy": 0.7583586626139818
    },
    {
      "epoch": 0.09795191451469279,
      "grad_norm": 2.613288402557373,
      "learning_rate": 4.896142433234421e-06,
      "loss": 0.0323,
      "step": 330,
      "video_reward_cumulative_accuracy": 0.759090909090909
    },
    {
      "epoch": 0.09824873849807064,
      "grad_norm": 1.3308014869689941,
      "learning_rate": 4.910979228486647e-06,
      "loss": 0.0467,
      "step": 331,
      "video_reward_cumulative_accuracy": 0.7598187311178247
    },
    {
      "epoch": 0.0985455624814485,
      "grad_norm": 2.2056570053100586,
      "learning_rate": 4.925816023738873e-06,
      "loss": 0.0739,
      "step": 332,
      "video_reward_cumulative_accuracy": 0.7605421686746988
    },
    {
      "epoch": 0.09884238646482636,
      "grad_norm": 1.1088576316833496,
      "learning_rate": 4.9406528189910986e-06,
      "loss": 0.0498,
      "step": 333,
      "video_reward_cumulative_accuracy": 0.7597597597597597
    },
    {
      "epoch": 0.09913921044820422,
      "grad_norm": 1.563938856124878,
      "learning_rate": 4.955489614243324e-06,
      "loss": 0.0737,
      "step": 334,
      "video_reward_cumulative_accuracy": 0.7604790419161677
    },
    {
      "epoch": 0.09943603443158207,
      "grad_norm": 1.9494915008544922,
      "learning_rate": 4.970326409495549e-06,
      "loss": 0.0488,
      "step": 335,
      "video_reward_cumulative_accuracy": 0.7611940298507462
    },
    {
      "epoch": 0.09973285841495994,
      "grad_norm": 4.285027980804443,
      "learning_rate": 4.985163204747775e-06,
      "loss": 0.0958,
      "step": 336,
      "video_reward_cumulative_accuracy": 0.7619047619047619
    },
    {
      "epoch": 0.10002968239833779,
      "grad_norm": 1.7529289722442627,
      "learning_rate": 5e-06,
      "loss": 0.0616,
      "step": 337,
      "video_reward_cumulative_accuracy": 0.7611275964391692
    },
    {
      "epoch": 0.10032650638171564,
      "grad_norm": 2.010082960128784,
      "learning_rate": 4.999998658003678e-06,
      "loss": 0.0854,
      "step": 338,
      "video_reward_cumulative_accuracy": 0.7618343195266272
    },
    {
      "epoch": 0.1006233303650935,
      "grad_norm": 2.320046901702881,
      "learning_rate": 4.9999946320161525e-06,
      "loss": 0.0489,
      "step": 339,
      "video_reward_cumulative_accuracy": 0.7625368731563422
    },
    {
      "epoch": 0.10092015434847136,
      "grad_norm": 3.510748863220215,
      "learning_rate": 4.999987922041746e-06,
      "loss": 0.1,
      "step": 340,
      "video_reward_cumulative_accuracy": 0.7602941176470588
    },
    {
      "epoch": 0.10121697833184921,
      "grad_norm": 4.800668239593506,
      "learning_rate": 4.999978528087661e-06,
      "loss": 0.0532,
      "step": 341,
      "video_reward_cumulative_accuracy": 0.7609970674486803
    },
    {
      "epoch": 0.10151380231522707,
      "grad_norm": 3.035673141479492,
      "learning_rate": 4.999966450163984e-06,
      "loss": 0.0732,
      "step": 342,
      "video_reward_cumulative_accuracy": 0.7616959064327485
    },
    {
      "epoch": 0.10181062629860493,
      "grad_norm": 1.4098176956176758,
      "learning_rate": 4.999951688283682e-06,
      "loss": 0.0352,
      "step": 343,
      "video_reward_cumulative_accuracy": 0.7623906705539358
    },
    {
      "epoch": 0.10210745028198279,
      "grad_norm": 2.0432138442993164,
      "learning_rate": 4.999934242462603e-06,
      "loss": 0.0546,
      "step": 344,
      "video_reward_cumulative_accuracy": 0.7630813953488372
    },
    {
      "epoch": 0.10240427426536064,
      "grad_norm": 3.4673523902893066,
      "learning_rate": 4.999914112719477e-06,
      "loss": 0.0746,
      "step": 345,
      "video_reward_cumulative_accuracy": 0.763768115942029
    },
    {
      "epoch": 0.10270109824873849,
      "grad_norm": 1.0772874355316162,
      "learning_rate": 4.9998912990759146e-06,
      "loss": 0.049,
      "step": 346,
      "video_reward_cumulative_accuracy": 0.7644508670520231
    },
    {
      "epoch": 0.10299792223211636,
      "grad_norm": 2.790788412094116,
      "learning_rate": 4.999865801556409e-06,
      "loss": 0.0442,
      "step": 347,
      "video_reward_cumulative_accuracy": 0.7651296829971181
    },
    {
      "epoch": 0.10329474621549421,
      "grad_norm": 3.4424827098846436,
      "learning_rate": 4.999837620188334e-06,
      "loss": 0.0384,
      "step": 348,
      "video_reward_cumulative_accuracy": 0.7658045977011494
    },
    {
      "epoch": 0.10359157019887207,
      "grad_norm": 2.485424280166626,
      "learning_rate": 4.999806755001946e-06,
      "loss": 0.0333,
      "step": 349,
      "video_reward_cumulative_accuracy": 0.7664756446991404
    },
    {
      "epoch": 0.10388839418224993,
      "grad_norm": 1.6122627258300781,
      "learning_rate": 4.999773206030379e-06,
      "loss": 0.0689,
      "step": 350,
      "video_reward_cumulative_accuracy": 0.7671428571428571
    },
    {
      "epoch": 0.10418521816562779,
      "grad_norm": 3.075976848602295,
      "learning_rate": 4.999736973309655e-06,
      "loss": 0.0376,
      "step": 351,
      "video_reward_cumulative_accuracy": 0.7663817663817664
    },
    {
      "epoch": 0.10448204214900564,
      "grad_norm": 3.550297260284424,
      "learning_rate": 4.99969805687867e-06,
      "loss": 0.048,
      "step": 352,
      "video_reward_cumulative_accuracy": 0.7670454545454546
    },
    {
      "epoch": 0.10477886613238349,
      "grad_norm": 6.603730201721191,
      "learning_rate": 4.999656456779207e-06,
      "loss": 0.0667,
      "step": 353,
      "video_reward_cumulative_accuracy": 0.7662889518413598
    },
    {
      "epoch": 0.10507569011576136,
      "grad_norm": 1.7593069076538086,
      "learning_rate": 4.999612173055926e-06,
      "loss": 0.0376,
      "step": 354,
      "video_reward_cumulative_accuracy": 0.7669491525423728
    },
    {
      "epoch": 0.10537251409913921,
      "grad_norm": 1.126291275024414,
      "learning_rate": 4.99956520575637e-06,
      "loss": 0.0315,
      "step": 355,
      "video_reward_cumulative_accuracy": 0.7676056338028169
    },
    {
      "epoch": 0.10566933808251706,
      "grad_norm": 5.531168460845947,
      "learning_rate": 4.999515554930965e-06,
      "loss": 0.0701,
      "step": 356,
      "video_reward_cumulative_accuracy": 0.7682584269662921
    },
    {
      "epoch": 0.10596616206589493,
      "grad_norm": 1.5562381744384766,
      "learning_rate": 4.999463220633013e-06,
      "loss": 0.0385,
      "step": 357,
      "video_reward_cumulative_accuracy": 0.7675070028011205
    },
    {
      "epoch": 0.10626298604927278,
      "grad_norm": 11.684016227722168,
      "learning_rate": 4.999408202918702e-06,
      "loss": 0.1559,
      "step": 358,
      "video_reward_cumulative_accuracy": 0.7667597765363129
    },
    {
      "epoch": 0.10655981003265064,
      "grad_norm": 3.8814454078674316,
      "learning_rate": 4.999350501847098e-06,
      "loss": 0.0488,
      "step": 359,
      "video_reward_cumulative_accuracy": 0.7674094707520891
    },
    {
      "epoch": 0.10685663401602849,
      "grad_norm": 8.800138473510742,
      "learning_rate": 4.999290117480149e-06,
      "loss": 0.1598,
      "step": 360,
      "video_reward_cumulative_accuracy": 0.7680555555555556
    },
    {
      "epoch": 0.10715345799940636,
      "grad_norm": 4.154999256134033,
      "learning_rate": 4.999227049882684e-06,
      "loss": 0.0748,
      "step": 361,
      "video_reward_cumulative_accuracy": 0.7686980609418282
    },
    {
      "epoch": 0.10745028198278421,
      "grad_norm": 4.171562671661377,
      "learning_rate": 4.999161299122411e-06,
      "loss": 0.0392,
      "step": 362,
      "video_reward_cumulative_accuracy": 0.7693370165745856
    },
    {
      "epoch": 0.10774710596616206,
      "grad_norm": 3.6571145057678223,
      "learning_rate": 4.9990928652699214e-06,
      "loss": 0.0314,
      "step": 363,
      "video_reward_cumulative_accuracy": 0.7699724517906336
    },
    {
      "epoch": 0.10804392994953993,
      "grad_norm": 12.863832473754883,
      "learning_rate": 4.999021748398684e-06,
      "loss": 0.1173,
      "step": 364,
      "video_reward_cumulative_accuracy": 0.7678571428571429
    },
    {
      "epoch": 0.10834075393291778,
      "grad_norm": 4.573801517486572,
      "learning_rate": 4.99894794858505e-06,
      "loss": 0.0748,
      "step": 365,
      "video_reward_cumulative_accuracy": 0.7684931506849315
    },
    {
      "epoch": 0.10863757791629564,
      "grad_norm": 7.55157470703125,
      "learning_rate": 4.998871465908251e-06,
      "loss": 0.1099,
      "step": 366,
      "video_reward_cumulative_accuracy": 0.76775956284153
    },
    {
      "epoch": 0.10893440189967349,
      "grad_norm": 2.933627128601074,
      "learning_rate": 4.998792300450399e-06,
      "loss": 0.0819,
      "step": 367,
      "video_reward_cumulative_accuracy": 0.7683923705722071
    },
    {
      "epoch": 0.10923122588305136,
      "grad_norm": 6.3735575675964355,
      "learning_rate": 4.998710452296485e-06,
      "loss": 0.0566,
      "step": 368,
      "video_reward_cumulative_accuracy": 0.7690217391304348
    },
    {
      "epoch": 0.10952804986642921,
      "grad_norm": 1.9880731105804443,
      "learning_rate": 4.9986259215343814e-06,
      "loss": 0.0467,
      "step": 369,
      "video_reward_cumulative_accuracy": 0.7696476964769647
    },
    {
      "epoch": 0.10982487384980706,
      "grad_norm": 5.522644996643066,
      "learning_rate": 4.99853870825484e-06,
      "loss": 0.0861,
      "step": 370,
      "video_reward_cumulative_accuracy": 0.768918918918919
    },
    {
      "epoch": 0.11012169783318491,
      "grad_norm": 4.956112861633301,
      "learning_rate": 4.998448812551493e-06,
      "loss": 0.0501,
      "step": 371,
      "video_reward_cumulative_accuracy": 0.7695417789757413
    },
    {
      "epoch": 0.11041852181656278,
      "grad_norm": 4.254426956176758,
      "learning_rate": 4.998356234520851e-06,
      "loss": 0.0427,
      "step": 372,
      "video_reward_cumulative_accuracy": 0.7701612903225806
    },
    {
      "epoch": 0.11071534579994063,
      "grad_norm": 3.666555166244507,
      "learning_rate": 4.998260974262308e-06,
      "loss": 0.079,
      "step": 373,
      "video_reward_cumulative_accuracy": 0.7694369973190348
    },
    {
      "epoch": 0.11101216978331849,
      "grad_norm": 3.9171857833862305,
      "learning_rate": 4.998163031878133e-06,
      "loss": 0.0739,
      "step": 374,
      "video_reward_cumulative_accuracy": 0.7687165775401069
    },
    {
      "epoch": 0.11130899376669635,
      "grad_norm": 4.026345252990723,
      "learning_rate": 4.998062407473477e-06,
      "loss": 0.0506,
      "step": 375,
      "video_reward_cumulative_accuracy": 0.7693333333333333
    },
    {
      "epoch": 0.11160581775007421,
      "grad_norm": 1.3979490995407104,
      "learning_rate": 4.99795910115637e-06,
      "loss": 0.0479,
      "step": 376,
      "video_reward_cumulative_accuracy": 0.7686170212765957
    },
    {
      "epoch": 0.11190264173345206,
      "grad_norm": 1.5546613931655884,
      "learning_rate": 4.997853113037722e-06,
      "loss": 0.0323,
      "step": 377,
      "video_reward_cumulative_accuracy": 0.7692307692307693
    },
    {
      "epoch": 0.11219946571682991,
      "grad_norm": 2.0145232677459717,
      "learning_rate": 4.997744443231321e-06,
      "loss": 0.0471,
      "step": 378,
      "video_reward_cumulative_accuracy": 0.7698412698412699
    },
    {
      "epoch": 0.11249628970020778,
      "grad_norm": 3.335103750228882,
      "learning_rate": 4.9976330918538356e-06,
      "loss": 0.0416,
      "step": 379,
      "video_reward_cumulative_accuracy": 0.7704485488126649
    },
    {
      "epoch": 0.11279311368358563,
      "grad_norm": 3.545008897781372,
      "learning_rate": 4.99751905902481e-06,
      "loss": 0.039,
      "step": 380,
      "video_reward_cumulative_accuracy": 0.7710526315789473
    },
    {
      "epoch": 0.11308993766696349,
      "grad_norm": 6.1864190101623535,
      "learning_rate": 4.997402344866672e-06,
      "loss": 0.0831,
      "step": 381,
      "video_reward_cumulative_accuracy": 0.7703412073490814
    },
    {
      "epoch": 0.11338676165034135,
      "grad_norm": 1.5058016777038574,
      "learning_rate": 4.997282949504725e-06,
      "loss": 0.0472,
      "step": 382,
      "video_reward_cumulative_accuracy": 0.7709424083769634
    },
    {
      "epoch": 0.1136835856337192,
      "grad_norm": 5.477189064025879,
      "learning_rate": 4.99716087306715e-06,
      "loss": 0.0518,
      "step": 383,
      "video_reward_cumulative_accuracy": 0.7702349869451697
    },
    {
      "epoch": 0.11398040961709706,
      "grad_norm": 2.9288504123687744,
      "learning_rate": 4.99703611568501e-06,
      "loss": 0.0564,
      "step": 384,
      "video_reward_cumulative_accuracy": 0.76953125
    },
    {
      "epoch": 0.11427723360047491,
      "grad_norm": 3.441972017288208,
      "learning_rate": 4.996908677492243e-06,
      "loss": 0.019,
      "step": 385,
      "video_reward_cumulative_accuracy": 0.7701298701298701
    },
    {
      "epoch": 0.11457405758385278,
      "grad_norm": 2.967977523803711,
      "learning_rate": 4.996778558625666e-06,
      "loss": 0.058,
      "step": 386,
      "video_reward_cumulative_accuracy": 0.7707253886010362
    },
    {
      "epoch": 0.11487088156723063,
      "grad_norm": 3.720794200897217,
      "learning_rate": 4.996645759224974e-06,
      "loss": 0.0383,
      "step": 387,
      "video_reward_cumulative_accuracy": 0.7713178294573644
    },
    {
      "epoch": 0.11516770555060848,
      "grad_norm": 4.410665035247803,
      "learning_rate": 4.9965102794327416e-06,
      "loss": 0.0816,
      "step": 388,
      "video_reward_cumulative_accuracy": 0.7719072164948454
    },
    {
      "epoch": 0.11546452953398635,
      "grad_norm": 4.823568344116211,
      "learning_rate": 4.996372119394418e-06,
      "loss": 0.0597,
      "step": 389,
      "video_reward_cumulative_accuracy": 0.7699228791773779
    },
    {
      "epoch": 0.1157613535173642,
      "grad_norm": 6.129620552062988,
      "learning_rate": 4.9962312792583325e-06,
      "loss": 0.1142,
      "step": 390,
      "video_reward_cumulative_accuracy": 0.7705128205128206
    },
    {
      "epoch": 0.11605817750074206,
      "grad_norm": 4.6021037101745605,
      "learning_rate": 4.9960877591756895e-06,
      "loss": 0.0491,
      "step": 391,
      "video_reward_cumulative_accuracy": 0.7710997442455243
    },
    {
      "epoch": 0.11635500148411991,
      "grad_norm": 6.3145341873168945,
      "learning_rate": 4.9959415593005734e-06,
      "loss": 0.1095,
      "step": 392,
      "video_reward_cumulative_accuracy": 0.7704081632653061
    },
    {
      "epoch": 0.11665182546749778,
      "grad_norm": 2.623091697692871,
      "learning_rate": 4.995792679789943e-06,
      "loss": 0.0661,
      "step": 393,
      "video_reward_cumulative_accuracy": 0.7709923664122137
    },
    {
      "epoch": 0.11694864945087563,
      "grad_norm": 4.27155065536499,
      "learning_rate": 4.995641120803634e-06,
      "loss": 0.0581,
      "step": 394,
      "video_reward_cumulative_accuracy": 0.7715736040609137
    },
    {
      "epoch": 0.11724547343425348,
      "grad_norm": 4.873302936553955,
      "learning_rate": 4.99548688250436e-06,
      "loss": 0.0637,
      "step": 395,
      "video_reward_cumulative_accuracy": 0.7708860759493671
    },
    {
      "epoch": 0.11754229741763135,
      "grad_norm": 1.3667075634002686,
      "learning_rate": 4.995329965057712e-06,
      "loss": 0.0398,
      "step": 396,
      "video_reward_cumulative_accuracy": 0.7714646464646465
    },
    {
      "epoch": 0.1178391214010092,
      "grad_norm": 2.0029561519622803,
      "learning_rate": 4.995170368632156e-06,
      "loss": 0.0376,
      "step": 397,
      "video_reward_cumulative_accuracy": 0.7720403022670025
    },
    {
      "epoch": 0.11813594538438706,
      "grad_norm": 2.7206361293792725,
      "learning_rate": 4.995008093399034e-06,
      "loss": 0.0364,
      "step": 398,
      "video_reward_cumulative_accuracy": 0.7726130653266332
    },
    {
      "epoch": 0.11843276936776491,
      "grad_norm": 3.3953866958618164,
      "learning_rate": 4.9948431395325626e-06,
      "loss": 0.0533,
      "step": 399,
      "video_reward_cumulative_accuracy": 0.7731829573934837
    },
    {
      "epoch": 0.11872959335114278,
      "grad_norm": 4.966310024261475,
      "learning_rate": 4.994675507209837e-06,
      "loss": 0.0953,
      "step": 400,
      "video_reward_cumulative_accuracy": 0.77375
    },
    {
      "epoch": 0.11872959335114278,
      "eval_runtime": 133.1422,
      "eval_samples_per_second": 5.926,
      "eval_steps_per_second": 0.744,
      "eval_test_set_accuracy": 0.7323232323232324,
      "step": 400
    },
    {
      "epoch": 0.11902641733452063,
      "grad_norm": 1.743256688117981,
      "learning_rate": 4.9945051966108285e-06,
      "loss": 0.0466,
      "step": 401,
      "video_reward_cumulative_accuracy": 0.7743142144638404
    },
    {
      "epoch": 0.11932324131789848,
      "grad_norm": 1.3353296518325806,
      "learning_rate": 4.99433220791838e-06,
      "loss": 0.0713,
      "step": 402,
      "video_reward_cumulative_accuracy": 0.7723880597014925
    },
    {
      "epoch": 0.11962006530127635,
      "grad_norm": 3.405177593231201,
      "learning_rate": 4.994156541318211e-06,
      "loss": 0.049,
      "step": 403,
      "video_reward_cumulative_accuracy": 0.771712158808933
    },
    {
      "epoch": 0.1199168892846542,
      "grad_norm": 1.6333727836608887,
      "learning_rate": 4.993978196998918e-06,
      "loss": 0.0639,
      "step": 404,
      "video_reward_cumulative_accuracy": 0.7722772277227723
    },
    {
      "epoch": 0.12021371326803205,
      "grad_norm": 2.6365771293640137,
      "learning_rate": 4.993797175151971e-06,
      "loss": 0.0343,
      "step": 405,
      "video_reward_cumulative_accuracy": 0.7728395061728395
    },
    {
      "epoch": 0.12051053725140991,
      "grad_norm": 2.222435712814331,
      "learning_rate": 4.9936134759717134e-06,
      "loss": 0.0686,
      "step": 406,
      "video_reward_cumulative_accuracy": 0.7733990147783252
    },
    {
      "epoch": 0.12080736123478777,
      "grad_norm": 7.1145405769348145,
      "learning_rate": 4.993427099655366e-06,
      "loss": 0.0705,
      "step": 407,
      "video_reward_cumulative_accuracy": 0.7727272727272727
    },
    {
      "epoch": 0.12110418521816563,
      "grad_norm": 4.418581008911133,
      "learning_rate": 4.993238046403021e-06,
      "loss": 0.0407,
      "step": 408,
      "video_reward_cumulative_accuracy": 0.7720588235294118
    },
    {
      "epoch": 0.12140100920154348,
      "grad_norm": 1.4404574632644653,
      "learning_rate": 4.993046316417643e-06,
      "loss": 0.0407,
      "step": 409,
      "video_reward_cumulative_accuracy": 0.7713936430317848
    },
    {
      "epoch": 0.12169783318492135,
      "grad_norm": 1.7548354864120483,
      "learning_rate": 4.992851909905077e-06,
      "loss": 0.0422,
      "step": 410,
      "video_reward_cumulative_accuracy": 0.7719512195121951
    },
    {
      "epoch": 0.1219946571682992,
      "grad_norm": 2.8836653232574463,
      "learning_rate": 4.992654827074034e-06,
      "loss": 0.0519,
      "step": 411,
      "video_reward_cumulative_accuracy": 0.7725060827250608
    },
    {
      "epoch": 0.12229148115167705,
      "grad_norm": 6.0396504402160645,
      "learning_rate": 4.992455068136104e-06,
      "loss": 0.0834,
      "step": 412,
      "video_reward_cumulative_accuracy": 0.7730582524271845
    },
    {
      "epoch": 0.1225883051350549,
      "grad_norm": 1.5045963525772095,
      "learning_rate": 4.992252633305745e-06,
      "loss": 0.0404,
      "step": 413,
      "video_reward_cumulative_accuracy": 0.7736077481840193
    },
    {
      "epoch": 0.12288512911843277,
      "grad_norm": 1.5633553266525269,
      "learning_rate": 4.992047522800292e-06,
      "loss": 0.0249,
      "step": 414,
      "video_reward_cumulative_accuracy": 0.7741545893719807
    },
    {
      "epoch": 0.12318195310181063,
      "grad_norm": 2.4161694049835205,
      "learning_rate": 4.991839736839951e-06,
      "loss": 0.047,
      "step": 415,
      "video_reward_cumulative_accuracy": 0.7746987951807229
    },
    {
      "epoch": 0.12347877708518848,
      "grad_norm": 4.570451259613037,
      "learning_rate": 4.9916292756478e-06,
      "loss": 0.0645,
      "step": 416,
      "video_reward_cumulative_accuracy": 0.7752403846153846
    },
    {
      "epoch": 0.12377560106856635,
      "grad_norm": 2.0078325271606445,
      "learning_rate": 4.991416139449791e-06,
      "loss": 0.053,
      "step": 417,
      "video_reward_cumulative_accuracy": 0.7757793764988009
    },
    {
      "epoch": 0.1240724250519442,
      "grad_norm": 2.8492980003356934,
      "learning_rate": 4.991200328474743e-06,
      "loss": 0.041,
      "step": 418,
      "video_reward_cumulative_accuracy": 0.7763157894736842
    },
    {
      "epoch": 0.12436924903532205,
      "grad_norm": 1.4428461790084839,
      "learning_rate": 4.990981842954353e-06,
      "loss": 0.0213,
      "step": 419,
      "video_reward_cumulative_accuracy": 0.7768496420047732
    },
    {
      "epoch": 0.1246660730186999,
      "grad_norm": 1.667288899421692,
      "learning_rate": 4.990760683123186e-06,
      "loss": 0.0402,
      "step": 420,
      "video_reward_cumulative_accuracy": 0.7773809523809524
    },
    {
      "epoch": 0.12496289700207777,
      "grad_norm": 5.119730472564697,
      "learning_rate": 4.990536849218678e-06,
      "loss": 0.0759,
      "step": 421,
      "video_reward_cumulative_accuracy": 0.7779097387173397
    },
    {
      "epoch": 0.12525972098545562,
      "grad_norm": 7.024728298187256,
      "learning_rate": 4.990310341481136e-06,
      "loss": 0.1402,
      "step": 422,
      "video_reward_cumulative_accuracy": 0.7784360189573459
    },
    {
      "epoch": 0.12555654496883348,
      "grad_norm": 3.2602407932281494,
      "learning_rate": 4.990081160153738e-06,
      "loss": 0.0567,
      "step": 423,
      "video_reward_cumulative_accuracy": 0.7789598108747045
    },
    {
      "epoch": 0.12585336895221133,
      "grad_norm": 3.0349199771881104,
      "learning_rate": 4.989849305482534e-06,
      "loss": 0.1118,
      "step": 424,
      "video_reward_cumulative_accuracy": 0.7783018867924528
    },
    {
      "epoch": 0.12615019293558918,
      "grad_norm": 4.9538774490356445,
      "learning_rate": 4.989614777716442e-06,
      "loss": 0.0844,
      "step": 425,
      "video_reward_cumulative_accuracy": 0.7776470588235294
    },
    {
      "epoch": 0.12644701691896706,
      "grad_norm": 4.11725378036499,
      "learning_rate": 4.989377577107248e-06,
      "loss": 0.0852,
      "step": 426,
      "video_reward_cumulative_accuracy": 0.7769953051643192
    },
    {
      "epoch": 0.12674384090234492,
      "grad_norm": 2.492112874984741,
      "learning_rate": 4.989137703909612e-06,
      "loss": 0.0574,
      "step": 427,
      "video_reward_cumulative_accuracy": 0.7775175644028103
    },
    {
      "epoch": 0.12704066488572277,
      "grad_norm": 3.3286123275756836,
      "learning_rate": 4.988895158381062e-06,
      "loss": 0.0402,
      "step": 428,
      "video_reward_cumulative_accuracy": 0.7780373831775701
    },
    {
      "epoch": 0.12733748886910062,
      "grad_norm": 5.785702228546143,
      "learning_rate": 4.988649940781992e-06,
      "loss": 0.0592,
      "step": 429,
      "video_reward_cumulative_accuracy": 0.7773892773892774
    },
    {
      "epoch": 0.12763431285247848,
      "grad_norm": 1.343465805053711,
      "learning_rate": 4.988402051375668e-06,
      "loss": 0.0254,
      "step": 430,
      "video_reward_cumulative_accuracy": 0.7779069767441861
    },
    {
      "epoch": 0.12793113683585633,
      "grad_norm": 2.176379680633545,
      "learning_rate": 4.988151490428223e-06,
      "loss": 0.1113,
      "step": 431,
      "video_reward_cumulative_accuracy": 0.7761020881670534
    },
    {
      "epoch": 0.12822796081923418,
      "grad_norm": 4.0742340087890625,
      "learning_rate": 4.987898258208659e-06,
      "loss": 0.0802,
      "step": 432,
      "video_reward_cumulative_accuracy": 0.7766203703703703
    },
    {
      "epoch": 0.12852478480261206,
      "grad_norm": 1.4879510402679443,
      "learning_rate": 4.987642354988845e-06,
      "loss": 0.0415,
      "step": 433,
      "video_reward_cumulative_accuracy": 0.7771362586605081
    },
    {
      "epoch": 0.12882160878598992,
      "grad_norm": 2.5588343143463135,
      "learning_rate": 4.987383781043517e-06,
      "loss": 0.0591,
      "step": 434,
      "video_reward_cumulative_accuracy": 0.7764976958525346
    },
    {
      "epoch": 0.12911843276936777,
      "grad_norm": 1.5112923383712769,
      "learning_rate": 4.987122536650282e-06,
      "loss": 0.0583,
      "step": 435,
      "video_reward_cumulative_accuracy": 0.7770114942528735
    },
    {
      "epoch": 0.12941525675274562,
      "grad_norm": 3.004580020904541,
      "learning_rate": 4.986858622089609e-06,
      "loss": 0.1139,
      "step": 436,
      "video_reward_cumulative_accuracy": 0.7775229357798165
    },
    {
      "epoch": 0.12971208073612348,
      "grad_norm": 4.673270225524902,
      "learning_rate": 4.986592037644836e-06,
      "loss": 0.1071,
      "step": 437,
      "video_reward_cumulative_accuracy": 0.7768878718535469
    },
    {
      "epoch": 0.13000890471950133,
      "grad_norm": 1.8337359428405762,
      "learning_rate": 4.986322783602167e-06,
      "loss": 0.0351,
      "step": 438,
      "video_reward_cumulative_accuracy": 0.7773972602739726
    },
    {
      "epoch": 0.13030572870287918,
      "grad_norm": 4.02969217300415,
      "learning_rate": 4.986050860250674e-06,
      "loss": 0.0719,
      "step": 439,
      "video_reward_cumulative_accuracy": 0.7767653758542141
    },
    {
      "epoch": 0.13060255268625706,
      "grad_norm": 9.999349594116211,
      "learning_rate": 4.985776267882291e-06,
      "loss": 0.1391,
      "step": 440,
      "video_reward_cumulative_accuracy": 0.7761363636363636
    },
    {
      "epoch": 0.13089937666963491,
      "grad_norm": 1.213397741317749,
      "learning_rate": 4.985499006791822e-06,
      "loss": 0.0421,
      "step": 441,
      "video_reward_cumulative_accuracy": 0.7766439909297053
    },
    {
      "epoch": 0.13119620065301277,
      "grad_norm": 1.5377864837646484,
      "learning_rate": 4.9852190772769304e-06,
      "loss": 0.0629,
      "step": 442,
      "video_reward_cumulative_accuracy": 0.7748868778280543
    },
    {
      "epoch": 0.13149302463639062,
      "grad_norm": 2.056608200073242,
      "learning_rate": 4.984936479638151e-06,
      "loss": 0.0557,
      "step": 443,
      "video_reward_cumulative_accuracy": 0.7731376975169301
    },
    {
      "epoch": 0.13178984861976847,
      "grad_norm": 2.519721746444702,
      "learning_rate": 4.9846512141788774e-06,
      "loss": 0.0692,
      "step": 444,
      "video_reward_cumulative_accuracy": 0.7725225225225225
    },
    {
      "epoch": 0.13208667260314633,
      "grad_norm": 1.5284984111785889,
      "learning_rate": 4.984363281205372e-06,
      "loss": 0.0448,
      "step": 445,
      "video_reward_cumulative_accuracy": 0.7730337078651686
    },
    {
      "epoch": 0.13238349658652418,
      "grad_norm": 1.4093446731567383,
      "learning_rate": 4.984072681026757e-06,
      "loss": 0.0707,
      "step": 446,
      "video_reward_cumulative_accuracy": 0.773542600896861
    },
    {
      "epoch": 0.13268032056990206,
      "grad_norm": 4.844381332397461,
      "learning_rate": 4.98377941395502e-06,
      "loss": 0.0652,
      "step": 447,
      "video_reward_cumulative_accuracy": 0.772930648769575
    },
    {
      "epoch": 0.1329771445532799,
      "grad_norm": 3.3423585891723633,
      "learning_rate": 4.983483480305012e-06,
      "loss": 0.0597,
      "step": 448,
      "video_reward_cumulative_accuracy": 0.7723214285714286
    },
    {
      "epoch": 0.13327396853665777,
      "grad_norm": 1.8888392448425293,
      "learning_rate": 4.983184880394447e-06,
      "loss": 0.0142,
      "step": 449,
      "video_reward_cumulative_accuracy": 0.7728285077951003
    },
    {
      "epoch": 0.13357079252003562,
      "grad_norm": 4.848865032196045,
      "learning_rate": 4.982883614543901e-06,
      "loss": 0.0525,
      "step": 450,
      "video_reward_cumulative_accuracy": 0.7733333333333333
    },
    {
      "epoch": 0.13386761650341347,
      "grad_norm": 3.70519757270813,
      "learning_rate": 4.982579683076811e-06,
      "loss": 0.0716,
      "step": 451,
      "video_reward_cumulative_accuracy": 0.7727272727272727
    },
    {
      "epoch": 0.13416444048679133,
      "grad_norm": 1.4521280527114868,
      "learning_rate": 4.982273086319479e-06,
      "loss": 0.0166,
      "step": 452,
      "video_reward_cumulative_accuracy": 0.7732300884955752
    },
    {
      "epoch": 0.13446126447016918,
      "grad_norm": 1.7118197679519653,
      "learning_rate": 4.981963824601064e-06,
      "loss": 0.0634,
      "step": 453,
      "video_reward_cumulative_accuracy": 0.7737306843267108
    },
    {
      "epoch": 0.13475808845354706,
      "grad_norm": 3.1727960109710693,
      "learning_rate": 4.98165189825359e-06,
      "loss": 0.0269,
      "step": 454,
      "video_reward_cumulative_accuracy": 0.7742290748898678
    },
    {
      "epoch": 0.1350549124369249,
      "grad_norm": 2.006000280380249,
      "learning_rate": 4.981337307611939e-06,
      "loss": 0.0942,
      "step": 455,
      "video_reward_cumulative_accuracy": 0.7747252747252747
    },
    {
      "epoch": 0.13535173642030277,
      "grad_norm": 2.203691244125366,
      "learning_rate": 4.981020053013855e-06,
      "loss": 0.0439,
      "step": 456,
      "video_reward_cumulative_accuracy": 0.7741228070175439
    },
    {
      "epoch": 0.13564856040368062,
      "grad_norm": 3.0224599838256836,
      "learning_rate": 4.9807001347999424e-06,
      "loss": 0.066,
      "step": 457,
      "video_reward_cumulative_accuracy": 0.774617067833698
    },
    {
      "epoch": 0.13594538438705847,
      "grad_norm": 1.738229513168335,
      "learning_rate": 4.980377553313665e-06,
      "loss": 0.0393,
      "step": 458,
      "video_reward_cumulative_accuracy": 0.7751091703056768
    },
    {
      "epoch": 0.13624220837043632,
      "grad_norm": 3.5705530643463135,
      "learning_rate": 4.980052308901343e-06,
      "loss": 0.0345,
      "step": 459,
      "video_reward_cumulative_accuracy": 0.7745098039215687
    },
    {
      "epoch": 0.13653903235381418,
      "grad_norm": 2.2670326232910156,
      "learning_rate": 4.9797244019121595e-06,
      "loss": 0.0286,
      "step": 460,
      "video_reward_cumulative_accuracy": 0.775
    },
    {
      "epoch": 0.13683585633719206,
      "grad_norm": 2.399627923965454,
      "learning_rate": 4.979393832698154e-06,
      "loss": 0.0963,
      "step": 461,
      "video_reward_cumulative_accuracy": 0.7754880694143167
    },
    {
      "epoch": 0.1371326803205699,
      "grad_norm": 4.614706039428711,
      "learning_rate": 4.979060601614225e-06,
      "loss": 0.1159,
      "step": 462,
      "video_reward_cumulative_accuracy": 0.7748917748917749
    },
    {
      "epoch": 0.13742950430394776,
      "grad_norm": 3.261317729949951,
      "learning_rate": 4.978724709018128e-06,
      "loss": 0.0661,
      "step": 463,
      "video_reward_cumulative_accuracy": 0.775377969762419
    },
    {
      "epoch": 0.13772632828732562,
      "grad_norm": 3.9476890563964844,
      "learning_rate": 4.978386155270477e-06,
      "loss": 0.0487,
      "step": 464,
      "video_reward_cumulative_accuracy": 0.7747844827586207
    },
    {
      "epoch": 0.13802315227070347,
      "grad_norm": 2.917313575744629,
      "learning_rate": 4.9780449407347405e-06,
      "loss": 0.0759,
      "step": 465,
      "video_reward_cumulative_accuracy": 0.7731182795698924
    },
    {
      "epoch": 0.13831997625408132,
      "grad_norm": 1.9969302415847778,
      "learning_rate": 4.977701065777247e-06,
      "loss": 0.0262,
      "step": 466,
      "video_reward_cumulative_accuracy": 0.7736051502145923
    },
    {
      "epoch": 0.13861680023745918,
      "grad_norm": 2.4382143020629883,
      "learning_rate": 4.97735453076718e-06,
      "loss": 0.0622,
      "step": 467,
      "video_reward_cumulative_accuracy": 0.7740899357601713
    },
    {
      "epoch": 0.13891362422083706,
      "grad_norm": 1.4531607627868652,
      "learning_rate": 4.977005336076578e-06,
      "loss": 0.0309,
      "step": 468,
      "video_reward_cumulative_accuracy": 0.7745726495726496
    },
    {
      "epoch": 0.1392104482042149,
      "grad_norm": 4.770167350769043,
      "learning_rate": 4.976653482080335e-06,
      "loss": 0.0523,
      "step": 469,
      "video_reward_cumulative_accuracy": 0.7750533049040512
    },
    {
      "epoch": 0.13950727218759276,
      "grad_norm": 1.4525412321090698,
      "learning_rate": 4.9762989691562006e-06,
      "loss": 0.0469,
      "step": 470,
      "video_reward_cumulative_accuracy": 0.774468085106383
    },
    {
      "epoch": 0.13980409617097062,
      "grad_norm": 7.346729755401611,
      "learning_rate": 4.975941797684778e-06,
      "loss": 0.104,
      "step": 471,
      "video_reward_cumulative_accuracy": 0.772823779193206
    },
    {
      "epoch": 0.14010092015434847,
      "grad_norm": 2.2593302726745605,
      "learning_rate": 4.975581968049527e-06,
      "loss": 0.0912,
      "step": 472,
      "video_reward_cumulative_accuracy": 0.7733050847457628
    },
    {
      "epoch": 0.14039774413772632,
      "grad_norm": 2.1568541526794434,
      "learning_rate": 4.9752194806367585e-06,
      "loss": 0.0645,
      "step": 473,
      "video_reward_cumulative_accuracy": 0.7716701902748414
    },
    {
      "epoch": 0.14069456812110417,
      "grad_norm": 2.715193033218384,
      "learning_rate": 4.974854335835639e-06,
      "loss": 0.0426,
      "step": 474,
      "video_reward_cumulative_accuracy": 0.7710970464135021
    },
    {
      "epoch": 0.14099139210448206,
      "grad_norm": 2.6449036598205566,
      "learning_rate": 4.974486534038185e-06,
      "loss": 0.0349,
      "step": 475,
      "video_reward_cumulative_accuracy": 0.771578947368421
    },
    {
      "epoch": 0.1412882160878599,
      "grad_norm": 2.4418740272521973,
      "learning_rate": 4.9741160756392705e-06,
      "loss": 0.0411,
      "step": 476,
      "video_reward_cumulative_accuracy": 0.7720588235294118
    },
    {
      "epoch": 0.14158504007123776,
      "grad_norm": 1.6195287704467773,
      "learning_rate": 4.973742961036615e-06,
      "loss": 0.0366,
      "step": 477,
      "video_reward_cumulative_accuracy": 0.7725366876310272
    },
    {
      "epoch": 0.14188186405461561,
      "grad_norm": 1.6140589714050293,
      "learning_rate": 4.973367190630796e-06,
      "loss": 0.0646,
      "step": 478,
      "video_reward_cumulative_accuracy": 0.7719665271966527
    },
    {
      "epoch": 0.14217868803799347,
      "grad_norm": 3.116804361343384,
      "learning_rate": 4.972988764825239e-06,
      "loss": 0.0293,
      "step": 479,
      "video_reward_cumulative_accuracy": 0.7724425887265136
    },
    {
      "epoch": 0.14247551202137132,
      "grad_norm": 1.9201480150222778,
      "learning_rate": 4.972607684026218e-06,
      "loss": 0.039,
      "step": 480,
      "video_reward_cumulative_accuracy": 0.771875
    },
    {
      "epoch": 0.14277233600474917,
      "grad_norm": 1.579925298690796,
      "learning_rate": 4.972223948642865e-06,
      "loss": 0.0468,
      "step": 481,
      "video_reward_cumulative_accuracy": 0.7713097713097713
    },
    {
      "epoch": 0.14306915998812705,
      "grad_norm": 2.824054718017578,
      "learning_rate": 4.971837559087153e-06,
      "loss": 0.0691,
      "step": 482,
      "video_reward_cumulative_accuracy": 0.770746887966805
    },
    {
      "epoch": 0.1433659839715049,
      "grad_norm": 3.3171064853668213,
      "learning_rate": 4.971448515773911e-06,
      "loss": 0.0773,
      "step": 483,
      "video_reward_cumulative_accuracy": 0.7701863354037267
    },
    {
      "epoch": 0.14366280795488276,
      "grad_norm": 4.4460577964782715,
      "learning_rate": 4.971056819120814e-06,
      "loss": 0.055,
      "step": 484,
      "video_reward_cumulative_accuracy": 0.7696280991735537
    },
    {
      "epoch": 0.1439596319382606,
      "grad_norm": 2.3328418731689453,
      "learning_rate": 4.970662469548386e-06,
      "loss": 0.0411,
      "step": 485,
      "video_reward_cumulative_accuracy": 0.7701030927835052
    },
    {
      "epoch": 0.14425645592163847,
      "grad_norm": 2.608328342437744,
      "learning_rate": 4.970265467480001e-06,
      "loss": 0.0615,
      "step": 486,
      "video_reward_cumulative_accuracy": 0.7695473251028807
    },
    {
      "epoch": 0.14455327990501632,
      "grad_norm": 1.6409941911697388,
      "learning_rate": 4.969865813341878e-06,
      "loss": 0.0431,
      "step": 487,
      "video_reward_cumulative_accuracy": 0.7700205338809035
    },
    {
      "epoch": 0.14485010388839417,
      "grad_norm": 6.2619805335998535,
      "learning_rate": 4.969463507563085e-06,
      "loss": 0.0885,
      "step": 488,
      "video_reward_cumulative_accuracy": 0.7704918032786885
    },
    {
      "epoch": 0.14514692787177205,
      "grad_norm": 2.390130043029785,
      "learning_rate": 4.969058550575535e-06,
      "loss": 0.0684,
      "step": 489,
      "video_reward_cumulative_accuracy": 0.7709611451942741
    },
    {
      "epoch": 0.1454437518551499,
      "grad_norm": 1.9959198236465454,
      "learning_rate": 4.968650942813991e-06,
      "loss": 0.0635,
      "step": 490,
      "video_reward_cumulative_accuracy": 0.7704081632653061
    },
    {
      "epoch": 0.14574057583852776,
      "grad_norm": 3.4746286869049072,
      "learning_rate": 4.968240684716058e-06,
      "loss": 0.0764,
      "step": 491,
      "video_reward_cumulative_accuracy": 0.769857433808554
    },
    {
      "epoch": 0.1460373998219056,
      "grad_norm": 2.226306438446045,
      "learning_rate": 4.967827776722187e-06,
      "loss": 0.0627,
      "step": 492,
      "video_reward_cumulative_accuracy": 0.7703252032520326
    },
    {
      "epoch": 0.14633422380528346,
      "grad_norm": 1.9936774969100952,
      "learning_rate": 4.967412219275677e-06,
      "loss": 0.026,
      "step": 493,
      "video_reward_cumulative_accuracy": 0.77079107505071
    },
    {
      "epoch": 0.14663104778866132,
      "grad_norm": 2.9451053142547607,
      "learning_rate": 4.966994012822668e-06,
      "loss": 0.037,
      "step": 494,
      "video_reward_cumulative_accuracy": 0.7692307692307693
    },
    {
      "epoch": 0.14692787177203917,
      "grad_norm": 2.2165896892547607,
      "learning_rate": 4.9665731578121445e-06,
      "loss": 0.0604,
      "step": 495,
      "video_reward_cumulative_accuracy": 0.7696969696969697
    },
    {
      "epoch": 0.14722469575541705,
      "grad_norm": 4.33952522277832,
      "learning_rate": 4.966149654695937e-06,
      "loss": 0.0512,
      "step": 496,
      "video_reward_cumulative_accuracy": 0.7691532258064516
    },
    {
      "epoch": 0.1475215197387949,
      "grad_norm": 1.7252819538116455,
      "learning_rate": 4.9657235039287165e-06,
      "loss": 0.0451,
      "step": 497,
      "video_reward_cumulative_accuracy": 0.7686116700201208
    },
    {
      "epoch": 0.14781834372217276,
      "grad_norm": 1.3271393775939941,
      "learning_rate": 4.965294705967997e-06,
      "loss": 0.0548,
      "step": 498,
      "video_reward_cumulative_accuracy": 0.7680722891566265
    },
    {
      "epoch": 0.1481151677055506,
      "grad_norm": 4.343282699584961,
      "learning_rate": 4.964863261274134e-06,
      "loss": 0.0571,
      "step": 499,
      "video_reward_cumulative_accuracy": 0.7675350701402806
    },
    {
      "epoch": 0.14841199168892846,
      "grad_norm": 1.385603666305542,
      "learning_rate": 4.964429170310327e-06,
      "loss": 0.0579,
      "step": 500,
      "video_reward_cumulative_accuracy": 0.767
    },
    {
      "epoch": 0.14870881567230632,
      "grad_norm": 1.3973246812820435,
      "learning_rate": 4.963992433542612e-06,
      "loss": 0.0573,
      "step": 501,
      "video_reward_cumulative_accuracy": 0.7674650698602794
    },
    {
      "epoch": 0.14900563965568417,
      "grad_norm": 1.4018256664276123,
      "learning_rate": 4.963553051439871e-06,
      "loss": 0.0543,
      "step": 502,
      "video_reward_cumulative_accuracy": 0.7679282868525896
    },
    {
      "epoch": 0.14930246363906205,
      "grad_norm": 1.3220703601837158,
      "learning_rate": 4.963111024473823e-06,
      "loss": 0.0535,
      "step": 503,
      "video_reward_cumulative_accuracy": 0.768389662027833
    },
    {
      "epoch": 0.1495992876224399,
      "grad_norm": 5.434345722198486,
      "learning_rate": 4.962666353119025e-06,
      "loss": 0.0945,
      "step": 504,
      "video_reward_cumulative_accuracy": 0.7688492063492064
    },
    {
      "epoch": 0.14989611160581776,
      "grad_norm": 1.8976709842681885,
      "learning_rate": 4.9622190378528775e-06,
      "loss": 0.0447,
      "step": 505,
      "video_reward_cumulative_accuracy": 0.7683168316831683
    },
    {
      "epoch": 0.1501929355891956,
      "grad_norm": 0.9916190505027771,
      "learning_rate": 4.961769079155615e-06,
      "loss": 0.0367,
      "step": 506,
      "video_reward_cumulative_accuracy": 0.7687747035573123
    },
    {
      "epoch": 0.15048975957257346,
      "grad_norm": 3.0541810989379883,
      "learning_rate": 4.961316477510312e-06,
      "loss": 0.0512,
      "step": 507,
      "video_reward_cumulative_accuracy": 0.7682445759368837
    },
    {
      "epoch": 0.15078658355595131,
      "grad_norm": 1.1684255599975586,
      "learning_rate": 4.960861233402881e-06,
      "loss": 0.0324,
      "step": 508,
      "video_reward_cumulative_accuracy": 0.7687007874015748
    },
    {
      "epoch": 0.15108340753932917,
      "grad_norm": 2.5912883281707764,
      "learning_rate": 4.960403347322069e-06,
      "loss": 0.0573,
      "step": 509,
      "video_reward_cumulative_accuracy": 0.768172888015717
    },
    {
      "epoch": 0.15138023152270705,
      "grad_norm": 2.920675039291382,
      "learning_rate": 4.959942819759464e-06,
      "loss": 0.0379,
      "step": 510,
      "video_reward_cumulative_accuracy": 0.7676470588235295
    },
    {
      "epoch": 0.1516770555060849,
      "grad_norm": 7.364986419677734,
      "learning_rate": 4.959479651209485e-06,
      "loss": 0.109,
      "step": 511,
      "video_reward_cumulative_accuracy": 0.7681017612524462
    },
    {
      "epoch": 0.15197387948946275,
      "grad_norm": 2.541637420654297,
      "learning_rate": 4.959013842169389e-06,
      "loss": 0.0187,
      "step": 512,
      "video_reward_cumulative_accuracy": 0.7685546875
    },
    {
      "epoch": 0.1522707034728406,
      "grad_norm": 1.8760055303573608,
      "learning_rate": 4.9585453931392665e-06,
      "loss": 0.0403,
      "step": 513,
      "video_reward_cumulative_accuracy": 0.7680311890838206
    },
    {
      "epoch": 0.15256752745621846,
      "grad_norm": 1.9295579195022583,
      "learning_rate": 4.958074304622045e-06,
      "loss": 0.0487,
      "step": 514,
      "video_reward_cumulative_accuracy": 0.7684824902723736
    },
    {
      "epoch": 0.1528643514395963,
      "grad_norm": 4.06351900100708,
      "learning_rate": 4.957600577123482e-06,
      "loss": 0.0608,
      "step": 515,
      "video_reward_cumulative_accuracy": 0.7679611650485437
    },
    {
      "epoch": 0.15316117542297417,
      "grad_norm": 2.601158618927002,
      "learning_rate": 4.957124211152169e-06,
      "loss": 0.0148,
      "step": 516,
      "video_reward_cumulative_accuracy": 0.7684108527131783
    },
    {
      "epoch": 0.15345799940635205,
      "grad_norm": 1.6226683855056763,
      "learning_rate": 4.9566452072195335e-06,
      "loss": 0.0445,
      "step": 517,
      "video_reward_cumulative_accuracy": 0.7688588007736944
    },
    {
      "epoch": 0.1537548233897299,
      "grad_norm": 1.285947322845459,
      "learning_rate": 4.956163565839831e-06,
      "loss": 0.0194,
      "step": 518,
      "video_reward_cumulative_accuracy": 0.7693050193050193
    },
    {
      "epoch": 0.15405164737310775,
      "grad_norm": 5.118712425231934,
      "learning_rate": 4.955679287530152e-06,
      "loss": 0.1169,
      "step": 519,
      "video_reward_cumulative_accuracy": 0.7687861271676301
    },
    {
      "epoch": 0.1543484713564856,
      "grad_norm": 1.981034755706787,
      "learning_rate": 4.955192372810414e-06,
      "loss": 0.0227,
      "step": 520,
      "video_reward_cumulative_accuracy": 0.7692307692307693
    },
    {
      "epoch": 0.15464529533986346,
      "grad_norm": 3.3112709522247314,
      "learning_rate": 4.954702822203369e-06,
      "loss": 0.0877,
      "step": 521,
      "video_reward_cumulative_accuracy": 0.7687140115163148
    },
    {
      "epoch": 0.1549421193232413,
      "grad_norm": 2.729583501815796,
      "learning_rate": 4.954210636234597e-06,
      "loss": 0.0301,
      "step": 522,
      "video_reward_cumulative_accuracy": 0.7691570881226054
    },
    {
      "epoch": 0.15523894330661916,
      "grad_norm": 9.558045387268066,
      "learning_rate": 4.953715815432505e-06,
      "loss": 0.0811,
      "step": 523,
      "video_reward_cumulative_accuracy": 0.7695984703632888
    },
    {
      "epoch": 0.15553576728999705,
      "grad_norm": 6.721735000610352,
      "learning_rate": 4.9532183603283345e-06,
      "loss": 0.0445,
      "step": 524,
      "video_reward_cumulative_accuracy": 0.7690839694656488
    },
    {
      "epoch": 0.1558325912733749,
      "grad_norm": 1.9270586967468262,
      "learning_rate": 4.952718271456151e-06,
      "loss": 0.0429,
      "step": 525,
      "video_reward_cumulative_accuracy": 0.7695238095238095
    },
    {
      "epoch": 0.15612941525675275,
      "grad_norm": 2.5546162128448486,
      "learning_rate": 4.952215549352846e-06,
      "loss": 0.0786,
      "step": 526,
      "video_reward_cumulative_accuracy": 0.7690114068441065
    },
    {
      "epoch": 0.1564262392401306,
      "grad_norm": 2.585820436477661,
      "learning_rate": 4.951710194558144e-06,
      "loss": 0.0416,
      "step": 527,
      "video_reward_cumulative_accuracy": 0.7694497153700189
    },
    {
      "epoch": 0.15672306322350846,
      "grad_norm": 2.7101705074310303,
      "learning_rate": 4.9512022076145895e-06,
      "loss": 0.0562,
      "step": 528,
      "video_reward_cumulative_accuracy": 0.7698863636363636
    },
    {
      "epoch": 0.1570198872068863,
      "grad_norm": 1.0189766883850098,
      "learning_rate": 4.9506915890675566e-06,
      "loss": 0.0526,
      "step": 529,
      "video_reward_cumulative_accuracy": 0.7703213610586012
    },
    {
      "epoch": 0.15731671119026416,
      "grad_norm": 6.756640911102295,
      "learning_rate": 4.9501783394652455e-06,
      "loss": 0.0875,
      "step": 530,
      "video_reward_cumulative_accuracy": 0.7707547169811321
    },
    {
      "epoch": 0.15761353517364202,
      "grad_norm": 1.9713293313980103,
      "learning_rate": 4.9496624593586775e-06,
      "loss": 0.076,
      "step": 531,
      "video_reward_cumulative_accuracy": 0.7702448210922788
    },
    {
      "epoch": 0.1579103591570199,
      "grad_norm": 2.242279529571533,
      "learning_rate": 4.949143949301701e-06,
      "loss": 0.0646,
      "step": 532,
      "video_reward_cumulative_accuracy": 0.7706766917293233
    },
    {
      "epoch": 0.15820718314039775,
      "grad_norm": 3.1193904876708984,
      "learning_rate": 4.9486228098509865e-06,
      "loss": 0.074,
      "step": 533,
      "video_reward_cumulative_accuracy": 0.7692307692307693
    },
    {
      "epoch": 0.1585040071237756,
      "grad_norm": 1.8932733535766602,
      "learning_rate": 4.9480990415660276e-06,
      "loss": 0.0565,
      "step": 534,
      "video_reward_cumulative_accuracy": 0.7696629213483146
    },
    {
      "epoch": 0.15880083110715346,
      "grad_norm": 2.4588887691497803,
      "learning_rate": 4.947572645009141e-06,
      "loss": 0.0547,
      "step": 535,
      "video_reward_cumulative_accuracy": 0.7691588785046729
    },
    {
      "epoch": 0.1590976550905313,
      "grad_norm": 4.820741176605225,
      "learning_rate": 4.947043620745464e-06,
      "loss": 0.0805,
      "step": 536,
      "video_reward_cumulative_accuracy": 0.769589552238806
    },
    {
      "epoch": 0.15939447907390916,
      "grad_norm": 1.0390130281448364,
      "learning_rate": 4.946511969342956e-06,
      "loss": 0.0558,
      "step": 537,
      "video_reward_cumulative_accuracy": 0.7700186219739292
    },
    {
      "epoch": 0.15969130305728702,
      "grad_norm": 2.8653810024261475,
      "learning_rate": 4.945977691372396e-06,
      "loss": 0.0489,
      "step": 538,
      "video_reward_cumulative_accuracy": 0.7704460966542751
    },
    {
      "epoch": 0.1599881270406649,
      "grad_norm": 3.0216479301452637,
      "learning_rate": 4.945440787407382e-06,
      "loss": 0.0597,
      "step": 539,
      "video_reward_cumulative_accuracy": 0.7708719851576994
    },
    {
      "epoch": 0.16028495102404275,
      "grad_norm": 1.2938923835754395,
      "learning_rate": 4.944901258024335e-06,
      "loss": 0.0615,
      "step": 540,
      "video_reward_cumulative_accuracy": 0.7703703703703704
    },
    {
      "epoch": 0.1605817750074206,
      "grad_norm": 1.7534013986587524,
      "learning_rate": 4.94435910380249e-06,
      "loss": 0.0746,
      "step": 541,
      "video_reward_cumulative_accuracy": 0.7689463955637708
    },
    {
      "epoch": 0.16087859899079845,
      "grad_norm": 2.365793228149414,
      "learning_rate": 4.943814325323904e-06,
      "loss": 0.0314,
      "step": 542,
      "video_reward_cumulative_accuracy": 0.7693726937269373
    },
    {
      "epoch": 0.1611754229741763,
      "grad_norm": 5.616259574890137,
      "learning_rate": 4.943266923173449e-06,
      "loss": 0.1097,
      "step": 543,
      "video_reward_cumulative_accuracy": 0.7697974217311234
    },
    {
      "epoch": 0.16147224695755416,
      "grad_norm": 1.3462814092636108,
      "learning_rate": 4.942716897938813e-06,
      "loss": 0.0486,
      "step": 544,
      "video_reward_cumulative_accuracy": 0.7702205882352942
    },
    {
      "epoch": 0.161769070940932,
      "grad_norm": 1.065239429473877,
      "learning_rate": 4.9421642502105025e-06,
      "loss": 0.0442,
      "step": 545,
      "video_reward_cumulative_accuracy": 0.7697247706422018
    },
    {
      "epoch": 0.1620658949243099,
      "grad_norm": 2.191693067550659,
      "learning_rate": 4.941608980581839e-06,
      "loss": 0.0351,
      "step": 546,
      "video_reward_cumulative_accuracy": 0.7692307692307693
    },
    {
      "epoch": 0.16236271890768775,
      "grad_norm": 2.6450510025024414,
      "learning_rate": 4.941051089648958e-06,
      "loss": 0.0778,
      "step": 547,
      "video_reward_cumulative_accuracy": 0.7687385740402194
    },
    {
      "epoch": 0.1626595428910656,
      "grad_norm": 4.044307231903076,
      "learning_rate": 4.940490578010808e-06,
      "loss": 0.068,
      "step": 548,
      "video_reward_cumulative_accuracy": 0.7691605839416058
    },
    {
      "epoch": 0.16295636687444345,
      "grad_norm": 2.968937873840332,
      "learning_rate": 4.9399274462691555e-06,
      "loss": 0.0773,
      "step": 549,
      "video_reward_cumulative_accuracy": 0.7695810564663024
    },
    {
      "epoch": 0.1632531908578213,
      "grad_norm": 1.9103548526763916,
      "learning_rate": 4.939361695028575e-06,
      "loss": 0.0691,
      "step": 550,
      "video_reward_cumulative_accuracy": 0.769090909090909
    },
    {
      "epoch": 0.16355001484119916,
      "grad_norm": 3.47516131401062,
      "learning_rate": 4.938793324896456e-06,
      "loss": 0.0912,
      "step": 551,
      "video_reward_cumulative_accuracy": 0.7686025408348457
    },
    {
      "epoch": 0.163846838824577,
      "grad_norm": 3.028697967529297,
      "learning_rate": 4.9382223364829995e-06,
      "loss": 0.0532,
      "step": 552,
      "video_reward_cumulative_accuracy": 0.7690217391304348
    },
    {
      "epoch": 0.1641436628079549,
      "grad_norm": 3.2655930519104004,
      "learning_rate": 4.937648730401215e-06,
      "loss": 0.036,
      "step": 553,
      "video_reward_cumulative_accuracy": 0.7694394213381555
    },
    {
      "epoch": 0.16444048679133275,
      "grad_norm": 1.8100935220718384,
      "learning_rate": 4.937072507266928e-06,
      "loss": 0.0492,
      "step": 554,
      "video_reward_cumulative_accuracy": 0.76985559566787
    },
    {
      "epoch": 0.1647373107747106,
      "grad_norm": 3.864176034927368,
      "learning_rate": 4.936493667698766e-06,
      "loss": 0.0589,
      "step": 555,
      "video_reward_cumulative_accuracy": 0.7702702702702703
    },
    {
      "epoch": 0.16503413475808845,
      "grad_norm": 2.1781342029571533,
      "learning_rate": 4.935912212318171e-06,
      "loss": 0.0196,
      "step": 556,
      "video_reward_cumulative_accuracy": 0.77068345323741
    },
    {
      "epoch": 0.1653309587414663,
      "grad_norm": 1.995492696762085,
      "learning_rate": 4.935328141749393e-06,
      "loss": 0.049,
      "step": 557,
      "video_reward_cumulative_accuracy": 0.770197486535009
    },
    {
      "epoch": 0.16562778272484416,
      "grad_norm": 2.0720860958099365,
      "learning_rate": 4.934741456619488e-06,
      "loss": 0.0585,
      "step": 558,
      "video_reward_cumulative_accuracy": 0.7697132616487455
    },
    {
      "epoch": 0.165924606708222,
      "grad_norm": 1.4079474210739136,
      "learning_rate": 4.934152157558317e-06,
      "loss": 0.0322,
      "step": 559,
      "video_reward_cumulative_accuracy": 0.7701252236135957
    },
    {
      "epoch": 0.1662214306915999,
      "grad_norm": 4.133894443511963,
      "learning_rate": 4.933560245198552e-06,
      "loss": 0.0604,
      "step": 560,
      "video_reward_cumulative_accuracy": 0.7705357142857143
    },
    {
      "epoch": 0.16651825467497774,
      "grad_norm": 3.3255507946014404,
      "learning_rate": 4.932965720175669e-06,
      "loss": 0.0559,
      "step": 561,
      "video_reward_cumulative_accuracy": 0.7709447415329769
    },
    {
      "epoch": 0.1668150786583556,
      "grad_norm": 3.746882677078247,
      "learning_rate": 4.9323685831279465e-06,
      "loss": 0.0728,
      "step": 562,
      "video_reward_cumulative_accuracy": 0.7713523131672598
    },
    {
      "epoch": 0.16711190264173345,
      "grad_norm": 2.2193782329559326,
      "learning_rate": 4.93176883469647e-06,
      "loss": 0.0512,
      "step": 563,
      "video_reward_cumulative_accuracy": 0.7708703374777975
    },
    {
      "epoch": 0.1674087266251113,
      "grad_norm": 0.7712289094924927,
      "learning_rate": 4.9311664755251265e-06,
      "loss": 0.0202,
      "step": 564,
      "video_reward_cumulative_accuracy": 0.7712765957446809
    },
    {
      "epoch": 0.16770555060848916,
      "grad_norm": 1.9561268091201782,
      "learning_rate": 4.93056150626061e-06,
      "loss": 0.045,
      "step": 565,
      "video_reward_cumulative_accuracy": 0.7716814159292036
    },
    {
      "epoch": 0.168002374591867,
      "grad_norm": 2.4627063274383545,
      "learning_rate": 4.92995392755241e-06,
      "loss": 0.0474,
      "step": 566,
      "video_reward_cumulative_accuracy": 0.7720848056537103
    },
    {
      "epoch": 0.1682991985752449,
      "grad_norm": 2.0036139488220215,
      "learning_rate": 4.929343740052823e-06,
      "loss": 0.0358,
      "step": 567,
      "video_reward_cumulative_accuracy": 0.7724867724867724
    },
    {
      "epoch": 0.16859602255862274,
      "grad_norm": 3.07709002494812,
      "learning_rate": 4.928730944416945e-06,
      "loss": 0.0407,
      "step": 568,
      "video_reward_cumulative_accuracy": 0.772887323943662
    },
    {
      "epoch": 0.1688928465420006,
      "grad_norm": 1.9337095022201538,
      "learning_rate": 4.928115541302672e-06,
      "loss": 0.0386,
      "step": 569,
      "video_reward_cumulative_accuracy": 0.7724077328646749
    },
    {
      "epoch": 0.16918967052537845,
      "grad_norm": 6.987502574920654,
      "learning_rate": 4.927497531370697e-06,
      "loss": 0.1114,
      "step": 570,
      "video_reward_cumulative_accuracy": 0.7728070175438596
    },
    {
      "epoch": 0.1694864945087563,
      "grad_norm": 3.6298933029174805,
      "learning_rate": 4.9268769152845146e-06,
      "loss": 0.0853,
      "step": 571,
      "video_reward_cumulative_accuracy": 0.7723292469352014
    },
    {
      "epoch": 0.16978331849213416,
      "grad_norm": 6.841943740844727,
      "learning_rate": 4.926253693710416e-06,
      "loss": 0.0701,
      "step": 572,
      "video_reward_cumulative_accuracy": 0.7727272727272727
    },
    {
      "epoch": 0.170080142475512,
      "grad_norm": 5.937003135681152,
      "learning_rate": 4.925627867317491e-06,
      "loss": 0.1064,
      "step": 573,
      "video_reward_cumulative_accuracy": 0.7731239092495636
    },
    {
      "epoch": 0.1703769664588899,
      "grad_norm": 2.3757357597351074,
      "learning_rate": 4.924999436777624e-06,
      "loss": 0.0466,
      "step": 574,
      "video_reward_cumulative_accuracy": 0.7735191637630662
    },
    {
      "epoch": 0.17067379044226774,
      "grad_norm": 1.4566744565963745,
      "learning_rate": 4.924368402765498e-06,
      "loss": 0.0228,
      "step": 575,
      "video_reward_cumulative_accuracy": 0.7739130434782608
    },
    {
      "epoch": 0.1709706144256456,
      "grad_norm": 2.156557083129883,
      "learning_rate": 4.923734765958587e-06,
      "loss": 0.0483,
      "step": 576,
      "video_reward_cumulative_accuracy": 0.7743055555555556
    },
    {
      "epoch": 0.17126743840902345,
      "grad_norm": 3.534210681915283,
      "learning_rate": 4.9230985270371625e-06,
      "loss": 0.0695,
      "step": 577,
      "video_reward_cumulative_accuracy": 0.7738301559792028
    },
    {
      "epoch": 0.1715642623924013,
      "grad_norm": 1.7886089086532593,
      "learning_rate": 4.9224596866842895e-06,
      "loss": 0.06,
      "step": 578,
      "video_reward_cumulative_accuracy": 0.7742214532871973
    },
    {
      "epoch": 0.17186108637577915,
      "grad_norm": 1.3056138753890991,
      "learning_rate": 4.921818245585824e-06,
      "loss": 0.0428,
      "step": 579,
      "video_reward_cumulative_accuracy": 0.7746113989637305
    },
    {
      "epoch": 0.172157910359157,
      "grad_norm": 2.9909825325012207,
      "learning_rate": 4.921174204430415e-06,
      "loss": 0.0486,
      "step": 580,
      "video_reward_cumulative_accuracy": 0.775
    },
    {
      "epoch": 0.1724547343425349,
      "grad_norm": 3.4431159496307373,
      "learning_rate": 4.920527563909505e-06,
      "loss": 0.0921,
      "step": 581,
      "video_reward_cumulative_accuracy": 0.774526678141136
    },
    {
      "epoch": 0.17275155832591274,
      "grad_norm": 1.9083445072174072,
      "learning_rate": 4.919878324717323e-06,
      "loss": 0.0655,
      "step": 582,
      "video_reward_cumulative_accuracy": 0.7749140893470791
    },
    {
      "epoch": 0.1730483823092906,
      "grad_norm": 5.427271366119385,
      "learning_rate": 4.919226487550892e-06,
      "loss": 0.0755,
      "step": 583,
      "video_reward_cumulative_accuracy": 0.774442538593482
    },
    {
      "epoch": 0.17334520629266845,
      "grad_norm": 2.5748579502105713,
      "learning_rate": 4.918572053110022e-06,
      "loss": 0.0445,
      "step": 584,
      "video_reward_cumulative_accuracy": 0.7748287671232876
    },
    {
      "epoch": 0.1736420302760463,
      "grad_norm": 1.3002210855484009,
      "learning_rate": 4.917915022097313e-06,
      "loss": 0.0644,
      "step": 585,
      "video_reward_cumulative_accuracy": 0.7752136752136752
    },
    {
      "epoch": 0.17393885425942415,
      "grad_norm": 0.7442671060562134,
      "learning_rate": 4.917255395218149e-06,
      "loss": 0.0408,
      "step": 586,
      "video_reward_cumulative_accuracy": 0.7747440273037542
    },
    {
      "epoch": 0.174235678242802,
      "grad_norm": 0.8181408643722534,
      "learning_rate": 4.9165931731807045e-06,
      "loss": 0.0564,
      "step": 587,
      "video_reward_cumulative_accuracy": 0.7751277683134583
    },
    {
      "epoch": 0.1745325022261799,
      "grad_norm": 1.601649522781372,
      "learning_rate": 4.915928356695941e-06,
      "loss": 0.0286,
      "step": 588,
      "video_reward_cumulative_accuracy": 0.7755102040816326
    },
    {
      "epoch": 0.17482932620955774,
      "grad_norm": 0.9357208013534546,
      "learning_rate": 4.915260946477601e-06,
      "loss": 0.0481,
      "step": 589,
      "video_reward_cumulative_accuracy": 0.7750424448217318
    },
    {
      "epoch": 0.1751261501929356,
      "grad_norm": 3.644911766052246,
      "learning_rate": 4.914590943242216e-06,
      "loss": 0.0626,
      "step": 590,
      "video_reward_cumulative_accuracy": 0.7745762711864407
    },
    {
      "epoch": 0.17542297417631345,
      "grad_norm": 6.013518810272217,
      "learning_rate": 4.913918347709098e-06,
      "loss": 0.0961,
      "step": 591,
      "video_reward_cumulative_accuracy": 0.7749576988155669
    },
    {
      "epoch": 0.1757197981596913,
      "grad_norm": 3.016268491744995,
      "learning_rate": 4.9132431606003444e-06,
      "loss": 0.0378,
      "step": 592,
      "video_reward_cumulative_accuracy": 0.7753378378378378
    },
    {
      "epoch": 0.17601662214306915,
      "grad_norm": 1.4820626974105835,
      "learning_rate": 4.912565382640834e-06,
      "loss": 0.0345,
      "step": 593,
      "video_reward_cumulative_accuracy": 0.7757166947723441
    },
    {
      "epoch": 0.176313446126447,
      "grad_norm": 5.260765075683594,
      "learning_rate": 4.911885014558227e-06,
      "loss": 0.0478,
      "step": 594,
      "video_reward_cumulative_accuracy": 0.7760942760942761
    },
    {
      "epoch": 0.17661027010982489,
      "grad_norm": 1.4641700983047485,
      "learning_rate": 4.911202057082966e-06,
      "loss": 0.0504,
      "step": 595,
      "video_reward_cumulative_accuracy": 0.7747899159663866
    },
    {
      "epoch": 0.17690709409320274,
      "grad_norm": 2.0738446712493896,
      "learning_rate": 4.91051651094827e-06,
      "loss": 0.0336,
      "step": 596,
      "video_reward_cumulative_accuracy": 0.7743288590604027
    },
    {
      "epoch": 0.1772039180765806,
      "grad_norm": 1.8221532106399536,
      "learning_rate": 4.90982837689014e-06,
      "loss": 0.0481,
      "step": 597,
      "video_reward_cumulative_accuracy": 0.7747068676716918
    },
    {
      "epoch": 0.17750074205995844,
      "grad_norm": 3.999908685684204,
      "learning_rate": 4.909137655647354e-06,
      "loss": 0.0827,
      "step": 598,
      "video_reward_cumulative_accuracy": 0.7750836120401338
    },
    {
      "epoch": 0.1777975660433363,
      "grad_norm": 3.1265053749084473,
      "learning_rate": 4.908444347961472e-06,
      "loss": 0.0808,
      "step": 599,
      "video_reward_cumulative_accuracy": 0.7754590984974958
    },
    {
      "epoch": 0.17809439002671415,
      "grad_norm": 4.669152736663818,
      "learning_rate": 4.907748454576822e-06,
      "loss": 0.09,
      "step": 600,
      "video_reward_cumulative_accuracy": 0.7758333333333334
    },
    {
      "epoch": 0.17809439002671415,
      "eval_runtime": 143.405,
      "eval_samples_per_second": 5.502,
      "eval_steps_per_second": 0.69,
      "eval_test_set_accuracy": 0.73989898989899,
      "step": 600
    },
    {
      "epoch": 0.178391214010092,
      "grad_norm": 1.6913496255874634,
      "learning_rate": 4.907049976240516e-06,
      "loss": 0.0538,
      "step": 601,
      "video_reward_cumulative_accuracy": 0.7762063227953411
    },
    {
      "epoch": 0.17868803799346988,
      "grad_norm": 10.412848472595215,
      "learning_rate": 4.9063489137024375e-06,
      "loss": 0.1035,
      "step": 602,
      "video_reward_cumulative_accuracy": 0.7757475083056479
    },
    {
      "epoch": 0.17898486197684774,
      "grad_norm": 0.9118223786354065,
      "learning_rate": 4.905645267715246e-06,
      "loss": 0.0125,
      "step": 603,
      "video_reward_cumulative_accuracy": 0.7761194029850746
    },
    {
      "epoch": 0.1792816859602256,
      "grad_norm": 2.315340518951416,
      "learning_rate": 4.904939039034373e-06,
      "loss": 0.0634,
      "step": 604,
      "video_reward_cumulative_accuracy": 0.7756622516556292
    },
    {
      "epoch": 0.17957850994360344,
      "grad_norm": 3.120900869369507,
      "learning_rate": 4.904230228418023e-06,
      "loss": 0.1249,
      "step": 605,
      "video_reward_cumulative_accuracy": 0.775206611570248
    },
    {
      "epoch": 0.1798753339269813,
      "grad_norm": 3.1384425163269043,
      "learning_rate": 4.903518836627174e-06,
      "loss": 0.0442,
      "step": 606,
      "video_reward_cumulative_accuracy": 0.7747524752475248
    },
    {
      "epoch": 0.18017215791035915,
      "grad_norm": 4.558505535125732,
      "learning_rate": 4.9028048644255745e-06,
      "loss": 0.071,
      "step": 607,
      "video_reward_cumulative_accuracy": 0.7742998352553542
    },
    {
      "epoch": 0.180468981893737,
      "grad_norm": 2.9076156616210938,
      "learning_rate": 4.9020883125797415e-06,
      "loss": 0.0323,
      "step": 608,
      "video_reward_cumulative_accuracy": 0.7738486842105263
    },
    {
      "epoch": 0.18076580587711488,
      "grad_norm": 4.778907775878906,
      "learning_rate": 4.9013691818589635e-06,
      "loss": 0.0686,
      "step": 609,
      "video_reward_cumulative_accuracy": 0.7742200328407225
    },
    {
      "epoch": 0.18106262986049274,
      "grad_norm": 1.0505435466766357,
      "learning_rate": 4.9006474730352974e-06,
      "loss": 0.0419,
      "step": 610,
      "video_reward_cumulative_accuracy": 0.7745901639344263
    },
    {
      "epoch": 0.1813594538438706,
      "grad_norm": 3.0237913131713867,
      "learning_rate": 4.8999231868835675e-06,
      "loss": 0.0503,
      "step": 611,
      "video_reward_cumulative_accuracy": 0.7741407528641571
    },
    {
      "epoch": 0.18165627782724844,
      "grad_norm": 1.5496163368225098,
      "learning_rate": 4.899196324181365e-06,
      "loss": 0.0545,
      "step": 612,
      "video_reward_cumulative_accuracy": 0.7736928104575164
    },
    {
      "epoch": 0.1819531018106263,
      "grad_norm": 4.970526695251465,
      "learning_rate": 4.898466885709049e-06,
      "loss": 0.0746,
      "step": 613,
      "video_reward_cumulative_accuracy": 0.7732463295269169
    },
    {
      "epoch": 0.18224992579400415,
      "grad_norm": 3.2845726013183594,
      "learning_rate": 4.897734872249742e-06,
      "loss": 0.0703,
      "step": 614,
      "video_reward_cumulative_accuracy": 0.7728013029315961
    },
    {
      "epoch": 0.182546749777382,
      "grad_norm": 1.6914643049240112,
      "learning_rate": 4.89700028458933e-06,
      "loss": 0.0178,
      "step": 615,
      "video_reward_cumulative_accuracy": 0.7731707317073171
    },
    {
      "epoch": 0.18284357376075988,
      "grad_norm": 1.8486802577972412,
      "learning_rate": 4.896263123516465e-06,
      "loss": 0.0277,
      "step": 616,
      "video_reward_cumulative_accuracy": 0.773538961038961
    },
    {
      "epoch": 0.18314039774413773,
      "grad_norm": 4.048538684844971,
      "learning_rate": 4.8955233898225605e-06,
      "loss": 0.0762,
      "step": 617,
      "video_reward_cumulative_accuracy": 0.773095623987034
    },
    {
      "epoch": 0.1834372217275156,
      "grad_norm": 3.5552480220794678,
      "learning_rate": 4.894781084301793e-06,
      "loss": 0.0318,
      "step": 618,
      "video_reward_cumulative_accuracy": 0.7726537216828478
    },
    {
      "epoch": 0.18373404571089344,
      "grad_norm": 4.219141006469727,
      "learning_rate": 4.8940362077511e-06,
      "loss": 0.0499,
      "step": 619,
      "video_reward_cumulative_accuracy": 0.7730210016155089
    },
    {
      "epoch": 0.1840308696942713,
      "grad_norm": 1.9816478490829468,
      "learning_rate": 4.893288760970178e-06,
      "loss": 0.0364,
      "step": 620,
      "video_reward_cumulative_accuracy": 0.7733870967741936
    },
    {
      "epoch": 0.18432769367764915,
      "grad_norm": 2.502150535583496,
      "learning_rate": 4.892538744761484e-06,
      "loss": 0.0592,
      "step": 621,
      "video_reward_cumulative_accuracy": 0.7737520128824477
    },
    {
      "epoch": 0.184624517661027,
      "grad_norm": 2.3041718006134033,
      "learning_rate": 4.891786159930234e-06,
      "loss": 0.0372,
      "step": 622,
      "video_reward_cumulative_accuracy": 0.7741157556270096
    },
    {
      "epoch": 0.18492134164440488,
      "grad_norm": 2.347670078277588,
      "learning_rate": 4.8910310072843996e-06,
      "loss": 0.0908,
      "step": 623,
      "video_reward_cumulative_accuracy": 0.7744783306581059
    },
    {
      "epoch": 0.18521816562778273,
      "grad_norm": 1.8450899124145508,
      "learning_rate": 4.89027328763471e-06,
      "loss": 0.0378,
      "step": 624,
      "video_reward_cumulative_accuracy": 0.7748397435897436
    },
    {
      "epoch": 0.18551498961116059,
      "grad_norm": 2.6531243324279785,
      "learning_rate": 4.889513001794652e-06,
      "loss": 0.0294,
      "step": 625,
      "video_reward_cumulative_accuracy": 0.7752
    },
    {
      "epoch": 0.18581181359453844,
      "grad_norm": 1.7452423572540283,
      "learning_rate": 4.888750150580466e-06,
      "loss": 0.0357,
      "step": 626,
      "video_reward_cumulative_accuracy": 0.7747603833865815
    },
    {
      "epoch": 0.1861086375779163,
      "grad_norm": 1.9167020320892334,
      "learning_rate": 4.887984734811146e-06,
      "loss": 0.0278,
      "step": 627,
      "video_reward_cumulative_accuracy": 0.7751196172248804
    },
    {
      "epoch": 0.18640546156129414,
      "grad_norm": 2.1981749534606934,
      "learning_rate": 4.887216755308442e-06,
      "loss": 0.0457,
      "step": 628,
      "video_reward_cumulative_accuracy": 0.7746815286624203
    },
    {
      "epoch": 0.186702285544672,
      "grad_norm": 2.631011962890625,
      "learning_rate": 4.886446212896853e-06,
      "loss": 0.0836,
      "step": 629,
      "video_reward_cumulative_accuracy": 0.7742448330683624
    },
    {
      "epoch": 0.18699910952804988,
      "grad_norm": 3.423548936843872,
      "learning_rate": 4.885673108403631e-06,
      "loss": 0.0552,
      "step": 630,
      "video_reward_cumulative_accuracy": 0.7746031746031746
    },
    {
      "epoch": 0.18729593351142773,
      "grad_norm": 0.9264172911643982,
      "learning_rate": 4.884897442658779e-06,
      "loss": 0.0143,
      "step": 631,
      "video_reward_cumulative_accuracy": 0.7749603803486529
    },
    {
      "epoch": 0.18759275749480558,
      "grad_norm": 1.8935270309448242,
      "learning_rate": 4.88411921649505e-06,
      "loss": 0.0241,
      "step": 632,
      "video_reward_cumulative_accuracy": 0.7745253164556962
    },
    {
      "epoch": 0.18788958147818344,
      "grad_norm": 1.9011247158050537,
      "learning_rate": 4.883338430747944e-06,
      "loss": 0.1005,
      "step": 633,
      "video_reward_cumulative_accuracy": 0.7748815165876777
    },
    {
      "epoch": 0.1881864054615613,
      "grad_norm": 2.4055683612823486,
      "learning_rate": 4.882555086255712e-06,
      "loss": 0.0326,
      "step": 634,
      "video_reward_cumulative_accuracy": 0.7752365930599369
    },
    {
      "epoch": 0.18848322944493914,
      "grad_norm": 3.9865567684173584,
      "learning_rate": 4.88176918385935e-06,
      "loss": 0.0432,
      "step": 635,
      "video_reward_cumulative_accuracy": 0.7748031496062993
    },
    {
      "epoch": 0.188780053428317,
      "grad_norm": 1.4653565883636475,
      "learning_rate": 4.8809807244025985e-06,
      "loss": 0.0275,
      "step": 636,
      "video_reward_cumulative_accuracy": 0.7751572327044025
    },
    {
      "epoch": 0.18907687741169488,
      "grad_norm": 1.477861762046814,
      "learning_rate": 4.880189708731947e-06,
      "loss": 0.0707,
      "step": 637,
      "video_reward_cumulative_accuracy": 0.7755102040816326
    },
    {
      "epoch": 0.18937370139507273,
      "grad_norm": 4.140712261199951,
      "learning_rate": 4.879396137696628e-06,
      "loss": 0.0873,
      "step": 638,
      "video_reward_cumulative_accuracy": 0.7750783699059561
    },
    {
      "epoch": 0.18967052537845058,
      "grad_norm": 2.715289354324341,
      "learning_rate": 4.878600012148617e-06,
      "loss": 0.0496,
      "step": 639,
      "video_reward_cumulative_accuracy": 0.7754303599374022
    },
    {
      "epoch": 0.18996734936182844,
      "grad_norm": 1.2748372554779053,
      "learning_rate": 4.87780133294263e-06,
      "loss": 0.0294,
      "step": 640,
      "video_reward_cumulative_accuracy": 0.77578125
    },
    {
      "epoch": 0.1902641733452063,
      "grad_norm": 1.9033632278442383,
      "learning_rate": 4.877000100936129e-06,
      "loss": 0.0344,
      "step": 641,
      "video_reward_cumulative_accuracy": 0.7761310452418096
    },
    {
      "epoch": 0.19056099732858414,
      "grad_norm": 3.2748398780822754,
      "learning_rate": 4.876196316989313e-06,
      "loss": 0.054,
      "step": 642,
      "video_reward_cumulative_accuracy": 0.7764797507788161
    },
    {
      "epoch": 0.190857821311962,
      "grad_norm": 2.8134162425994873,
      "learning_rate": 4.875389981965123e-06,
      "loss": 0.0845,
      "step": 643,
      "video_reward_cumulative_accuracy": 0.776049766718507
    },
    {
      "epoch": 0.19115464529533988,
      "grad_norm": 1.5256245136260986,
      "learning_rate": 4.874581096729238e-06,
      "loss": 0.0205,
      "step": 644,
      "video_reward_cumulative_accuracy": 0.7763975155279503
    },
    {
      "epoch": 0.19145146927871773,
      "grad_norm": 5.077547073364258,
      "learning_rate": 4.8737696621500715e-06,
      "loss": 0.0899,
      "step": 645,
      "video_reward_cumulative_accuracy": 0.7767441860465116
    },
    {
      "epoch": 0.19174829326209558,
      "grad_norm": 4.956404209136963,
      "learning_rate": 4.872955679098782e-06,
      "loss": 0.0849,
      "step": 646,
      "video_reward_cumulative_accuracy": 0.7770897832817337
    },
    {
      "epoch": 0.19204511724547343,
      "grad_norm": 7.943280220031738,
      "learning_rate": 4.872139148449257e-06,
      "loss": 0.1075,
      "step": 647,
      "video_reward_cumulative_accuracy": 0.7774343122102009
    },
    {
      "epoch": 0.1923419412288513,
      "grad_norm": 6.340520858764648,
      "learning_rate": 4.871320071078122e-06,
      "loss": 0.0777,
      "step": 648,
      "video_reward_cumulative_accuracy": 0.7777777777777778
    },
    {
      "epoch": 0.19263876521222914,
      "grad_norm": 2.82149338722229,
      "learning_rate": 4.870498447864735e-06,
      "loss": 0.0556,
      "step": 649,
      "video_reward_cumulative_accuracy": 0.7781201848998459
    },
    {
      "epoch": 0.192935589195607,
      "grad_norm": 5.320289134979248,
      "learning_rate": 4.86967427969119e-06,
      "loss": 0.0697,
      "step": 650,
      "video_reward_cumulative_accuracy": 0.7776923076923077
    },
    {
      "epoch": 0.19323241317898487,
      "grad_norm": 1.857016682624817,
      "learning_rate": 4.86884756744231e-06,
      "loss": 0.0323,
      "step": 651,
      "video_reward_cumulative_accuracy": 0.7780337941628265
    },
    {
      "epoch": 0.19352923716236273,
      "grad_norm": 2.1225006580352783,
      "learning_rate": 4.8680183120056516e-06,
      "loss": 0.0557,
      "step": 652,
      "video_reward_cumulative_accuracy": 0.7776073619631901
    },
    {
      "epoch": 0.19382606114574058,
      "grad_norm": 1.882553219795227,
      "learning_rate": 4.8671865142715e-06,
      "loss": 0.0494,
      "step": 653,
      "video_reward_cumulative_accuracy": 0.7771822358346095
    },
    {
      "epoch": 0.19412288512911843,
      "grad_norm": 3.701078414916992,
      "learning_rate": 4.866352175132873e-06,
      "loss": 0.1412,
      "step": 654,
      "video_reward_cumulative_accuracy": 0.7759938837920489
    },
    {
      "epoch": 0.1944197091124963,
      "grad_norm": 4.4002509117126465,
      "learning_rate": 4.865515295485511e-06,
      "loss": 0.0662,
      "step": 655,
      "video_reward_cumulative_accuracy": 0.7763358778625954
    },
    {
      "epoch": 0.19471653309587414,
      "grad_norm": 5.623415470123291,
      "learning_rate": 4.864675876227889e-06,
      "loss": 0.0927,
      "step": 656,
      "video_reward_cumulative_accuracy": 0.7751524390243902
    },
    {
      "epoch": 0.195013357079252,
      "grad_norm": 1.7767045497894287,
      "learning_rate": 4.863833918261204e-06,
      "loss": 0.0468,
      "step": 657,
      "video_reward_cumulative_accuracy": 0.7754946727549468
    },
    {
      "epoch": 0.19531018106262987,
      "grad_norm": 1.3329066038131714,
      "learning_rate": 4.862989422489379e-06,
      "loss": 0.0362,
      "step": 658,
      "video_reward_cumulative_accuracy": 0.7758358662613982
    },
    {
      "epoch": 0.19560700504600773,
      "grad_norm": 3.9193496704101562,
      "learning_rate": 4.862142389819063e-06,
      "loss": 0.0751,
      "step": 659,
      "video_reward_cumulative_accuracy": 0.776176024279211
    },
    {
      "epoch": 0.19590382902938558,
      "grad_norm": 2.8584847450256348,
      "learning_rate": 4.861292821159627e-06,
      "loss": 0.0569,
      "step": 660,
      "video_reward_cumulative_accuracy": 0.7765151515151515
    },
    {
      "epoch": 0.19620065301276343,
      "grad_norm": 1.3020362854003906,
      "learning_rate": 4.860440717423166e-06,
      "loss": 0.0746,
      "step": 661,
      "video_reward_cumulative_accuracy": 0.7768532526475038
    },
    {
      "epoch": 0.19649747699614128,
      "grad_norm": 5.554771423339844,
      "learning_rate": 4.8595860795244955e-06,
      "loss": 0.0509,
      "step": 662,
      "video_reward_cumulative_accuracy": 0.7764350453172205
    },
    {
      "epoch": 0.19679430097951914,
      "grad_norm": 1.001604676246643,
      "learning_rate": 4.858728908381153e-06,
      "loss": 0.0301,
      "step": 663,
      "video_reward_cumulative_accuracy": 0.7760180995475113
    },
    {
      "epoch": 0.197091124962897,
      "grad_norm": 1.391948938369751,
      "learning_rate": 4.857869204913394e-06,
      "loss": 0.0644,
      "step": 664,
      "video_reward_cumulative_accuracy": 0.776355421686747
    },
    {
      "epoch": 0.19738794894627487,
      "grad_norm": 1.2883930206298828,
      "learning_rate": 4.857006970044194e-06,
      "loss": 0.0282,
      "step": 665,
      "video_reward_cumulative_accuracy": 0.7766917293233083
    },
    {
      "epoch": 0.19768477292965272,
      "grad_norm": 1.8838915824890137,
      "learning_rate": 4.856142204699246e-06,
      "loss": 0.037,
      "step": 666,
      "video_reward_cumulative_accuracy": 0.777027027027027
    },
    {
      "epoch": 0.19798159691303058,
      "grad_norm": 8.990363121032715,
      "learning_rate": 4.855274909806959e-06,
      "loss": 0.1334,
      "step": 667,
      "video_reward_cumulative_accuracy": 0.7766116941529235
    },
    {
      "epoch": 0.19827842089640843,
      "grad_norm": 3.5435233116149902,
      "learning_rate": 4.85440508629846e-06,
      "loss": 0.0568,
      "step": 668,
      "video_reward_cumulative_accuracy": 0.7761976047904192
    },
    {
      "epoch": 0.19857524487978628,
      "grad_norm": 2.3105525970458984,
      "learning_rate": 4.853532735107587e-06,
      "loss": 0.0383,
      "step": 669,
      "video_reward_cumulative_accuracy": 0.7765321375186846
    },
    {
      "epoch": 0.19887206886316414,
      "grad_norm": 0.8967596292495728,
      "learning_rate": 4.852657857170894e-06,
      "loss": 0.0358,
      "step": 670,
      "video_reward_cumulative_accuracy": 0.7768656716417911
    },
    {
      "epoch": 0.199168892846542,
      "grad_norm": 1.6966391801834106,
      "learning_rate": 4.851780453427648e-06,
      "loss": 0.035,
      "step": 671,
      "video_reward_cumulative_accuracy": 0.7764530551415797
    },
    {
      "epoch": 0.19946571682991987,
      "grad_norm": 5.105749607086182,
      "learning_rate": 4.8509005248198265e-06,
      "loss": 0.0905,
      "step": 672,
      "video_reward_cumulative_accuracy": 0.7760416666666666
    },
    {
      "epoch": 0.19976254081329772,
      "grad_norm": 1.4758727550506592,
      "learning_rate": 4.8500180722921184e-06,
      "loss": 0.0379,
      "step": 673,
      "video_reward_cumulative_accuracy": 0.7763744427934621
    },
    {
      "epoch": 0.20005936479667558,
      "grad_norm": 2.2049713134765625,
      "learning_rate": 4.849133096791923e-06,
      "loss": 0.0313,
      "step": 674,
      "video_reward_cumulative_accuracy": 0.776706231454006
    },
    {
      "epoch": 0.20035618878005343,
      "grad_norm": 2.7456955909729004,
      "learning_rate": 4.848245599269346e-06,
      "loss": 0.0959,
      "step": 675,
      "video_reward_cumulative_accuracy": 0.7770370370370371
    },
    {
      "epoch": 0.20065301276343128,
      "grad_norm": 5.363068103790283,
      "learning_rate": 4.847355580677203e-06,
      "loss": 0.0997,
      "step": 676,
      "video_reward_cumulative_accuracy": 0.775887573964497
    },
    {
      "epoch": 0.20094983674680914,
      "grad_norm": 1.5729711055755615,
      "learning_rate": 4.846463041971014e-06,
      "loss": 0.0311,
      "step": 677,
      "video_reward_cumulative_accuracy": 0.7754800590841949
    },
    {
      "epoch": 0.201246660730187,
      "grad_norm": 7.414484024047852,
      "learning_rate": 4.845567984109009e-06,
      "loss": 0.1189,
      "step": 678,
      "video_reward_cumulative_accuracy": 0.7743362831858407
    },
    {
      "epoch": 0.20154348471356487,
      "grad_norm": 1.798652172088623,
      "learning_rate": 4.844670408052117e-06,
      "loss": 0.0357,
      "step": 679,
      "video_reward_cumulative_accuracy": 0.7746686303387335
    },
    {
      "epoch": 0.20184030869694272,
      "grad_norm": 4.525697231292725,
      "learning_rate": 4.843770314763973e-06,
      "loss": 0.0504,
      "step": 680,
      "video_reward_cumulative_accuracy": 0.774264705882353
    },
    {
      "epoch": 0.20213713268032057,
      "grad_norm": 2.7870752811431885,
      "learning_rate": 4.842867705210915e-06,
      "loss": 0.0954,
      "step": 681,
      "video_reward_cumulative_accuracy": 0.7738619676945668
    },
    {
      "epoch": 0.20243395666369843,
      "grad_norm": 2.8513903617858887,
      "learning_rate": 4.841962580361983e-06,
      "loss": 0.0483,
      "step": 682,
      "video_reward_cumulative_accuracy": 0.7741935483870968
    },
    {
      "epoch": 0.20273078064707628,
      "grad_norm": 2.141054630279541,
      "learning_rate": 4.841054941188914e-06,
      "loss": 0.0399,
      "step": 683,
      "video_reward_cumulative_accuracy": 0.773792093704246
    },
    {
      "epoch": 0.20302760463045413,
      "grad_norm": 1.3511686325073242,
      "learning_rate": 4.840144788666149e-06,
      "loss": 0.0312,
      "step": 684,
      "video_reward_cumulative_accuracy": 0.7741228070175439
    },
    {
      "epoch": 0.203324428613832,
      "grad_norm": 1.8301844596862793,
      "learning_rate": 4.839232123770824e-06,
      "loss": 0.0654,
      "step": 685,
      "video_reward_cumulative_accuracy": 0.7744525547445256
    },
    {
      "epoch": 0.20362125259720987,
      "grad_norm": 4.309445858001709,
      "learning_rate": 4.838316947482774e-06,
      "loss": 0.0561,
      "step": 686,
      "video_reward_cumulative_accuracy": 0.7747813411078717
    },
    {
      "epoch": 0.20391807658058772,
      "grad_norm": 4.205143928527832,
      "learning_rate": 4.837399260784529e-06,
      "loss": 0.073,
      "step": 687,
      "video_reward_cumulative_accuracy": 0.7743813682678311
    },
    {
      "epoch": 0.20421490056396557,
      "grad_norm": 2.1332249641418457,
      "learning_rate": 4.836479064661314e-06,
      "loss": 0.0535,
      "step": 688,
      "video_reward_cumulative_accuracy": 0.7747093023255814
    },
    {
      "epoch": 0.20451172454734343,
      "grad_norm": 2.2309157848358154,
      "learning_rate": 4.83555636010105e-06,
      "loss": 0.0286,
      "step": 689,
      "video_reward_cumulative_accuracy": 0.7750362844702468
    },
    {
      "epoch": 0.20480854853072128,
      "grad_norm": 1.6311012506484985,
      "learning_rate": 4.8346311480943495e-06,
      "loss": 0.0292,
      "step": 690,
      "video_reward_cumulative_accuracy": 0.7753623188405797
    },
    {
      "epoch": 0.20510537251409913,
      "grad_norm": 3.9914817810058594,
      "learning_rate": 4.833703429634519e-06,
      "loss": 0.0789,
      "step": 691,
      "video_reward_cumulative_accuracy": 0.7749638205499276
    },
    {
      "epoch": 0.20540219649747699,
      "grad_norm": 0.823984682559967,
      "learning_rate": 4.832773205717551e-06,
      "loss": 0.027,
      "step": 692,
      "video_reward_cumulative_accuracy": 0.7752890173410405
    },
    {
      "epoch": 0.20569902048085487,
      "grad_norm": 0.9912533760070801,
      "learning_rate": 4.831840477342134e-06,
      "loss": 0.0309,
      "step": 693,
      "video_reward_cumulative_accuracy": 0.7756132756132756
    },
    {
      "epoch": 0.20599584446423272,
      "grad_norm": 1.6996347904205322,
      "learning_rate": 4.830905245509641e-06,
      "loss": 0.0468,
      "step": 694,
      "video_reward_cumulative_accuracy": 0.7752161383285303
    },
    {
      "epoch": 0.20629266844761057,
      "grad_norm": 1.391541600227356,
      "learning_rate": 4.829967511224135e-06,
      "loss": 0.0389,
      "step": 695,
      "video_reward_cumulative_accuracy": 0.7755395683453238
    },
    {
      "epoch": 0.20658949243098843,
      "grad_norm": 2.34708833694458,
      "learning_rate": 4.829027275492364e-06,
      "loss": 0.0395,
      "step": 696,
      "video_reward_cumulative_accuracy": 0.7758620689655172
    },
    {
      "epoch": 0.20688631641436628,
      "grad_norm": 2.918024778366089,
      "learning_rate": 4.828084539323763e-06,
      "loss": 0.0451,
      "step": 697,
      "video_reward_cumulative_accuracy": 0.7754662840746055
    },
    {
      "epoch": 0.20718314039774413,
      "grad_norm": 2.345532178878784,
      "learning_rate": 4.82713930373045e-06,
      "loss": 0.0442,
      "step": 698,
      "video_reward_cumulative_accuracy": 0.7757879656160458
    },
    {
      "epoch": 0.20747996438112198,
      "grad_norm": 1.47147536277771,
      "learning_rate": 4.826191569727228e-06,
      "loss": 0.0284,
      "step": 699,
      "video_reward_cumulative_accuracy": 0.7761087267525035
    },
    {
      "epoch": 0.20777678836449986,
      "grad_norm": 1.816048264503479,
      "learning_rate": 4.82524133833158e-06,
      "loss": 0.0437,
      "step": 700,
      "video_reward_cumulative_accuracy": 0.7764285714285715
    },
    {
      "epoch": 0.20807361234787772,
      "grad_norm": 3.5193707942962646,
      "learning_rate": 4.824288610563673e-06,
      "loss": 0.0454,
      "step": 701,
      "video_reward_cumulative_accuracy": 0.7767475035663338
    },
    {
      "epoch": 0.20837043633125557,
      "grad_norm": 1.532949447631836,
      "learning_rate": 4.8233333874463535e-06,
      "loss": 0.036,
      "step": 702,
      "video_reward_cumulative_accuracy": 0.7770655270655271
    },
    {
      "epoch": 0.20866726031463342,
      "grad_norm": 1.1091006994247437,
      "learning_rate": 4.822375670005144e-06,
      "loss": 0.0265,
      "step": 703,
      "video_reward_cumulative_accuracy": 0.7773826458036984
    },
    {
      "epoch": 0.20896408429801128,
      "grad_norm": 2.238027572631836,
      "learning_rate": 4.821415459268249e-06,
      "loss": 0.0393,
      "step": 704,
      "video_reward_cumulative_accuracy": 0.7776988636363636
    },
    {
      "epoch": 0.20926090828138913,
      "grad_norm": 4.488368988037109,
      "learning_rate": 4.820452756266546e-06,
      "loss": 0.0896,
      "step": 705,
      "video_reward_cumulative_accuracy": 0.7780141843971631
    },
    {
      "epoch": 0.20955773226476698,
      "grad_norm": 2.5125250816345215,
      "learning_rate": 4.819487562033592e-06,
      "loss": 0.0354,
      "step": 706,
      "video_reward_cumulative_accuracy": 0.7776203966005666
    },
    {
      "epoch": 0.20985455624814486,
      "grad_norm": 3.1740994453430176,
      "learning_rate": 4.818519877605616e-06,
      "loss": 0.0392,
      "step": 707,
      "video_reward_cumulative_accuracy": 0.7779349363507779
    },
    {
      "epoch": 0.21015138023152272,
      "grad_norm": 0.8142343759536743,
      "learning_rate": 4.817549704021521e-06,
      "loss": 0.0256,
      "step": 708,
      "video_reward_cumulative_accuracy": 0.7782485875706214
    },
    {
      "epoch": 0.21044820421490057,
      "grad_norm": 2.2193245887756348,
      "learning_rate": 4.816577042322883e-06,
      "loss": 0.0586,
      "step": 709,
      "video_reward_cumulative_accuracy": 0.7785613540197461
    },
    {
      "epoch": 0.21074502819827842,
      "grad_norm": 3.6588878631591797,
      "learning_rate": 4.815601893553948e-06,
      "loss": 0.061,
      "step": 710,
      "video_reward_cumulative_accuracy": 0.778169014084507
    },
    {
      "epoch": 0.21104185218165628,
      "grad_norm": 3.278996229171753,
      "learning_rate": 4.8146242587616335e-06,
      "loss": 0.0306,
      "step": 711,
      "video_reward_cumulative_accuracy": 0.7784810126582279
    },
    {
      "epoch": 0.21133867616503413,
      "grad_norm": 4.987575054168701,
      "learning_rate": 4.813644138995524e-06,
      "loss": 0.0612,
      "step": 712,
      "video_reward_cumulative_accuracy": 0.7780898876404494
    },
    {
      "epoch": 0.21163550014841198,
      "grad_norm": 3.508737087249756,
      "learning_rate": 4.812661535307876e-06,
      "loss": 0.0883,
      "step": 713,
      "video_reward_cumulative_accuracy": 0.7776998597475456
    },
    {
      "epoch": 0.21193232413178986,
      "grad_norm": 1.5868617296218872,
      "learning_rate": 4.811676448753606e-06,
      "loss": 0.0478,
      "step": 714,
      "video_reward_cumulative_accuracy": 0.7780112044817927
    },
    {
      "epoch": 0.21222914811516772,
      "grad_norm": 1.609864592552185,
      "learning_rate": 4.810688880390303e-06,
      "loss": 0.0553,
      "step": 715,
      "video_reward_cumulative_accuracy": 0.7783216783216783
    },
    {
      "epoch": 0.21252597209854557,
      "grad_norm": 3.3906569480895996,
      "learning_rate": 4.809698831278217e-06,
      "loss": 0.077,
      "step": 716,
      "video_reward_cumulative_accuracy": 0.7779329608938548
    },
    {
      "epoch": 0.21282279608192342,
      "grad_norm": 1.417561411857605,
      "learning_rate": 4.808706302480261e-06,
      "loss": 0.038,
      "step": 717,
      "video_reward_cumulative_accuracy": 0.7782426778242678
    },
    {
      "epoch": 0.21311962006530127,
      "grad_norm": 1.8394412994384766,
      "learning_rate": 4.807711295062013e-06,
      "loss": 0.0496,
      "step": 718,
      "video_reward_cumulative_accuracy": 0.7785515320334262
    },
    {
      "epoch": 0.21341644404867913,
      "grad_norm": 2.1921329498291016,
      "learning_rate": 4.8067138100917065e-06,
      "loss": 0.0483,
      "step": 719,
      "video_reward_cumulative_accuracy": 0.778164116828929
    },
    {
      "epoch": 0.21371326803205698,
      "grad_norm": 3.041285753250122,
      "learning_rate": 4.805713848640242e-06,
      "loss": 0.0777,
      "step": 720,
      "video_reward_cumulative_accuracy": 0.7784722222222222
    },
    {
      "epoch": 0.21401009201543486,
      "grad_norm": 2.195009469985962,
      "learning_rate": 4.804711411781173e-06,
      "loss": 0.0761,
      "step": 721,
      "video_reward_cumulative_accuracy": 0.7787794729542302
    },
    {
      "epoch": 0.2143069159988127,
      "grad_norm": 1.3252123594284058,
      "learning_rate": 4.803706500590714e-06,
      "loss": 0.062,
      "step": 722,
      "video_reward_cumulative_accuracy": 0.7790858725761773
    },
    {
      "epoch": 0.21460373998219057,
      "grad_norm": 3.693161964416504,
      "learning_rate": 4.802699116147732e-06,
      "loss": 0.0576,
      "step": 723,
      "video_reward_cumulative_accuracy": 0.7786998616874136
    },
    {
      "epoch": 0.21490056396556842,
      "grad_norm": 1.6738680601119995,
      "learning_rate": 4.801689259533756e-06,
      "loss": 0.0488,
      "step": 724,
      "video_reward_cumulative_accuracy": 0.7790055248618785
    },
    {
      "epoch": 0.21519738794894627,
      "grad_norm": 2.5583913326263428,
      "learning_rate": 4.800676931832963e-06,
      "loss": 0.0596,
      "step": 725,
      "video_reward_cumulative_accuracy": 0.7793103448275862
    },
    {
      "epoch": 0.21549421193232413,
      "grad_norm": 2.4868931770324707,
      "learning_rate": 4.799662134132185e-06,
      "loss": 0.0386,
      "step": 726,
      "video_reward_cumulative_accuracy": 0.7789256198347108
    },
    {
      "epoch": 0.21579103591570198,
      "grad_norm": 1.2940788269042969,
      "learning_rate": 4.798644867520905e-06,
      "loss": 0.0506,
      "step": 727,
      "video_reward_cumulative_accuracy": 0.7792297111416782
    },
    {
      "epoch": 0.21608785989907986,
      "grad_norm": 1.6243387460708618,
      "learning_rate": 4.797625133091259e-06,
      "loss": 0.0387,
      "step": 728,
      "video_reward_cumulative_accuracy": 0.779532967032967
    },
    {
      "epoch": 0.2163846838824577,
      "grad_norm": 3.6060104370117188,
      "learning_rate": 4.796602931938031e-06,
      "loss": 0.072,
      "step": 729,
      "video_reward_cumulative_accuracy": 0.7791495198902606
    },
    {
      "epoch": 0.21668150786583557,
      "grad_norm": 2.368060827255249,
      "learning_rate": 4.795578265158652e-06,
      "loss": 0.0417,
      "step": 730,
      "video_reward_cumulative_accuracy": 0.7787671232876713
    },
    {
      "epoch": 0.21697833184921342,
      "grad_norm": 2.8776209354400635,
      "learning_rate": 4.794551133853202e-06,
      "loss": 0.0693,
      "step": 731,
      "video_reward_cumulative_accuracy": 0.7790697674418605
    },
    {
      "epoch": 0.21727515583259127,
      "grad_norm": 1.1618021726608276,
      "learning_rate": 4.7935215391244065e-06,
      "loss": 0.0558,
      "step": 732,
      "video_reward_cumulative_accuracy": 0.7786885245901639
    },
    {
      "epoch": 0.21757197981596912,
      "grad_norm": 4.455048561096191,
      "learning_rate": 4.792489482077633e-06,
      "loss": 0.0619,
      "step": 733,
      "video_reward_cumulative_accuracy": 0.7789904502046384
    },
    {
      "epoch": 0.21786880379934698,
      "grad_norm": 3.7576048374176025,
      "learning_rate": 4.791454963820898e-06,
      "loss": 0.0586,
      "step": 734,
      "video_reward_cumulative_accuracy": 0.779291553133515
    },
    {
      "epoch": 0.21816562778272486,
      "grad_norm": 2.280623435974121,
      "learning_rate": 4.790417985464855e-06,
      "loss": 0.0457,
      "step": 735,
      "video_reward_cumulative_accuracy": 0.7789115646258503
    },
    {
      "epoch": 0.2184624517661027,
      "grad_norm": 3.224135398864746,
      "learning_rate": 4.789378548122803e-06,
      "loss": 0.0378,
      "step": 736,
      "video_reward_cumulative_accuracy": 0.7792119565217391
    },
    {
      "epoch": 0.21875927574948056,
      "grad_norm": 1.9654239416122437,
      "learning_rate": 4.788336652910676e-06,
      "loss": 0.068,
      "step": 737,
      "video_reward_cumulative_accuracy": 0.7788331071913162
    },
    {
      "epoch": 0.21905609973285842,
      "grad_norm": 2.6083526611328125,
      "learning_rate": 4.787292300947053e-06,
      "loss": 0.0529,
      "step": 738,
      "video_reward_cumulative_accuracy": 0.7784552845528455
    },
    {
      "epoch": 0.21935292371623627,
      "grad_norm": 2.398172378540039,
      "learning_rate": 4.786245493353145e-06,
      "loss": 0.0541,
      "step": 739,
      "video_reward_cumulative_accuracy": 0.7780784844384303
    },
    {
      "epoch": 0.21964974769961412,
      "grad_norm": 0.9663519263267517,
      "learning_rate": 4.785196231252802e-06,
      "loss": 0.031,
      "step": 740,
      "video_reward_cumulative_accuracy": 0.7783783783783784
    },
    {
      "epoch": 0.21994657168299198,
      "grad_norm": 1.1138893365859985,
      "learning_rate": 4.784144515772509e-06,
      "loss": 0.0387,
      "step": 741,
      "video_reward_cumulative_accuracy": 0.7780026990553306
    },
    {
      "epoch": 0.22024339566636983,
      "grad_norm": 1.9808402061462402,
      "learning_rate": 4.783090348041384e-06,
      "loss": 0.0348,
      "step": 742,
      "video_reward_cumulative_accuracy": 0.7776280323450134
    },
    {
      "epoch": 0.2205402196497477,
      "grad_norm": 1.5914376974105835,
      "learning_rate": 4.782033729191179e-06,
      "loss": 0.0462,
      "step": 743,
      "video_reward_cumulative_accuracy": 0.7779273216689099
    },
    {
      "epoch": 0.22083704363312556,
      "grad_norm": 3.169459819793701,
      "learning_rate": 4.780974660356276e-06,
      "loss": 0.0653,
      "step": 744,
      "video_reward_cumulative_accuracy": 0.7782258064516129
    },
    {
      "epoch": 0.22113386761650342,
      "grad_norm": 2.7912189960479736,
      "learning_rate": 4.77991314267369e-06,
      "loss": 0.0405,
      "step": 745,
      "video_reward_cumulative_accuracy": 0.7778523489932886
    },
    {
      "epoch": 0.22143069159988127,
      "grad_norm": 2.121472120285034,
      "learning_rate": 4.778849177283061e-06,
      "loss": 0.0563,
      "step": 746,
      "video_reward_cumulative_accuracy": 0.7781501340482574
    },
    {
      "epoch": 0.22172751558325912,
      "grad_norm": 3.9767544269561768,
      "learning_rate": 4.777782765326661e-06,
      "loss": 0.0501,
      "step": 747,
      "video_reward_cumulative_accuracy": 0.7784471218206158
    },
    {
      "epoch": 0.22202433956663697,
      "grad_norm": 1.9390398263931274,
      "learning_rate": 4.776713907949386e-06,
      "loss": 0.065,
      "step": 748,
      "video_reward_cumulative_accuracy": 0.7787433155080213
    },
    {
      "epoch": 0.22232116355001483,
      "grad_norm": 4.238917827606201,
      "learning_rate": 4.775642606298758e-06,
      "loss": 0.0829,
      "step": 749,
      "video_reward_cumulative_accuracy": 0.7790387182910548
    },
    {
      "epoch": 0.2226179875333927,
      "grad_norm": 2.718532085418701,
      "learning_rate": 4.774568861524923e-06,
      "loss": 0.034,
      "step": 750,
      "video_reward_cumulative_accuracy": 0.7786666666666666
    },
    {
      "epoch": 0.22291481151677056,
      "grad_norm": 4.930084228515625,
      "learning_rate": 4.773492674780651e-06,
      "loss": 0.0583,
      "step": 751,
      "video_reward_cumulative_accuracy": 0.7782956058588548
    },
    {
      "epoch": 0.22321163550014841,
      "grad_norm": 4.565423965454102,
      "learning_rate": 4.772414047221333e-06,
      "loss": 0.0486,
      "step": 752,
      "video_reward_cumulative_accuracy": 0.7785904255319149
    },
    {
      "epoch": 0.22350845948352627,
      "grad_norm": 4.179710865020752,
      "learning_rate": 4.77133298000498e-06,
      "loss": 0.0614,
      "step": 753,
      "video_reward_cumulative_accuracy": 0.7782204515272244
    },
    {
      "epoch": 0.22380528346690412,
      "grad_norm": 5.0286383628845215,
      "learning_rate": 4.7702494742922215e-06,
      "loss": 0.0705,
      "step": 754,
      "video_reward_cumulative_accuracy": 0.7771883289124668
    },
    {
      "epoch": 0.22410210745028197,
      "grad_norm": 1.3847112655639648,
      "learning_rate": 4.769163531246308e-06,
      "loss": 0.0306,
      "step": 755,
      "video_reward_cumulative_accuracy": 0.7774834437086092
    },
    {
      "epoch": 0.22439893143365983,
      "grad_norm": 3.042525053024292,
      "learning_rate": 4.7680751520331035e-06,
      "loss": 0.0426,
      "step": 756,
      "video_reward_cumulative_accuracy": 0.7777777777777778
    },
    {
      "epoch": 0.2246957554170377,
      "grad_norm": 1.5790531635284424,
      "learning_rate": 4.766984337821089e-06,
      "loss": 0.0229,
      "step": 757,
      "video_reward_cumulative_accuracy": 0.7780713342140027
    },
    {
      "epoch": 0.22499257940041556,
      "grad_norm": 3.0712270736694336,
      "learning_rate": 4.76589108978136e-06,
      "loss": 0.0316,
      "step": 758,
      "video_reward_cumulative_accuracy": 0.7783641160949868
    },
    {
      "epoch": 0.2252894033837934,
      "grad_norm": 3.963674783706665,
      "learning_rate": 4.764795409087623e-06,
      "loss": 0.053,
      "step": 759,
      "video_reward_cumulative_accuracy": 0.7786561264822134
    },
    {
      "epoch": 0.22558622736717127,
      "grad_norm": 3.206650495529175,
      "learning_rate": 4.7636972969161984e-06,
      "loss": 0.0279,
      "step": 760,
      "video_reward_cumulative_accuracy": 0.7789473684210526
    },
    {
      "epoch": 0.22588305135054912,
      "grad_norm": 4.03346061706543,
      "learning_rate": 4.762596754446017e-06,
      "loss": 0.0558,
      "step": 761,
      "video_reward_cumulative_accuracy": 0.778580814717477
    },
    {
      "epoch": 0.22617987533392697,
      "grad_norm": 4.312930107116699,
      "learning_rate": 4.7614937828586176e-06,
      "loss": 0.1088,
      "step": 762,
      "video_reward_cumulative_accuracy": 0.7782152230971129
    },
    {
      "epoch": 0.22647669931730482,
      "grad_norm": 3.7724556922912598,
      "learning_rate": 4.760388383338145e-06,
      "loss": 0.0847,
      "step": 763,
      "video_reward_cumulative_accuracy": 0.7785058977719528
    },
    {
      "epoch": 0.2267735233006827,
      "grad_norm": 1.2291319370269775,
      "learning_rate": 4.759280557071357e-06,
      "loss": 0.0156,
      "step": 764,
      "video_reward_cumulative_accuracy": 0.7787958115183246
    },
    {
      "epoch": 0.22707034728406056,
      "grad_norm": 10.076594352722168,
      "learning_rate": 4.758170305247608e-06,
      "loss": 0.0863,
      "step": 765,
      "video_reward_cumulative_accuracy": 0.7790849673202614
    },
    {
      "epoch": 0.2273671712674384,
      "grad_norm": 2.8340132236480713,
      "learning_rate": 4.757057629058865e-06,
      "loss": 0.0329,
      "step": 766,
      "video_reward_cumulative_accuracy": 0.7787206266318538
    },
    {
      "epoch": 0.22766399525081626,
      "grad_norm": 2.3575525283813477,
      "learning_rate": 4.755942529699692e-06,
      "loss": 0.05,
      "step": 767,
      "video_reward_cumulative_accuracy": 0.7783572359843546
    },
    {
      "epoch": 0.22796081923419412,
      "grad_norm": 3.471806049346924,
      "learning_rate": 4.754825008367256e-06,
      "loss": 0.0655,
      "step": 768,
      "video_reward_cumulative_accuracy": 0.7779947916666666
    },
    {
      "epoch": 0.22825764321757197,
      "grad_norm": 3.795821189880371,
      "learning_rate": 4.753705066261326e-06,
      "loss": 0.0479,
      "step": 769,
      "video_reward_cumulative_accuracy": 0.7776332899869961
    },
    {
      "epoch": 0.22855446720094982,
      "grad_norm": 2.038712978363037,
      "learning_rate": 4.752582704584267e-06,
      "loss": 0.067,
      "step": 770,
      "video_reward_cumulative_accuracy": 0.7779220779220779
    },
    {
      "epoch": 0.2288512911843277,
      "grad_norm": 2.496264696121216,
      "learning_rate": 4.751457924541045e-06,
      "loss": 0.0409,
      "step": 771,
      "video_reward_cumulative_accuracy": 0.7782101167315175
    },
    {
      "epoch": 0.22914811516770556,
      "grad_norm": 1.8071130514144897,
      "learning_rate": 4.75033072733922e-06,
      "loss": 0.0145,
      "step": 772,
      "video_reward_cumulative_accuracy": 0.7784974093264249
    },
    {
      "epoch": 0.2294449391510834,
      "grad_norm": 3.4890074729919434,
      "learning_rate": 4.749201114188946e-06,
      "loss": 0.0712,
      "step": 773,
      "video_reward_cumulative_accuracy": 0.7781371280724451
    },
    {
      "epoch": 0.22974176313446126,
      "grad_norm": 2.5620384216308594,
      "learning_rate": 4.748069086302975e-06,
      "loss": 0.0741,
      "step": 774,
      "video_reward_cumulative_accuracy": 0.7777777777777778
    },
    {
      "epoch": 0.23003858711783912,
      "grad_norm": 1.5887774229049683,
      "learning_rate": 4.7469346448966455e-06,
      "loss": 0.0347,
      "step": 775,
      "video_reward_cumulative_accuracy": 0.7774193548387097
    },
    {
      "epoch": 0.23033541110121697,
      "grad_norm": 2.032930612564087,
      "learning_rate": 4.745797791187894e-06,
      "loss": 0.0537,
      "step": 776,
      "video_reward_cumulative_accuracy": 0.7777061855670103
    },
    {
      "epoch": 0.23063223508459482,
      "grad_norm": 3.915695905685425,
      "learning_rate": 4.744658526397241e-06,
      "loss": 0.0798,
      "step": 777,
      "video_reward_cumulative_accuracy": 0.7773487773487774
    },
    {
      "epoch": 0.2309290590679727,
      "grad_norm": 1.2229045629501343,
      "learning_rate": 4.743516851747798e-06,
      "loss": 0.0364,
      "step": 778,
      "video_reward_cumulative_accuracy": 0.7776349614395887
    },
    {
      "epoch": 0.23122588305135056,
      "grad_norm": 2.093339204788208,
      "learning_rate": 4.742372768465264e-06,
      "loss": 0.0472,
      "step": 779,
      "video_reward_cumulative_accuracy": 0.7772785622593068
    },
    {
      "epoch": 0.2315227070347284,
      "grad_norm": 2.220613479614258,
      "learning_rate": 4.7412262777779235e-06,
      "loss": 0.0444,
      "step": 780,
      "video_reward_cumulative_accuracy": 0.7769230769230769
    },
    {
      "epoch": 0.23181953101810626,
      "grad_norm": 4.584027290344238,
      "learning_rate": 4.740077380916646e-06,
      "loss": 0.0535,
      "step": 781,
      "video_reward_cumulative_accuracy": 0.7772087067861716
    },
    {
      "epoch": 0.23211635500148411,
      "grad_norm": 2.5561423301696777,
      "learning_rate": 4.738926079114883e-06,
      "loss": 0.0301,
      "step": 782,
      "video_reward_cumulative_accuracy": 0.7774936061381074
    },
    {
      "epoch": 0.23241317898486197,
      "grad_norm": 2.526623249053955,
      "learning_rate": 4.737772373608669e-06,
      "loss": 0.0315,
      "step": 783,
      "video_reward_cumulative_accuracy": 0.777139208173691
    },
    {
      "epoch": 0.23271000296823982,
      "grad_norm": 1.8118770122528076,
      "learning_rate": 4.736616265636619e-06,
      "loss": 0.0408,
      "step": 784,
      "video_reward_cumulative_accuracy": 0.7774234693877551
    },
    {
      "epoch": 0.2330068269516177,
      "grad_norm": 1.151275873184204,
      "learning_rate": 4.735457756439926e-06,
      "loss": 0.0169,
      "step": 785,
      "video_reward_cumulative_accuracy": 0.7777070063694268
    },
    {
      "epoch": 0.23330365093499555,
      "grad_norm": 5.073890209197998,
      "learning_rate": 4.734296847262364e-06,
      "loss": 0.0722,
      "step": 786,
      "video_reward_cumulative_accuracy": 0.7779898218829516
    },
    {
      "epoch": 0.2336004749183734,
      "grad_norm": 2.3573646545410156,
      "learning_rate": 4.733133539350281e-06,
      "loss": 0.0311,
      "step": 787,
      "video_reward_cumulative_accuracy": 0.7776365946632783
    },
    {
      "epoch": 0.23389729890175126,
      "grad_norm": 3.158076286315918,
      "learning_rate": 4.7319678339526e-06,
      "loss": 0.0754,
      "step": 788,
      "video_reward_cumulative_accuracy": 0.7779187817258884
    },
    {
      "epoch": 0.2341941228851291,
      "grad_norm": 2.9681179523468018,
      "learning_rate": 4.730799732320819e-06,
      "loss": 0.0412,
      "step": 789,
      "video_reward_cumulative_accuracy": 0.7782002534854245
    },
    {
      "epoch": 0.23449094686850697,
      "grad_norm": 2.718312978744507,
      "learning_rate": 4.729629235709009e-06,
      "loss": 0.0404,
      "step": 790,
      "video_reward_cumulative_accuracy": 0.7784810126582279
    },
    {
      "epoch": 0.23478777085188482,
      "grad_norm": 2.505716562271118,
      "learning_rate": 4.728456345373813e-06,
      "loss": 0.0526,
      "step": 791,
      "video_reward_cumulative_accuracy": 0.7787610619469026
    },
    {
      "epoch": 0.2350845948352627,
      "grad_norm": 3.465552806854248,
      "learning_rate": 4.7272810625744405e-06,
      "loss": 0.0543,
      "step": 792,
      "video_reward_cumulative_accuracy": 0.7790404040404041
    },
    {
      "epoch": 0.23538141881864055,
      "grad_norm": 2.564662218093872,
      "learning_rate": 4.726103388572672e-06,
      "loss": 0.0536,
      "step": 793,
      "video_reward_cumulative_accuracy": 0.7786885245901639
    },
    {
      "epoch": 0.2356782428020184,
      "grad_norm": 2.045988082885742,
      "learning_rate": 4.724923324632855e-06,
      "loss": 0.0451,
      "step": 794,
      "video_reward_cumulative_accuracy": 0.7789672544080605
    },
    {
      "epoch": 0.23597506678539626,
      "grad_norm": 1.944718837738037,
      "learning_rate": 4.7237408720219045e-06,
      "loss": 0.0348,
      "step": 795,
      "video_reward_cumulative_accuracy": 0.779245283018868
    },
    {
      "epoch": 0.2362718907687741,
      "grad_norm": 2.2963719367980957,
      "learning_rate": 4.722556032009295e-06,
      "loss": 0.0597,
      "step": 796,
      "video_reward_cumulative_accuracy": 0.7795226130653267
    },
    {
      "epoch": 0.23656871475215197,
      "grad_norm": 2.749324083328247,
      "learning_rate": 4.72136880586707e-06,
      "loss": 0.0344,
      "step": 797,
      "video_reward_cumulative_accuracy": 0.7797992471769134
    },
    {
      "epoch": 0.23686553873552982,
      "grad_norm": 3.161100387573242,
      "learning_rate": 4.7201791948698315e-06,
      "loss": 0.0518,
      "step": 798,
      "video_reward_cumulative_accuracy": 0.7800751879699248
    },
    {
      "epoch": 0.2371623627189077,
      "grad_norm": 8.226521492004395,
      "learning_rate": 4.71898720029474e-06,
      "loss": 0.1091,
      "step": 799,
      "video_reward_cumulative_accuracy": 0.77909887359199
    },
    {
      "epoch": 0.23745918670228555,
      "grad_norm": 4.398781776428223,
      "learning_rate": 4.717792823421521e-06,
      "loss": 0.0586,
      "step": 800,
      "video_reward_cumulative_accuracy": 0.77875
    },
    {
      "epoch": 0.23745918670228555,
      "eval_runtime": 128.8206,
      "eval_samples_per_second": 6.125,
      "eval_steps_per_second": 0.769,
      "eval_test_set_accuracy": 0.7487373737373737,
      "step": 800
    },
    {
      "epoch": 0.2377560106856634,
      "grad_norm": 3.7454681396484375,
      "learning_rate": 4.71659606553245e-06,
      "loss": 0.0363,
      "step": 801,
      "video_reward_cumulative_accuracy": 0.7790262172284644
    },
    {
      "epoch": 0.23805283466904126,
      "grad_norm": 1.430262804031372,
      "learning_rate": 4.7153969279123665e-06,
      "loss": 0.046,
      "step": 802,
      "video_reward_cumulative_accuracy": 0.7793017456359103
    },
    {
      "epoch": 0.2383496586524191,
      "grad_norm": 5.489625453948975,
      "learning_rate": 4.7141954118486585e-06,
      "loss": 0.0652,
      "step": 803,
      "video_reward_cumulative_accuracy": 0.7789539227895392
    },
    {
      "epoch": 0.23864648263579696,
      "grad_norm": 2.220259666442871,
      "learning_rate": 4.712991518631272e-06,
      "loss": 0.0209,
      "step": 804,
      "video_reward_cumulative_accuracy": 0.779228855721393
    },
    {
      "epoch": 0.23894330661917482,
      "grad_norm": 2.3900604248046875,
      "learning_rate": 4.711785249552701e-06,
      "loss": 0.0485,
      "step": 805,
      "video_reward_cumulative_accuracy": 0.7795031055900621
    },
    {
      "epoch": 0.2392401306025527,
      "grad_norm": 2.9659066200256348,
      "learning_rate": 4.710576605907995e-06,
      "loss": 0.0493,
      "step": 806,
      "video_reward_cumulative_accuracy": 0.7797766749379652
    },
    {
      "epoch": 0.23953695458593055,
      "grad_norm": 5.7252326011657715,
      "learning_rate": 4.709365588994749e-06,
      "loss": 0.0617,
      "step": 807,
      "video_reward_cumulative_accuracy": 0.7794299876084263
    },
    {
      "epoch": 0.2398337785693084,
      "grad_norm": 3.0332016944885254,
      "learning_rate": 4.708152200113106e-06,
      "loss": 0.0504,
      "step": 808,
      "video_reward_cumulative_accuracy": 0.7797029702970297
    },
    {
      "epoch": 0.24013060255268626,
      "grad_norm": 1.474946141242981,
      "learning_rate": 4.706936440565759e-06,
      "loss": 0.0367,
      "step": 809,
      "video_reward_cumulative_accuracy": 0.7799752781211372
    },
    {
      "epoch": 0.2404274265360641,
      "grad_norm": 5.749459743499756,
      "learning_rate": 4.705718311657943e-06,
      "loss": 0.0698,
      "step": 810,
      "video_reward_cumulative_accuracy": 0.7802469135802469
    },
    {
      "epoch": 0.24072425051944196,
      "grad_norm": 1.7116312980651855,
      "learning_rate": 4.704497814697436e-06,
      "loss": 0.0396,
      "step": 811,
      "video_reward_cumulative_accuracy": 0.7805178791615289
    },
    {
      "epoch": 0.24102107450281982,
      "grad_norm": 1.7955880165100098,
      "learning_rate": 4.703274950994563e-06,
      "loss": 0.0209,
      "step": 812,
      "video_reward_cumulative_accuracy": 0.7807881773399015
    },
    {
      "epoch": 0.2413178984861977,
      "grad_norm": 4.338817596435547,
      "learning_rate": 4.702049721862184e-06,
      "loss": 0.0471,
      "step": 813,
      "video_reward_cumulative_accuracy": 0.7810578105781057
    },
    {
      "epoch": 0.24161472246957555,
      "grad_norm": 1.308546781539917,
      "learning_rate": 4.700822128615703e-06,
      "loss": 0.028,
      "step": 814,
      "video_reward_cumulative_accuracy": 0.7813267813267813
    },
    {
      "epoch": 0.2419115464529534,
      "grad_norm": 5.44117546081543,
      "learning_rate": 4.699592172573061e-06,
      "loss": 0.08,
      "step": 815,
      "video_reward_cumulative_accuracy": 0.7815950920245399
    },
    {
      "epoch": 0.24220837043633126,
      "grad_norm": 3.996955394744873,
      "learning_rate": 4.698359855054733e-06,
      "loss": 0.0551,
      "step": 816,
      "video_reward_cumulative_accuracy": 0.7818627450980392
    },
    {
      "epoch": 0.2425051944197091,
      "grad_norm": 3.2032980918884277,
      "learning_rate": 4.6971251773837335e-06,
      "loss": 0.0969,
      "step": 817,
      "video_reward_cumulative_accuracy": 0.7821297429620563
    },
    {
      "epoch": 0.24280201840308696,
      "grad_norm": 3.8643088340759277,
      "learning_rate": 4.695888140885608e-06,
      "loss": 0.0489,
      "step": 818,
      "video_reward_cumulative_accuracy": 0.78239608801956
    },
    {
      "epoch": 0.24309884238646481,
      "grad_norm": 1.5098183155059814,
      "learning_rate": 4.6946487468884346e-06,
      "loss": 0.0363,
      "step": 819,
      "video_reward_cumulative_accuracy": 0.7826617826617827
    },
    {
      "epoch": 0.2433956663698427,
      "grad_norm": 2.6578338146209717,
      "learning_rate": 4.693406996722824e-06,
      "loss": 0.0805,
      "step": 820,
      "video_reward_cumulative_accuracy": 0.7829268292682927
    },
    {
      "epoch": 0.24369249035322055,
      "grad_norm": 2.7649166584014893,
      "learning_rate": 4.692162891721917e-06,
      "loss": 0.0424,
      "step": 821,
      "video_reward_cumulative_accuracy": 0.7825822168087698
    },
    {
      "epoch": 0.2439893143365984,
      "grad_norm": 1.1864911317825317,
      "learning_rate": 4.690916433221377e-06,
      "loss": 0.0167,
      "step": 822,
      "video_reward_cumulative_accuracy": 0.7828467153284672
    },
    {
      "epoch": 0.24428613831997625,
      "grad_norm": 1.7823535203933716,
      "learning_rate": 4.6896676225594016e-06,
      "loss": 0.0778,
      "step": 823,
      "video_reward_cumulative_accuracy": 0.7831105710814095
    },
    {
      "epoch": 0.2445829623033541,
      "grad_norm": 3.1167526245117188,
      "learning_rate": 4.68841646107671e-06,
      "loss": 0.0327,
      "step": 824,
      "video_reward_cumulative_accuracy": 0.783373786407767
    },
    {
      "epoch": 0.24487978628673196,
      "grad_norm": 3.2958545684814453,
      "learning_rate": 4.6871629501165435e-06,
      "loss": 0.024,
      "step": 825,
      "video_reward_cumulative_accuracy": 0.7836363636363637
    },
    {
      "epoch": 0.2451766102701098,
      "grad_norm": 3.2736194133758545,
      "learning_rate": 4.68590709102467e-06,
      "loss": 0.0416,
      "step": 826,
      "video_reward_cumulative_accuracy": 0.7832929782082324
    },
    {
      "epoch": 0.2454734342534877,
      "grad_norm": 2.1489417552948,
      "learning_rate": 4.684648885149374e-06,
      "loss": 0.0491,
      "step": 827,
      "video_reward_cumulative_accuracy": 0.7835550181378477
    },
    {
      "epoch": 0.24577025823686555,
      "grad_norm": 1.746704339981079,
      "learning_rate": 4.6833883338414635e-06,
      "loss": 0.0513,
      "step": 828,
      "video_reward_cumulative_accuracy": 0.7832125603864735
    },
    {
      "epoch": 0.2460670822202434,
      "grad_norm": 2.047314167022705,
      "learning_rate": 4.682125438454261e-06,
      "loss": 0.0399,
      "step": 829,
      "video_reward_cumulative_accuracy": 0.7828709288299156
    },
    {
      "epoch": 0.24636390620362125,
      "grad_norm": 1.1579736471176147,
      "learning_rate": 4.680860200343609e-06,
      "loss": 0.0214,
      "step": 830,
      "video_reward_cumulative_accuracy": 0.7831325301204819
    },
    {
      "epoch": 0.2466607301869991,
      "grad_norm": 3.0398266315460205,
      "learning_rate": 4.679592620867862e-06,
      "loss": 0.0607,
      "step": 831,
      "video_reward_cumulative_accuracy": 0.782791817087846
    },
    {
      "epoch": 0.24695755417037696,
      "grad_norm": 2.732314348220825,
      "learning_rate": 4.678322701387891e-06,
      "loss": 0.0403,
      "step": 832,
      "video_reward_cumulative_accuracy": 0.7824519230769231
    },
    {
      "epoch": 0.2472543781537548,
      "grad_norm": 2.1207661628723145,
      "learning_rate": 4.677050443267076e-06,
      "loss": 0.0301,
      "step": 833,
      "video_reward_cumulative_accuracy": 0.7827130852340937
    },
    {
      "epoch": 0.2475512021371327,
      "grad_norm": 1.3241153955459595,
      "learning_rate": 4.675775847871311e-06,
      "loss": 0.0499,
      "step": 834,
      "video_reward_cumulative_accuracy": 0.7823741007194245
    },
    {
      "epoch": 0.24784802612051055,
      "grad_norm": 1.7813966274261475,
      "learning_rate": 4.6744989165689975e-06,
      "loss": 0.0392,
      "step": 835,
      "video_reward_cumulative_accuracy": 0.7826347305389222
    },
    {
      "epoch": 0.2481448501038884,
      "grad_norm": 1.4840867519378662,
      "learning_rate": 4.673219650731045e-06,
      "loss": 0.0193,
      "step": 836,
      "video_reward_cumulative_accuracy": 0.7828947368421053
    },
    {
      "epoch": 0.24844167408726625,
      "grad_norm": 0.5827304720878601,
      "learning_rate": 4.67193805173087e-06,
      "loss": 0.0077,
      "step": 837,
      "video_reward_cumulative_accuracy": 0.7831541218637993
    },
    {
      "epoch": 0.2487384980706441,
      "grad_norm": 2.449009656906128,
      "learning_rate": 4.670654120944393e-06,
      "loss": 0.0828,
      "step": 838,
      "video_reward_cumulative_accuracy": 0.7828162291169452
    },
    {
      "epoch": 0.24903532205402196,
      "grad_norm": 2.1830556392669678,
      "learning_rate": 4.669367859750038e-06,
      "loss": 0.0239,
      "step": 839,
      "video_reward_cumulative_accuracy": 0.7830750893921334
    },
    {
      "epoch": 0.2493321460373998,
      "grad_norm": 2.4376471042633057,
      "learning_rate": 4.668079269528732e-06,
      "loss": 0.0652,
      "step": 840,
      "video_reward_cumulative_accuracy": 0.7833333333333333
    },
    {
      "epoch": 0.2496289700207777,
      "grad_norm": 8.17204761505127,
      "learning_rate": 4.666788351663902e-06,
      "loss": 0.1043,
      "step": 841,
      "video_reward_cumulative_accuracy": 0.7835909631391201
    },
    {
      "epoch": 0.24992579400415554,
      "grad_norm": 4.692628860473633,
      "learning_rate": 4.6654951075414715e-06,
      "loss": 0.1819,
      "step": 842,
      "video_reward_cumulative_accuracy": 0.7838479809976246
    },
    {
      "epoch": 0.25022261798753337,
      "grad_norm": 2.7783429622650146,
      "learning_rate": 4.664199538549865e-06,
      "loss": 0.0546,
      "step": 843,
      "video_reward_cumulative_accuracy": 0.7841043890865955
    },
    {
      "epoch": 0.25051944197091125,
      "grad_norm": 2.2595643997192383,
      "learning_rate": 4.662901646080002e-06,
      "loss": 0.0521,
      "step": 844,
      "video_reward_cumulative_accuracy": 0.7837677725118484
    },
    {
      "epoch": 0.25081626595428913,
      "grad_norm": 6.65918493270874,
      "learning_rate": 4.661601431525295e-06,
      "loss": 0.106,
      "step": 845,
      "video_reward_cumulative_accuracy": 0.7840236686390533
    },
    {
      "epoch": 0.25111308993766696,
      "grad_norm": 2.6098530292510986,
      "learning_rate": 4.660298896281653e-06,
      "loss": 0.0738,
      "step": 846,
      "video_reward_cumulative_accuracy": 0.7836879432624113
    },
    {
      "epoch": 0.25140991392104484,
      "grad_norm": 1.8904296159744263,
      "learning_rate": 4.658994041747471e-06,
      "loss": 0.0658,
      "step": 847,
      "video_reward_cumulative_accuracy": 0.7839433293978748
    },
    {
      "epoch": 0.25170673790442266,
      "grad_norm": 3.186429977416992,
      "learning_rate": 4.657686869323638e-06,
      "loss": 0.0371,
      "step": 848,
      "video_reward_cumulative_accuracy": 0.7841981132075472
    },
    {
      "epoch": 0.25200356188780054,
      "grad_norm": 1.2086502313613892,
      "learning_rate": 4.6563773804135305e-06,
      "loss": 0.038,
      "step": 849,
      "video_reward_cumulative_accuracy": 0.784452296819788
    },
    {
      "epoch": 0.25230038587117837,
      "grad_norm": 1.1845347881317139,
      "learning_rate": 4.655065576423013e-06,
      "loss": 0.0215,
      "step": 850,
      "video_reward_cumulative_accuracy": 0.7847058823529411
    },
    {
      "epoch": 0.25259720985455625,
      "grad_norm": 2.5198981761932373,
      "learning_rate": 4.6537514587604316e-06,
      "loss": 0.0581,
      "step": 851,
      "video_reward_cumulative_accuracy": 0.7849588719153937
    },
    {
      "epoch": 0.25289403383793413,
      "grad_norm": 4.27205753326416,
      "learning_rate": 4.652435028836622e-06,
      "loss": 0.0617,
      "step": 852,
      "video_reward_cumulative_accuracy": 0.7846244131455399
    },
    {
      "epoch": 0.25319085782131195,
      "grad_norm": 1.900675892829895,
      "learning_rate": 4.651116288064899e-06,
      "loss": 0.0384,
      "step": 853,
      "video_reward_cumulative_accuracy": 0.7848769050410317
    },
    {
      "epoch": 0.25348768180468984,
      "grad_norm": 1.898750901222229,
      "learning_rate": 4.649795237861058e-06,
      "loss": 0.0461,
      "step": 854,
      "video_reward_cumulative_accuracy": 0.7851288056206089
    },
    {
      "epoch": 0.25378450578806766,
      "grad_norm": 1.234446406364441,
      "learning_rate": 4.648471879643374e-06,
      "loss": 0.0797,
      "step": 855,
      "video_reward_cumulative_accuracy": 0.7847953216374269
    },
    {
      "epoch": 0.25408132977144554,
      "grad_norm": 2.0378189086914062,
      "learning_rate": 4.647146214832602e-06,
      "loss": 0.0835,
      "step": 856,
      "video_reward_cumulative_accuracy": 0.7844626168224299
    },
    {
      "epoch": 0.25437815375482337,
      "grad_norm": 1.5428322553634644,
      "learning_rate": 4.645818244851971e-06,
      "loss": 0.0477,
      "step": 857,
      "video_reward_cumulative_accuracy": 0.7841306884480747
    },
    {
      "epoch": 0.25467497773820125,
      "grad_norm": 1.234578251838684,
      "learning_rate": 4.644487971127186e-06,
      "loss": 0.0556,
      "step": 858,
      "video_reward_cumulative_accuracy": 0.7843822843822844
    },
    {
      "epoch": 0.2549718017215791,
      "grad_norm": 2.008531093597412,
      "learning_rate": 4.643155395086425e-06,
      "loss": 0.0508,
      "step": 859,
      "video_reward_cumulative_accuracy": 0.7846332945285215
    },
    {
      "epoch": 0.25526862570495695,
      "grad_norm": 2.030647039413452,
      "learning_rate": 4.6418205181603385e-06,
      "loss": 0.0239,
      "step": 860,
      "video_reward_cumulative_accuracy": 0.7848837209302325
    },
    {
      "epoch": 0.25556544968833483,
      "grad_norm": 3.389904022216797,
      "learning_rate": 4.640483341782044e-06,
      "loss": 0.0565,
      "step": 861,
      "video_reward_cumulative_accuracy": 0.7851335656213705
    },
    {
      "epoch": 0.25586227367171266,
      "grad_norm": 1.0547555685043335,
      "learning_rate": 4.639143867387132e-06,
      "loss": 0.0433,
      "step": 862,
      "video_reward_cumulative_accuracy": 0.7853828306264501
    },
    {
      "epoch": 0.25615909765509054,
      "grad_norm": 2.2876555919647217,
      "learning_rate": 4.6378020964136586e-06,
      "loss": 0.0344,
      "step": 863,
      "video_reward_cumulative_accuracy": 0.7856315179606026
    },
    {
      "epoch": 0.25645592163846836,
      "grad_norm": 1.4506580829620361,
      "learning_rate": 4.636458030302144e-06,
      "loss": 0.0418,
      "step": 864,
      "video_reward_cumulative_accuracy": 0.7858796296296297
    },
    {
      "epoch": 0.25675274562184625,
      "grad_norm": 5.08413028717041,
      "learning_rate": 4.635111670495574e-06,
      "loss": 0.049,
      "step": 865,
      "video_reward_cumulative_accuracy": 0.7855491329479769
    },
    {
      "epoch": 0.2570495696052241,
      "grad_norm": 4.657954692840576,
      "learning_rate": 4.6337630184393965e-06,
      "loss": 0.0434,
      "step": 866,
      "video_reward_cumulative_accuracy": 0.785796766743649
    },
    {
      "epoch": 0.25734639358860195,
      "grad_norm": 1.428345799446106,
      "learning_rate": 4.632412075581521e-06,
      "loss": 0.0475,
      "step": 867,
      "video_reward_cumulative_accuracy": 0.7860438292964245
    },
    {
      "epoch": 0.25764321757197983,
      "grad_norm": 0.920592188835144,
      "learning_rate": 4.6310588433723145e-06,
      "loss": 0.0175,
      "step": 868,
      "video_reward_cumulative_accuracy": 0.7862903225806451
    },
    {
      "epoch": 0.25794004155535766,
      "grad_norm": 3.902883291244507,
      "learning_rate": 4.629703323264605e-06,
      "loss": 0.0379,
      "step": 869,
      "video_reward_cumulative_accuracy": 0.786536248561565
    },
    {
      "epoch": 0.25823686553873554,
      "grad_norm": 2.918687582015991,
      "learning_rate": 4.6283455167136724e-06,
      "loss": 0.0538,
      "step": 870,
      "video_reward_cumulative_accuracy": 0.7867816091954023
    },
    {
      "epoch": 0.25853368952211336,
      "grad_norm": 2.900933027267456,
      "learning_rate": 4.626985425177256e-06,
      "loss": 0.0521,
      "step": 871,
      "video_reward_cumulative_accuracy": 0.7870264064293915
    },
    {
      "epoch": 0.25883051350549124,
      "grad_norm": 3.1551060676574707,
      "learning_rate": 4.625623050115545e-06,
      "loss": 0.0863,
      "step": 872,
      "video_reward_cumulative_accuracy": 0.786697247706422
    },
    {
      "epoch": 0.2591273374888691,
      "grad_norm": 1.1339958906173706,
      "learning_rate": 4.6242583929911825e-06,
      "loss": 0.0553,
      "step": 873,
      "video_reward_cumulative_accuracy": 0.786368843069874
    },
    {
      "epoch": 0.25942416147224695,
      "grad_norm": 1.0027540922164917,
      "learning_rate": 4.6228914552692585e-06,
      "loss": 0.0178,
      "step": 874,
      "video_reward_cumulative_accuracy": 0.7866132723112128
    },
    {
      "epoch": 0.25972098545562483,
      "grad_norm": 3.1570804119110107,
      "learning_rate": 4.621522238417314e-06,
      "loss": 0.1099,
      "step": 875,
      "video_reward_cumulative_accuracy": 0.7868571428571428
    },
    {
      "epoch": 0.26001780943900266,
      "grad_norm": 2.4433491230010986,
      "learning_rate": 4.620150743905338e-06,
      "loss": 0.0314,
      "step": 876,
      "video_reward_cumulative_accuracy": 0.7865296803652968
    },
    {
      "epoch": 0.26031463342238054,
      "grad_norm": 3.163719654083252,
      "learning_rate": 4.6187769732057595e-06,
      "loss": 0.0235,
      "step": 877,
      "video_reward_cumulative_accuracy": 0.7867730900798175
    },
    {
      "epoch": 0.26061145740575836,
      "grad_norm": 4.77492094039917,
      "learning_rate": 4.617400927793457e-06,
      "loss": 0.0781,
      "step": 878,
      "video_reward_cumulative_accuracy": 0.7864464692482915
    },
    {
      "epoch": 0.26090828138913624,
      "grad_norm": 1.8638689517974854,
      "learning_rate": 4.6160226091457495e-06,
      "loss": 0.0368,
      "step": 879,
      "video_reward_cumulative_accuracy": 0.7861205915813424
    },
    {
      "epoch": 0.2612051053725141,
      "grad_norm": 2.038536548614502,
      "learning_rate": 4.6146420187423935e-06,
      "loss": 0.0344,
      "step": 880,
      "video_reward_cumulative_accuracy": 0.7863636363636364
    },
    {
      "epoch": 0.26150192935589195,
      "grad_norm": 2.544743299484253,
      "learning_rate": 4.613259158065588e-06,
      "loss": 0.0741,
      "step": 881,
      "video_reward_cumulative_accuracy": 0.7866061293984109
    },
    {
      "epoch": 0.26179875333926983,
      "grad_norm": 4.100607395172119,
      "learning_rate": 4.611874028599969e-06,
      "loss": 0.0515,
      "step": 882,
      "video_reward_cumulative_accuracy": 0.7868480725623582
    },
    {
      "epoch": 0.26209557732264765,
      "grad_norm": 0.9536772966384888,
      "learning_rate": 4.610486631832606e-06,
      "loss": 0.0332,
      "step": 883,
      "video_reward_cumulative_accuracy": 0.7865232163080408
    },
    {
      "epoch": 0.26239240130602554,
      "grad_norm": 1.7179930210113525,
      "learning_rate": 4.609096969253005e-06,
      "loss": 0.0275,
      "step": 884,
      "video_reward_cumulative_accuracy": 0.7867647058823529
    },
    {
      "epoch": 0.26268922528940336,
      "grad_norm": 2.068021059036255,
      "learning_rate": 4.607705042353104e-06,
      "loss": 0.0526,
      "step": 885,
      "video_reward_cumulative_accuracy": 0.7870056497175141
    },
    {
      "epoch": 0.26298604927278124,
      "grad_norm": 3.365269899368286,
      "learning_rate": 4.60631085262727e-06,
      "loss": 0.0301,
      "step": 886,
      "video_reward_cumulative_accuracy": 0.7872460496613995
    },
    {
      "epoch": 0.2632828732561591,
      "grad_norm": 4.123976707458496,
      "learning_rate": 4.604914401572301e-06,
      "loss": 0.0437,
      "step": 887,
      "video_reward_cumulative_accuracy": 0.7874859075535513
    },
    {
      "epoch": 0.26357969723953695,
      "grad_norm": 2.0754764080047607,
      "learning_rate": 4.603515690687425e-06,
      "loss": 0.0825,
      "step": 888,
      "video_reward_cumulative_accuracy": 0.7877252252252253
    },
    {
      "epoch": 0.26387652122291483,
      "grad_norm": 2.7115345001220703,
      "learning_rate": 4.602114721474293e-06,
      "loss": 0.0596,
      "step": 889,
      "video_reward_cumulative_accuracy": 0.7879640044994376
    },
    {
      "epoch": 0.26417334520629265,
      "grad_norm": 1.4795091152191162,
      "learning_rate": 4.60071149543698e-06,
      "loss": 0.0438,
      "step": 890,
      "video_reward_cumulative_accuracy": 0.7882022471910113
    },
    {
      "epoch": 0.26447016918967053,
      "grad_norm": 4.9827985763549805,
      "learning_rate": 4.599306014081987e-06,
      "loss": 0.0523,
      "step": 891,
      "video_reward_cumulative_accuracy": 0.7878787878787878
    },
    {
      "epoch": 0.26476699317304836,
      "grad_norm": 3.740387439727783,
      "learning_rate": 4.597898278918233e-06,
      "loss": 0.0927,
      "step": 892,
      "video_reward_cumulative_accuracy": 0.7881165919282511
    },
    {
      "epoch": 0.26506381715642624,
      "grad_norm": 2.645918130874634,
      "learning_rate": 4.596488291457061e-06,
      "loss": 0.0482,
      "step": 893,
      "video_reward_cumulative_accuracy": 0.7883538633818589
    },
    {
      "epoch": 0.2653606411398041,
      "grad_norm": 3.115306854248047,
      "learning_rate": 4.595076053212226e-06,
      "loss": 0.0522,
      "step": 894,
      "video_reward_cumulative_accuracy": 0.7885906040268457
    },
    {
      "epoch": 0.26565746512318195,
      "grad_norm": 2.4514198303222656,
      "learning_rate": 4.593661565699905e-06,
      "loss": 0.0442,
      "step": 895,
      "video_reward_cumulative_accuracy": 0.788826815642458
    },
    {
      "epoch": 0.2659542891065598,
      "grad_norm": 1.658631443977356,
      "learning_rate": 4.592244830438688e-06,
      "loss": 0.0353,
      "step": 896,
      "video_reward_cumulative_accuracy": 0.7890625
    },
    {
      "epoch": 0.26625111308993765,
      "grad_norm": 4.696023464202881,
      "learning_rate": 4.590825848949576e-06,
      "loss": 0.0655,
      "step": 897,
      "video_reward_cumulative_accuracy": 0.7892976588628763
    },
    {
      "epoch": 0.26654793707331553,
      "grad_norm": 4.951619625091553,
      "learning_rate": 4.589404622755985e-06,
      "loss": 0.0759,
      "step": 898,
      "video_reward_cumulative_accuracy": 0.7895322939866369
    },
    {
      "epoch": 0.26684476105669336,
      "grad_norm": 4.583511829376221,
      "learning_rate": 4.587981153383738e-06,
      "loss": 0.1074,
      "step": 899,
      "video_reward_cumulative_accuracy": 0.789210233592881
    },
    {
      "epoch": 0.26714158504007124,
      "grad_norm": 1.0951406955718994,
      "learning_rate": 4.586555442361068e-06,
      "loss": 0.0384,
      "step": 900,
      "video_reward_cumulative_accuracy": 0.7894444444444444
    },
    {
      "epoch": 0.2674384090234491,
      "grad_norm": 2.3430957794189453,
      "learning_rate": 4.585127491218615e-06,
      "loss": 0.0268,
      "step": 901,
      "video_reward_cumulative_accuracy": 0.7896781354051055
    },
    {
      "epoch": 0.26773523300682694,
      "grad_norm": 3.9636011123657227,
      "learning_rate": 4.5836973014894225e-06,
      "loss": 0.0735,
      "step": 902,
      "video_reward_cumulative_accuracy": 0.7899113082039911
    },
    {
      "epoch": 0.2680320569902048,
      "grad_norm": 3.968548536300659,
      "learning_rate": 4.582264874708937e-06,
      "loss": 0.0568,
      "step": 903,
      "video_reward_cumulative_accuracy": 0.7901439645625692
    },
    {
      "epoch": 0.26832888097358265,
      "grad_norm": 1.8706952333450317,
      "learning_rate": 4.58083021241501e-06,
      "loss": 0.0886,
      "step": 904,
      "video_reward_cumulative_accuracy": 0.7898230088495575
    },
    {
      "epoch": 0.26862570495696053,
      "grad_norm": 2.0862226486206055,
      "learning_rate": 4.579393316147888e-06,
      "loss": 0.057,
      "step": 905,
      "video_reward_cumulative_accuracy": 0.7900552486187845
    },
    {
      "epoch": 0.26892252894033836,
      "grad_norm": 2.1430046558380127,
      "learning_rate": 4.577954187450221e-06,
      "loss": 0.051,
      "step": 906,
      "video_reward_cumulative_accuracy": 0.7902869757174393
    },
    {
      "epoch": 0.26921935292371624,
      "grad_norm": 1.2016264200210571,
      "learning_rate": 4.576512827867051e-06,
      "loss": 0.0298,
      "step": 907,
      "video_reward_cumulative_accuracy": 0.7905181918412348
    },
    {
      "epoch": 0.2695161769070941,
      "grad_norm": 2.2290544509887695,
      "learning_rate": 4.57506923894582e-06,
      "loss": 0.059,
      "step": 908,
      "video_reward_cumulative_accuracy": 0.7907488986784141
    },
    {
      "epoch": 0.26981300089047194,
      "grad_norm": 1.2380579710006714,
      "learning_rate": 4.573623422236359e-06,
      "loss": 0.0574,
      "step": 909,
      "video_reward_cumulative_accuracy": 0.7904290429042904
    },
    {
      "epoch": 0.2701098248738498,
      "grad_norm": 1.3100279569625854,
      "learning_rate": 4.572175379290892e-06,
      "loss": 0.0188,
      "step": 910,
      "video_reward_cumulative_accuracy": 0.7906593406593406
    },
    {
      "epoch": 0.27040664885722765,
      "grad_norm": 1.4202399253845215,
      "learning_rate": 4.570725111664035e-06,
      "loss": 0.0274,
      "step": 911,
      "video_reward_cumulative_accuracy": 0.7908891328210758
    },
    {
      "epoch": 0.27070347284060553,
      "grad_norm": 1.6007649898529053,
      "learning_rate": 4.569272620912791e-06,
      "loss": 0.0389,
      "step": 912,
      "video_reward_cumulative_accuracy": 0.7911184210526315
    },
    {
      "epoch": 0.27100029682398336,
      "grad_norm": 1.8392062187194824,
      "learning_rate": 4.56781790859655e-06,
      "loss": 0.07,
      "step": 913,
      "video_reward_cumulative_accuracy": 0.7913472070098576
    },
    {
      "epoch": 0.27129712080736124,
      "grad_norm": 2.425304651260376,
      "learning_rate": 4.566360976277086e-06,
      "loss": 0.0375,
      "step": 914,
      "video_reward_cumulative_accuracy": 0.7915754923413567
    },
    {
      "epoch": 0.2715939447907391,
      "grad_norm": 3.8037993907928467,
      "learning_rate": 4.564901825518558e-06,
      "loss": 0.066,
      "step": 915,
      "video_reward_cumulative_accuracy": 0.7918032786885246
    },
    {
      "epoch": 0.27189076877411694,
      "grad_norm": 2.4875757694244385,
      "learning_rate": 4.563440457887506e-06,
      "loss": 0.0269,
      "step": 916,
      "video_reward_cumulative_accuracy": 0.7920305676855895
    },
    {
      "epoch": 0.2721875927574948,
      "grad_norm": 2.975370168685913,
      "learning_rate": 4.561976874952849e-06,
      "loss": 0.0474,
      "step": 917,
      "video_reward_cumulative_accuracy": 0.792257360959651
    },
    {
      "epoch": 0.27248441674087265,
      "grad_norm": 1.9937337636947632,
      "learning_rate": 4.560511078285885e-06,
      "loss": 0.092,
      "step": 918,
      "video_reward_cumulative_accuracy": 0.7924836601307189
    },
    {
      "epoch": 0.27278124072425053,
      "grad_norm": 2.1573855876922607,
      "learning_rate": 4.559043069460291e-06,
      "loss": 0.0465,
      "step": 919,
      "video_reward_cumulative_accuracy": 0.7921653971708379
    },
    {
      "epoch": 0.27307806470762835,
      "grad_norm": 1.528318166732788,
      "learning_rate": 4.557572850052116e-06,
      "loss": 0.0668,
      "step": 920,
      "video_reward_cumulative_accuracy": 0.7918478260869565
    },
    {
      "epoch": 0.27337488869100623,
      "grad_norm": 3.7441587448120117,
      "learning_rate": 4.556100421639783e-06,
      "loss": 0.1027,
      "step": 921,
      "video_reward_cumulative_accuracy": 0.7920738327904452
    },
    {
      "epoch": 0.2736717126743841,
      "grad_norm": 1.5246275663375854,
      "learning_rate": 4.554625785804087e-06,
      "loss": 0.0302,
      "step": 922,
      "video_reward_cumulative_accuracy": 0.7922993492407809
    },
    {
      "epoch": 0.27396853665776194,
      "grad_norm": 3.7128987312316895,
      "learning_rate": 4.553148944128192e-06,
      "loss": 0.0519,
      "step": 923,
      "video_reward_cumulative_accuracy": 0.7925243770314193
    },
    {
      "epoch": 0.2742653606411398,
      "grad_norm": 5.355534076690674,
      "learning_rate": 4.551669898197631e-06,
      "loss": 0.0838,
      "step": 924,
      "video_reward_cumulative_accuracy": 0.7927489177489178
    },
    {
      "epoch": 0.27456218462451765,
      "grad_norm": 3.1454975605010986,
      "learning_rate": 4.550188649600306e-06,
      "loss": 0.0614,
      "step": 925,
      "video_reward_cumulative_accuracy": 0.792972972972973
    },
    {
      "epoch": 0.2748590086078955,
      "grad_norm": 6.770321846008301,
      "learning_rate": 4.548705199926478e-06,
      "loss": 0.0797,
      "step": 926,
      "video_reward_cumulative_accuracy": 0.7931965442764579
    },
    {
      "epoch": 0.27515583259127335,
      "grad_norm": 4.7147064208984375,
      "learning_rate": 4.547219550768774e-06,
      "loss": 0.0511,
      "step": 927,
      "video_reward_cumulative_accuracy": 0.7928802588996764
    },
    {
      "epoch": 0.27545265657465123,
      "grad_norm": 3.4447805881500244,
      "learning_rate": 4.545731703722185e-06,
      "loss": 0.0591,
      "step": 928,
      "video_reward_cumulative_accuracy": 0.7931034482758621
    },
    {
      "epoch": 0.2757494805580291,
      "grad_norm": 1.9188413619995117,
      "learning_rate": 4.544241660384057e-06,
      "loss": 0.0486,
      "step": 929,
      "video_reward_cumulative_accuracy": 0.7927879440258342
    },
    {
      "epoch": 0.27604630454140694,
      "grad_norm": 0.8871810436248779,
      "learning_rate": 4.542749422354098e-06,
      "loss": 0.0457,
      "step": 930,
      "video_reward_cumulative_accuracy": 0.7924731182795699
    },
    {
      "epoch": 0.2763431285247848,
      "grad_norm": 1.0391457080841064,
      "learning_rate": 4.54125499123437e-06,
      "loss": 0.0463,
      "step": 931,
      "video_reward_cumulative_accuracy": 0.7926960257787325
    },
    {
      "epoch": 0.27663995250816265,
      "grad_norm": 3.6447765827178955,
      "learning_rate": 4.539758368629288e-06,
      "loss": 0.0333,
      "step": 932,
      "video_reward_cumulative_accuracy": 0.7929184549356223
    },
    {
      "epoch": 0.2769367764915405,
      "grad_norm": 2.135462760925293,
      "learning_rate": 4.538259556145623e-06,
      "loss": 0.0349,
      "step": 933,
      "video_reward_cumulative_accuracy": 0.7931404072883173
    },
    {
      "epoch": 0.27723360047491835,
      "grad_norm": 1.0831674337387085,
      "learning_rate": 4.5367585553924965e-06,
      "loss": 0.0454,
      "step": 934,
      "video_reward_cumulative_accuracy": 0.7933618843683083
    },
    {
      "epoch": 0.27753042445829623,
      "grad_norm": 2.257012128829956,
      "learning_rate": 4.5352553679813775e-06,
      "loss": 0.0664,
      "step": 935,
      "video_reward_cumulative_accuracy": 0.793048128342246
    },
    {
      "epoch": 0.2778272484416741,
      "grad_norm": 1.1687437295913696,
      "learning_rate": 4.5337499955260825e-06,
      "loss": 0.0443,
      "step": 936,
      "video_reward_cumulative_accuracy": 0.7932692307692307
    },
    {
      "epoch": 0.27812407242505194,
      "grad_norm": 1.566916823387146,
      "learning_rate": 4.532242439642778e-06,
      "loss": 0.0663,
      "step": 937,
      "video_reward_cumulative_accuracy": 0.7934898612593383
    },
    {
      "epoch": 0.2784208964084298,
      "grad_norm": 2.265585422515869,
      "learning_rate": 4.530732701949968e-06,
      "loss": 0.0347,
      "step": 938,
      "video_reward_cumulative_accuracy": 0.7937100213219617
    },
    {
      "epoch": 0.27871772039180764,
      "grad_norm": 1.542738437652588,
      "learning_rate": 4.529220784068505e-06,
      "loss": 0.0337,
      "step": 939,
      "video_reward_cumulative_accuracy": 0.7933972310969116
    },
    {
      "epoch": 0.2790145443751855,
      "grad_norm": 1.868859887123108,
      "learning_rate": 4.527706687621578e-06,
      "loss": 0.0593,
      "step": 940,
      "video_reward_cumulative_accuracy": 0.7930851063829787
    },
    {
      "epoch": 0.27931136835856335,
      "grad_norm": 1.1284223794937134,
      "learning_rate": 4.526190414234718e-06,
      "loss": 0.0271,
      "step": 941,
      "video_reward_cumulative_accuracy": 0.7933049946865037
    },
    {
      "epoch": 0.27960819234194123,
      "grad_norm": 2.6170237064361572,
      "learning_rate": 4.524671965535791e-06,
      "loss": 0.0675,
      "step": 942,
      "video_reward_cumulative_accuracy": 0.7929936305732485
    },
    {
      "epoch": 0.2799050163253191,
      "grad_norm": 1.8915306329727173,
      "learning_rate": 4.523151343154999e-06,
      "loss": 0.0355,
      "step": 943,
      "video_reward_cumulative_accuracy": 0.7932131495227995
    },
    {
      "epoch": 0.28020184030869694,
      "grad_norm": 1.1092487573623657,
      "learning_rate": 4.521628548724877e-06,
      "loss": 0.0447,
      "step": 944,
      "video_reward_cumulative_accuracy": 0.7934322033898306
    },
    {
      "epoch": 0.2804986642920748,
      "grad_norm": 2.1226096153259277,
      "learning_rate": 4.5201035838802935e-06,
      "loss": 0.076,
      "step": 945,
      "video_reward_cumulative_accuracy": 0.7936507936507936
    },
    {
      "epoch": 0.28079548827545264,
      "grad_norm": 3.9487340450286865,
      "learning_rate": 4.518576450258446e-06,
      "loss": 0.102,
      "step": 946,
      "video_reward_cumulative_accuracy": 0.7938689217758985
    },
    {
      "epoch": 0.2810923122588305,
      "grad_norm": 2.5979459285736084,
      "learning_rate": 4.517047149498861e-06,
      "loss": 0.0372,
      "step": 947,
      "video_reward_cumulative_accuracy": 0.7940865892291447
    },
    {
      "epoch": 0.28138913624220835,
      "grad_norm": 5.942515850067139,
      "learning_rate": 4.51551568324339e-06,
      "loss": 0.1805,
      "step": 948,
      "video_reward_cumulative_accuracy": 0.7943037974683544
    },
    {
      "epoch": 0.28168596022558623,
      "grad_norm": 3.59495210647583,
      "learning_rate": 4.5139820531362125e-06,
      "loss": 0.0965,
      "step": 949,
      "video_reward_cumulative_accuracy": 0.7945205479452054
    },
    {
      "epoch": 0.2819827842089641,
      "grad_norm": 1.3361320495605469,
      "learning_rate": 4.512446260823828e-06,
      "loss": 0.0318,
      "step": 950,
      "video_reward_cumulative_accuracy": 0.7947368421052632
    },
    {
      "epoch": 0.28227960819234194,
      "grad_norm": 3.481994390487671,
      "learning_rate": 4.510908307955059e-06,
      "loss": 0.0588,
      "step": 951,
      "video_reward_cumulative_accuracy": 0.7944269190325972
    },
    {
      "epoch": 0.2825764321757198,
      "grad_norm": 0.8120396137237549,
      "learning_rate": 4.509368196181048e-06,
      "loss": 0.0275,
      "step": 952,
      "video_reward_cumulative_accuracy": 0.7946428571428571
    },
    {
      "epoch": 0.28287325615909764,
      "grad_norm": 6.15502405166626,
      "learning_rate": 4.507825927155253e-06,
      "loss": 0.0756,
      "step": 953,
      "video_reward_cumulative_accuracy": 0.7948583420776495
    },
    {
      "epoch": 0.2831700801424755,
      "grad_norm": 3.1125705242156982,
      "learning_rate": 4.506281502533451e-06,
      "loss": 0.0404,
      "step": 954,
      "video_reward_cumulative_accuracy": 0.7950733752620545
    },
    {
      "epoch": 0.28346690412585335,
      "grad_norm": 0.9939224123954773,
      "learning_rate": 4.50473492397373e-06,
      "loss": 0.0321,
      "step": 955,
      "video_reward_cumulative_accuracy": 0.7952879581151833
    },
    {
      "epoch": 0.28376372810923123,
      "grad_norm": 2.3135781288146973,
      "learning_rate": 4.503186193136493e-06,
      "loss": 0.0179,
      "step": 956,
      "video_reward_cumulative_accuracy": 0.7955020920502092
    },
    {
      "epoch": 0.2840605520926091,
      "grad_norm": 1.7014567852020264,
      "learning_rate": 4.501635311684453e-06,
      "loss": 0.095,
      "step": 957,
      "video_reward_cumulative_accuracy": 0.7951933124346917
    },
    {
      "epoch": 0.28435737607598693,
      "grad_norm": 1.145207166671753,
      "learning_rate": 4.500082281282632e-06,
      "loss": 0.044,
      "step": 958,
      "video_reward_cumulative_accuracy": 0.7954070981210856
    },
    {
      "epoch": 0.2846542000593648,
      "grad_norm": 1.2436047792434692,
      "learning_rate": 4.4985271035983584e-06,
      "loss": 0.0842,
      "step": 959,
      "video_reward_cumulative_accuracy": 0.7950990615224192
    },
    {
      "epoch": 0.28495102404274264,
      "grad_norm": 2.411715030670166,
      "learning_rate": 4.496969780301267e-06,
      "loss": 0.0421,
      "step": 960,
      "video_reward_cumulative_accuracy": 0.7953125
    },
    {
      "epoch": 0.2852478480261205,
      "grad_norm": 1.0124142169952393,
      "learning_rate": 4.495410313063295e-06,
      "loss": 0.037,
      "step": 961,
      "video_reward_cumulative_accuracy": 0.795525494276795
    },
    {
      "epoch": 0.28554467200949835,
      "grad_norm": 1.802912950515747,
      "learning_rate": 4.493848703558681e-06,
      "loss": 0.0552,
      "step": 962,
      "video_reward_cumulative_accuracy": 0.7957380457380457
    },
    {
      "epoch": 0.2858414959928762,
      "grad_norm": 2.056218147277832,
      "learning_rate": 4.492284953463967e-06,
      "loss": 0.0721,
      "step": 963,
      "video_reward_cumulative_accuracy": 0.7959501557632399
    },
    {
      "epoch": 0.2861383199762541,
      "grad_norm": 2.424288511276245,
      "learning_rate": 4.490719064457987e-06,
      "loss": 0.0269,
      "step": 964,
      "video_reward_cumulative_accuracy": 0.7956431535269709
    },
    {
      "epoch": 0.28643514395963193,
      "grad_norm": 1.5298640727996826,
      "learning_rate": 4.4891510382218775e-06,
      "loss": 0.0371,
      "step": 965,
      "video_reward_cumulative_accuracy": 0.7958549222797927
    },
    {
      "epoch": 0.2867319679430098,
      "grad_norm": 2.102123737335205,
      "learning_rate": 4.487580876439066e-06,
      "loss": 0.0662,
      "step": 966,
      "video_reward_cumulative_accuracy": 0.7960662525879917
    },
    {
      "epoch": 0.28702879192638764,
      "grad_norm": 1.2193660736083984,
      "learning_rate": 4.486008580795273e-06,
      "loss": 0.0652,
      "step": 967,
      "video_reward_cumulative_accuracy": 0.795760082730093
    },
    {
      "epoch": 0.2873256159097655,
      "grad_norm": 2.06874418258667,
      "learning_rate": 4.484434152978512e-06,
      "loss": 0.0292,
      "step": 968,
      "video_reward_cumulative_accuracy": 0.7954545454545454
    },
    {
      "epoch": 0.28762243989314334,
      "grad_norm": 1.5268133878707886,
      "learning_rate": 4.482857594679082e-06,
      "loss": 0.0429,
      "step": 969,
      "video_reward_cumulative_accuracy": 0.7951496388028896
    },
    {
      "epoch": 0.2879192638765212,
      "grad_norm": 2.622420072555542,
      "learning_rate": 4.4812789075895735e-06,
      "loss": 0.1016,
      "step": 970,
      "video_reward_cumulative_accuracy": 0.795360824742268
    },
    {
      "epoch": 0.2882160878598991,
      "grad_norm": 2.2772624492645264,
      "learning_rate": 4.479698093404858e-06,
      "loss": 0.0652,
      "step": 971,
      "video_reward_cumulative_accuracy": 0.7950566426364573
    },
    {
      "epoch": 0.28851291184327693,
      "grad_norm": 0.8947013020515442,
      "learning_rate": 4.478115153822096e-06,
      "loss": 0.0285,
      "step": 972,
      "video_reward_cumulative_accuracy": 0.7952674897119342
    },
    {
      "epoch": 0.2888097358266548,
      "grad_norm": 1.8154103755950928,
      "learning_rate": 4.476530090540724e-06,
      "loss": 0.0613,
      "step": 973,
      "video_reward_cumulative_accuracy": 0.7949640287769785
    },
    {
      "epoch": 0.28910655981003264,
      "grad_norm": 1.2615007162094116,
      "learning_rate": 4.474942905262462e-06,
      "loss": 0.0474,
      "step": 974,
      "video_reward_cumulative_accuracy": 0.7951745379876797
    },
    {
      "epoch": 0.2894033837934105,
      "grad_norm": 0.8353313207626343,
      "learning_rate": 4.473353599691308e-06,
      "loss": 0.0222,
      "step": 975,
      "video_reward_cumulative_accuracy": 0.7948717948717948
    },
    {
      "epoch": 0.28970020777678834,
      "grad_norm": 2.982597827911377,
      "learning_rate": 4.471762175533535e-06,
      "loss": 0.0546,
      "step": 976,
      "video_reward_cumulative_accuracy": 0.7945696721311475
    },
    {
      "epoch": 0.2899970317601662,
      "grad_norm": 2.2625701427459717,
      "learning_rate": 4.470168634497692e-06,
      "loss": 0.0405,
      "step": 977,
      "video_reward_cumulative_accuracy": 0.7947799385875128
    },
    {
      "epoch": 0.2902938557435441,
      "grad_norm": 1.974323034286499,
      "learning_rate": 4.4685729782946005e-06,
      "loss": 0.0385,
      "step": 978,
      "video_reward_cumulative_accuracy": 0.7949897750511248
    },
    {
      "epoch": 0.29059067972692193,
      "grad_norm": 4.570712566375732,
      "learning_rate": 4.46697520863735e-06,
      "loss": 0.0505,
      "step": 979,
      "video_reward_cumulative_accuracy": 0.7951991828396323
    },
    {
      "epoch": 0.2908875037102998,
      "grad_norm": 0.7615110874176025,
      "learning_rate": 4.465375327241305e-06,
      "loss": 0.0154,
      "step": 980,
      "video_reward_cumulative_accuracy": 0.7954081632653062
    },
    {
      "epoch": 0.29118432769367764,
      "grad_norm": 3.337805986404419,
      "learning_rate": 4.46377333582409e-06,
      "loss": 0.0636,
      "step": 981,
      "video_reward_cumulative_accuracy": 0.7956167176350663
    },
    {
      "epoch": 0.2914811516770555,
      "grad_norm": 1.5002707242965698,
      "learning_rate": 4.4621692361056005e-06,
      "loss": 0.0339,
      "step": 982,
      "video_reward_cumulative_accuracy": 0.7958248472505092
    },
    {
      "epoch": 0.29177797566043334,
      "grad_norm": 3.1056461334228516,
      "learning_rate": 4.460563029807991e-06,
      "loss": 0.0719,
      "step": 983,
      "video_reward_cumulative_accuracy": 0.7960325534079349
    },
    {
      "epoch": 0.2920747996438112,
      "grad_norm": 4.184977054595947,
      "learning_rate": 4.4589547186556825e-06,
      "loss": 0.0574,
      "step": 984,
      "video_reward_cumulative_accuracy": 0.796239837398374
    },
    {
      "epoch": 0.2923716236271891,
      "grad_norm": 0.9293310046195984,
      "learning_rate": 4.45734430437535e-06,
      "loss": 0.0066,
      "step": 985,
      "video_reward_cumulative_accuracy": 0.7964467005076142
    },
    {
      "epoch": 0.29266844761056693,
      "grad_norm": 1.263472557067871,
      "learning_rate": 4.455731788695933e-06,
      "loss": 0.0232,
      "step": 986,
      "video_reward_cumulative_accuracy": 0.7966531440162272
    },
    {
      "epoch": 0.2929652715939448,
      "grad_norm": 2.434809446334839,
      "learning_rate": 4.4541171733486224e-06,
      "loss": 0.06,
      "step": 987,
      "video_reward_cumulative_accuracy": 0.7958459979736575
    },
    {
      "epoch": 0.29326209557732263,
      "grad_norm": 4.916622161865234,
      "learning_rate": 4.452500460066863e-06,
      "loss": 0.0295,
      "step": 988,
      "video_reward_cumulative_accuracy": 0.7960526315789473
    },
    {
      "epoch": 0.2935589195607005,
      "grad_norm": 1.8236298561096191,
      "learning_rate": 4.450881650586354e-06,
      "loss": 0.0219,
      "step": 989,
      "video_reward_cumulative_accuracy": 0.7957532861476239
    },
    {
      "epoch": 0.29385574354407834,
      "grad_norm": 1.2961921691894531,
      "learning_rate": 4.449260746645046e-06,
      "loss": 0.0212,
      "step": 990,
      "video_reward_cumulative_accuracy": 0.795959595959596
    },
    {
      "epoch": 0.2941525675274562,
      "grad_norm": 0.6145333051681519,
      "learning_rate": 4.447637749983135e-06,
      "loss": 0.0062,
      "step": 991,
      "video_reward_cumulative_accuracy": 0.7961654894046418
    },
    {
      "epoch": 0.2944493915108341,
      "grad_norm": 3.7940635681152344,
      "learning_rate": 4.446012662343066e-06,
      "loss": 0.0816,
      "step": 992,
      "video_reward_cumulative_accuracy": 0.7953629032258065
    },
    {
      "epoch": 0.2947462154942119,
      "grad_norm": 2.3133246898651123,
      "learning_rate": 4.444385485469529e-06,
      "loss": 0.0319,
      "step": 993,
      "video_reward_cumulative_accuracy": 0.7955689828801611
    },
    {
      "epoch": 0.2950430394775898,
      "grad_norm": 2.988262891769409,
      "learning_rate": 4.442756221109456e-06,
      "loss": 0.0406,
      "step": 994,
      "video_reward_cumulative_accuracy": 0.795774647887324
    },
    {
      "epoch": 0.29533986346096763,
      "grad_norm": 0.47877877950668335,
      "learning_rate": 4.441124871012018e-06,
      "loss": 0.0096,
      "step": 995,
      "video_reward_cumulative_accuracy": 0.7959798994974875
    },
    {
      "epoch": 0.2956366874443455,
      "grad_norm": 3.234767198562622,
      "learning_rate": 4.439491436928631e-06,
      "loss": 0.0469,
      "step": 996,
      "video_reward_cumulative_accuracy": 0.7961847389558233
    },
    {
      "epoch": 0.29593351142772334,
      "grad_norm": 2.747255325317383,
      "learning_rate": 4.437855920612945e-06,
      "loss": 0.094,
      "step": 997,
      "video_reward_cumulative_accuracy": 0.7958876629889668
    },
    {
      "epoch": 0.2962303354111012,
      "grad_norm": 1.539255142211914,
      "learning_rate": 4.436218323820843e-06,
      "loss": 0.0229,
      "step": 998,
      "video_reward_cumulative_accuracy": 0.7960921843687375
    },
    {
      "epoch": 0.2965271593944791,
      "grad_norm": 4.402137279510498,
      "learning_rate": 4.4345786483104455e-06,
      "loss": 0.083,
      "step": 999,
      "video_reward_cumulative_accuracy": 0.7957957957957958
    },
    {
      "epoch": 0.2968239833778569,
      "grad_norm": 4.107393741607666,
      "learning_rate": 4.432936895842104e-06,
      "loss": 0.0391,
      "step": 1000,
      "video_reward_cumulative_accuracy": 0.796
    },
    {
      "epoch": 0.2968239833778569,
      "eval_runtime": 135.0462,
      "eval_samples_per_second": 5.842,
      "eval_steps_per_second": 0.733,
      "eval_test_set_accuracy": 0.7790404040404041,
      "step": 1000
    },
    {
      "epoch": 0.2971208073612348,
      "grad_norm": 2.150050640106201,
      "learning_rate": 4.431293068178397e-06,
      "loss": 0.1201,
      "step": 1001,
      "video_reward_cumulative_accuracy": 0.7957042957042957
    },
    {
      "epoch": 0.29741763134461263,
      "grad_norm": 1.469811201095581,
      "learning_rate": 4.429647167084135e-06,
      "loss": 0.022,
      "step": 1002,
      "video_reward_cumulative_accuracy": 0.7959081836327345
    },
    {
      "epoch": 0.2977144553279905,
      "grad_norm": 7.703973770141602,
      "learning_rate": 4.4279991943263525e-06,
      "loss": 0.0968,
      "step": 1003,
      "video_reward_cumulative_accuracy": 0.7961116650049851
    },
    {
      "epoch": 0.29801127931136834,
      "grad_norm": 4.6636271476745605,
      "learning_rate": 4.426349151674307e-06,
      "loss": 0.0969,
      "step": 1004,
      "video_reward_cumulative_accuracy": 0.795816733067729
    },
    {
      "epoch": 0.2983081032947462,
      "grad_norm": 4.039132118225098,
      "learning_rate": 4.424697040899481e-06,
      "loss": 0.0568,
      "step": 1005,
      "video_reward_cumulative_accuracy": 0.7955223880597015
    },
    {
      "epoch": 0.2986049272781241,
      "grad_norm": 3.1670384407043457,
      "learning_rate": 4.423042863775574e-06,
      "loss": 0.1071,
      "step": 1006,
      "video_reward_cumulative_accuracy": 0.7952286282306164
    },
    {
      "epoch": 0.2989017512615019,
      "grad_norm": 6.5014967918396,
      "learning_rate": 4.421386622078507e-06,
      "loss": 0.0683,
      "step": 1007,
      "video_reward_cumulative_accuracy": 0.7954319761668321
    },
    {
      "epoch": 0.2991985752448798,
      "grad_norm": 1.935164451599121,
      "learning_rate": 4.419728317586416e-06,
      "loss": 0.0532,
      "step": 1008,
      "video_reward_cumulative_accuracy": 0.7951388888888888
    },
    {
      "epoch": 0.29949539922825763,
      "grad_norm": 0.7051401138305664,
      "learning_rate": 4.418067952079651e-06,
      "loss": 0.0142,
      "step": 1009,
      "video_reward_cumulative_accuracy": 0.7953419226957383
    },
    {
      "epoch": 0.2997922232116355,
      "grad_norm": 3.0097496509552,
      "learning_rate": 4.416405527340776e-06,
      "loss": 0.0499,
      "step": 1010,
      "video_reward_cumulative_accuracy": 0.7955445544554456
    },
    {
      "epoch": 0.30008904719501334,
      "grad_norm": 3.7447476387023926,
      "learning_rate": 4.414741045154566e-06,
      "loss": 0.0378,
      "step": 1011,
      "video_reward_cumulative_accuracy": 0.7957467853610287
    },
    {
      "epoch": 0.3003858711783912,
      "grad_norm": 3.5587432384490967,
      "learning_rate": 4.4130745073080025e-06,
      "loss": 0.0991,
      "step": 1012,
      "video_reward_cumulative_accuracy": 0.7949604743083004
    },
    {
      "epoch": 0.3006826951617691,
      "grad_norm": 0.9364858269691467,
      "learning_rate": 4.411405915590278e-06,
      "loss": 0.0261,
      "step": 1013,
      "video_reward_cumulative_accuracy": 0.7951628825271471
    },
    {
      "epoch": 0.3009795191451469,
      "grad_norm": 0.5637649297714233,
      "learning_rate": 4.409735271792786e-06,
      "loss": 0.0121,
      "step": 1014,
      "video_reward_cumulative_accuracy": 0.7953648915187377
    },
    {
      "epoch": 0.3012763431285248,
      "grad_norm": 1.5942317247390747,
      "learning_rate": 4.408062577709124e-06,
      "loss": 0.0446,
      "step": 1015,
      "video_reward_cumulative_accuracy": 0.7955665024630542
    },
    {
      "epoch": 0.30157316711190263,
      "grad_norm": 2.7422399520874023,
      "learning_rate": 4.406387835135094e-06,
      "loss": 0.0223,
      "step": 1016,
      "video_reward_cumulative_accuracy": 0.7957677165354331
    },
    {
      "epoch": 0.3018699910952805,
      "grad_norm": 2.2551071643829346,
      "learning_rate": 4.404711045868694e-06,
      "loss": 0.0434,
      "step": 1017,
      "video_reward_cumulative_accuracy": 0.795968534906588
    },
    {
      "epoch": 0.30216681507865834,
      "grad_norm": 5.363723278045654,
      "learning_rate": 4.403032211710118e-06,
      "loss": 0.0916,
      "step": 1018,
      "video_reward_cumulative_accuracy": 0.7956777996070727
    },
    {
      "epoch": 0.3024636390620362,
      "grad_norm": 2.561952829360962,
      "learning_rate": 4.401351334461759e-06,
      "loss": 0.0415,
      "step": 1019,
      "video_reward_cumulative_accuracy": 0.7958783120706575
    },
    {
      "epoch": 0.3027604630454141,
      "grad_norm": 1.5561801195144653,
      "learning_rate": 4.3996684159282014e-06,
      "loss": 0.0834,
      "step": 1020,
      "video_reward_cumulative_accuracy": 0.796078431372549
    },
    {
      "epoch": 0.3030572870287919,
      "grad_norm": 2.4101288318634033,
      "learning_rate": 4.397983457916222e-06,
      "loss": 0.0574,
      "step": 1021,
      "video_reward_cumulative_accuracy": 0.7962781586679726
    },
    {
      "epoch": 0.3033541110121698,
      "grad_norm": 2.7204737663269043,
      "learning_rate": 4.3962964622347855e-06,
      "loss": 0.0439,
      "step": 1022,
      "video_reward_cumulative_accuracy": 0.7964774951076321
    },
    {
      "epoch": 0.30365093499554763,
      "grad_norm": 1.8821197748184204,
      "learning_rate": 4.3946074306950484e-06,
      "loss": 0.0284,
      "step": 1023,
      "video_reward_cumulative_accuracy": 0.7966764418377321
    },
    {
      "epoch": 0.3039477589789255,
      "grad_norm": 1.1801754236221313,
      "learning_rate": 4.392916365110347e-06,
      "loss": 0.0358,
      "step": 1024,
      "video_reward_cumulative_accuracy": 0.796875
    },
    {
      "epoch": 0.30424458296230333,
      "grad_norm": 1.7793229818344116,
      "learning_rate": 4.391223267296206e-06,
      "loss": 0.043,
      "step": 1025,
      "video_reward_cumulative_accuracy": 0.7970731707317074
    },
    {
      "epoch": 0.3045414069456812,
      "grad_norm": 1.5398008823394775,
      "learning_rate": 4.389528139070329e-06,
      "loss": 0.028,
      "step": 1026,
      "video_reward_cumulative_accuracy": 0.797270955165692
    },
    {
      "epoch": 0.3048382309290591,
      "grad_norm": 2.413785457611084,
      "learning_rate": 4.387830982252602e-06,
      "loss": 0.0461,
      "step": 1027,
      "video_reward_cumulative_accuracy": 0.7974683544303798
    },
    {
      "epoch": 0.3051350549124369,
      "grad_norm": 5.496139049530029,
      "learning_rate": 4.3861317986650875e-06,
      "loss": 0.0584,
      "step": 1028,
      "video_reward_cumulative_accuracy": 0.7976653696498055
    },
    {
      "epoch": 0.3054318788958148,
      "grad_norm": 2.5064406394958496,
      "learning_rate": 4.384430590132023e-06,
      "loss": 0.0621,
      "step": 1029,
      "video_reward_cumulative_accuracy": 0.7978620019436345
    },
    {
      "epoch": 0.3057287028791926,
      "grad_norm": 2.9505836963653564,
      "learning_rate": 4.382727358479821e-06,
      "loss": 0.0354,
      "step": 1030,
      "video_reward_cumulative_accuracy": 0.7980582524271844
    },
    {
      "epoch": 0.3060255268625705,
      "grad_norm": 5.6248297691345215,
      "learning_rate": 4.3810221055370664e-06,
      "loss": 0.0853,
      "step": 1031,
      "video_reward_cumulative_accuracy": 0.7982541222114452
    },
    {
      "epoch": 0.30632235084594833,
      "grad_norm": 2.6210145950317383,
      "learning_rate": 4.3793148331345136e-06,
      "loss": 0.0609,
      "step": 1032,
      "video_reward_cumulative_accuracy": 0.7984496124031008
    },
    {
      "epoch": 0.3066191748293262,
      "grad_norm": 3.5333571434020996,
      "learning_rate": 4.377605543105086e-06,
      "loss": 0.0319,
      "step": 1033,
      "video_reward_cumulative_accuracy": 0.7986447241045499
    },
    {
      "epoch": 0.3069159988127041,
      "grad_norm": 1.4399138689041138,
      "learning_rate": 4.375894237283872e-06,
      "loss": 0.0569,
      "step": 1034,
      "video_reward_cumulative_accuracy": 0.7988394584139265
    },
    {
      "epoch": 0.3072128227960819,
      "grad_norm": 1.2284201383590698,
      "learning_rate": 4.374180917508124e-06,
      "loss": 0.0193,
      "step": 1035,
      "video_reward_cumulative_accuracy": 0.7990338164251207
    },
    {
      "epoch": 0.3075096467794598,
      "grad_norm": 2.532024383544922,
      "learning_rate": 4.372465585617257e-06,
      "loss": 0.0401,
      "step": 1036,
      "video_reward_cumulative_accuracy": 0.7992277992277992
    },
    {
      "epoch": 0.3078064707628376,
      "grad_norm": 1.7466938495635986,
      "learning_rate": 4.370748243452846e-06,
      "loss": 0.038,
      "step": 1037,
      "video_reward_cumulative_accuracy": 0.7994214079074252
    },
    {
      "epoch": 0.3081032947462155,
      "grad_norm": 1.943616509437561,
      "learning_rate": 4.369028892858626e-06,
      "loss": 0.0164,
      "step": 1038,
      "video_reward_cumulative_accuracy": 0.7996146435452793
    },
    {
      "epoch": 0.30840011872959333,
      "grad_norm": 0.6063635349273682,
      "learning_rate": 4.367307535680485e-06,
      "loss": 0.0151,
      "step": 1039,
      "video_reward_cumulative_accuracy": 0.7998075072184793
    },
    {
      "epoch": 0.3086969427129712,
      "grad_norm": 6.530287742614746,
      "learning_rate": 4.3655841737664685e-06,
      "loss": 0.1203,
      "step": 1040,
      "video_reward_cumulative_accuracy": 0.7995192307692308
    },
    {
      "epoch": 0.3089937666963491,
      "grad_norm": 4.490469932556152,
      "learning_rate": 4.363858808966772e-06,
      "loss": 0.0926,
      "step": 1041,
      "video_reward_cumulative_accuracy": 0.7997118155619597
    },
    {
      "epoch": 0.3092905906797269,
      "grad_norm": 1.2573050260543823,
      "learning_rate": 4.362131443133742e-06,
      "loss": 0.0368,
      "step": 1042,
      "video_reward_cumulative_accuracy": 0.7994241842610365
    },
    {
      "epoch": 0.3095874146631048,
      "grad_norm": 3.369048595428467,
      "learning_rate": 4.3604020781218736e-06,
      "loss": 0.0502,
      "step": 1043,
      "video_reward_cumulative_accuracy": 0.7996164908916586
    },
    {
      "epoch": 0.3098842386464826,
      "grad_norm": 1.5536525249481201,
      "learning_rate": 4.358670715787808e-06,
      "loss": 0.0142,
      "step": 1044,
      "video_reward_cumulative_accuracy": 0.7998084291187739
    },
    {
      "epoch": 0.3101810626298605,
      "grad_norm": 2.2478020191192627,
      "learning_rate": 4.356937357990331e-06,
      "loss": 0.0408,
      "step": 1045,
      "video_reward_cumulative_accuracy": 0.8
    },
    {
      "epoch": 0.31047788661323833,
      "grad_norm": 4.114165782928467,
      "learning_rate": 4.3552020065903685e-06,
      "loss": 0.0481,
      "step": 1046,
      "video_reward_cumulative_accuracy": 0.7992351816443595
    },
    {
      "epoch": 0.3107747105966162,
      "grad_norm": 2.3197696208953857,
      "learning_rate": 4.353464663450991e-06,
      "loss": 0.0471,
      "step": 1047,
      "video_reward_cumulative_accuracy": 0.7994269340974212
    },
    {
      "epoch": 0.3110715345799941,
      "grad_norm": 1.8591456413269043,
      "learning_rate": 4.351725330437405e-06,
      "loss": 0.0179,
      "step": 1048,
      "video_reward_cumulative_accuracy": 0.799618320610687
    },
    {
      "epoch": 0.3113683585633719,
      "grad_norm": 1.4338923692703247,
      "learning_rate": 4.349984009416952e-06,
      "loss": 0.0159,
      "step": 1049,
      "video_reward_cumulative_accuracy": 0.7993326978074357
    },
    {
      "epoch": 0.3116651825467498,
      "grad_norm": 5.681126594543457,
      "learning_rate": 4.34824070225911e-06,
      "loss": 0.0691,
      "step": 1050,
      "video_reward_cumulative_accuracy": 0.799047619047619
    },
    {
      "epoch": 0.3119620065301276,
      "grad_norm": 1.271953821182251,
      "learning_rate": 4.346495410835487e-06,
      "loss": 0.0444,
      "step": 1051,
      "video_reward_cumulative_accuracy": 0.7992388201712655
    },
    {
      "epoch": 0.3122588305135055,
      "grad_norm": 2.048283100128174,
      "learning_rate": 4.344748137019825e-06,
      "loss": 0.0131,
      "step": 1052,
      "video_reward_cumulative_accuracy": 0.7989543726235742
    },
    {
      "epoch": 0.31255565449688333,
      "grad_norm": 1.9480700492858887,
      "learning_rate": 4.34299888268799e-06,
      "loss": 0.0607,
      "step": 1053,
      "video_reward_cumulative_accuracy": 0.798670465337132
    },
    {
      "epoch": 0.3128524784802612,
      "grad_norm": 2.398563861846924,
      "learning_rate": 4.341247649717978e-06,
      "loss": 0.055,
      "step": 1054,
      "video_reward_cumulative_accuracy": 0.7988614800759013
    },
    {
      "epoch": 0.31314930246363903,
      "grad_norm": 2.373682737350464,
      "learning_rate": 4.339494439989907e-06,
      "loss": 0.0684,
      "step": 1055,
      "video_reward_cumulative_accuracy": 0.7990521327014218
    },
    {
      "epoch": 0.3134461264470169,
      "grad_norm": 3.2019567489624023,
      "learning_rate": 4.3377392553860156e-06,
      "loss": 0.0426,
      "step": 1056,
      "video_reward_cumulative_accuracy": 0.7992424242424242
    },
    {
      "epoch": 0.3137429504303948,
      "grad_norm": 5.288125038146973,
      "learning_rate": 4.335982097790668e-06,
      "loss": 0.0768,
      "step": 1057,
      "video_reward_cumulative_accuracy": 0.7994323557237465
    },
    {
      "epoch": 0.3140397744137726,
      "grad_norm": 4.724822521209717,
      "learning_rate": 4.334222969090342e-06,
      "loss": 0.0782,
      "step": 1058,
      "video_reward_cumulative_accuracy": 0.7996219281663516
    },
    {
      "epoch": 0.3143365983971505,
      "grad_norm": 1.1170587539672852,
      "learning_rate": 4.332461871173633e-06,
      "loss": 0.0591,
      "step": 1059,
      "video_reward_cumulative_accuracy": 0.7998111425873465
    },
    {
      "epoch": 0.3146334223805283,
      "grad_norm": 3.1828134059906006,
      "learning_rate": 4.330698805931251e-06,
      "loss": 0.0642,
      "step": 1060,
      "video_reward_cumulative_accuracy": 0.8
    },
    {
      "epoch": 0.3149302463639062,
      "grad_norm": 3.1132760047912598,
      "learning_rate": 4.328933775256017e-06,
      "loss": 0.0925,
      "step": 1061,
      "video_reward_cumulative_accuracy": 0.8001885014137606
    },
    {
      "epoch": 0.31522707034728403,
      "grad_norm": 2.5283889770507812,
      "learning_rate": 4.327166781042864e-06,
      "loss": 0.0319,
      "step": 1062,
      "video_reward_cumulative_accuracy": 0.7999058380414312
    },
    {
      "epoch": 0.3155238943306619,
      "grad_norm": 2.8218538761138916,
      "learning_rate": 4.325397825188829e-06,
      "loss": 0.0391,
      "step": 1063,
      "video_reward_cumulative_accuracy": 0.8000940733772343
    },
    {
      "epoch": 0.3158207183140398,
      "grad_norm": 2.607076644897461,
      "learning_rate": 4.323626909593062e-06,
      "loss": 0.0454,
      "step": 1064,
      "video_reward_cumulative_accuracy": 0.8002819548872181
    },
    {
      "epoch": 0.3161175422974176,
      "grad_norm": 1.558544397354126,
      "learning_rate": 4.321854036156809e-06,
      "loss": 0.0189,
      "step": 1065,
      "video_reward_cumulative_accuracy": 0.8004694835680751
    },
    {
      "epoch": 0.3164143662807955,
      "grad_norm": 1.2349810600280762,
      "learning_rate": 4.320079206783423e-06,
      "loss": 0.0272,
      "step": 1066,
      "video_reward_cumulative_accuracy": 0.800656660412758
    },
    {
      "epoch": 0.3167111902641733,
      "grad_norm": 1.2629001140594482,
      "learning_rate": 4.318302423378357e-06,
      "loss": 0.0318,
      "step": 1067,
      "video_reward_cumulative_accuracy": 0.8003748828491096
    },
    {
      "epoch": 0.3170080142475512,
      "grad_norm": 2.740196466445923,
      "learning_rate": 4.3165236878491575e-06,
      "loss": 0.0518,
      "step": 1068,
      "video_reward_cumulative_accuracy": 0.800561797752809
    },
    {
      "epoch": 0.31730483823092903,
      "grad_norm": 3.2238783836364746,
      "learning_rate": 4.314743002105473e-06,
      "loss": 0.0403,
      "step": 1069,
      "video_reward_cumulative_accuracy": 0.8002806361085126
    },
    {
      "epoch": 0.3176016622143069,
      "grad_norm": 1.6323812007904053,
      "learning_rate": 4.31296036805904e-06,
      "loss": 0.0721,
      "step": 1070,
      "video_reward_cumulative_accuracy": 0.8004672897196262
    },
    {
      "epoch": 0.3178984861976848,
      "grad_norm": 1.8098689317703247,
      "learning_rate": 4.3111757876236905e-06,
      "loss": 0.0256,
      "step": 1071,
      "video_reward_cumulative_accuracy": 0.8006535947712419
    },
    {
      "epoch": 0.3181953101810626,
      "grad_norm": 1.6807585954666138,
      "learning_rate": 4.309389262715344e-06,
      "loss": 0.0353,
      "step": 1072,
      "video_reward_cumulative_accuracy": 0.800839552238806
    },
    {
      "epoch": 0.3184921341644405,
      "grad_norm": 4.06589937210083,
      "learning_rate": 4.307600795252008e-06,
      "loss": 0.0547,
      "step": 1073,
      "video_reward_cumulative_accuracy": 0.8005591798695247
    },
    {
      "epoch": 0.3187889581478183,
      "grad_norm": 2.8750007152557373,
      "learning_rate": 4.305810387153778e-06,
      "loss": 0.0552,
      "step": 1074,
      "video_reward_cumulative_accuracy": 0.8007448789571695
    },
    {
      "epoch": 0.3190857821311962,
      "grad_norm": 1.4461251497268677,
      "learning_rate": 4.30401804034283e-06,
      "loss": 0.0467,
      "step": 1075,
      "video_reward_cumulative_accuracy": 0.8009302325581396
    },
    {
      "epoch": 0.31938260611457403,
      "grad_norm": 7.60485315322876,
      "learning_rate": 4.30222375674342e-06,
      "loss": 0.0826,
      "step": 1076,
      "video_reward_cumulative_accuracy": 0.8011152416356877
    },
    {
      "epoch": 0.3196794300979519,
      "grad_norm": 5.275068283081055,
      "learning_rate": 4.3004275382818884e-06,
      "loss": 0.0536,
      "step": 1077,
      "video_reward_cumulative_accuracy": 0.8012999071494893
    },
    {
      "epoch": 0.3199762540813298,
      "grad_norm": 2.498542547225952,
      "learning_rate": 4.298629386886649e-06,
      "loss": 0.0574,
      "step": 1078,
      "video_reward_cumulative_accuracy": 0.8010204081632653
    },
    {
      "epoch": 0.3202730780647076,
      "grad_norm": 4.200756072998047,
      "learning_rate": 4.296829304488191e-06,
      "loss": 0.0782,
      "step": 1079,
      "video_reward_cumulative_accuracy": 0.8012048192771084
    },
    {
      "epoch": 0.3205699020480855,
      "grad_norm": 1.4311738014221191,
      "learning_rate": 4.29502729301908e-06,
      "loss": 0.0201,
      "step": 1080,
      "video_reward_cumulative_accuracy": 0.8013888888888889
    },
    {
      "epoch": 0.3208667260314633,
      "grad_norm": 0.836157500743866,
      "learning_rate": 4.293223354413948e-06,
      "loss": 0.0322,
      "step": 1081,
      "video_reward_cumulative_accuracy": 0.8015726179463459
    },
    {
      "epoch": 0.3211635500148412,
      "grad_norm": 1.57651686668396,
      "learning_rate": 4.2914174906094985e-06,
      "loss": 0.0481,
      "step": 1082,
      "video_reward_cumulative_accuracy": 0.8017560073937153
    },
    {
      "epoch": 0.32146037399821903,
      "grad_norm": 1.0639756917953491,
      "learning_rate": 4.289609703544501e-06,
      "loss": 0.0469,
      "step": 1083,
      "video_reward_cumulative_accuracy": 0.8019390581717452
    },
    {
      "epoch": 0.3217571979815969,
      "grad_norm": 2.068735361099243,
      "learning_rate": 4.2877999951597935e-06,
      "loss": 0.0474,
      "step": 1084,
      "video_reward_cumulative_accuracy": 0.8021217712177122
    },
    {
      "epoch": 0.3220540219649748,
      "grad_norm": 2.1171066761016846,
      "learning_rate": 4.28598836739827e-06,
      "loss": 0.0429,
      "step": 1085,
      "video_reward_cumulative_accuracy": 0.8018433179723502
    },
    {
      "epoch": 0.3223508459483526,
      "grad_norm": 3.831587076187134,
      "learning_rate": 4.28417482220489e-06,
      "loss": 0.0376,
      "step": 1086,
      "video_reward_cumulative_accuracy": 0.8015653775322283
    },
    {
      "epoch": 0.3226476699317305,
      "grad_norm": 6.328731536865234,
      "learning_rate": 4.282359361526671e-06,
      "loss": 0.0615,
      "step": 1087,
      "video_reward_cumulative_accuracy": 0.8012879484820608
    },
    {
      "epoch": 0.3229444939151083,
      "grad_norm": 1.401442289352417,
      "learning_rate": 4.2805419873126855e-06,
      "loss": 0.0197,
      "step": 1088,
      "video_reward_cumulative_accuracy": 0.8014705882352942
    },
    {
      "epoch": 0.3232413178984862,
      "grad_norm": 4.52972412109375,
      "learning_rate": 4.278722701514061e-06,
      "loss": 0.0569,
      "step": 1089,
      "video_reward_cumulative_accuracy": 0.8016528925619835
    },
    {
      "epoch": 0.323538141881864,
      "grad_norm": 2.7835745811462402,
      "learning_rate": 4.276901506083978e-06,
      "loss": 0.0962,
      "step": 1090,
      "video_reward_cumulative_accuracy": 0.8018348623853211
    },
    {
      "epoch": 0.3238349658652419,
      "grad_norm": 1.7582579851150513,
      "learning_rate": 4.275078402977666e-06,
      "loss": 0.0331,
      "step": 1091,
      "video_reward_cumulative_accuracy": 0.8020164986251146
    },
    {
      "epoch": 0.3241317898486198,
      "grad_norm": 1.52336847782135,
      "learning_rate": 4.273253394152404e-06,
      "loss": 0.059,
      "step": 1092,
      "video_reward_cumulative_accuracy": 0.8021978021978022
    },
    {
      "epoch": 0.3244286138319976,
      "grad_norm": 1.373092532157898,
      "learning_rate": 4.271426481567515e-06,
      "loss": 0.0319,
      "step": 1093,
      "video_reward_cumulative_accuracy": 0.8023787740164684
    },
    {
      "epoch": 0.3247254378153755,
      "grad_norm": 4.265668869018555,
      "learning_rate": 4.269597667184366e-06,
      "loss": 0.1015,
      "step": 1094,
      "video_reward_cumulative_accuracy": 0.8025594149908593
    },
    {
      "epoch": 0.3250222617987533,
      "grad_norm": 2.7200961112976074,
      "learning_rate": 4.267766952966369e-06,
      "loss": 0.0494,
      "step": 1095,
      "video_reward_cumulative_accuracy": 0.8027397260273973
    },
    {
      "epoch": 0.3253190857821312,
      "grad_norm": 2.589541435241699,
      "learning_rate": 4.2659343408789734e-06,
      "loss": 0.038,
      "step": 1096,
      "video_reward_cumulative_accuracy": 0.8029197080291971
    },
    {
      "epoch": 0.325615909765509,
      "grad_norm": 2.7991340160369873,
      "learning_rate": 4.264099832889665e-06,
      "loss": 0.0584,
      "step": 1097,
      "video_reward_cumulative_accuracy": 0.8030993618960802
    },
    {
      "epoch": 0.3259127337488869,
      "grad_norm": 2.104408025741577,
      "learning_rate": 4.262263430967966e-06,
      "loss": 0.0505,
      "step": 1098,
      "video_reward_cumulative_accuracy": 0.8032786885245902
    },
    {
      "epoch": 0.3262095577322648,
      "grad_norm": 1.6177819967269897,
      "learning_rate": 4.2604251370854325e-06,
      "loss": 0.0405,
      "step": 1099,
      "video_reward_cumulative_accuracy": 0.8030027297543221
    },
    {
      "epoch": 0.3265063817156426,
      "grad_norm": 4.011326789855957,
      "learning_rate": 4.2585849532156505e-06,
      "loss": 0.0463,
      "step": 1100,
      "video_reward_cumulative_accuracy": 0.8027272727272727
    },
    {
      "epoch": 0.3268032056990205,
      "grad_norm": 1.1863105297088623,
      "learning_rate": 4.256742881334238e-06,
      "loss": 0.0208,
      "step": 1101,
      "video_reward_cumulative_accuracy": 0.8029064486830154
    },
    {
      "epoch": 0.3271000296823983,
      "grad_norm": 2.122631072998047,
      "learning_rate": 4.254898923418838e-06,
      "loss": 0.0622,
      "step": 1102,
      "video_reward_cumulative_accuracy": 0.8026315789473685
    },
    {
      "epoch": 0.3273968536657762,
      "grad_norm": 2.541916608810425,
      "learning_rate": 4.253053081449116e-06,
      "loss": 0.0483,
      "step": 1103,
      "video_reward_cumulative_accuracy": 0.8028105167724388
    },
    {
      "epoch": 0.327693677649154,
      "grad_norm": 3.4067628383636475,
      "learning_rate": 4.251205357406764e-06,
      "loss": 0.0289,
      "step": 1104,
      "video_reward_cumulative_accuracy": 0.802536231884058
    },
    {
      "epoch": 0.3279905016325319,
      "grad_norm": 1.9281690120697021,
      "learning_rate": 4.249355753275492e-06,
      "loss": 0.0488,
      "step": 1105,
      "video_reward_cumulative_accuracy": 0.8022624434389141
    },
    {
      "epoch": 0.3282873256159098,
      "grad_norm": 4.4767231941223145,
      "learning_rate": 4.247504271041031e-06,
      "loss": 0.0644,
      "step": 1106,
      "video_reward_cumulative_accuracy": 0.8024412296564195
    },
    {
      "epoch": 0.3285841495992876,
      "grad_norm": 4.301503658294678,
      "learning_rate": 4.245650912691127e-06,
      "loss": 0.0987,
      "step": 1107,
      "video_reward_cumulative_accuracy": 0.8026196928635954
    },
    {
      "epoch": 0.3288809735826655,
      "grad_norm": 2.4027132987976074,
      "learning_rate": 4.243795680215538e-06,
      "loss": 0.0807,
      "step": 1108,
      "video_reward_cumulative_accuracy": 0.8023465703971119
    },
    {
      "epoch": 0.3291777975660433,
      "grad_norm": 2.302290678024292,
      "learning_rate": 4.241938575606038e-06,
      "loss": 0.0403,
      "step": 1109,
      "video_reward_cumulative_accuracy": 0.8025247971145176
    },
    {
      "epoch": 0.3294746215494212,
      "grad_norm": 4.239207744598389,
      "learning_rate": 4.240079600856408e-06,
      "loss": 0.062,
      "step": 1110,
      "video_reward_cumulative_accuracy": 0.8027027027027027
    },
    {
      "epoch": 0.329771445532799,
      "grad_norm": 3.736924171447754,
      "learning_rate": 4.238218757962439e-06,
      "loss": 0.0399,
      "step": 1111,
      "video_reward_cumulative_accuracy": 0.8028802880288028
    },
    {
      "epoch": 0.3300682695161769,
      "grad_norm": 1.4400713443756104,
      "learning_rate": 4.2363560489219255e-06,
      "loss": 0.0213,
      "step": 1112,
      "video_reward_cumulative_accuracy": 0.8030575539568345
    },
    {
      "epoch": 0.3303650934995548,
      "grad_norm": 1.8887395858764648,
      "learning_rate": 4.234491475734667e-06,
      "loss": 0.0368,
      "step": 1113,
      "video_reward_cumulative_accuracy": 0.8032345013477089
    },
    {
      "epoch": 0.3306619174829326,
      "grad_norm": 1.5093616247177124,
      "learning_rate": 4.232625040402463e-06,
      "loss": 0.0272,
      "step": 1114,
      "video_reward_cumulative_accuracy": 0.803411131059246
    },
    {
      "epoch": 0.3309587414663105,
      "grad_norm": 5.804702281951904,
      "learning_rate": 4.230756744929114e-06,
      "loss": 0.0515,
      "step": 1115,
      "video_reward_cumulative_accuracy": 0.8031390134529148
    },
    {
      "epoch": 0.3312555654496883,
      "grad_norm": 0.6705430150032043,
      "learning_rate": 4.228886591320415e-06,
      "loss": 0.0149,
      "step": 1116,
      "video_reward_cumulative_accuracy": 0.8033154121863799
    },
    {
      "epoch": 0.3315523894330662,
      "grad_norm": 5.313145637512207,
      "learning_rate": 4.227014581584159e-06,
      "loss": 0.0725,
      "step": 1117,
      "video_reward_cumulative_accuracy": 0.8034914950760967
    },
    {
      "epoch": 0.331849213416444,
      "grad_norm": 2.9840526580810547,
      "learning_rate": 4.2251407177301295e-06,
      "loss": 0.0556,
      "step": 1118,
      "video_reward_cumulative_accuracy": 0.8032200357781754
    },
    {
      "epoch": 0.3321460373998219,
      "grad_norm": 1.9225348234176636,
      "learning_rate": 4.2232650017701015e-06,
      "loss": 0.0517,
      "step": 1119,
      "video_reward_cumulative_accuracy": 0.8029490616621984
    },
    {
      "epoch": 0.3324428613831998,
      "grad_norm": 6.583012580871582,
      "learning_rate": 4.221387435717838e-06,
      "loss": 0.0913,
      "step": 1120,
      "video_reward_cumulative_accuracy": 0.803125
    },
    {
      "epoch": 0.3327396853665776,
      "grad_norm": 5.673295021057129,
      "learning_rate": 4.219508021589088e-06,
      "loss": 0.0629,
      "step": 1121,
      "video_reward_cumulative_accuracy": 0.8033006244424621
    },
    {
      "epoch": 0.3330365093499555,
      "grad_norm": 2.6932709217071533,
      "learning_rate": 4.217626761401585e-06,
      "loss": 0.035,
      "step": 1122,
      "video_reward_cumulative_accuracy": 0.803475935828877
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 1.6052873134613037,
      "learning_rate": 4.215743657175046e-06,
      "loss": 0.0277,
      "step": 1123,
      "video_reward_cumulative_accuracy": 0.8036509349955476
    },
    {
      "epoch": 0.3336301573167112,
      "grad_norm": 1.1534535884857178,
      "learning_rate": 4.213858710931163e-06,
      "loss": 0.0273,
      "step": 1124,
      "video_reward_cumulative_accuracy": 0.8033807829181495
    },
    {
      "epoch": 0.333926981300089,
      "grad_norm": 1.3987401723861694,
      "learning_rate": 4.2119719246936114e-06,
      "loss": 0.034,
      "step": 1125,
      "video_reward_cumulative_accuracy": 0.8035555555555556
    },
    {
      "epoch": 0.3342238052834669,
      "grad_norm": 2.05250883102417,
      "learning_rate": 4.210083300488038e-06,
      "loss": 0.0606,
      "step": 1126,
      "video_reward_cumulative_accuracy": 0.8037300177619894
    },
    {
      "epoch": 0.3345206292668448,
      "grad_norm": 2.67376971244812,
      "learning_rate": 4.208192840342066e-06,
      "loss": 0.0622,
      "step": 1127,
      "video_reward_cumulative_accuracy": 0.8039041703637977
    },
    {
      "epoch": 0.3348174532502226,
      "grad_norm": 1.0269320011138916,
      "learning_rate": 4.206300546285286e-06,
      "loss": 0.0466,
      "step": 1128,
      "video_reward_cumulative_accuracy": 0.8036347517730497
    },
    {
      "epoch": 0.3351142772336005,
      "grad_norm": 6.182705879211426,
      "learning_rate": 4.204406420349259e-06,
      "loss": 0.035,
      "step": 1129,
      "video_reward_cumulative_accuracy": 0.8038086802480071
    },
    {
      "epoch": 0.3354111012169783,
      "grad_norm": 3.8771231174468994,
      "learning_rate": 4.2025104645675145e-06,
      "loss": 0.0472,
      "step": 1130,
      "video_reward_cumulative_accuracy": 0.8039823008849557
    },
    {
      "epoch": 0.3357079252003562,
      "grad_norm": 4.503866195678711,
      "learning_rate": 4.200612680975545e-06,
      "loss": 0.0592,
      "step": 1131,
      "video_reward_cumulative_accuracy": 0.8037135278514589
    },
    {
      "epoch": 0.336004749183734,
      "grad_norm": 1.73423433303833,
      "learning_rate": 4.1987130716108046e-06,
      "loss": 0.035,
      "step": 1132,
      "video_reward_cumulative_accuracy": 0.8034452296819788
    },
    {
      "epoch": 0.3363015731671119,
      "grad_norm": 2.8510076999664307,
      "learning_rate": 4.196811638512708e-06,
      "loss": 0.0513,
      "step": 1133,
      "video_reward_cumulative_accuracy": 0.8031774051191527
    },
    {
      "epoch": 0.3365983971504898,
      "grad_norm": 2.853792190551758,
      "learning_rate": 4.194908383722629e-06,
      "loss": 0.0676,
      "step": 1134,
      "video_reward_cumulative_accuracy": 0.8029100529100529
    },
    {
      "epoch": 0.3368952211338676,
      "grad_norm": 3.5561459064483643,
      "learning_rate": 4.193003309283896e-06,
      "loss": 0.0834,
      "step": 1135,
      "video_reward_cumulative_accuracy": 0.8030837004405287
    },
    {
      "epoch": 0.3371920451172455,
      "grad_norm": 1.2049405574798584,
      "learning_rate": 4.191096417241792e-06,
      "loss": 0.0216,
      "step": 1136,
      "video_reward_cumulative_accuracy": 0.8032570422535211
    },
    {
      "epoch": 0.3374888691006233,
      "grad_norm": 1.6044228076934814,
      "learning_rate": 4.189187709643549e-06,
      "loss": 0.0478,
      "step": 1137,
      "video_reward_cumulative_accuracy": 0.8034300791556728
    },
    {
      "epoch": 0.3377856930840012,
      "grad_norm": 1.5150611400604248,
      "learning_rate": 4.1872771885383525e-06,
      "loss": 0.0535,
      "step": 1138,
      "video_reward_cumulative_accuracy": 0.8031634446397188
    },
    {
      "epoch": 0.338082517067379,
      "grad_norm": 3.9265263080596924,
      "learning_rate": 4.18536485597733e-06,
      "loss": 0.0596,
      "step": 1139,
      "video_reward_cumulative_accuracy": 0.8033362598770851
    },
    {
      "epoch": 0.3383793410507569,
      "grad_norm": 7.542102336883545,
      "learning_rate": 4.183450714013557e-06,
      "loss": 0.0811,
      "step": 1140,
      "video_reward_cumulative_accuracy": 0.8035087719298246
    },
    {
      "epoch": 0.3386761650341348,
      "grad_norm": 4.337952613830566,
      "learning_rate": 4.181534764702051e-06,
      "loss": 0.0957,
      "step": 1141,
      "video_reward_cumulative_accuracy": 0.8032427695004382
    },
    {
      "epoch": 0.3389729890175126,
      "grad_norm": 1.57660710811615,
      "learning_rate": 4.179617010099768e-06,
      "loss": 0.0462,
      "step": 1142,
      "video_reward_cumulative_accuracy": 0.803415061295972
    },
    {
      "epoch": 0.3392698130008905,
      "grad_norm": 1.877387285232544,
      "learning_rate": 4.177697452265605e-06,
      "loss": 0.0485,
      "step": 1143,
      "video_reward_cumulative_accuracy": 0.8035870516185477
    },
    {
      "epoch": 0.3395666369842683,
      "grad_norm": 3.2722318172454834,
      "learning_rate": 4.175776093260395e-06,
      "loss": 0.0395,
      "step": 1144,
      "video_reward_cumulative_accuracy": 0.8037587412587412
    },
    {
      "epoch": 0.3398634609676462,
      "grad_norm": 2.0722169876098633,
      "learning_rate": 4.1738529351469e-06,
      "loss": 0.0416,
      "step": 1145,
      "video_reward_cumulative_accuracy": 0.8039301310043668
    },
    {
      "epoch": 0.340160284951024,
      "grad_norm": 1.58327317237854,
      "learning_rate": 4.1719279799898205e-06,
      "loss": 0.0566,
      "step": 1146,
      "video_reward_cumulative_accuracy": 0.8041012216404887
    },
    {
      "epoch": 0.3404571089344019,
      "grad_norm": 4.316904544830322,
      "learning_rate": 4.17000122985578e-06,
      "loss": 0.0597,
      "step": 1147,
      "video_reward_cumulative_accuracy": 0.8038360941586749
    },
    {
      "epoch": 0.3407539329177798,
      "grad_norm": 5.312283039093018,
      "learning_rate": 4.168072686813332e-06,
      "loss": 0.0579,
      "step": 1148,
      "video_reward_cumulative_accuracy": 0.804006968641115
    },
    {
      "epoch": 0.3410507569011576,
      "grad_norm": 1.4211534261703491,
      "learning_rate": 4.166142352932957e-06,
      "loss": 0.032,
      "step": 1149,
      "video_reward_cumulative_accuracy": 0.804177545691906
    },
    {
      "epoch": 0.3413475808845355,
      "grad_norm": 1.4748672246932983,
      "learning_rate": 4.164210230287053e-06,
      "loss": 0.0391,
      "step": 1150,
      "video_reward_cumulative_accuracy": 0.8043478260869565
    },
    {
      "epoch": 0.3416444048679133,
      "grad_norm": 1.9210374355316162,
      "learning_rate": 4.162276320949943e-06,
      "loss": 0.0787,
      "step": 1151,
      "video_reward_cumulative_accuracy": 0.8036490008688097
    },
    {
      "epoch": 0.3419412288512912,
      "grad_norm": 3.2308928966522217,
      "learning_rate": 4.160340626997865e-06,
      "loss": 0.0535,
      "step": 1152,
      "video_reward_cumulative_accuracy": 0.8038194444444444
    },
    {
      "epoch": 0.342238052834669,
      "grad_norm": 1.825577735900879,
      "learning_rate": 4.158403150508975e-06,
      "loss": 0.0535,
      "step": 1153,
      "video_reward_cumulative_accuracy": 0.8035559410234172
    },
    {
      "epoch": 0.3425348768180469,
      "grad_norm": 1.2146353721618652,
      "learning_rate": 4.156463893563342e-06,
      "loss": 0.0576,
      "step": 1154,
      "video_reward_cumulative_accuracy": 0.8032928942807626
    },
    {
      "epoch": 0.3428317008014248,
      "grad_norm": 1.1705416440963745,
      "learning_rate": 4.154522858242947e-06,
      "loss": 0.0369,
      "step": 1155,
      "video_reward_cumulative_accuracy": 0.8034632034632034
    },
    {
      "epoch": 0.3431285247848026,
      "grad_norm": 1.2783030271530151,
      "learning_rate": 4.15258004663168e-06,
      "loss": 0.0363,
      "step": 1156,
      "video_reward_cumulative_accuracy": 0.8036332179930796
    },
    {
      "epoch": 0.3434253487681805,
      "grad_norm": 1.560981273651123,
      "learning_rate": 4.150635460815336e-06,
      "loss": 0.0454,
      "step": 1157,
      "video_reward_cumulative_accuracy": 0.8033707865168539
    },
    {
      "epoch": 0.3437221727515583,
      "grad_norm": 1.9000885486602783,
      "learning_rate": 4.148689102881619e-06,
      "loss": 0.0535,
      "step": 1158,
      "video_reward_cumulative_accuracy": 0.8035405872193437
    },
    {
      "epoch": 0.3440189967349362,
      "grad_norm": 1.363930344581604,
      "learning_rate": 4.146740974920131e-06,
      "loss": 0.0444,
      "step": 1159,
      "video_reward_cumulative_accuracy": 0.8037100949094047
    },
    {
      "epoch": 0.344315820718314,
      "grad_norm": 1.4986649751663208,
      "learning_rate": 4.144791079022379e-06,
      "loss": 0.0329,
      "step": 1160,
      "video_reward_cumulative_accuracy": 0.8038793103448276
    },
    {
      "epoch": 0.3446126447016919,
      "grad_norm": 2.1276040077209473,
      "learning_rate": 4.142839417281762e-06,
      "loss": 0.0409,
      "step": 1161,
      "video_reward_cumulative_accuracy": 0.8040482342807924
    },
    {
      "epoch": 0.3449094686850698,
      "grad_norm": 2.0634193420410156,
      "learning_rate": 4.140885991793582e-06,
      "loss": 0.0363,
      "step": 1162,
      "video_reward_cumulative_accuracy": 0.8042168674698795
    },
    {
      "epoch": 0.3452062926684476,
      "grad_norm": 4.047058582305908,
      "learning_rate": 4.138930804655028e-06,
      "loss": 0.0609,
      "step": 1163,
      "video_reward_cumulative_accuracy": 0.8039552880481513
    },
    {
      "epoch": 0.3455031166518255,
      "grad_norm": 1.8624390363693237,
      "learning_rate": 4.136973857965185e-06,
      "loss": 0.0345,
      "step": 1164,
      "video_reward_cumulative_accuracy": 0.8041237113402062
    },
    {
      "epoch": 0.3457999406352033,
      "grad_norm": 3.3220489025115967,
      "learning_rate": 4.135015153825024e-06,
      "loss": 0.0393,
      "step": 1165,
      "video_reward_cumulative_accuracy": 0.8042918454935623
    },
    {
      "epoch": 0.3460967646185812,
      "grad_norm": 2.244272470474243,
      "learning_rate": 4.133054694337404e-06,
      "loss": 0.1681,
      "step": 1166,
      "video_reward_cumulative_accuracy": 0.8040308747855918
    },
    {
      "epoch": 0.346393588601959,
      "grad_norm": 1.2074984312057495,
      "learning_rate": 4.1310924816070705e-06,
      "loss": 0.0117,
      "step": 1167,
      "video_reward_cumulative_accuracy": 0.8041988003427593
    },
    {
      "epoch": 0.3466904125853369,
      "grad_norm": 4.098258018493652,
      "learning_rate": 4.129128517740647e-06,
      "loss": 0.0522,
      "step": 1168,
      "video_reward_cumulative_accuracy": 0.8039383561643836
    },
    {
      "epoch": 0.3469872365687148,
      "grad_norm": 1.4286725521087646,
      "learning_rate": 4.12716280484664e-06,
      "loss": 0.0594,
      "step": 1169,
      "video_reward_cumulative_accuracy": 0.8032506415739948
    },
    {
      "epoch": 0.3472840605520926,
      "grad_norm": 3.419475555419922,
      "learning_rate": 4.125195345035433e-06,
      "loss": 0.0582,
      "step": 1170,
      "video_reward_cumulative_accuracy": 0.8034188034188035
    },
    {
      "epoch": 0.3475808845354705,
      "grad_norm": 1.2283254861831665,
      "learning_rate": 4.1232261404192865e-06,
      "loss": 0.0162,
      "step": 1171,
      "video_reward_cumulative_accuracy": 0.8035866780529461
    },
    {
      "epoch": 0.3478777085188483,
      "grad_norm": 1.2608306407928467,
      "learning_rate": 4.12125519311233e-06,
      "loss": 0.0137,
      "step": 1172,
      "video_reward_cumulative_accuracy": 0.8037542662116041
    },
    {
      "epoch": 0.3481745325022262,
      "grad_norm": 1.785900354385376,
      "learning_rate": 4.119282505230569e-06,
      "loss": 0.0385,
      "step": 1173,
      "video_reward_cumulative_accuracy": 0.8034953111679455
    },
    {
      "epoch": 0.348471356485604,
      "grad_norm": 2.0561554431915283,
      "learning_rate": 4.117308078891876e-06,
      "loss": 0.0453,
      "step": 1174,
      "video_reward_cumulative_accuracy": 0.8036626916524702
    },
    {
      "epoch": 0.3487681804689819,
      "grad_norm": 4.3540730476379395,
      "learning_rate": 4.115331916215987e-06,
      "loss": 0.0697,
      "step": 1175,
      "video_reward_cumulative_accuracy": 0.8038297872340425
    },
    {
      "epoch": 0.3490650044523598,
      "grad_norm": 2.7298593521118164,
      "learning_rate": 4.1133540193245056e-06,
      "loss": 0.0258,
      "step": 1176,
      "video_reward_cumulative_accuracy": 0.8039965986394558
    },
    {
      "epoch": 0.3493618284357376,
      "grad_norm": 1.9880746603012085,
      "learning_rate": 4.111374390340895e-06,
      "loss": 0.0545,
      "step": 1177,
      "video_reward_cumulative_accuracy": 0.8037383177570093
    },
    {
      "epoch": 0.3496586524191155,
      "grad_norm": 2.990823745727539,
      "learning_rate": 4.109393031390482e-06,
      "loss": 0.0543,
      "step": 1178,
      "video_reward_cumulative_accuracy": 0.8034804753820034
    },
    {
      "epoch": 0.3499554764024933,
      "grad_norm": 2.0915024280548096,
      "learning_rate": 4.107409944600444e-06,
      "loss": 0.0747,
      "step": 1179,
      "video_reward_cumulative_accuracy": 0.8036471586089907
    },
    {
      "epoch": 0.3502523003858712,
      "grad_norm": 1.47837495803833,
      "learning_rate": 4.105425132099821e-06,
      "loss": 0.0526,
      "step": 1180,
      "video_reward_cumulative_accuracy": 0.8038135593220339
    },
    {
      "epoch": 0.350549124369249,
      "grad_norm": 1.934434175491333,
      "learning_rate": 4.103438596019498e-06,
      "loss": 0.0385,
      "step": 1181,
      "video_reward_cumulative_accuracy": 0.8039796782387807
    },
    {
      "epoch": 0.3508459483526269,
      "grad_norm": 1.294867753982544,
      "learning_rate": 4.1014503384922164e-06,
      "loss": 0.0314,
      "step": 1182,
      "video_reward_cumulative_accuracy": 0.8041455160744501
    },
    {
      "epoch": 0.35114277233600477,
      "grad_norm": 2.0357749462127686,
      "learning_rate": 4.099460361652563e-06,
      "loss": 0.0253,
      "step": 1183,
      "video_reward_cumulative_accuracy": 0.8043110735418427
    },
    {
      "epoch": 0.3514395963193826,
      "grad_norm": 2.3916919231414795,
      "learning_rate": 4.097468667636971e-06,
      "loss": 0.0447,
      "step": 1184,
      "video_reward_cumulative_accuracy": 0.8040540540540541
    },
    {
      "epoch": 0.3517364203027605,
      "grad_norm": 2.0069797039031982,
      "learning_rate": 4.095475258583719e-06,
      "loss": 0.0179,
      "step": 1185,
      "video_reward_cumulative_accuracy": 0.8042194092827004
    },
    {
      "epoch": 0.3520332442861383,
      "grad_norm": 3.129631996154785,
      "learning_rate": 4.093480136632922e-06,
      "loss": 0.1022,
      "step": 1186,
      "video_reward_cumulative_accuracy": 0.8039629005059022
    },
    {
      "epoch": 0.3523300682695162,
      "grad_norm": 1.985904574394226,
      "learning_rate": 4.09148330392654e-06,
      "loss": 0.0333,
      "step": 1187,
      "video_reward_cumulative_accuracy": 0.8041280539174389
    },
    {
      "epoch": 0.352626892252894,
      "grad_norm": 2.5807793140411377,
      "learning_rate": 4.089484762608365e-06,
      "loss": 0.0602,
      "step": 1188,
      "video_reward_cumulative_accuracy": 0.8042929292929293
    },
    {
      "epoch": 0.3529237162362719,
      "grad_norm": 2.0883278846740723,
      "learning_rate": 4.0874845148240265e-06,
      "loss": 0.0711,
      "step": 1189,
      "video_reward_cumulative_accuracy": 0.804457527333894
    },
    {
      "epoch": 0.35322054021964977,
      "grad_norm": 1.2518675327301025,
      "learning_rate": 4.085482562720983e-06,
      "loss": 0.0395,
      "step": 1190,
      "video_reward_cumulative_accuracy": 0.8042016806722689
    },
    {
      "epoch": 0.3535173642030276,
      "grad_norm": 1.957599401473999,
      "learning_rate": 4.083478908448525e-06,
      "loss": 0.0636,
      "step": 1191,
      "video_reward_cumulative_accuracy": 0.8043660789252729
    },
    {
      "epoch": 0.3538141881864055,
      "grad_norm": 2.872051954269409,
      "learning_rate": 4.08147355415777e-06,
      "loss": 0.0783,
      "step": 1192,
      "video_reward_cumulative_accuracy": 0.8045302013422819
    },
    {
      "epoch": 0.3541110121697833,
      "grad_norm": 5.688748836517334,
      "learning_rate": 4.07946650200166e-06,
      "loss": 0.0614,
      "step": 1193,
      "video_reward_cumulative_accuracy": 0.8046940486169321
    },
    {
      "epoch": 0.3544078361531612,
      "grad_norm": 1.7336045503616333,
      "learning_rate": 4.0774577541349605e-06,
      "loss": 0.0619,
      "step": 1194,
      "video_reward_cumulative_accuracy": 0.8044388609715243
    },
    {
      "epoch": 0.354704660136539,
      "grad_norm": 2.06296968460083,
      "learning_rate": 4.075447312714258e-06,
      "loss": 0.0263,
      "step": 1195,
      "video_reward_cumulative_accuracy": 0.80418410041841
    },
    {
      "epoch": 0.3550014841199169,
      "grad_norm": 1.0817362070083618,
      "learning_rate": 4.073435179897956e-06,
      "loss": 0.0567,
      "step": 1196,
      "video_reward_cumulative_accuracy": 0.8035117056856187
    },
    {
      "epoch": 0.35529830810329477,
      "grad_norm": 1.0421109199523926,
      "learning_rate": 4.071421357846274e-06,
      "loss": 0.0549,
      "step": 1197,
      "video_reward_cumulative_accuracy": 0.8036758563074352
    },
    {
      "epoch": 0.3555951320866726,
      "grad_norm": 2.0795469284057617,
      "learning_rate": 4.0694058487212464e-06,
      "loss": 0.0329,
      "step": 1198,
      "video_reward_cumulative_accuracy": 0.8038397328881469
    },
    {
      "epoch": 0.3558919560700505,
      "grad_norm": 2.0918068885803223,
      "learning_rate": 4.067388654686717e-06,
      "loss": 0.0445,
      "step": 1199,
      "video_reward_cumulative_accuracy": 0.8035863219349458
    },
    {
      "epoch": 0.3561887800534283,
      "grad_norm": 1.405401349067688,
      "learning_rate": 4.065369777908339e-06,
      "loss": 0.0439,
      "step": 1200,
      "video_reward_cumulative_accuracy": 0.80375
    },
    {
      "epoch": 0.3561887800534283,
      "eval_runtime": 129.9202,
      "eval_samples_per_second": 6.073,
      "eval_steps_per_second": 0.762,
      "eval_test_set_accuracy": 0.7689393939393939,
      "step": 1200
    },
    {
      "epoch": 0.3564856040368062,
      "grad_norm": 1.2888494729995728,
      "learning_rate": 4.063349220553573e-06,
      "loss": 0.0477,
      "step": 1201,
      "video_reward_cumulative_accuracy": 0.8030807660283097
    },
    {
      "epoch": 0.356782428020184,
      "grad_norm": 2.8064706325531006,
      "learning_rate": 4.0613269847916845e-06,
      "loss": 0.0431,
      "step": 1202,
      "video_reward_cumulative_accuracy": 0.8032445923460898
    },
    {
      "epoch": 0.3570792520035619,
      "grad_norm": 1.3145371675491333,
      "learning_rate": 4.059303072793739e-06,
      "loss": 0.0215,
      "step": 1203,
      "video_reward_cumulative_accuracy": 0.8034081463009144
    },
    {
      "epoch": 0.35737607598693977,
      "grad_norm": 1.8004136085510254,
      "learning_rate": 4.057277486732601e-06,
      "loss": 0.1032,
      "step": 1204,
      "video_reward_cumulative_accuracy": 0.8035714285714286
    },
    {
      "epoch": 0.3576728999703176,
      "grad_norm": 0.9853895902633667,
      "learning_rate": 4.0552502287829365e-06,
      "loss": 0.0421,
      "step": 1205,
      "video_reward_cumulative_accuracy": 0.8033195020746888
    },
    {
      "epoch": 0.3579697239536955,
      "grad_norm": 2.2384772300720215,
      "learning_rate": 4.0532213011212025e-06,
      "loss": 0.0396,
      "step": 1206,
      "video_reward_cumulative_accuracy": 0.8034825870646766
    },
    {
      "epoch": 0.3582665479370733,
      "grad_norm": 1.6136014461517334,
      "learning_rate": 4.0511907059256485e-06,
      "loss": 0.0425,
      "step": 1207,
      "video_reward_cumulative_accuracy": 0.8036454018227009
    },
    {
      "epoch": 0.3585633719204512,
      "grad_norm": 0.9855162501335144,
      "learning_rate": 4.049158445376318e-06,
      "loss": 0.0325,
      "step": 1208,
      "video_reward_cumulative_accuracy": 0.8038079470198676
    },
    {
      "epoch": 0.358860195903829,
      "grad_norm": 2.7680773735046387,
      "learning_rate": 4.047124521655037e-06,
      "loss": 0.0559,
      "step": 1209,
      "video_reward_cumulative_accuracy": 0.803556658395368
    },
    {
      "epoch": 0.3591570198872069,
      "grad_norm": 1.1192249059677124,
      "learning_rate": 4.045088936945423e-06,
      "loss": 0.0224,
      "step": 1210,
      "video_reward_cumulative_accuracy": 0.8037190082644629
    },
    {
      "epoch": 0.35945384387058477,
      "grad_norm": 1.8444184064865112,
      "learning_rate": 4.043051693432871e-06,
      "loss": 0.0565,
      "step": 1211,
      "video_reward_cumulative_accuracy": 0.8038810900082577
    },
    {
      "epoch": 0.3597506678539626,
      "grad_norm": 2.4251158237457275,
      "learning_rate": 4.041012793304563e-06,
      "loss": 0.0407,
      "step": 1212,
      "video_reward_cumulative_accuracy": 0.804042904290429
    },
    {
      "epoch": 0.36004749183734047,
      "grad_norm": 1.5506036281585693,
      "learning_rate": 4.038972238749452e-06,
      "loss": 0.0247,
      "step": 1213,
      "video_reward_cumulative_accuracy": 0.8042044517724649
    },
    {
      "epoch": 0.3603443158207183,
      "grad_norm": 4.467177391052246,
      "learning_rate": 4.036930031958275e-06,
      "loss": 0.0588,
      "step": 1214,
      "video_reward_cumulative_accuracy": 0.8043657331136738
    },
    {
      "epoch": 0.3606411398040962,
      "grad_norm": 2.9094066619873047,
      "learning_rate": 4.034886175123537e-06,
      "loss": 0.0309,
      "step": 1215,
      "video_reward_cumulative_accuracy": 0.8045267489711934
    },
    {
      "epoch": 0.360937963787474,
      "grad_norm": 2.6900370121002197,
      "learning_rate": 4.032840670439517e-06,
      "loss": 0.0351,
      "step": 1216,
      "video_reward_cumulative_accuracy": 0.8046875
    },
    {
      "epoch": 0.3612347877708519,
      "grad_norm": 3.0466442108154297,
      "learning_rate": 4.030793520102264e-06,
      "loss": 0.0434,
      "step": 1217,
      "video_reward_cumulative_accuracy": 0.804847986852917
    },
    {
      "epoch": 0.36153161175422976,
      "grad_norm": 0.9686444401741028,
      "learning_rate": 4.028744726309592e-06,
      "loss": 0.0301,
      "step": 1218,
      "video_reward_cumulative_accuracy": 0.805008210180624
    },
    {
      "epoch": 0.3618284357376076,
      "grad_norm": 1.4730597734451294,
      "learning_rate": 4.02669429126108e-06,
      "loss": 0.0612,
      "step": 1219,
      "video_reward_cumulative_accuracy": 0.8051681706316653
    },
    {
      "epoch": 0.36212525972098547,
      "grad_norm": 4.798864841461182,
      "learning_rate": 4.024642217158068e-06,
      "loss": 0.0744,
      "step": 1220,
      "video_reward_cumulative_accuracy": 0.8045081967213115
    },
    {
      "epoch": 0.3624220837043633,
      "grad_norm": 2.4471278190612793,
      "learning_rate": 4.022588506203658e-06,
      "loss": 0.035,
      "step": 1221,
      "video_reward_cumulative_accuracy": 0.8046683046683046
    },
    {
      "epoch": 0.3627189076877412,
      "grad_norm": 7.197712421417236,
      "learning_rate": 4.020533160602708e-06,
      "loss": 0.0859,
      "step": 1222,
      "video_reward_cumulative_accuracy": 0.8048281505728314
    },
    {
      "epoch": 0.363015731671119,
      "grad_norm": 2.378415107727051,
      "learning_rate": 4.018476182561829e-06,
      "loss": 0.053,
      "step": 1223,
      "video_reward_cumulative_accuracy": 0.8049877350776778
    },
    {
      "epoch": 0.3633125556544969,
      "grad_norm": 1.3243201971054077,
      "learning_rate": 4.0164175742893894e-06,
      "loss": 0.0264,
      "step": 1224,
      "video_reward_cumulative_accuracy": 0.8051470588235294
    },
    {
      "epoch": 0.36360937963787476,
      "grad_norm": 4.309061050415039,
      "learning_rate": 4.014357337995504e-06,
      "loss": 0.0634,
      "step": 1225,
      "video_reward_cumulative_accuracy": 0.8048979591836735
    },
    {
      "epoch": 0.3639062036212526,
      "grad_norm": 2.373619318008423,
      "learning_rate": 4.012295475892036e-06,
      "loss": 0.0803,
      "step": 1226,
      "video_reward_cumulative_accuracy": 0.8050570962479608
    },
    {
      "epoch": 0.36420302760463047,
      "grad_norm": 6.605100154876709,
      "learning_rate": 4.0102319901925945e-06,
      "loss": 0.076,
      "step": 1227,
      "video_reward_cumulative_accuracy": 0.8048084759576202
    },
    {
      "epoch": 0.3644998515880083,
      "grad_norm": 1.552177906036377,
      "learning_rate": 4.008166883112532e-06,
      "loss": 0.0249,
      "step": 1228,
      "video_reward_cumulative_accuracy": 0.8049674267100977
    },
    {
      "epoch": 0.3647966755713862,
      "grad_norm": 1.2064961194992065,
      "learning_rate": 4.00610015686894e-06,
      "loss": 0.0212,
      "step": 1229,
      "video_reward_cumulative_accuracy": 0.8051261187957689
    },
    {
      "epoch": 0.365093499554764,
      "grad_norm": 1.4522373676300049,
      "learning_rate": 4.004031813680652e-06,
      "loss": 0.057,
      "step": 1230,
      "video_reward_cumulative_accuracy": 0.8052845528455285
    },
    {
      "epoch": 0.3653903235381419,
      "grad_norm": 3.753844738006592,
      "learning_rate": 4.0019618557682345e-06,
      "loss": 0.0332,
      "step": 1231,
      "video_reward_cumulative_accuracy": 0.8050365556458164
    },
    {
      "epoch": 0.36568714752151976,
      "grad_norm": 0.7311299443244934,
      "learning_rate": 3.999890285353988e-06,
      "loss": 0.0161,
      "step": 1232,
      "video_reward_cumulative_accuracy": 0.8051948051948052
    },
    {
      "epoch": 0.3659839715048976,
      "grad_norm": 1.8597936630249023,
      "learning_rate": 3.997817104661943e-06,
      "loss": 0.0308,
      "step": 1233,
      "video_reward_cumulative_accuracy": 0.805352798053528
    },
    {
      "epoch": 0.36628079548827547,
      "grad_norm": 1.923897624015808,
      "learning_rate": 3.995742315917862e-06,
      "loss": 0.0338,
      "step": 1234,
      "video_reward_cumulative_accuracy": 0.8055105348460292
    },
    {
      "epoch": 0.3665776194716533,
      "grad_norm": 1.4819157123565674,
      "learning_rate": 3.993665921349232e-06,
      "loss": 0.0398,
      "step": 1235,
      "video_reward_cumulative_accuracy": 0.805668016194332
    },
    {
      "epoch": 0.3668744434550312,
      "grad_norm": 3.7512669563293457,
      "learning_rate": 3.991587923185263e-06,
      "loss": 0.0493,
      "step": 1236,
      "video_reward_cumulative_accuracy": 0.8050161812297735
    },
    {
      "epoch": 0.367171267438409,
      "grad_norm": 0.6202178597450256,
      "learning_rate": 3.989508323656888e-06,
      "loss": 0.0137,
      "step": 1237,
      "video_reward_cumulative_accuracy": 0.8051738075990299
    },
    {
      "epoch": 0.3674680914217869,
      "grad_norm": 3.8399429321289062,
      "learning_rate": 3.987427124996759e-06,
      "loss": 0.0561,
      "step": 1238,
      "video_reward_cumulative_accuracy": 0.8053311793214862
    },
    {
      "epoch": 0.36776491540516476,
      "grad_norm": 1.2864596843719482,
      "learning_rate": 3.985344329439246e-06,
      "loss": 0.0431,
      "step": 1239,
      "video_reward_cumulative_accuracy": 0.8050847457627118
    },
    {
      "epoch": 0.3680617393885426,
      "grad_norm": 2.3478002548217773,
      "learning_rate": 3.983259939220431e-06,
      "loss": 0.03,
      "step": 1240,
      "video_reward_cumulative_accuracy": 0.805241935483871
    },
    {
      "epoch": 0.36835856337192047,
      "grad_norm": 4.141085624694824,
      "learning_rate": 3.9811739565781085e-06,
      "loss": 0.0648,
      "step": 1241,
      "video_reward_cumulative_accuracy": 0.8053988718775181
    },
    {
      "epoch": 0.3686553873552983,
      "grad_norm": 3.281418800354004,
      "learning_rate": 3.979086383751786e-06,
      "loss": 0.0348,
      "step": 1242,
      "video_reward_cumulative_accuracy": 0.8055555555555556
    },
    {
      "epoch": 0.3689522113386762,
      "grad_norm": 2.6300387382507324,
      "learning_rate": 3.976997222982671e-06,
      "loss": 0.0671,
      "step": 1243,
      "video_reward_cumulative_accuracy": 0.8053097345132744
    },
    {
      "epoch": 0.369249035322054,
      "grad_norm": 2.545103073120117,
      "learning_rate": 3.974906476513686e-06,
      "loss": 0.0264,
      "step": 1244,
      "video_reward_cumulative_accuracy": 0.805064308681672
    },
    {
      "epoch": 0.3695458593054319,
      "grad_norm": 3.0602078437805176,
      "learning_rate": 3.972814146589446e-06,
      "loss": 0.051,
      "step": 1245,
      "video_reward_cumulative_accuracy": 0.8052208835341366
    },
    {
      "epoch": 0.36984268328880976,
      "grad_norm": 3.791985273361206,
      "learning_rate": 3.970720235456272e-06,
      "loss": 0.0344,
      "step": 1246,
      "video_reward_cumulative_accuracy": 0.8053772070626003
    },
    {
      "epoch": 0.3701395072721876,
      "grad_norm": 2.767526388168335,
      "learning_rate": 3.96862474536218e-06,
      "loss": 0.0703,
      "step": 1247,
      "video_reward_cumulative_accuracy": 0.8051323175621492
    },
    {
      "epoch": 0.37043633125556547,
      "grad_norm": 3.0397701263427734,
      "learning_rate": 3.9665276785568825e-06,
      "loss": 0.0825,
      "step": 1248,
      "video_reward_cumulative_accuracy": 0.8048878205128205
    },
    {
      "epoch": 0.3707331552389433,
      "grad_norm": 1.6637073755264282,
      "learning_rate": 3.964429037291785e-06,
      "loss": 0.0129,
      "step": 1249,
      "video_reward_cumulative_accuracy": 0.8050440352281826
    },
    {
      "epoch": 0.37102997922232117,
      "grad_norm": 4.110602855682373,
      "learning_rate": 3.962328823819981e-06,
      "loss": 0.0622,
      "step": 1250,
      "video_reward_cumulative_accuracy": 0.8048
    },
    {
      "epoch": 0.371326803205699,
      "grad_norm": 2.5476889610290527,
      "learning_rate": 3.960227040396255e-06,
      "loss": 0.0384,
      "step": 1251,
      "video_reward_cumulative_accuracy": 0.8049560351718625
    },
    {
      "epoch": 0.3716236271890769,
      "grad_norm": 0.3595353364944458,
      "learning_rate": 3.958123689277074e-06,
      "loss": 0.0129,
      "step": 1252,
      "video_reward_cumulative_accuracy": 0.805111821086262
    },
    {
      "epoch": 0.37192045117245476,
      "grad_norm": 0.8653481602668762,
      "learning_rate": 3.956018772720591e-06,
      "loss": 0.0145,
      "step": 1253,
      "video_reward_cumulative_accuracy": 0.8052673583399841
    },
    {
      "epoch": 0.3722172751558326,
      "grad_norm": 4.666868209838867,
      "learning_rate": 3.953912292986637e-06,
      "loss": 0.0618,
      "step": 1254,
      "video_reward_cumulative_accuracy": 0.8054226475279107
    },
    {
      "epoch": 0.37251409913921046,
      "grad_norm": 1.4767639636993408,
      "learning_rate": 3.951804252336723e-06,
      "loss": 0.027,
      "step": 1255,
      "video_reward_cumulative_accuracy": 0.8055776892430279
    },
    {
      "epoch": 0.3728109231225883,
      "grad_norm": 1.8188785314559937,
      "learning_rate": 3.949694653034036e-06,
      "loss": 0.0648,
      "step": 1256,
      "video_reward_cumulative_accuracy": 0.8057324840764332
    },
    {
      "epoch": 0.37310774710596617,
      "grad_norm": 4.584212779998779,
      "learning_rate": 3.9475834973434345e-06,
      "loss": 0.0546,
      "step": 1257,
      "video_reward_cumulative_accuracy": 0.8058870326173428
    },
    {
      "epoch": 0.373404571089344,
      "grad_norm": 2.5021183490753174,
      "learning_rate": 3.94547078753145e-06,
      "loss": 0.0283,
      "step": 1258,
      "video_reward_cumulative_accuracy": 0.8060413354531002
    },
    {
      "epoch": 0.3737013950727219,
      "grad_norm": 0.826378345489502,
      "learning_rate": 3.94335652586628e-06,
      "loss": 0.0123,
      "step": 1259,
      "video_reward_cumulative_accuracy": 0.8061953931691819
    },
    {
      "epoch": 0.37399821905609976,
      "grad_norm": 2.355395555496216,
      "learning_rate": 3.941240714617791e-06,
      "loss": 0.0221,
      "step": 1260,
      "video_reward_cumulative_accuracy": 0.8063492063492064
    },
    {
      "epoch": 0.3742950430394776,
      "grad_norm": 0.9242327213287354,
      "learning_rate": 3.9391233560575116e-06,
      "loss": 0.011,
      "step": 1261,
      "video_reward_cumulative_accuracy": 0.8065027755749405
    },
    {
      "epoch": 0.37459186702285546,
      "grad_norm": 1.269935131072998,
      "learning_rate": 3.937004452458631e-06,
      "loss": 0.0226,
      "step": 1262,
      "video_reward_cumulative_accuracy": 0.8066561014263075
    },
    {
      "epoch": 0.3748886910062333,
      "grad_norm": 1.5934422016143799,
      "learning_rate": 3.9348840060959985e-06,
      "loss": 0.0288,
      "step": 1263,
      "video_reward_cumulative_accuracy": 0.8068091844813935
    },
    {
      "epoch": 0.37518551498961117,
      "grad_norm": 1.6049624681472778,
      "learning_rate": 3.932762019246119e-06,
      "loss": 0.0327,
      "step": 1264,
      "video_reward_cumulative_accuracy": 0.8065664556962026
    },
    {
      "epoch": 0.375482338972989,
      "grad_norm": 2.5892493724823,
      "learning_rate": 3.930638494187151e-06,
      "loss": 0.0368,
      "step": 1265,
      "video_reward_cumulative_accuracy": 0.8063241106719368
    },
    {
      "epoch": 0.3757791629563669,
      "grad_norm": 4.644944667816162,
      "learning_rate": 3.928513433198905e-06,
      "loss": 0.1756,
      "step": 1266,
      "video_reward_cumulative_accuracy": 0.8060821484992101
    },
    {
      "epoch": 0.37607598693974476,
      "grad_norm": 3.284151315689087,
      "learning_rate": 3.92638683856284e-06,
      "loss": 0.0922,
      "step": 1267,
      "video_reward_cumulative_accuracy": 0.8058405682715075
    },
    {
      "epoch": 0.3763728109231226,
      "grad_norm": 1.4588419198989868,
      "learning_rate": 3.924258712562061e-06,
      "loss": 0.0219,
      "step": 1268,
      "video_reward_cumulative_accuracy": 0.805993690851735
    },
    {
      "epoch": 0.37666963490650046,
      "grad_norm": 2.8274147510528564,
      "learning_rate": 3.9221290574813205e-06,
      "loss": 0.0222,
      "step": 1269,
      "video_reward_cumulative_accuracy": 0.806146572104019
    },
    {
      "epoch": 0.3769664588898783,
      "grad_norm": 2.812047243118286,
      "learning_rate": 3.919997875607008e-06,
      "loss": 0.0383,
      "step": 1270,
      "video_reward_cumulative_accuracy": 0.8062992125984252
    },
    {
      "epoch": 0.37726328287325617,
      "grad_norm": 5.075555324554443,
      "learning_rate": 3.917865169227154e-06,
      "loss": 0.0635,
      "step": 1271,
      "video_reward_cumulative_accuracy": 0.8060582218725413
    },
    {
      "epoch": 0.377560106856634,
      "grad_norm": 1.0230021476745605,
      "learning_rate": 3.915730940631426e-06,
      "loss": 0.0303,
      "step": 1272,
      "video_reward_cumulative_accuracy": 0.8058176100628931
    },
    {
      "epoch": 0.3778569308400119,
      "grad_norm": 1.6530554294586182,
      "learning_rate": 3.913595192111124e-06,
      "loss": 0.0386,
      "step": 1273,
      "video_reward_cumulative_accuracy": 0.8059701492537313
    },
    {
      "epoch": 0.37815375482338975,
      "grad_norm": 5.165441513061523,
      "learning_rate": 3.911457925959185e-06,
      "loss": 0.062,
      "step": 1274,
      "video_reward_cumulative_accuracy": 0.8057299843014128
    },
    {
      "epoch": 0.3784505788067676,
      "grad_norm": 1.2560231685638428,
      "learning_rate": 3.909319144470169e-06,
      "loss": 0.0234,
      "step": 1275,
      "video_reward_cumulative_accuracy": 0.8058823529411765
    },
    {
      "epoch": 0.37874740279014546,
      "grad_norm": 0.828478991985321,
      "learning_rate": 3.907178849940266e-06,
      "loss": 0.0111,
      "step": 1276,
      "video_reward_cumulative_accuracy": 0.8060344827586207
    },
    {
      "epoch": 0.3790442267735233,
      "grad_norm": 3.1586334705352783,
      "learning_rate": 3.90503704466729e-06,
      "loss": 0.0643,
      "step": 1277,
      "video_reward_cumulative_accuracy": 0.8061863743148003
    },
    {
      "epoch": 0.37934105075690117,
      "grad_norm": 3.0786163806915283,
      "learning_rate": 3.902893730950676e-06,
      "loss": 0.0786,
      "step": 1278,
      "video_reward_cumulative_accuracy": 0.8063380281690141
    },
    {
      "epoch": 0.379637874740279,
      "grad_norm": 2.47560453414917,
      "learning_rate": 3.900748911091481e-06,
      "loss": 0.0522,
      "step": 1279,
      "video_reward_cumulative_accuracy": 0.8060985144644254
    },
    {
      "epoch": 0.37993469872365687,
      "grad_norm": 2.573753595352173,
      "learning_rate": 3.898602587392377e-06,
      "loss": 0.0691,
      "step": 1280,
      "video_reward_cumulative_accuracy": 0.80625
    },
    {
      "epoch": 0.38023152270703475,
      "grad_norm": 6.1154656410217285,
      "learning_rate": 3.89645476215765e-06,
      "loss": 0.1061,
      "step": 1281,
      "video_reward_cumulative_accuracy": 0.8064012490241999
    },
    {
      "epoch": 0.3805283466904126,
      "grad_norm": 2.435875654220581,
      "learning_rate": 3.894305437693198e-06,
      "loss": 0.0309,
      "step": 1282,
      "video_reward_cumulative_accuracy": 0.8061622464898596
    },
    {
      "epoch": 0.38082517067379046,
      "grad_norm": 3.2194957733154297,
      "learning_rate": 3.892154616306531e-06,
      "loss": 0.0485,
      "step": 1283,
      "video_reward_cumulative_accuracy": 0.8063133281371785
    },
    {
      "epoch": 0.3811219946571683,
      "grad_norm": 2.315264940261841,
      "learning_rate": 3.890002300306764e-06,
      "loss": 0.0662,
      "step": 1284,
      "video_reward_cumulative_accuracy": 0.8064641744548287
    },
    {
      "epoch": 0.38141881864054616,
      "grad_norm": 2.33933687210083,
      "learning_rate": 3.887848492004618e-06,
      "loss": 0.0468,
      "step": 1285,
      "video_reward_cumulative_accuracy": 0.8066147859922179
    },
    {
      "epoch": 0.381715642623924,
      "grad_norm": 2.370605707168579,
      "learning_rate": 3.885693193712413e-06,
      "loss": 0.0385,
      "step": 1286,
      "video_reward_cumulative_accuracy": 0.8063763608087092
    },
    {
      "epoch": 0.38201246660730187,
      "grad_norm": 2.9528722763061523,
      "learning_rate": 3.883536407744073e-06,
      "loss": 0.0312,
      "step": 1287,
      "video_reward_cumulative_accuracy": 0.8061383061383062
    },
    {
      "epoch": 0.38230929059067975,
      "grad_norm": 1.1673752069473267,
      "learning_rate": 3.881378136415117e-06,
      "loss": 0.0343,
      "step": 1288,
      "video_reward_cumulative_accuracy": 0.8062888198757764
    },
    {
      "epoch": 0.3826061145740576,
      "grad_norm": 1.4453524351119995,
      "learning_rate": 3.8792183820426575e-06,
      "loss": 0.0593,
      "step": 1289,
      "video_reward_cumulative_accuracy": 0.8064391000775796
    },
    {
      "epoch": 0.38290293855743546,
      "grad_norm": 2.969148874282837,
      "learning_rate": 3.877057146945401e-06,
      "loss": 0.0384,
      "step": 1290,
      "video_reward_cumulative_accuracy": 0.8065891472868217
    },
    {
      "epoch": 0.3831997625408133,
      "grad_norm": 2.2823967933654785,
      "learning_rate": 3.874894433443643e-06,
      "loss": 0.0443,
      "step": 1291,
      "video_reward_cumulative_accuracy": 0.8067389620449265
    },
    {
      "epoch": 0.38349658652419116,
      "grad_norm": 1.4445525407791138,
      "learning_rate": 3.872730243859267e-06,
      "loss": 0.0504,
      "step": 1292,
      "video_reward_cumulative_accuracy": 0.8065015479876161
    },
    {
      "epoch": 0.383793410507569,
      "grad_norm": 1.5774403810501099,
      "learning_rate": 3.87056458051574e-06,
      "loss": 0.0322,
      "step": 1293,
      "video_reward_cumulative_accuracy": 0.8066511987625676
    },
    {
      "epoch": 0.38409023449094687,
      "grad_norm": 2.641799211502075,
      "learning_rate": 3.868397445738112e-06,
      "loss": 0.0246,
      "step": 1294,
      "video_reward_cumulative_accuracy": 0.8068006182380216
    },
    {
      "epoch": 0.38438705847432475,
      "grad_norm": 2.0484931468963623,
      "learning_rate": 3.866228841853012e-06,
      "loss": 0.0937,
      "step": 1295,
      "video_reward_cumulative_accuracy": 0.806949806949807
    },
    {
      "epoch": 0.3846838824577026,
      "grad_norm": 3.79331374168396,
      "learning_rate": 3.864058771188648e-06,
      "loss": 0.0471,
      "step": 1296,
      "video_reward_cumulative_accuracy": 0.8070987654320988
    },
    {
      "epoch": 0.38498070644108046,
      "grad_norm": 1.6458531618118286,
      "learning_rate": 3.861887236074801e-06,
      "loss": 0.0402,
      "step": 1297,
      "video_reward_cumulative_accuracy": 0.8072474942174248
    },
    {
      "epoch": 0.3852775304244583,
      "grad_norm": 2.398191213607788,
      "learning_rate": 3.859714238842823e-06,
      "loss": 0.0288,
      "step": 1298,
      "video_reward_cumulative_accuracy": 0.8073959938366718
    },
    {
      "epoch": 0.38557435440783616,
      "grad_norm": 1.3121765851974487,
      "learning_rate": 3.8575397818256396e-06,
      "loss": 0.0379,
      "step": 1299,
      "video_reward_cumulative_accuracy": 0.8075442648190916
    },
    {
      "epoch": 0.385871178391214,
      "grad_norm": 2.593432903289795,
      "learning_rate": 3.855363867357741e-06,
      "loss": 0.0366,
      "step": 1300,
      "video_reward_cumulative_accuracy": 0.8076923076923077
    },
    {
      "epoch": 0.38616800237459187,
      "grad_norm": 2.9913852214813232,
      "learning_rate": 3.853186497775181e-06,
      "loss": 0.0427,
      "step": 1301,
      "video_reward_cumulative_accuracy": 0.8078401229823213
    },
    {
      "epoch": 0.38646482635796975,
      "grad_norm": 1.1634633541107178,
      "learning_rate": 3.85100767541558e-06,
      "loss": 0.0457,
      "step": 1302,
      "video_reward_cumulative_accuracy": 0.8079877112135176
    },
    {
      "epoch": 0.3867616503413476,
      "grad_norm": 2.312039852142334,
      "learning_rate": 3.8488274026181125e-06,
      "loss": 0.0337,
      "step": 1303,
      "video_reward_cumulative_accuracy": 0.8077513430544896
    },
    {
      "epoch": 0.38705847432472545,
      "grad_norm": 0.9711390733718872,
      "learning_rate": 3.846645681723514e-06,
      "loss": 0.0367,
      "step": 1304,
      "video_reward_cumulative_accuracy": 0.807898773006135
    },
    {
      "epoch": 0.3873552983081033,
      "grad_norm": 0.5557654500007629,
      "learning_rate": 3.844462515074075e-06,
      "loss": 0.0175,
      "step": 1305,
      "video_reward_cumulative_accuracy": 0.8080459770114943
    },
    {
      "epoch": 0.38765212229148116,
      "grad_norm": 1.1500357389450073,
      "learning_rate": 3.842277905013634e-06,
      "loss": 0.0308,
      "step": 1306,
      "video_reward_cumulative_accuracy": 0.8081929555895865
    },
    {
      "epoch": 0.387948946274859,
      "grad_norm": 1.2327475547790527,
      "learning_rate": 3.840091853887585e-06,
      "loss": 0.0429,
      "step": 1307,
      "video_reward_cumulative_accuracy": 0.8083397092578424
    },
    {
      "epoch": 0.38824577025823687,
      "grad_norm": 0.717802107334137,
      "learning_rate": 3.837904364042864e-06,
      "loss": 0.0256,
      "step": 1308,
      "video_reward_cumulative_accuracy": 0.8084862385321101
    },
    {
      "epoch": 0.38854259424161475,
      "grad_norm": 1.6281253099441528,
      "learning_rate": 3.835715437827954e-06,
      "loss": 0.0191,
      "step": 1309,
      "video_reward_cumulative_accuracy": 0.8086325439266616
    },
    {
      "epoch": 0.3888394182249926,
      "grad_norm": 2.4564850330352783,
      "learning_rate": 3.83352507759288e-06,
      "loss": 0.0526,
      "step": 1310,
      "video_reward_cumulative_accuracy": 0.8087786259541985
    },
    {
      "epoch": 0.38913624220837045,
      "grad_norm": 1.5885370969772339,
      "learning_rate": 3.831333285689207e-06,
      "loss": 0.0145,
      "step": 1311,
      "video_reward_cumulative_accuracy": 0.8089244851258581
    },
    {
      "epoch": 0.3894330661917483,
      "grad_norm": 3.0064384937286377,
      "learning_rate": 3.829140064470035e-06,
      "loss": 0.0724,
      "step": 1312,
      "video_reward_cumulative_accuracy": 0.8090701219512195
    },
    {
      "epoch": 0.38972989017512616,
      "grad_norm": 0.9188132882118225,
      "learning_rate": 3.826945416290001e-06,
      "loss": 0.012,
      "step": 1313,
      "video_reward_cumulative_accuracy": 0.8092155369383092
    },
    {
      "epoch": 0.390026714158504,
      "grad_norm": 2.3891713619232178,
      "learning_rate": 3.824749343505271e-06,
      "loss": 0.0717,
      "step": 1314,
      "video_reward_cumulative_accuracy": 0.8089802130898022
    },
    {
      "epoch": 0.39032353814188187,
      "grad_norm": 2.077953577041626,
      "learning_rate": 3.822551848473545e-06,
      "loss": 0.0498,
      "step": 1315,
      "video_reward_cumulative_accuracy": 0.8091254752851711
    },
    {
      "epoch": 0.39062036212525975,
      "grad_norm": 1.5445294380187988,
      "learning_rate": 3.820352933554045e-06,
      "loss": 0.0276,
      "step": 1316,
      "video_reward_cumulative_accuracy": 0.8088905775075987
    },
    {
      "epoch": 0.39091718610863757,
      "grad_norm": 3.751812219619751,
      "learning_rate": 3.81815260110752e-06,
      "loss": 0.0526,
      "step": 1317,
      "video_reward_cumulative_accuracy": 0.8090356871678056
    },
    {
      "epoch": 0.39121401009201545,
      "grad_norm": 3.9450860023498535,
      "learning_rate": 3.815950853496242e-06,
      "loss": 0.0608,
      "step": 1318,
      "video_reward_cumulative_accuracy": 0.8091805766312595
    },
    {
      "epoch": 0.3915108340753933,
      "grad_norm": 2.4571402072906494,
      "learning_rate": 3.813747693083999e-06,
      "loss": 0.0406,
      "step": 1319,
      "video_reward_cumulative_accuracy": 0.809325246398787
    },
    {
      "epoch": 0.39180765805877116,
      "grad_norm": 0.831182062625885,
      "learning_rate": 3.8115431222360984e-06,
      "loss": 0.0075,
      "step": 1320,
      "video_reward_cumulative_accuracy": 0.809469696969697
    },
    {
      "epoch": 0.392104482042149,
      "grad_norm": 5.207632541656494,
      "learning_rate": 3.80933714331936e-06,
      "loss": 0.0581,
      "step": 1321,
      "video_reward_cumulative_accuracy": 0.8096139288417865
    },
    {
      "epoch": 0.39240130602552686,
      "grad_norm": 0.5821350812911987,
      "learning_rate": 3.807129758702117e-06,
      "loss": 0.0128,
      "step": 1322,
      "video_reward_cumulative_accuracy": 0.8097579425113465
    },
    {
      "epoch": 0.39269813000890474,
      "grad_norm": 4.091737747192383,
      "learning_rate": 3.804920970754211e-06,
      "loss": 0.032,
      "step": 1323,
      "video_reward_cumulative_accuracy": 0.809901738473167
    },
    {
      "epoch": 0.39299495399228257,
      "grad_norm": 1.5021380186080933,
      "learning_rate": 3.802710781846991e-06,
      "loss": 0.017,
      "step": 1324,
      "video_reward_cumulative_accuracy": 0.8100453172205438
    },
    {
      "epoch": 0.39329177797566045,
      "grad_norm": 3.842895984649658,
      "learning_rate": 3.8004991943533077e-06,
      "loss": 0.059,
      "step": 1325,
      "video_reward_cumulative_accuracy": 0.810188679245283
    },
    {
      "epoch": 0.3935886019590383,
      "grad_norm": 3.194486141204834,
      "learning_rate": 3.798286210647516e-06,
      "loss": 0.0299,
      "step": 1326,
      "video_reward_cumulative_accuracy": 0.8103318250377074
    },
    {
      "epoch": 0.39388542594241616,
      "grad_norm": 2.559457302093506,
      "learning_rate": 3.796071833105468e-06,
      "loss": 0.0648,
      "step": 1327,
      "video_reward_cumulative_accuracy": 0.8100979653353428
    },
    {
      "epoch": 0.394182249925794,
      "grad_norm": 4.017072677612305,
      "learning_rate": 3.793856064104514e-06,
      "loss": 0.057,
      "step": 1328,
      "video_reward_cumulative_accuracy": 0.8102409638554217
    },
    {
      "epoch": 0.39447907390917186,
      "grad_norm": 0.940937876701355,
      "learning_rate": 3.7916389060234964e-06,
      "loss": 0.0176,
      "step": 1329,
      "video_reward_cumulative_accuracy": 0.8103837471783296
    },
    {
      "epoch": 0.39477589789254974,
      "grad_norm": 3.047013759613037,
      "learning_rate": 3.78942036124275e-06,
      "loss": 0.0547,
      "step": 1330,
      "video_reward_cumulative_accuracy": 0.8105263157894737
    },
    {
      "epoch": 0.39507272187592757,
      "grad_norm": 0.7967216372489929,
      "learning_rate": 3.787200432144097e-06,
      "loss": 0.0175,
      "step": 1331,
      "video_reward_cumulative_accuracy": 0.8106686701728024
    },
    {
      "epoch": 0.39536954585930545,
      "grad_norm": 3.501380205154419,
      "learning_rate": 3.784979121110848e-06,
      "loss": 0.0465,
      "step": 1332,
      "video_reward_cumulative_accuracy": 0.8108108108108109
    },
    {
      "epoch": 0.3956663698426833,
      "grad_norm": 3.338715076446533,
      "learning_rate": 3.782756430527794e-06,
      "loss": 0.0628,
      "step": 1333,
      "video_reward_cumulative_accuracy": 0.8105776444111028
    },
    {
      "epoch": 0.39596319382606116,
      "grad_norm": 4.169296741485596,
      "learning_rate": 3.7805323627812108e-06,
      "loss": 0.0453,
      "step": 1334,
      "video_reward_cumulative_accuracy": 0.81071964017991
    },
    {
      "epoch": 0.396260017809439,
      "grad_norm": 2.169301986694336,
      "learning_rate": 3.778306920258852e-06,
      "loss": 0.0294,
      "step": 1335,
      "video_reward_cumulative_accuracy": 0.8104868913857678
    },
    {
      "epoch": 0.39655684179281686,
      "grad_norm": 2.8611955642700195,
      "learning_rate": 3.7760801053499435e-06,
      "loss": 0.0706,
      "step": 1336,
      "video_reward_cumulative_accuracy": 0.8106287425149701
    },
    {
      "epoch": 0.39685366577619474,
      "grad_norm": 3.386845827102661,
      "learning_rate": 3.7738519204451883e-06,
      "loss": 0.0497,
      "step": 1337,
      "video_reward_cumulative_accuracy": 0.8107703814510098
    },
    {
      "epoch": 0.39715048975957257,
      "grad_norm": 3.034348249435425,
      "learning_rate": 3.7716223679367604e-06,
      "loss": 0.0708,
      "step": 1338,
      "video_reward_cumulative_accuracy": 0.8109118086696562
    },
    {
      "epoch": 0.39744731374295045,
      "grad_norm": 0.7038185000419617,
      "learning_rate": 3.769391450218298e-06,
      "loss": 0.0199,
      "step": 1339,
      "video_reward_cumulative_accuracy": 0.8110530246452576
    },
    {
      "epoch": 0.3977441377263283,
      "grad_norm": 2.768979072570801,
      "learning_rate": 3.767159169684911e-06,
      "loss": 0.0546,
      "step": 1340,
      "video_reward_cumulative_accuracy": 0.8111940298507463
    },
    {
      "epoch": 0.39804096170970615,
      "grad_norm": 2.743908405303955,
      "learning_rate": 3.7649255287331676e-06,
      "loss": 0.0425,
      "step": 1341,
      "video_reward_cumulative_accuracy": 0.8113348247576435
    },
    {
      "epoch": 0.398337785693084,
      "grad_norm": 2.2306787967681885,
      "learning_rate": 3.762690529761097e-06,
      "loss": 0.0258,
      "step": 1342,
      "video_reward_cumulative_accuracy": 0.8114754098360656
    },
    {
      "epoch": 0.39863460967646186,
      "grad_norm": 1.8014007806777954,
      "learning_rate": 3.7604541751681904e-06,
      "loss": 0.0331,
      "step": 1343,
      "video_reward_cumulative_accuracy": 0.8116157855547282
    },
    {
      "epoch": 0.39893143365983974,
      "grad_norm": 2.2490646839141846,
      "learning_rate": 3.7582164673553888e-06,
      "loss": 0.0227,
      "step": 1344,
      "video_reward_cumulative_accuracy": 0.8117559523809523
    },
    {
      "epoch": 0.39922825764321757,
      "grad_norm": 3.8205676078796387,
      "learning_rate": 3.7559774087250906e-06,
      "loss": 0.0826,
      "step": 1345,
      "video_reward_cumulative_accuracy": 0.8118959107806691
    },
    {
      "epoch": 0.39952508162659545,
      "grad_norm": 4.000797271728516,
      "learning_rate": 3.753737001681142e-06,
      "loss": 0.0942,
      "step": 1346,
      "video_reward_cumulative_accuracy": 0.812035661218425
    },
    {
      "epoch": 0.39982190560997327,
      "grad_norm": 3.239428758621216,
      "learning_rate": 3.7514952486288365e-06,
      "loss": 0.0449,
      "step": 1347,
      "video_reward_cumulative_accuracy": 0.811804008908686
    },
    {
      "epoch": 0.40011872959335115,
      "grad_norm": 4.8066725730896,
      "learning_rate": 3.7492521519749146e-06,
      "loss": 0.0516,
      "step": 1348,
      "video_reward_cumulative_accuracy": 0.8119436201780416
    },
    {
      "epoch": 0.400415553576729,
      "grad_norm": 3.3413074016571045,
      "learning_rate": 3.7470077141275578e-06,
      "loss": 0.0927,
      "step": 1349,
      "video_reward_cumulative_accuracy": 0.8117123795404003
    },
    {
      "epoch": 0.40071237756010686,
      "grad_norm": 1.3113895654678345,
      "learning_rate": 3.744761937496389e-06,
      "loss": 0.0308,
      "step": 1350,
      "video_reward_cumulative_accuracy": 0.8118518518518518
    },
    {
      "epoch": 0.40100920154348474,
      "grad_norm": 1.7896422147750854,
      "learning_rate": 3.742514824492465e-06,
      "loss": 0.0702,
      "step": 1351,
      "video_reward_cumulative_accuracy": 0.8119911176905995
    },
    {
      "epoch": 0.40130602552686256,
      "grad_norm": 3.303739309310913,
      "learning_rate": 3.740266377528282e-06,
      "loss": 0.0711,
      "step": 1352,
      "video_reward_cumulative_accuracy": 0.8121301775147929
    },
    {
      "epoch": 0.40160284951024044,
      "grad_norm": 0.9642285108566284,
      "learning_rate": 3.738016599017766e-06,
      "loss": 0.0306,
      "step": 1353,
      "video_reward_cumulative_accuracy": 0.8122690317812269
    },
    {
      "epoch": 0.40189967349361827,
      "grad_norm": 1.783601999282837,
      "learning_rate": 3.735765491376271e-06,
      "loss": 0.041,
      "step": 1354,
      "video_reward_cumulative_accuracy": 0.8124076809453471
    },
    {
      "epoch": 0.40219649747699615,
      "grad_norm": 2.2338671684265137,
      "learning_rate": 3.733513057020581e-06,
      "loss": 0.0219,
      "step": 1355,
      "video_reward_cumulative_accuracy": 0.8125461254612546
    },
    {
      "epoch": 0.402493321460374,
      "grad_norm": 2.4188389778137207,
      "learning_rate": 3.731259298368902e-06,
      "loss": 0.042,
      "step": 1356,
      "video_reward_cumulative_accuracy": 0.8126843657817109
    },
    {
      "epoch": 0.40279014544375186,
      "grad_norm": 2.109005928039551,
      "learning_rate": 3.7290042178408625e-06,
      "loss": 0.0795,
      "step": 1357,
      "video_reward_cumulative_accuracy": 0.8124539425202653
    },
    {
      "epoch": 0.40308696942712974,
      "grad_norm": 2.0904476642608643,
      "learning_rate": 3.726747817857511e-06,
      "loss": 0.039,
      "step": 1358,
      "video_reward_cumulative_accuracy": 0.8125920471281296
    },
    {
      "epoch": 0.40338379341050756,
      "grad_norm": 3.161112070083618,
      "learning_rate": 3.7244901008413127e-06,
      "loss": 0.0728,
      "step": 1359,
      "video_reward_cumulative_accuracy": 0.8123620309050773
    },
    {
      "epoch": 0.40368061739388544,
      "grad_norm": 2.363586187362671,
      "learning_rate": 3.7222310692161434e-06,
      "loss": 0.0416,
      "step": 1360,
      "video_reward_cumulative_accuracy": 0.8125
    },
    {
      "epoch": 0.40397744137726327,
      "grad_norm": 1.192459225654602,
      "learning_rate": 3.7199707254072953e-06,
      "loss": 0.0394,
      "step": 1361,
      "video_reward_cumulative_accuracy": 0.8126377663482733
    },
    {
      "epoch": 0.40427426536064115,
      "grad_norm": 2.6609785556793213,
      "learning_rate": 3.7177090718414654e-06,
      "loss": 0.1119,
      "step": 1362,
      "video_reward_cumulative_accuracy": 0.8124082232011748
    },
    {
      "epoch": 0.404571089344019,
      "grad_norm": 3.815920352935791,
      "learning_rate": 3.7154461109467586e-06,
      "loss": 0.072,
      "step": 1363,
      "video_reward_cumulative_accuracy": 0.8125458547322084
    },
    {
      "epoch": 0.40486791332739686,
      "grad_norm": 2.569744110107422,
      "learning_rate": 3.713181845152684e-06,
      "loss": 0.025,
      "step": 1364,
      "video_reward_cumulative_accuracy": 0.8126832844574781
    },
    {
      "epoch": 0.40516473731077474,
      "grad_norm": 3.3158631324768066,
      "learning_rate": 3.710916276890149e-06,
      "loss": 0.0523,
      "step": 1365,
      "video_reward_cumulative_accuracy": 0.8128205128205128
    },
    {
      "epoch": 0.40546156129415256,
      "grad_norm": 3.6916356086730957,
      "learning_rate": 3.7086494085914632e-06,
      "loss": 0.0656,
      "step": 1366,
      "video_reward_cumulative_accuracy": 0.8129575402635432
    },
    {
      "epoch": 0.40575838527753044,
      "grad_norm": 1.8868242502212524,
      "learning_rate": 3.7063812426903273e-06,
      "loss": 0.031,
      "step": 1367,
      "video_reward_cumulative_accuracy": 0.8127286027798098
    },
    {
      "epoch": 0.40605520926090827,
      "grad_norm": 3.946322441101074,
      "learning_rate": 3.7041117816218396e-06,
      "loss": 0.068,
      "step": 1368,
      "video_reward_cumulative_accuracy": 0.8128654970760234
    },
    {
      "epoch": 0.40635203324428615,
      "grad_norm": 4.210629940032959,
      "learning_rate": 3.7018410278224852e-06,
      "loss": 0.0726,
      "step": 1369,
      "video_reward_cumulative_accuracy": 0.8122717311906501
    },
    {
      "epoch": 0.406648857227664,
      "grad_norm": 3.0957443714141846,
      "learning_rate": 3.69956898373014e-06,
      "loss": 0.0363,
      "step": 1370,
      "video_reward_cumulative_accuracy": 0.8124087591240876
    },
    {
      "epoch": 0.40694568121104185,
      "grad_norm": 1.1315875053405762,
      "learning_rate": 3.697295651784063e-06,
      "loss": 0.0365,
      "step": 1371,
      "video_reward_cumulative_accuracy": 0.812545587162655
    },
    {
      "epoch": 0.40724250519441973,
      "grad_norm": 4.435636043548584,
      "learning_rate": 3.695021034424897e-06,
      "loss": 0.0564,
      "step": 1372,
      "video_reward_cumulative_accuracy": 0.8126822157434402
    },
    {
      "epoch": 0.40753932917779756,
      "grad_norm": 1.2530689239501953,
      "learning_rate": 3.692745134094665e-06,
      "loss": 0.03,
      "step": 1373,
      "video_reward_cumulative_accuracy": 0.8128186453022578
    },
    {
      "epoch": 0.40783615316117544,
      "grad_norm": 1.9480600357055664,
      "learning_rate": 3.690467953236766e-06,
      "loss": 0.0375,
      "step": 1374,
      "video_reward_cumulative_accuracy": 0.8125909752547307
    },
    {
      "epoch": 0.40813297714455327,
      "grad_norm": 1.831527590751648,
      "learning_rate": 3.6881894942959752e-06,
      "loss": 0.0614,
      "step": 1375,
      "video_reward_cumulative_accuracy": 0.8127272727272727
    },
    {
      "epoch": 0.40842980112793115,
      "grad_norm": 2.9821012020111084,
      "learning_rate": 3.6859097597184395e-06,
      "loss": 0.0336,
      "step": 1376,
      "video_reward_cumulative_accuracy": 0.8128633720930233
    },
    {
      "epoch": 0.40872662511130897,
      "grad_norm": 1.6577091217041016,
      "learning_rate": 3.6836287519516745e-06,
      "loss": 0.0375,
      "step": 1377,
      "video_reward_cumulative_accuracy": 0.8126361655773421
    },
    {
      "epoch": 0.40902344909468685,
      "grad_norm": 3.604968309402466,
      "learning_rate": 3.681346473444565e-06,
      "loss": 0.0396,
      "step": 1378,
      "video_reward_cumulative_accuracy": 0.8127721335268505
    },
    {
      "epoch": 0.40932027307806473,
      "grad_norm": 4.176747798919678,
      "learning_rate": 3.6790629266473564e-06,
      "loss": 0.0433,
      "step": 1379,
      "video_reward_cumulative_accuracy": 0.8129079042784626
    },
    {
      "epoch": 0.40961709706144256,
      "grad_norm": 1.8375120162963867,
      "learning_rate": 3.676778114011659e-06,
      "loss": 0.0673,
      "step": 1380,
      "video_reward_cumulative_accuracy": 0.8130434782608695
    },
    {
      "epoch": 0.40991392104482044,
      "grad_norm": 1.4766967296600342,
      "learning_rate": 3.6744920379904407e-06,
      "loss": 0.0376,
      "step": 1381,
      "video_reward_cumulative_accuracy": 0.8131788559015206
    },
    {
      "epoch": 0.41021074502819826,
      "grad_norm": 1.3402959108352661,
      "learning_rate": 3.6722047010380265e-06,
      "loss": 0.0401,
      "step": 1382,
      "video_reward_cumulative_accuracy": 0.8133140376266281
    },
    {
      "epoch": 0.41050756901157615,
      "grad_norm": 2.443718671798706,
      "learning_rate": 3.669916105610094e-06,
      "loss": 0.0345,
      "step": 1383,
      "video_reward_cumulative_accuracy": 0.8134490238611713
    },
    {
      "epoch": 0.41080439299495397,
      "grad_norm": 2.9310362339019775,
      "learning_rate": 3.667626254163673e-06,
      "loss": 0.0351,
      "step": 1384,
      "video_reward_cumulative_accuracy": 0.8132225433526011
    },
    {
      "epoch": 0.41110121697833185,
      "grad_norm": 1.3766952753067017,
      "learning_rate": 3.665335149157141e-06,
      "loss": 0.0203,
      "step": 1385,
      "video_reward_cumulative_accuracy": 0.8133574007220217
    },
    {
      "epoch": 0.41139804096170973,
      "grad_norm": 2.6975274085998535,
      "learning_rate": 3.6630427930502215e-06,
      "loss": 0.0513,
      "step": 1386,
      "video_reward_cumulative_accuracy": 0.8131313131313131
    },
    {
      "epoch": 0.41169486494508756,
      "grad_norm": 2.027492046356201,
      "learning_rate": 3.6607491883039807e-06,
      "loss": 0.071,
      "step": 1387,
      "video_reward_cumulative_accuracy": 0.8132660418168709
    },
    {
      "epoch": 0.41199168892846544,
      "grad_norm": 1.6064057350158691,
      "learning_rate": 3.658454337380827e-06,
      "loss": 0.0425,
      "step": 1388,
      "video_reward_cumulative_accuracy": 0.8134005763688761
    },
    {
      "epoch": 0.41228851291184326,
      "grad_norm": 2.689882755279541,
      "learning_rate": 3.6561582427445053e-06,
      "loss": 0.0549,
      "step": 1389,
      "video_reward_cumulative_accuracy": 0.8135349172066235
    },
    {
      "epoch": 0.41258533689522114,
      "grad_norm": 2.3749701976776123,
      "learning_rate": 3.653860906860096e-06,
      "loss": 0.0514,
      "step": 1390,
      "video_reward_cumulative_accuracy": 0.8133093525179856
    },
    {
      "epoch": 0.41288216087859897,
      "grad_norm": 2.138916015625,
      "learning_rate": 3.651562332194012e-06,
      "loss": 0.0303,
      "step": 1391,
      "video_reward_cumulative_accuracy": 0.8134435657800144
    },
    {
      "epoch": 0.41317898486197685,
      "grad_norm": 3.9567198753356934,
      "learning_rate": 3.6492625212139964e-06,
      "loss": 0.0721,
      "step": 1392,
      "video_reward_cumulative_accuracy": 0.8135775862068966
    },
    {
      "epoch": 0.41347580884535473,
      "grad_norm": 1.3105418682098389,
      "learning_rate": 3.6469614763891193e-06,
      "loss": 0.0371,
      "step": 1393,
      "video_reward_cumulative_accuracy": 0.8137114142139268
    },
    {
      "epoch": 0.41377263282873256,
      "grad_norm": 1.6768875122070312,
      "learning_rate": 3.644659200189776e-06,
      "loss": 0.0369,
      "step": 1394,
      "video_reward_cumulative_accuracy": 0.8134863701578192
    },
    {
      "epoch": 0.41406945681211044,
      "grad_norm": 3.7469892501831055,
      "learning_rate": 3.6423556950876827e-06,
      "loss": 0.0554,
      "step": 1395,
      "video_reward_cumulative_accuracy": 0.8136200716845878
    },
    {
      "epoch": 0.41436628079548826,
      "grad_norm": 5.655117511749268,
      "learning_rate": 3.6400509635558766e-06,
      "loss": 0.0512,
      "step": 1396,
      "video_reward_cumulative_accuracy": 0.8133954154727794
    },
    {
      "epoch": 0.41466310477886614,
      "grad_norm": 3.703136682510376,
      "learning_rate": 3.6377450080687106e-06,
      "loss": 0.0283,
      "step": 1397,
      "video_reward_cumulative_accuracy": 0.813528990694345
    },
    {
      "epoch": 0.41495992876224397,
      "grad_norm": 3.908426284790039,
      "learning_rate": 3.635437831101851e-06,
      "loss": 0.0451,
      "step": 1398,
      "video_reward_cumulative_accuracy": 0.8136623748211731
    },
    {
      "epoch": 0.41525675274562185,
      "grad_norm": 1.4794903993606567,
      "learning_rate": 3.633129435132277e-06,
      "loss": 0.0248,
      "step": 1399,
      "video_reward_cumulative_accuracy": 0.813795568263045
    },
    {
      "epoch": 0.41555357672899973,
      "grad_norm": 1.4713983535766602,
      "learning_rate": 3.630819822638275e-06,
      "loss": 0.0451,
      "step": 1400,
      "video_reward_cumulative_accuracy": 0.8135714285714286
    },
    {
      "epoch": 0.41555357672899973,
      "eval_runtime": 130.0037,
      "eval_samples_per_second": 6.069,
      "eval_steps_per_second": 0.762,
      "eval_test_set_accuracy": 0.773989898989899,
      "step": 1400
    },
    {
      "epoch": 0.41585040071237755,
      "grad_norm": 2.494872570037842,
      "learning_rate": 3.6285089960994396e-06,
      "loss": 0.0338,
      "step": 1401,
      "video_reward_cumulative_accuracy": 0.8137044967880086
    },
    {
      "epoch": 0.41614722469575544,
      "grad_norm": 3.971022605895996,
      "learning_rate": 3.626196957996666e-06,
      "loss": 0.081,
      "step": 1402,
      "video_reward_cumulative_accuracy": 0.8138373751783167
    },
    {
      "epoch": 0.41644404867913326,
      "grad_norm": 2.763796806335449,
      "learning_rate": 3.6238837108121514e-06,
      "loss": 0.0512,
      "step": 1403,
      "video_reward_cumulative_accuracy": 0.8136136849607983
    },
    {
      "epoch": 0.41674087266251114,
      "grad_norm": 3.586524724960327,
      "learning_rate": 3.6215692570293924e-06,
      "loss": 0.0805,
      "step": 1404,
      "video_reward_cumulative_accuracy": 0.8137464387464387
    },
    {
      "epoch": 0.41703769664588897,
      "grad_norm": 2.187155246734619,
      "learning_rate": 3.619253599133178e-06,
      "loss": 0.0365,
      "step": 1405,
      "video_reward_cumulative_accuracy": 0.8135231316725978
    },
    {
      "epoch": 0.41733452062926685,
      "grad_norm": 3.1447842121124268,
      "learning_rate": 3.6169367396095935e-06,
      "loss": 0.0494,
      "step": 1406,
      "video_reward_cumulative_accuracy": 0.8133001422475107
    },
    {
      "epoch": 0.41763134461264473,
      "grad_norm": 1.2427921295166016,
      "learning_rate": 3.6146186809460114e-06,
      "loss": 0.0147,
      "step": 1407,
      "video_reward_cumulative_accuracy": 0.8134328358208955
    },
    {
      "epoch": 0.41792816859602255,
      "grad_norm": 1.1575847864151,
      "learning_rate": 3.612299425631093e-06,
      "loss": 0.0468,
      "step": 1408,
      "video_reward_cumulative_accuracy": 0.8132102272727273
    },
    {
      "epoch": 0.41822499257940043,
      "grad_norm": 0.6622688174247742,
      "learning_rate": 3.609978976154784e-06,
      "loss": 0.0116,
      "step": 1409,
      "video_reward_cumulative_accuracy": 0.8133427963094393
    },
    {
      "epoch": 0.41852181656277826,
      "grad_norm": 1.4234910011291504,
      "learning_rate": 3.6076573350083112e-06,
      "loss": 0.0449,
      "step": 1410,
      "video_reward_cumulative_accuracy": 0.8134751773049645
    },
    {
      "epoch": 0.41881864054615614,
      "grad_norm": 0.7136000990867615,
      "learning_rate": 3.605334504684183e-06,
      "loss": 0.0299,
      "step": 1411,
      "video_reward_cumulative_accuracy": 0.8132530120481928
    },
    {
      "epoch": 0.41911546452953397,
      "grad_norm": 2.231410503387451,
      "learning_rate": 3.6030104876761835e-06,
      "loss": 0.0417,
      "step": 1412,
      "video_reward_cumulative_accuracy": 0.8133852691218131
    },
    {
      "epoch": 0.41941228851291185,
      "grad_norm": 2.5650246143341064,
      "learning_rate": 3.600685286479369e-06,
      "loss": 0.0438,
      "step": 1413,
      "video_reward_cumulative_accuracy": 0.813517338995046
    },
    {
      "epoch": 0.4197091124962897,
      "grad_norm": 3.8068923950195312,
      "learning_rate": 3.59835890359007e-06,
      "loss": 0.0378,
      "step": 1414,
      "video_reward_cumulative_accuracy": 0.8136492220650636
    },
    {
      "epoch": 0.42000593647966755,
      "grad_norm": 2.6522464752197266,
      "learning_rate": 3.5960313415058833e-06,
      "loss": 0.0443,
      "step": 1415,
      "video_reward_cumulative_accuracy": 0.8134275618374558
    },
    {
      "epoch": 0.42030276046304543,
      "grad_norm": 2.89424729347229,
      "learning_rate": 3.5937026027256738e-06,
      "loss": 0.0213,
      "step": 1416,
      "video_reward_cumulative_accuracy": 0.8135593220338984
    },
    {
      "epoch": 0.42059958444642326,
      "grad_norm": 3.648902177810669,
      "learning_rate": 3.591372689749567e-06,
      "loss": 0.1216,
      "step": 1417,
      "video_reward_cumulative_accuracy": 0.8136908962597036
    },
    {
      "epoch": 0.42089640842980114,
      "grad_norm": 1.9664138555526733,
      "learning_rate": 3.5890416050789523e-06,
      "loss": 0.0361,
      "step": 1418,
      "video_reward_cumulative_accuracy": 0.8138222849083215
    },
    {
      "epoch": 0.42119323241317896,
      "grad_norm": 3.4354248046875,
      "learning_rate": 3.586709351216474e-06,
      "loss": 0.0391,
      "step": 1419,
      "video_reward_cumulative_accuracy": 0.8136011275546159
    },
    {
      "epoch": 0.42149005639655684,
      "grad_norm": 1.596327543258667,
      "learning_rate": 3.5843759306660344e-06,
      "loss": 0.0905,
      "step": 1420,
      "video_reward_cumulative_accuracy": 0.8133802816901409
    },
    {
      "epoch": 0.4217868803799347,
      "grad_norm": 3.7419114112854004,
      "learning_rate": 3.5820413459327863e-06,
      "loss": 0.0678,
      "step": 1421,
      "video_reward_cumulative_accuracy": 0.812807881773399
    },
    {
      "epoch": 0.42208370436331255,
      "grad_norm": 3.569519281387329,
      "learning_rate": 3.579705599523132e-06,
      "loss": 0.047,
      "step": 1422,
      "video_reward_cumulative_accuracy": 0.8129395218002813
    },
    {
      "epoch": 0.42238052834669043,
      "grad_norm": 1.55231511592865,
      "learning_rate": 3.5773686939447226e-06,
      "loss": 0.0314,
      "step": 1423,
      "video_reward_cumulative_accuracy": 0.8130709768095573
    },
    {
      "epoch": 0.42267735233006826,
      "grad_norm": 2.8076114654541016,
      "learning_rate": 3.575030631706454e-06,
      "loss": 0.0511,
      "step": 1424,
      "video_reward_cumulative_accuracy": 0.8128511235955056
    },
    {
      "epoch": 0.42297417631344614,
      "grad_norm": 1.3293800354003906,
      "learning_rate": 3.5726914153184624e-06,
      "loss": 0.0334,
      "step": 1425,
      "video_reward_cumulative_accuracy": 0.8129824561403509
    },
    {
      "epoch": 0.42327100029682396,
      "grad_norm": 2.4649341106414795,
      "learning_rate": 3.570351047292123e-06,
      "loss": 0.0397,
      "step": 1426,
      "video_reward_cumulative_accuracy": 0.8131136044880786
    },
    {
      "epoch": 0.42356782428020184,
      "grad_norm": 3.885298490524292,
      "learning_rate": 3.5680095301400497e-06,
      "loss": 0.0345,
      "step": 1427,
      "video_reward_cumulative_accuracy": 0.8132445690259286
    },
    {
      "epoch": 0.4238646482635797,
      "grad_norm": 2.974383592605591,
      "learning_rate": 3.565666866376086e-06,
      "loss": 0.0572,
      "step": 1428,
      "video_reward_cumulative_accuracy": 0.8130252100840336
    },
    {
      "epoch": 0.42416147224695755,
      "grad_norm": 1.509018898010254,
      "learning_rate": 3.5633230585153093e-06,
      "loss": 0.0386,
      "step": 1429,
      "video_reward_cumulative_accuracy": 0.8131560531840448
    },
    {
      "epoch": 0.42445829623033543,
      "grad_norm": 1.3774346113204956,
      "learning_rate": 3.5609781090740264e-06,
      "loss": 0.0231,
      "step": 1430,
      "video_reward_cumulative_accuracy": 0.8132867132867133
    },
    {
      "epoch": 0.42475512021371326,
      "grad_norm": 2.268357276916504,
      "learning_rate": 3.558632020569768e-06,
      "loss": 0.045,
      "step": 1431,
      "video_reward_cumulative_accuracy": 0.8134171907756813
    },
    {
      "epoch": 0.42505194419709114,
      "grad_norm": 2.4706010818481445,
      "learning_rate": 3.5562847955212863e-06,
      "loss": 0.0662,
      "step": 1432,
      "video_reward_cumulative_accuracy": 0.8131983240223464
    },
    {
      "epoch": 0.42534876818046896,
      "grad_norm": 1.930578589439392,
      "learning_rate": 3.553936436448556e-06,
      "loss": 0.0474,
      "step": 1433,
      "video_reward_cumulative_accuracy": 0.8133286810886252
    },
    {
      "epoch": 0.42564559216384684,
      "grad_norm": 2.0366480350494385,
      "learning_rate": 3.551586945872769e-06,
      "loss": 0.061,
      "step": 1434,
      "video_reward_cumulative_accuracy": 0.8131101813110181
    },
    {
      "epoch": 0.4259424161472247,
      "grad_norm": 2.4363696575164795,
      "learning_rate": 3.5492363263163305e-06,
      "loss": 0.0386,
      "step": 1435,
      "video_reward_cumulative_accuracy": 0.8132404181184669
    },
    {
      "epoch": 0.42623924013060255,
      "grad_norm": 1.8315794467926025,
      "learning_rate": 3.546884580302859e-06,
      "loss": 0.0375,
      "step": 1436,
      "video_reward_cumulative_accuracy": 0.8133704735376045
    },
    {
      "epoch": 0.42653606411398043,
      "grad_norm": 2.6749696731567383,
      "learning_rate": 3.544531710357183e-06,
      "loss": 0.1026,
      "step": 1437,
      "video_reward_cumulative_accuracy": 0.8131524008350731
    },
    {
      "epoch": 0.42683288809735825,
      "grad_norm": 1.775738000869751,
      "learning_rate": 3.5421777190053354e-06,
      "loss": 0.0308,
      "step": 1438,
      "video_reward_cumulative_accuracy": 0.8132823365785814
    },
    {
      "epoch": 0.42712971208073613,
      "grad_norm": 2.429361581802368,
      "learning_rate": 3.539822608774555e-06,
      "loss": 0.0293,
      "step": 1439,
      "video_reward_cumulative_accuracy": 0.8134120917303683
    },
    {
      "epoch": 0.42742653606411396,
      "grad_norm": 1.1443023681640625,
      "learning_rate": 3.537466382193282e-06,
      "loss": 0.0219,
      "step": 1440,
      "video_reward_cumulative_accuracy": 0.8135416666666667
    },
    {
      "epoch": 0.42772336004749184,
      "grad_norm": 2.1525886058807373,
      "learning_rate": 3.535109041791153e-06,
      "loss": 0.0612,
      "step": 1441,
      "video_reward_cumulative_accuracy": 0.8136710617626648
    },
    {
      "epoch": 0.4280201840308697,
      "grad_norm": 2.423384666442871,
      "learning_rate": 3.532750590099002e-06,
      "loss": 0.0459,
      "step": 1442,
      "video_reward_cumulative_accuracy": 0.8134535367545076
    },
    {
      "epoch": 0.42831700801424755,
      "grad_norm": 1.831072449684143,
      "learning_rate": 3.5303910296488565e-06,
      "loss": 0.0212,
      "step": 1443,
      "video_reward_cumulative_accuracy": 0.8135828135828136
    },
    {
      "epoch": 0.4286138319976254,
      "grad_norm": 3.769604444503784,
      "learning_rate": 3.528030362973933e-06,
      "loss": 0.0522,
      "step": 1444,
      "video_reward_cumulative_accuracy": 0.8133656509695291
    },
    {
      "epoch": 0.42891065598100325,
      "grad_norm": 2.6289186477661133,
      "learning_rate": 3.525668592608637e-06,
      "loss": 0.0766,
      "step": 1445,
      "video_reward_cumulative_accuracy": 0.8134948096885813
    },
    {
      "epoch": 0.42920747996438113,
      "grad_norm": 2.1730971336364746,
      "learning_rate": 3.523305721088558e-06,
      "loss": 0.0222,
      "step": 1446,
      "video_reward_cumulative_accuracy": 0.8136237897648686
    },
    {
      "epoch": 0.42950430394775896,
      "grad_norm": 1.4084819555282593,
      "learning_rate": 3.5209417509504668e-06,
      "loss": 0.0793,
      "step": 1447,
      "video_reward_cumulative_accuracy": 0.813752591568763
    },
    {
      "epoch": 0.42980112793113684,
      "grad_norm": 1.7031943798065186,
      "learning_rate": 3.518576684732316e-06,
      "loss": 0.0489,
      "step": 1448,
      "video_reward_cumulative_accuracy": 0.8138812154696132
    },
    {
      "epoch": 0.4300979519145147,
      "grad_norm": 2.925882339477539,
      "learning_rate": 3.5162105249732336e-06,
      "loss": 0.0379,
      "step": 1449,
      "video_reward_cumulative_accuracy": 0.8136645962732919
    },
    {
      "epoch": 0.43039477589789255,
      "grad_norm": 0.6623610854148865,
      "learning_rate": 3.5138432742135215e-06,
      "loss": 0.023,
      "step": 1450,
      "video_reward_cumulative_accuracy": 0.8137931034482758
    },
    {
      "epoch": 0.4306915998812704,
      "grad_norm": 1.5418504476547241,
      "learning_rate": 3.511474934994653e-06,
      "loss": 0.0315,
      "step": 1451,
      "video_reward_cumulative_accuracy": 0.8135768435561681
    },
    {
      "epoch": 0.43098842386464825,
      "grad_norm": 1.7840099334716797,
      "learning_rate": 3.509105509859271e-06,
      "loss": 0.0258,
      "step": 1452,
      "video_reward_cumulative_accuracy": 0.8137052341597796
    },
    {
      "epoch": 0.43128524784802613,
      "grad_norm": 3.910229444503784,
      "learning_rate": 3.5067350013511816e-06,
      "loss": 0.0932,
      "step": 1453,
      "video_reward_cumulative_accuracy": 0.8134893324156917
    },
    {
      "epoch": 0.43158207183140396,
      "grad_norm": 1.7669485807418823,
      "learning_rate": 3.5043634120153572e-06,
      "loss": 0.0423,
      "step": 1454,
      "video_reward_cumulative_accuracy": 0.813617606602476
    },
    {
      "epoch": 0.43187889581478184,
      "grad_norm": 0.9480779767036438,
      "learning_rate": 3.5019907443979297e-06,
      "loss": 0.0301,
      "step": 1455,
      "video_reward_cumulative_accuracy": 0.813745704467354
    },
    {
      "epoch": 0.4321757197981597,
      "grad_norm": 2.241234540939331,
      "learning_rate": 3.4996170010461862e-06,
      "loss": 0.0298,
      "step": 1456,
      "video_reward_cumulative_accuracy": 0.8138736263736264
    },
    {
      "epoch": 0.43247254378153754,
      "grad_norm": 2.01543927192688,
      "learning_rate": 3.497242184508571e-06,
      "loss": 0.0604,
      "step": 1457,
      "video_reward_cumulative_accuracy": 0.8140013726835964
    },
    {
      "epoch": 0.4327693677649154,
      "grad_norm": 1.9135305881500244,
      "learning_rate": 3.4948662973346816e-06,
      "loss": 0.0274,
      "step": 1458,
      "video_reward_cumulative_accuracy": 0.8141289437585734
    },
    {
      "epoch": 0.43306619174829325,
      "grad_norm": 1.8279916048049927,
      "learning_rate": 3.492489342075262e-06,
      "loss": 0.0504,
      "step": 1459,
      "video_reward_cumulative_accuracy": 0.8142563399588759
    },
    {
      "epoch": 0.43336301573167113,
      "grad_norm": 1.939100980758667,
      "learning_rate": 3.4901113212822057e-06,
      "loss": 0.0561,
      "step": 1460,
      "video_reward_cumulative_accuracy": 0.8143835616438356
    },
    {
      "epoch": 0.43365983971504896,
      "grad_norm": 1.3775911331176758,
      "learning_rate": 3.487732237508547e-06,
      "loss": 0.0456,
      "step": 1461,
      "video_reward_cumulative_accuracy": 0.8145106091718002
    },
    {
      "epoch": 0.43395666369842684,
      "grad_norm": 3.4904792308807373,
      "learning_rate": 3.485352093308465e-06,
      "loss": 0.0448,
      "step": 1462,
      "video_reward_cumulative_accuracy": 0.8146374829001368
    },
    {
      "epoch": 0.4342534876818047,
      "grad_norm": 1.0064564943313599,
      "learning_rate": 3.4829708912372746e-06,
      "loss": 0.0191,
      "step": 1463,
      "video_reward_cumulative_accuracy": 0.8147641831852358
    },
    {
      "epoch": 0.43455031166518254,
      "grad_norm": 2.0086288452148438,
      "learning_rate": 3.4805886338514277e-06,
      "loss": 0.0285,
      "step": 1464,
      "video_reward_cumulative_accuracy": 0.8145491803278688
    },
    {
      "epoch": 0.4348471356485604,
      "grad_norm": 2.064359664916992,
      "learning_rate": 3.4782053237085083e-06,
      "loss": 0.0519,
      "step": 1465,
      "video_reward_cumulative_accuracy": 0.8143344709897611
    },
    {
      "epoch": 0.43514395963193825,
      "grad_norm": 3.986988067626953,
      "learning_rate": 3.4758209633672313e-06,
      "loss": 0.081,
      "step": 1466,
      "video_reward_cumulative_accuracy": 0.8144611186903138
    },
    {
      "epoch": 0.43544078361531613,
      "grad_norm": 3.117048740386963,
      "learning_rate": 3.47343555538744e-06,
      "loss": 0.0216,
      "step": 1467,
      "video_reward_cumulative_accuracy": 0.8142467620995228
    },
    {
      "epoch": 0.43573760759869395,
      "grad_norm": 2.8893699645996094,
      "learning_rate": 3.4710491023300997e-06,
      "loss": 0.0428,
      "step": 1468,
      "video_reward_cumulative_accuracy": 0.8140326975476839
    },
    {
      "epoch": 0.43603443158207184,
      "grad_norm": 3.7498586177825928,
      "learning_rate": 3.468661606757301e-06,
      "loss": 0.0808,
      "step": 1469,
      "video_reward_cumulative_accuracy": 0.8138189244383934
    },
    {
      "epoch": 0.4363312555654497,
      "grad_norm": 1.7329144477844238,
      "learning_rate": 3.4662730712322514e-06,
      "loss": 0.0348,
      "step": 1470,
      "video_reward_cumulative_accuracy": 0.8136054421768707
    },
    {
      "epoch": 0.43662807954882754,
      "grad_norm": 5.551042079925537,
      "learning_rate": 3.4638834983192743e-06,
      "loss": 0.0883,
      "step": 1471,
      "video_reward_cumulative_accuracy": 0.8133922501699524
    },
    {
      "epoch": 0.4369249035322054,
      "grad_norm": 1.6959829330444336,
      "learning_rate": 3.4614928905838103e-06,
      "loss": 0.0174,
      "step": 1472,
      "video_reward_cumulative_accuracy": 0.8135190217391305
    },
    {
      "epoch": 0.43722172751558325,
      "grad_norm": 1.2532111406326294,
      "learning_rate": 3.4591012505924078e-06,
      "loss": 0.0122,
      "step": 1473,
      "video_reward_cumulative_accuracy": 0.8136456211812627
    },
    {
      "epoch": 0.43751855149896113,
      "grad_norm": 0.6892161965370178,
      "learning_rate": 3.4567085809127247e-06,
      "loss": 0.0065,
      "step": 1474,
      "video_reward_cumulative_accuracy": 0.8137720488466758
    },
    {
      "epoch": 0.43781537548233895,
      "grad_norm": 3.311598539352417,
      "learning_rate": 3.4543148841135243e-06,
      "loss": 0.0672,
      "step": 1475,
      "video_reward_cumulative_accuracy": 0.8135593220338984
    },
    {
      "epoch": 0.43811219946571683,
      "grad_norm": 1.2820119857788086,
      "learning_rate": 3.4519201627646713e-06,
      "loss": 0.0293,
      "step": 1476,
      "video_reward_cumulative_accuracy": 0.8133468834688347
    },
    {
      "epoch": 0.43840902344909466,
      "grad_norm": 1.7546663284301758,
      "learning_rate": 3.4495244194371337e-06,
      "loss": 0.0629,
      "step": 1477,
      "video_reward_cumulative_accuracy": 0.8134732566012187
    },
    {
      "epoch": 0.43870584743247254,
      "grad_norm": 3.1134088039398193,
      "learning_rate": 3.447127656702971e-06,
      "loss": 0.061,
      "step": 1478,
      "video_reward_cumulative_accuracy": 0.8135994587280109
    },
    {
      "epoch": 0.4390026714158504,
      "grad_norm": 3.5772175788879395,
      "learning_rate": 3.444729877135345e-06,
      "loss": 0.0409,
      "step": 1479,
      "video_reward_cumulative_accuracy": 0.8137254901960784
    },
    {
      "epoch": 0.43929949539922825,
      "grad_norm": 1.2507808208465576,
      "learning_rate": 3.4423310833085015e-06,
      "loss": 0.0542,
      "step": 1480,
      "video_reward_cumulative_accuracy": 0.8135135135135135
    },
    {
      "epoch": 0.4395963193826061,
      "grad_norm": 0.4887540936470032,
      "learning_rate": 3.4399312777977794e-06,
      "loss": 0.0112,
      "step": 1481,
      "video_reward_cumulative_accuracy": 0.8136394328156651
    },
    {
      "epoch": 0.43989314336598395,
      "grad_norm": 0.6733037233352661,
      "learning_rate": 3.437530463179604e-06,
      "loss": 0.0139,
      "step": 1482,
      "video_reward_cumulative_accuracy": 0.8137651821862348
    },
    {
      "epoch": 0.44018996734936183,
      "grad_norm": 0.46859636902809143,
      "learning_rate": 3.4351286420314807e-06,
      "loss": 0.0161,
      "step": 1483,
      "video_reward_cumulative_accuracy": 0.8138907619689818
    },
    {
      "epoch": 0.44048679133273966,
      "grad_norm": 1.171273946762085,
      "learning_rate": 3.4327258169319986e-06,
      "loss": 0.0282,
      "step": 1484,
      "video_reward_cumulative_accuracy": 0.8140161725067385
    },
    {
      "epoch": 0.44078361531611754,
      "grad_norm": 1.2421537637710571,
      "learning_rate": 3.4303219904608244e-06,
      "loss": 0.0144,
      "step": 1485,
      "video_reward_cumulative_accuracy": 0.8138047138047138
    },
    {
      "epoch": 0.4410804392994954,
      "grad_norm": 1.5912413597106934,
      "learning_rate": 3.427917165198698e-06,
      "loss": 0.0317,
      "step": 1486,
      "video_reward_cumulative_accuracy": 0.8135935397039031
    },
    {
      "epoch": 0.44137726328287324,
      "grad_norm": 2.529520034790039,
      "learning_rate": 3.425511343727434e-06,
      "loss": 0.04,
      "step": 1487,
      "video_reward_cumulative_accuracy": 0.8137188971082717
    },
    {
      "epoch": 0.4416740872662511,
      "grad_norm": 2.3800694942474365,
      "learning_rate": 3.4231045286299136e-06,
      "loss": 0.0557,
      "step": 1488,
      "video_reward_cumulative_accuracy": 0.8138440860215054
    },
    {
      "epoch": 0.44197091124962895,
      "grad_norm": 1.2274895906448364,
      "learning_rate": 3.4206967224900885e-06,
      "loss": 0.0194,
      "step": 1489,
      "video_reward_cumulative_accuracy": 0.8139691067830759
    },
    {
      "epoch": 0.44226773523300683,
      "grad_norm": 1.3150626420974731,
      "learning_rate": 3.41828792789297e-06,
      "loss": 0.0124,
      "step": 1490,
      "video_reward_cumulative_accuracy": 0.8140939597315436
    },
    {
      "epoch": 0.44256455921638466,
      "grad_norm": 1.3226598501205444,
      "learning_rate": 3.415878147424634e-06,
      "loss": 0.0129,
      "step": 1491,
      "video_reward_cumulative_accuracy": 0.8142186452045607
    },
    {
      "epoch": 0.44286138319976254,
      "grad_norm": 2.370067596435547,
      "learning_rate": 3.413467383672214e-06,
      "loss": 0.025,
      "step": 1492,
      "video_reward_cumulative_accuracy": 0.814343163538874
    },
    {
      "epoch": 0.4431582071831404,
      "grad_norm": 2.3997597694396973,
      "learning_rate": 3.411055639223898e-06,
      "loss": 0.0187,
      "step": 1493,
      "video_reward_cumulative_accuracy": 0.8144675150703282
    },
    {
      "epoch": 0.44345503116651824,
      "grad_norm": 1.877609133720398,
      "learning_rate": 3.4086429166689296e-06,
      "loss": 0.0527,
      "step": 1494,
      "video_reward_cumulative_accuracy": 0.8145917001338688
    },
    {
      "epoch": 0.4437518551498961,
      "grad_norm": 4.125174522399902,
      "learning_rate": 3.4062292185975987e-06,
      "loss": 0.0701,
      "step": 1495,
      "video_reward_cumulative_accuracy": 0.8147157190635451
    },
    {
      "epoch": 0.44404867913327395,
      "grad_norm": 2.3468966484069824,
      "learning_rate": 3.403814547601244e-06,
      "loss": 0.0234,
      "step": 1496,
      "video_reward_cumulative_accuracy": 0.8145053475935828
    },
    {
      "epoch": 0.44434550311665183,
      "grad_norm": 0.8067638874053955,
      "learning_rate": 3.4013989062722514e-06,
      "loss": 0.0121,
      "step": 1497,
      "video_reward_cumulative_accuracy": 0.814629258517034
    },
    {
      "epoch": 0.44464232710002966,
      "grad_norm": 1.2667425870895386,
      "learning_rate": 3.398982297204045e-06,
      "loss": 0.0718,
      "step": 1498,
      "video_reward_cumulative_accuracy": 0.8147530040053405
    },
    {
      "epoch": 0.44493915108340754,
      "grad_norm": 4.346756935119629,
      "learning_rate": 3.396564722991089e-06,
      "loss": 0.037,
      "step": 1499,
      "video_reward_cumulative_accuracy": 0.8148765843895931
    },
    {
      "epoch": 0.4452359750667854,
      "grad_norm": 2.6099324226379395,
      "learning_rate": 3.394146186228885e-06,
      "loss": 0.0565,
      "step": 1500,
      "video_reward_cumulative_accuracy": 0.815
    },
    {
      "epoch": 0.44553279905016324,
      "grad_norm": 2.9593958854675293,
      "learning_rate": 3.3917266895139654e-06,
      "loss": 0.0548,
      "step": 1501,
      "video_reward_cumulative_accuracy": 0.8147901399067289
    },
    {
      "epoch": 0.4458296230335411,
      "grad_norm": 1.9937952756881714,
      "learning_rate": 3.389306235443896e-06,
      "loss": 0.0267,
      "step": 1502,
      "video_reward_cumulative_accuracy": 0.8145805592543276
    },
    {
      "epoch": 0.44612644701691895,
      "grad_norm": 2.4806485176086426,
      "learning_rate": 3.3868848266172693e-06,
      "loss": 0.0222,
      "step": 1503,
      "video_reward_cumulative_accuracy": 0.8143712574850299
    },
    {
      "epoch": 0.44642327100029683,
      "grad_norm": 4.427879333496094,
      "learning_rate": 3.384462465633702e-06,
      "loss": 0.0498,
      "step": 1504,
      "video_reward_cumulative_accuracy": 0.8144946808510638
    },
    {
      "epoch": 0.44672009498367465,
      "grad_norm": 3.623908519744873,
      "learning_rate": 3.3820391550938337e-06,
      "loss": 0.0426,
      "step": 1505,
      "video_reward_cumulative_accuracy": 0.8142857142857143
    },
    {
      "epoch": 0.44701691896705253,
      "grad_norm": 3.808448076248169,
      "learning_rate": 3.3796148975993236e-06,
      "loss": 0.0388,
      "step": 1506,
      "video_reward_cumulative_accuracy": 0.8144090305444888
    },
    {
      "epoch": 0.4473137429504304,
      "grad_norm": 1.8826570510864258,
      "learning_rate": 3.3771896957528476e-06,
      "loss": 0.0414,
      "step": 1507,
      "video_reward_cumulative_accuracy": 0.814200398142004
    },
    {
      "epoch": 0.44761056693380824,
      "grad_norm": 2.2933990955352783,
      "learning_rate": 3.374763552158095e-06,
      "loss": 0.0396,
      "step": 1508,
      "video_reward_cumulative_accuracy": 0.8143236074270557
    },
    {
      "epoch": 0.4479073909171861,
      "grad_norm": 2.446279764175415,
      "learning_rate": 3.372336469419767e-06,
      "loss": 0.0834,
      "step": 1509,
      "video_reward_cumulative_accuracy": 0.8144466534128562
    },
    {
      "epoch": 0.44820421490056395,
      "grad_norm": 1.7556294202804565,
      "learning_rate": 3.3699084501435717e-06,
      "loss": 0.0329,
      "step": 1510,
      "video_reward_cumulative_accuracy": 0.8142384105960265
    },
    {
      "epoch": 0.4485010388839418,
      "grad_norm": 1.7487667798995972,
      "learning_rate": 3.3674794969362235e-06,
      "loss": 0.0511,
      "step": 1511,
      "video_reward_cumulative_accuracy": 0.814361350099272
    },
    {
      "epoch": 0.44879786286731965,
      "grad_norm": 2.5053646564483643,
      "learning_rate": 3.365049612405441e-06,
      "loss": 0.0534,
      "step": 1512,
      "video_reward_cumulative_accuracy": 0.814484126984127
    },
    {
      "epoch": 0.44909468685069753,
      "grad_norm": 1.7758805751800537,
      "learning_rate": 3.3626187991599384e-06,
      "loss": 0.0223,
      "step": 1513,
      "video_reward_cumulative_accuracy": 0.8146067415730337
    },
    {
      "epoch": 0.4493915108340754,
      "grad_norm": 3.1137726306915283,
      "learning_rate": 3.3601870598094317e-06,
      "loss": 0.0652,
      "step": 1514,
      "video_reward_cumulative_accuracy": 0.8147291941875826
    },
    {
      "epoch": 0.44968833481745324,
      "grad_norm": 1.9859057664871216,
      "learning_rate": 3.3577543969646287e-06,
      "loss": 0.0781,
      "step": 1515,
      "video_reward_cumulative_accuracy": 0.8148514851485148
    },
    {
      "epoch": 0.4499851588008311,
      "grad_norm": 2.5349478721618652,
      "learning_rate": 3.3553208132372284e-06,
      "loss": 0.056,
      "step": 1516,
      "video_reward_cumulative_accuracy": 0.8149736147757256
    },
    {
      "epoch": 0.45028198278420895,
      "grad_norm": 1.9917017221450806,
      "learning_rate": 3.35288631123992e-06,
      "loss": 0.0526,
      "step": 1517,
      "video_reward_cumulative_accuracy": 0.8147659854976929
    },
    {
      "epoch": 0.4505788067675868,
      "grad_norm": 2.3147571086883545,
      "learning_rate": 3.3504508935863776e-06,
      "loss": 0.0395,
      "step": 1518,
      "video_reward_cumulative_accuracy": 0.8148880105401844
    },
    {
      "epoch": 0.45087563075096465,
      "grad_norm": 3.1221325397491455,
      "learning_rate": 3.3480145628912574e-06,
      "loss": 0.0416,
      "step": 1519,
      "video_reward_cumulative_accuracy": 0.815009874917709
    },
    {
      "epoch": 0.45117245473434253,
      "grad_norm": 3.894681692123413,
      "learning_rate": 3.3455773217701977e-06,
      "loss": 0.0472,
      "step": 1520,
      "video_reward_cumulative_accuracy": 0.8151315789473684
    },
    {
      "epoch": 0.4514692787177204,
      "grad_norm": 2.071953773498535,
      "learning_rate": 3.343139172839813e-06,
      "loss": 0.0382,
      "step": 1521,
      "video_reward_cumulative_accuracy": 0.8152531229454306
    },
    {
      "epoch": 0.45176610270109824,
      "grad_norm": 1.4821226596832275,
      "learning_rate": 3.3407001187176934e-06,
      "loss": 0.0516,
      "step": 1522,
      "video_reward_cumulative_accuracy": 0.8153745072273325
    },
    {
      "epoch": 0.4520629266844761,
      "grad_norm": 2.996478796005249,
      "learning_rate": 3.3382601620224e-06,
      "loss": 0.0383,
      "step": 1523,
      "video_reward_cumulative_accuracy": 0.8154957321076822
    },
    {
      "epoch": 0.45235975066785394,
      "grad_norm": 0.9077578783035278,
      "learning_rate": 3.335819305373463e-06,
      "loss": 0.0113,
      "step": 1524,
      "video_reward_cumulative_accuracy": 0.8156167979002624
    },
    {
      "epoch": 0.4526565746512318,
      "grad_norm": 3.7463269233703613,
      "learning_rate": 3.333377551391379e-06,
      "loss": 0.065,
      "step": 1525,
      "video_reward_cumulative_accuracy": 0.8157377049180328
    },
    {
      "epoch": 0.45295339863460965,
      "grad_norm": 2.012558698654175,
      "learning_rate": 3.3309349026976074e-06,
      "loss": 0.0298,
      "step": 1526,
      "video_reward_cumulative_accuracy": 0.8158584534731324
    },
    {
      "epoch": 0.45325022261798753,
      "grad_norm": 2.4637279510498047,
      "learning_rate": 3.3284913619145697e-06,
      "loss": 0.0271,
      "step": 1527,
      "video_reward_cumulative_accuracy": 0.8159790438768828
    },
    {
      "epoch": 0.4535470466013654,
      "grad_norm": 1.1083879470825195,
      "learning_rate": 3.3260469316656435e-06,
      "loss": 0.0653,
      "step": 1528,
      "video_reward_cumulative_accuracy": 0.8157722513089005
    },
    {
      "epoch": 0.45384387058474324,
      "grad_norm": 0.8266122937202454,
      "learning_rate": 3.3236016145751616e-06,
      "loss": 0.0347,
      "step": 1529,
      "video_reward_cumulative_accuracy": 0.815892740353172
    },
    {
      "epoch": 0.4541406945681211,
      "grad_norm": 1.2026057243347168,
      "learning_rate": 3.32115541326841e-06,
      "loss": 0.035,
      "step": 1530,
      "video_reward_cumulative_accuracy": 0.8160130718954248
    },
    {
      "epoch": 0.45443751855149894,
      "grad_norm": 1.0709147453308105,
      "learning_rate": 3.3187083303716218e-06,
      "loss": 0.0366,
      "step": 1531,
      "video_reward_cumulative_accuracy": 0.8154800783801437
    },
    {
      "epoch": 0.4547343425348768,
      "grad_norm": 2.988943338394165,
      "learning_rate": 3.3162603685119795e-06,
      "loss": 0.075,
      "step": 1532,
      "video_reward_cumulative_accuracy": 0.8152741514360313
    },
    {
      "epoch": 0.45503116651825465,
      "grad_norm": 4.098608493804932,
      "learning_rate": 3.3138115303176073e-06,
      "loss": 0.0878,
      "step": 1533,
      "video_reward_cumulative_accuracy": 0.8153946510110893
    },
    {
      "epoch": 0.45532799050163253,
      "grad_norm": 1.185163140296936,
      "learning_rate": 3.31136181841757e-06,
      "loss": 0.0365,
      "step": 1534,
      "video_reward_cumulative_accuracy": 0.8155149934810951
    },
    {
      "epoch": 0.4556248144850104,
      "grad_norm": 1.2512524127960205,
      "learning_rate": 3.308911235441873e-06,
      "loss": 0.0201,
      "step": 1535,
      "video_reward_cumulative_accuracy": 0.8156351791530945
    },
    {
      "epoch": 0.45592163846838824,
      "grad_norm": 1.520028829574585,
      "learning_rate": 3.306459784021452e-06,
      "loss": 0.0322,
      "step": 1536,
      "video_reward_cumulative_accuracy": 0.8154296875
    },
    {
      "epoch": 0.4562184624517661,
      "grad_norm": 1.225994348526001,
      "learning_rate": 3.304007466788181e-06,
      "loss": 0.0554,
      "step": 1537,
      "video_reward_cumulative_accuracy": 0.815224463240078
    },
    {
      "epoch": 0.45651528643514394,
      "grad_norm": 2.5382115840911865,
      "learning_rate": 3.301554286374859e-06,
      "loss": 0.0308,
      "step": 1538,
      "video_reward_cumulative_accuracy": 0.8153446033810143
    },
    {
      "epoch": 0.4568121104185218,
      "grad_norm": 1.67540442943573,
      "learning_rate": 3.2991002454152133e-06,
      "loss": 0.0605,
      "step": 1539,
      "video_reward_cumulative_accuracy": 0.8151397011046134
    },
    {
      "epoch": 0.45710893440189965,
      "grad_norm": 0.42339617013931274,
      "learning_rate": 3.2966453465438954e-06,
      "loss": 0.011,
      "step": 1540,
      "video_reward_cumulative_accuracy": 0.8152597402597402
    },
    {
      "epoch": 0.4574057583852775,
      "grad_norm": 1.9776630401611328,
      "learning_rate": 3.294189592396477e-06,
      "loss": 0.0485,
      "step": 1541,
      "video_reward_cumulative_accuracy": 0.8153796236210253
    },
    {
      "epoch": 0.4577025823686554,
      "grad_norm": 3.960407018661499,
      "learning_rate": 3.29173298560945e-06,
      "loss": 0.0464,
      "step": 1542,
      "video_reward_cumulative_accuracy": 0.8154993514915694
    },
    {
      "epoch": 0.45799940635203323,
      "grad_norm": 0.9101660251617432,
      "learning_rate": 3.289275528820218e-06,
      "loss": 0.0242,
      "step": 1543,
      "video_reward_cumulative_accuracy": 0.8156189241736876
    },
    {
      "epoch": 0.4582962303354111,
      "grad_norm": 6.294965744018555,
      "learning_rate": 3.2868172246671005e-06,
      "loss": 0.0793,
      "step": 1544,
      "video_reward_cumulative_accuracy": 0.8157383419689119
    },
    {
      "epoch": 0.45859305431878894,
      "grad_norm": 2.3557205200195312,
      "learning_rate": 3.2843580757893266e-06,
      "loss": 0.0603,
      "step": 1545,
      "video_reward_cumulative_accuracy": 0.8158576051779936
    },
    {
      "epoch": 0.4588898783021668,
      "grad_norm": 3.9995932579040527,
      "learning_rate": 3.28189808482703e-06,
      "loss": 0.0902,
      "step": 1546,
      "video_reward_cumulative_accuracy": 0.815653298835705
    },
    {
      "epoch": 0.45918670228554465,
      "grad_norm": 1.1430221796035767,
      "learning_rate": 3.2794372544212495e-06,
      "loss": 0.0306,
      "step": 1547,
      "video_reward_cumulative_accuracy": 0.8157724628312863
    },
    {
      "epoch": 0.4594835262689225,
      "grad_norm": 4.331371784210205,
      "learning_rate": 3.2769755872139264e-06,
      "loss": 0.0725,
      "step": 1548,
      "video_reward_cumulative_accuracy": 0.8158914728682171
    },
    {
      "epoch": 0.4597803502523004,
      "grad_norm": 2.8517324924468994,
      "learning_rate": 3.274513085847899e-06,
      "loss": 0.0492,
      "step": 1549,
      "video_reward_cumulative_accuracy": 0.815687540348612
    },
    {
      "epoch": 0.46007717423567823,
      "grad_norm": 2.0116770267486572,
      "learning_rate": 3.272049752966901e-06,
      "loss": 0.0484,
      "step": 1550,
      "video_reward_cumulative_accuracy": 0.8158064516129032
    },
    {
      "epoch": 0.4603739982190561,
      "grad_norm": 2.2900784015655518,
      "learning_rate": 3.2695855912155605e-06,
      "loss": 0.0372,
      "step": 1551,
      "video_reward_cumulative_accuracy": 0.8156028368794326
    },
    {
      "epoch": 0.46067082220243394,
      "grad_norm": 2.2841272354125977,
      "learning_rate": 3.2671206032393926e-06,
      "loss": 0.0331,
      "step": 1552,
      "video_reward_cumulative_accuracy": 0.8150773195876289
    },
    {
      "epoch": 0.4609676461858118,
      "grad_norm": 1.335290789604187,
      "learning_rate": 3.264654791684803e-06,
      "loss": 0.0229,
      "step": 1553,
      "video_reward_cumulative_accuracy": 0.815196394075982
    },
    {
      "epoch": 0.46126447016918964,
      "grad_norm": 4.430738925933838,
      "learning_rate": 3.2621881591990784e-06,
      "loss": 0.0468,
      "step": 1554,
      "video_reward_cumulative_accuracy": 0.8153153153153153
    },
    {
      "epoch": 0.4615612941525675,
      "grad_norm": 5.130397319793701,
      "learning_rate": 3.2597207084303893e-06,
      "loss": 0.0863,
      "step": 1555,
      "video_reward_cumulative_accuracy": 0.8154340836012862
    },
    {
      "epoch": 0.4618581181359454,
      "grad_norm": 2.924949884414673,
      "learning_rate": 3.2572524420277825e-06,
      "loss": 0.0296,
      "step": 1556,
      "video_reward_cumulative_accuracy": 0.8152313624678663
    },
    {
      "epoch": 0.46215494211932323,
      "grad_norm": 1.930640697479248,
      "learning_rate": 3.2547833626411812e-06,
      "loss": 0.0361,
      "step": 1557,
      "video_reward_cumulative_accuracy": 0.8153500321130379
    },
    {
      "epoch": 0.4624517661027011,
      "grad_norm": 2.353710651397705,
      "learning_rate": 3.2523134729213833e-06,
      "loss": 0.0365,
      "step": 1558,
      "video_reward_cumulative_accuracy": 0.8151476251604621
    },
    {
      "epoch": 0.46274859008607894,
      "grad_norm": 2.0358400344848633,
      "learning_rate": 3.2498427755200546e-06,
      "loss": 0.0564,
      "step": 1559,
      "video_reward_cumulative_accuracy": 0.8152661962796665
    },
    {
      "epoch": 0.4630454140694568,
      "grad_norm": 2.5255966186523438,
      "learning_rate": 3.2473712730897282e-06,
      "loss": 0.0233,
      "step": 1560,
      "video_reward_cumulative_accuracy": 0.8153846153846154
    },
    {
      "epoch": 0.46334223805283464,
      "grad_norm": 0.2955355644226074,
      "learning_rate": 3.244898968283802e-06,
      "loss": 0.006,
      "step": 1561,
      "video_reward_cumulative_accuracy": 0.8155028827674567
    },
    {
      "epoch": 0.4636390620362125,
      "grad_norm": 1.5781440734863281,
      "learning_rate": 3.2424258637565347e-06,
      "loss": 0.0168,
      "step": 1562,
      "video_reward_cumulative_accuracy": 0.8156209987195903
    },
    {
      "epoch": 0.4639358860195904,
      "grad_norm": 1.639114260673523,
      "learning_rate": 3.239951962163045e-06,
      "loss": 0.0222,
      "step": 1563,
      "video_reward_cumulative_accuracy": 0.8154190658989123
    },
    {
      "epoch": 0.46423271000296823,
      "grad_norm": 0.8644540309906006,
      "learning_rate": 3.2374772661593055e-06,
      "loss": 0.0186,
      "step": 1564,
      "video_reward_cumulative_accuracy": 0.815537084398977
    },
    {
      "epoch": 0.4645295339863461,
      "grad_norm": 1.5920159816741943,
      "learning_rate": 3.235001778402143e-06,
      "loss": 0.0297,
      "step": 1565,
      "video_reward_cumulative_accuracy": 0.8156549520766773
    },
    {
      "epoch": 0.46482635796972394,
      "grad_norm": 2.091097354888916,
      "learning_rate": 3.232525501549234e-06,
      "loss": 0.0579,
      "step": 1566,
      "video_reward_cumulative_accuracy": 0.815772669220945
    },
    {
      "epoch": 0.4651231819531018,
      "grad_norm": 3.484055995941162,
      "learning_rate": 3.230048438259102e-06,
      "loss": 0.0441,
      "step": 1567,
      "video_reward_cumulative_accuracy": 0.8155711550733886
    },
    {
      "epoch": 0.46542000593647964,
      "grad_norm": 1.8032653331756592,
      "learning_rate": 3.227570591191114e-06,
      "loss": 0.0827,
      "step": 1568,
      "video_reward_cumulative_accuracy": 0.8156887755102041
    },
    {
      "epoch": 0.4657168299198575,
      "grad_norm": 2.3294694423675537,
      "learning_rate": 3.22509196300548e-06,
      "loss": 0.046,
      "step": 1569,
      "video_reward_cumulative_accuracy": 0.815806246016571
    },
    {
      "epoch": 0.4660136539032354,
      "grad_norm": 1.2451170682907104,
      "learning_rate": 3.222612556363249e-06,
      "loss": 0.0456,
      "step": 1570,
      "video_reward_cumulative_accuracy": 0.8159235668789809
    },
    {
      "epoch": 0.46631047788661323,
      "grad_norm": 2.33011531829834,
      "learning_rate": 3.2201323739263024e-06,
      "loss": 0.0337,
      "step": 1571,
      "video_reward_cumulative_accuracy": 0.8160407383831955
    },
    {
      "epoch": 0.4666073018699911,
      "grad_norm": 3.825652599334717,
      "learning_rate": 3.217651418357359e-06,
      "loss": 0.0352,
      "step": 1572,
      "video_reward_cumulative_accuracy": 0.8158396946564885
    },
    {
      "epoch": 0.46690412585336893,
      "grad_norm": 2.4924354553222656,
      "learning_rate": 3.2151696923199636e-06,
      "loss": 0.07,
      "step": 1573,
      "video_reward_cumulative_accuracy": 0.8156389065479974
    },
    {
      "epoch": 0.4672009498367468,
      "grad_norm": 3.4220986366271973,
      "learning_rate": 3.2126871984784907e-06,
      "loss": 0.079,
      "step": 1574,
      "video_reward_cumulative_accuracy": 0.8157560355781448
    },
    {
      "epoch": 0.46749777382012464,
      "grad_norm": 2.504857301712036,
      "learning_rate": 3.210203939498139e-06,
      "loss": 0.0278,
      "step": 1575,
      "video_reward_cumulative_accuracy": 0.8158730158730159
    },
    {
      "epoch": 0.4677945978035025,
      "grad_norm": 2.0332424640655518,
      "learning_rate": 3.207719918044927e-06,
      "loss": 0.0329,
      "step": 1576,
      "video_reward_cumulative_accuracy": 0.815989847715736
    },
    {
      "epoch": 0.4680914217868804,
      "grad_norm": 1.2692821025848389,
      "learning_rate": 3.205235136785693e-06,
      "loss": 0.0211,
      "step": 1577,
      "video_reward_cumulative_accuracy": 0.8161065313887127
    },
    {
      "epoch": 0.4683882457702582,
      "grad_norm": 1.8608229160308838,
      "learning_rate": 3.202749598388092e-06,
      "loss": 0.0441,
      "step": 1578,
      "video_reward_cumulative_accuracy": 0.8162230671736375
    },
    {
      "epoch": 0.4686850697536361,
      "grad_norm": 0.5689178109169006,
      "learning_rate": 3.200263305520589e-06,
      "loss": 0.0146,
      "step": 1579,
      "video_reward_cumulative_accuracy": 0.8163394553514883
    },
    {
      "epoch": 0.46898189373701393,
      "grad_norm": 1.689761996269226,
      "learning_rate": 3.197776260852462e-06,
      "loss": 0.0347,
      "step": 1580,
      "video_reward_cumulative_accuracy": 0.8161392405063291
    },
    {
      "epoch": 0.4692787177203918,
      "grad_norm": 2.7706823348999023,
      "learning_rate": 3.195288467053795e-06,
      "loss": 0.0643,
      "step": 1581,
      "video_reward_cumulative_accuracy": 0.8162555344718533
    },
    {
      "epoch": 0.46957554170376964,
      "grad_norm": 1.1557561159133911,
      "learning_rate": 3.1927999267954746e-06,
      "loss": 0.0424,
      "step": 1582,
      "video_reward_cumulative_accuracy": 0.8163716814159292
    },
    {
      "epoch": 0.4698723656871475,
      "grad_norm": 2.010145902633667,
      "learning_rate": 3.1903106427491914e-06,
      "loss": 0.0498,
      "step": 1583,
      "video_reward_cumulative_accuracy": 0.8164876816171825
    },
    {
      "epoch": 0.4701691896705254,
      "grad_norm": 3.788320302963257,
      "learning_rate": 3.1878206175874334e-06,
      "loss": 0.0303,
      "step": 1584,
      "video_reward_cumulative_accuracy": 0.8166035353535354
    },
    {
      "epoch": 0.4704660136539032,
      "grad_norm": 3.469613552093506,
      "learning_rate": 3.1853298539834836e-06,
      "loss": 0.0477,
      "step": 1585,
      "video_reward_cumulative_accuracy": 0.8167192429022082
    },
    {
      "epoch": 0.4707628376372811,
      "grad_norm": 5.445047378540039,
      "learning_rate": 3.1828383546114196e-06,
      "loss": 0.0825,
      "step": 1586,
      "video_reward_cumulative_accuracy": 0.8168348045397226
    },
    {
      "epoch": 0.47105966162065893,
      "grad_norm": 2.0806350708007812,
      "learning_rate": 3.180346122146106e-06,
      "loss": 0.0379,
      "step": 1587,
      "video_reward_cumulative_accuracy": 0.8169502205419029
    },
    {
      "epoch": 0.4713564856040368,
      "grad_norm": 1.694346308708191,
      "learning_rate": 3.177853159263199e-06,
      "loss": 0.0271,
      "step": 1588,
      "video_reward_cumulative_accuracy": 0.8167506297229219
    },
    {
      "epoch": 0.47165330958741464,
      "grad_norm": 2.332949161529541,
      "learning_rate": 3.1753594686391343e-06,
      "loss": 0.0367,
      "step": 1589,
      "video_reward_cumulative_accuracy": 0.8165512901195721
    },
    {
      "epoch": 0.4719501335707925,
      "grad_norm": 0.6793799996376038,
      "learning_rate": 3.1728650529511308e-06,
      "loss": 0.0154,
      "step": 1590,
      "video_reward_cumulative_accuracy": 0.8166666666666667
    },
    {
      "epoch": 0.4722469575541704,
      "grad_norm": 3.225214958190918,
      "learning_rate": 3.1703699148771866e-06,
      "loss": 0.0366,
      "step": 1591,
      "video_reward_cumulative_accuracy": 0.816781898177247
    },
    {
      "epoch": 0.4725437815375482,
      "grad_norm": 1.2964482307434082,
      "learning_rate": 3.167874057096073e-06,
      "loss": 0.0323,
      "step": 1592,
      "video_reward_cumulative_accuracy": 0.8168969849246231
    },
    {
      "epoch": 0.4728406055209261,
      "grad_norm": 1.0534178018569946,
      "learning_rate": 3.1653774822873378e-06,
      "loss": 0.0419,
      "step": 1593,
      "video_reward_cumulative_accuracy": 0.8166980539861896
    },
    {
      "epoch": 0.47313742950430393,
      "grad_norm": 3.556938886642456,
      "learning_rate": 3.162880193131296e-06,
      "loss": 0.04,
      "step": 1594,
      "video_reward_cumulative_accuracy": 0.8164993726474279
    },
    {
      "epoch": 0.4734342534876818,
      "grad_norm": 2.3933658599853516,
      "learning_rate": 3.1603821923090277e-06,
      "loss": 0.031,
      "step": 1595,
      "video_reward_cumulative_accuracy": 0.8166144200626959
    },
    {
      "epoch": 0.47373107747105964,
      "grad_norm": 2.3574721813201904,
      "learning_rate": 3.157883482502382e-06,
      "loss": 0.0415,
      "step": 1596,
      "video_reward_cumulative_accuracy": 0.8167293233082706
    },
    {
      "epoch": 0.4740279014544375,
      "grad_norm": 3.107463836669922,
      "learning_rate": 3.155384066393964e-06,
      "loss": 0.0538,
      "step": 1597,
      "video_reward_cumulative_accuracy": 0.8168440826549781
    },
    {
      "epoch": 0.4743247254378154,
      "grad_norm": 1.9954248666763306,
      "learning_rate": 3.1528839466671413e-06,
      "loss": 0.0545,
      "step": 1598,
      "video_reward_cumulative_accuracy": 0.8169586983729662
    },
    {
      "epoch": 0.4746215494211932,
      "grad_norm": 4.566195964813232,
      "learning_rate": 3.1503831260060346e-06,
      "loss": 0.0645,
      "step": 1599,
      "video_reward_cumulative_accuracy": 0.8170731707317073
    },
    {
      "epoch": 0.4749183734045711,
      "grad_norm": 1.2144123315811157,
      "learning_rate": 3.1478816070955164e-06,
      "loss": 0.0793,
      "step": 1600,
      "video_reward_cumulative_accuracy": 0.816875
    },
    {
      "epoch": 0.4749183734045711,
      "eval_runtime": 148.9352,
      "eval_samples_per_second": 5.298,
      "eval_steps_per_second": 0.665,
      "eval_test_set_accuracy": 0.7815656565656566,
      "step": 1600
    },
    {
      "epoch": 0.47521519738794893,
      "grad_norm": 1.2719308137893677,
      "learning_rate": 3.1453793926212125e-06,
      "loss": 0.0114,
      "step": 1601,
      "video_reward_cumulative_accuracy": 0.8169893816364772
    },
    {
      "epoch": 0.4755120213713268,
      "grad_norm": 1.4081724882125854,
      "learning_rate": 3.1428764852694905e-06,
      "loss": 0.0274,
      "step": 1602,
      "video_reward_cumulative_accuracy": 0.8167915106117354
    },
    {
      "epoch": 0.47580884535470463,
      "grad_norm": 2.737161874771118,
      "learning_rate": 3.1403728877274662e-06,
      "loss": 0.0492,
      "step": 1603,
      "video_reward_cumulative_accuracy": 0.8165938864628821
    },
    {
      "epoch": 0.4761056693380825,
      "grad_norm": 3.401216983795166,
      "learning_rate": 3.137868602682993e-06,
      "loss": 0.0512,
      "step": 1604,
      "video_reward_cumulative_accuracy": 0.816708229426434
    },
    {
      "epoch": 0.4764024933214604,
      "grad_norm": 3.201937437057495,
      "learning_rate": 3.1353636328246652e-06,
      "loss": 0.0277,
      "step": 1605,
      "video_reward_cumulative_accuracy": 0.8165109034267912
    },
    {
      "epoch": 0.4766993173048382,
      "grad_norm": 4.124434471130371,
      "learning_rate": 3.1328579808418103e-06,
      "loss": 0.043,
      "step": 1606,
      "video_reward_cumulative_accuracy": 0.8166251556662516
    },
    {
      "epoch": 0.4769961412882161,
      "grad_norm": 2.2116482257843018,
      "learning_rate": 3.1303516494244897e-06,
      "loss": 0.0553,
      "step": 1607,
      "video_reward_cumulative_accuracy": 0.8167392657125078
    },
    {
      "epoch": 0.4772929652715939,
      "grad_norm": 2.262800455093384,
      "learning_rate": 3.127844641263493e-06,
      "loss": 0.0599,
      "step": 1608,
      "video_reward_cumulative_accuracy": 0.8165422885572139
    },
    {
      "epoch": 0.4775897892549718,
      "grad_norm": 3.7271931171417236,
      "learning_rate": 3.1253369590503357e-06,
      "loss": 0.0817,
      "step": 1609,
      "video_reward_cumulative_accuracy": 0.8160348042262274
    },
    {
      "epoch": 0.47788661323834963,
      "grad_norm": 1.643730878829956,
      "learning_rate": 3.12282860547726e-06,
      "loss": 0.0279,
      "step": 1610,
      "video_reward_cumulative_accuracy": 0.8158385093167702
    },
    {
      "epoch": 0.4781834372217275,
      "grad_norm": 2.743765115737915,
      "learning_rate": 3.1203195832372256e-06,
      "loss": 0.0265,
      "step": 1611,
      "video_reward_cumulative_accuracy": 0.8159528243327127
    },
    {
      "epoch": 0.4784802612051054,
      "grad_norm": 3.5921216011047363,
      "learning_rate": 3.1178098950239118e-06,
      "loss": 0.0393,
      "step": 1612,
      "video_reward_cumulative_accuracy": 0.8160669975186104
    },
    {
      "epoch": 0.4787770851884832,
      "grad_norm": 1.2250019311904907,
      "learning_rate": 3.115299543531713e-06,
      "loss": 0.0227,
      "step": 1613,
      "video_reward_cumulative_accuracy": 0.8161810291382517
    },
    {
      "epoch": 0.4790739091718611,
      "grad_norm": 1.2587252855300903,
      "learning_rate": 3.1127885314557343e-06,
      "loss": 0.0438,
      "step": 1614,
      "video_reward_cumulative_accuracy": 0.8162949194547707
    },
    {
      "epoch": 0.4793707331552389,
      "grad_norm": 2.4114818572998047,
      "learning_rate": 3.110276861491791e-06,
      "loss": 0.0305,
      "step": 1615,
      "video_reward_cumulative_accuracy": 0.8164086687306501
    },
    {
      "epoch": 0.4796675571386168,
      "grad_norm": 0.43257880210876465,
      "learning_rate": 3.107764536336405e-06,
      "loss": 0.0134,
      "step": 1616,
      "video_reward_cumulative_accuracy": 0.8165222772277227
    },
    {
      "epoch": 0.47996438112199463,
      "grad_norm": 2.9027011394500732,
      "learning_rate": 3.1052515586868005e-06,
      "loss": 0.0472,
      "step": 1617,
      "video_reward_cumulative_accuracy": 0.8166357452071737
    },
    {
      "epoch": 0.4802612051053725,
      "grad_norm": 3.9004299640655518,
      "learning_rate": 3.102737931240904e-06,
      "loss": 0.0582,
      "step": 1618,
      "video_reward_cumulative_accuracy": 0.8167490729295427
    },
    {
      "epoch": 0.4805580290887504,
      "grad_norm": 1.6789156198501587,
      "learning_rate": 3.1002236566973383e-06,
      "loss": 0.044,
      "step": 1619,
      "video_reward_cumulative_accuracy": 0.8168622606547251
    },
    {
      "epoch": 0.4808548530721282,
      "grad_norm": 0.7176075577735901,
      "learning_rate": 3.09770873775542e-06,
      "loss": 0.0199,
      "step": 1620,
      "video_reward_cumulative_accuracy": 0.8169753086419753
    },
    {
      "epoch": 0.4811516770555061,
      "grad_norm": 3.140882968902588,
      "learning_rate": 3.09519317711516e-06,
      "loss": 0.0341,
      "step": 1621,
      "video_reward_cumulative_accuracy": 0.8170882171499074
    },
    {
      "epoch": 0.4814485010388839,
      "grad_norm": 4.167765140533447,
      "learning_rate": 3.092676977477256e-06,
      "loss": 0.069,
      "step": 1622,
      "video_reward_cumulative_accuracy": 0.8172009864364982
    },
    {
      "epoch": 0.4817453250222618,
      "grad_norm": 2.755486011505127,
      "learning_rate": 3.090160141543092e-06,
      "loss": 0.0635,
      "step": 1623,
      "video_reward_cumulative_accuracy": 0.8173136167590881
    },
    {
      "epoch": 0.48204214900563963,
      "grad_norm": 2.705613136291504,
      "learning_rate": 3.087642672014738e-06,
      "loss": 0.0451,
      "step": 1624,
      "video_reward_cumulative_accuracy": 0.8174261083743842
    },
    {
      "epoch": 0.4823389729890175,
      "grad_norm": 0.6087374091148376,
      "learning_rate": 3.085124571594939e-06,
      "loss": 0.0153,
      "step": 1625,
      "video_reward_cumulative_accuracy": 0.8175384615384615
    },
    {
      "epoch": 0.4826357969723954,
      "grad_norm": 4.505397319793701,
      "learning_rate": 3.0826058429871226e-06,
      "loss": 0.0659,
      "step": 1626,
      "video_reward_cumulative_accuracy": 0.817650676506765
    },
    {
      "epoch": 0.4829326209557732,
      "grad_norm": 1.5764565467834473,
      "learning_rate": 3.0800864888953863e-06,
      "loss": 0.0229,
      "step": 1627,
      "video_reward_cumulative_accuracy": 0.8177627535341119
    },
    {
      "epoch": 0.4832294449391511,
      "grad_norm": 1.974806785583496,
      "learning_rate": 3.077566512024503e-06,
      "loss": 0.0307,
      "step": 1628,
      "video_reward_cumulative_accuracy": 0.8178746928746928
    },
    {
      "epoch": 0.4835262689225289,
      "grad_norm": 2.2565455436706543,
      "learning_rate": 3.0750459150799116e-06,
      "loss": 0.0528,
      "step": 1629,
      "video_reward_cumulative_accuracy": 0.8173726212400245
    },
    {
      "epoch": 0.4838230929059068,
      "grad_norm": 2.1708600521087646,
      "learning_rate": 3.0725247007677172e-06,
      "loss": 0.0366,
      "step": 1630,
      "video_reward_cumulative_accuracy": 0.8171779141104294
    },
    {
      "epoch": 0.48411991688928463,
      "grad_norm": 0.7493535280227661,
      "learning_rate": 3.0700028717946895e-06,
      "loss": 0.0177,
      "step": 1631,
      "video_reward_cumulative_accuracy": 0.8172900061312078
    },
    {
      "epoch": 0.4844167408726625,
      "grad_norm": 0.8744514584541321,
      "learning_rate": 3.0674804308682547e-06,
      "loss": 0.0233,
      "step": 1632,
      "video_reward_cumulative_accuracy": 0.8174019607843137
    },
    {
      "epoch": 0.4847135648560404,
      "grad_norm": 2.157686710357666,
      "learning_rate": 3.0649573806965006e-06,
      "loss": 0.0401,
      "step": 1633,
      "video_reward_cumulative_accuracy": 0.8172075933864054
    },
    {
      "epoch": 0.4850103888394182,
      "grad_norm": 3.5518863201141357,
      "learning_rate": 3.0624337239881636e-06,
      "loss": 0.091,
      "step": 1634,
      "video_reward_cumulative_accuracy": 0.8170134638922889
    },
    {
      "epoch": 0.4853072128227961,
      "grad_norm": 2.3240246772766113,
      "learning_rate": 3.0599094634526348e-06,
      "loss": 0.0462,
      "step": 1635,
      "video_reward_cumulative_accuracy": 0.8168195718654434
    },
    {
      "epoch": 0.4856040368061739,
      "grad_norm": 1.841416358947754,
      "learning_rate": 3.057384601799955e-06,
      "loss": 0.0536,
      "step": 1636,
      "video_reward_cumulative_accuracy": 0.8166259168704156
    },
    {
      "epoch": 0.4859008607895518,
      "grad_norm": 1.3698524236679077,
      "learning_rate": 3.0548591417408054e-06,
      "loss": 0.0268,
      "step": 1637,
      "video_reward_cumulative_accuracy": 0.8167379352474038
    },
    {
      "epoch": 0.48619768477292963,
      "grad_norm": 8.311427116394043,
      "learning_rate": 3.0523330859865147e-06,
      "loss": 0.0834,
      "step": 1638,
      "video_reward_cumulative_accuracy": 0.8168498168498168
    },
    {
      "epoch": 0.4864945087563075,
      "grad_norm": 2.798309803009033,
      "learning_rate": 3.0498064372490475e-06,
      "loss": 0.0553,
      "step": 1639,
      "video_reward_cumulative_accuracy": 0.8169615619280048
    },
    {
      "epoch": 0.4867913327396854,
      "grad_norm": 1.4771977663040161,
      "learning_rate": 3.0472791982410065e-06,
      "loss": 0.0298,
      "step": 1640,
      "video_reward_cumulative_accuracy": 0.8170731707317073
    },
    {
      "epoch": 0.4870881567230632,
      "grad_norm": 2.279690742492676,
      "learning_rate": 3.0447513716756294e-06,
      "loss": 0.0534,
      "step": 1641,
      "video_reward_cumulative_accuracy": 0.8165752589884216
    },
    {
      "epoch": 0.4873849807064411,
      "grad_norm": 1.9072357416152954,
      "learning_rate": 3.0422229602667825e-06,
      "loss": 0.0476,
      "step": 1642,
      "video_reward_cumulative_accuracy": 0.8163824604141291
    },
    {
      "epoch": 0.4876818046898189,
      "grad_norm": 1.5728148221969604,
      "learning_rate": 3.0396939667289597e-06,
      "loss": 0.0234,
      "step": 1643,
      "video_reward_cumulative_accuracy": 0.8164942178940962
    },
    {
      "epoch": 0.4879786286731968,
      "grad_norm": 5.686618804931641,
      "learning_rate": 3.0371643937772834e-06,
      "loss": 0.0733,
      "step": 1644,
      "video_reward_cumulative_accuracy": 0.8166058394160584
    },
    {
      "epoch": 0.4882754526565746,
      "grad_norm": 2.5927038192749023,
      "learning_rate": 3.0346342441274935e-06,
      "loss": 0.0404,
      "step": 1645,
      "video_reward_cumulative_accuracy": 0.8164133738601824
    },
    {
      "epoch": 0.4885722766399525,
      "grad_norm": 3.340865135192871,
      "learning_rate": 3.0321035204959524e-06,
      "loss": 0.0583,
      "step": 1646,
      "video_reward_cumulative_accuracy": 0.8165249088699879
    },
    {
      "epoch": 0.4888691006233304,
      "grad_norm": 1.8082395792007446,
      "learning_rate": 3.029572225599637e-06,
      "loss": 0.1195,
      "step": 1647,
      "video_reward_cumulative_accuracy": 0.8166363084395871
    },
    {
      "epoch": 0.4891659246067082,
      "grad_norm": 2.37727427482605,
      "learning_rate": 3.0270403621561387e-06,
      "loss": 0.0565,
      "step": 1648,
      "video_reward_cumulative_accuracy": 0.816747572815534
    },
    {
      "epoch": 0.4894627485900861,
      "grad_norm": 2.865441083908081,
      "learning_rate": 3.024507932883659e-06,
      "loss": 0.0404,
      "step": 1649,
      "video_reward_cumulative_accuracy": 0.8168587022437841
    },
    {
      "epoch": 0.4897595725734639,
      "grad_norm": 1.271600604057312,
      "learning_rate": 3.0219749405010054e-06,
      "loss": 0.0444,
      "step": 1650,
      "video_reward_cumulative_accuracy": 0.816969696969697
    },
    {
      "epoch": 0.4900563965568418,
      "grad_norm": 0.5346439480781555,
      "learning_rate": 3.019441387727591e-06,
      "loss": 0.012,
      "step": 1651,
      "video_reward_cumulative_accuracy": 0.8170805572380375
    },
    {
      "epoch": 0.4903532205402196,
      "grad_norm": 1.969221830368042,
      "learning_rate": 3.016907277283431e-06,
      "loss": 0.0382,
      "step": 1652,
      "video_reward_cumulative_accuracy": 0.8171912832929782
    },
    {
      "epoch": 0.4906500445235975,
      "grad_norm": 1.0193865299224854,
      "learning_rate": 3.014372611889139e-06,
      "loss": 0.0287,
      "step": 1653,
      "video_reward_cumulative_accuracy": 0.8173018753781004
    },
    {
      "epoch": 0.4909468685069754,
      "grad_norm": 2.115161418914795,
      "learning_rate": 3.011837394265925e-06,
      "loss": 0.0778,
      "step": 1654,
      "video_reward_cumulative_accuracy": 0.8174123337363967
    },
    {
      "epoch": 0.4912436924903532,
      "grad_norm": 4.27302360534668,
      "learning_rate": 3.0093016271355894e-06,
      "loss": 0.0571,
      "step": 1655,
      "video_reward_cumulative_accuracy": 0.8172205438066465
    },
    {
      "epoch": 0.4915405164737311,
      "grad_norm": 3.0853607654571533,
      "learning_rate": 3.0067653132205267e-06,
      "loss": 0.0414,
      "step": 1656,
      "video_reward_cumulative_accuracy": 0.8170289855072463
    },
    {
      "epoch": 0.4918373404571089,
      "grad_norm": 1.4236749410629272,
      "learning_rate": 3.0042284552437138e-06,
      "loss": 0.0427,
      "step": 1657,
      "video_reward_cumulative_accuracy": 0.8168376584188292
    },
    {
      "epoch": 0.4921341644404868,
      "grad_norm": 3.9889345169067383,
      "learning_rate": 3.0016910559287147e-06,
      "loss": 0.0583,
      "step": 1658,
      "video_reward_cumulative_accuracy": 0.8166465621230398
    },
    {
      "epoch": 0.4924309884238646,
      "grad_norm": 3.3688066005706787,
      "learning_rate": 2.999153117999675e-06,
      "loss": 0.0509,
      "step": 1659,
      "video_reward_cumulative_accuracy": 0.8164556962025317
    },
    {
      "epoch": 0.4927278124072425,
      "grad_norm": 1.3700438737869263,
      "learning_rate": 2.996614644181316e-06,
      "loss": 0.0319,
      "step": 1660,
      "video_reward_cumulative_accuracy": 0.816566265060241
    },
    {
      "epoch": 0.4930246363906204,
      "grad_norm": 1.2768040895462036,
      "learning_rate": 2.9940756371989366e-06,
      "loss": 0.0296,
      "step": 1661,
      "video_reward_cumulative_accuracy": 0.8166767007826611
    },
    {
      "epoch": 0.4933214603739982,
      "grad_norm": 1.5340852737426758,
      "learning_rate": 2.9915360997784066e-06,
      "loss": 0.0312,
      "step": 1662,
      "video_reward_cumulative_accuracy": 0.8164861612515042
    },
    {
      "epoch": 0.4936182843573761,
      "grad_norm": 2.8251636028289795,
      "learning_rate": 2.9889960346461653e-06,
      "loss": 0.0578,
      "step": 1663,
      "video_reward_cumulative_accuracy": 0.8162958508719182
    },
    {
      "epoch": 0.4939151083407539,
      "grad_norm": 1.959545612335205,
      "learning_rate": 2.9864554445292205e-06,
      "loss": 0.0421,
      "step": 1664,
      "video_reward_cumulative_accuracy": 0.81640625
    },
    {
      "epoch": 0.4942119323241318,
      "grad_norm": 1.7720321416854858,
      "learning_rate": 2.9839143321551415e-06,
      "loss": 0.0273,
      "step": 1665,
      "video_reward_cumulative_accuracy": 0.8165165165165165
    },
    {
      "epoch": 0.4945087563075096,
      "grad_norm": 3.3178257942199707,
      "learning_rate": 2.9813727002520597e-06,
      "loss": 0.0837,
      "step": 1666,
      "video_reward_cumulative_accuracy": 0.8166266506602641
    },
    {
      "epoch": 0.4948055802908875,
      "grad_norm": 1.7163565158843994,
      "learning_rate": 2.9788305515486636e-06,
      "loss": 0.0305,
      "step": 1667,
      "video_reward_cumulative_accuracy": 0.8167366526694662
    },
    {
      "epoch": 0.4951024042742654,
      "grad_norm": 2.1592905521392822,
      "learning_rate": 2.9762878887741956e-06,
      "loss": 0.0309,
      "step": 1668,
      "video_reward_cumulative_accuracy": 0.8168465227817746
    },
    {
      "epoch": 0.4953992282576432,
      "grad_norm": 0.8975669741630554,
      "learning_rate": 2.973744714658452e-06,
      "loss": 0.0316,
      "step": 1669,
      "video_reward_cumulative_accuracy": 0.816956261234272
    },
    {
      "epoch": 0.4956960522410211,
      "grad_norm": 1.1875498294830322,
      "learning_rate": 2.9712010319317765e-06,
      "loss": 0.0149,
      "step": 1670,
      "video_reward_cumulative_accuracy": 0.8167664670658683
    },
    {
      "epoch": 0.4959928762243989,
      "grad_norm": 1.2564858198165894,
      "learning_rate": 2.968656843325059e-06,
      "loss": 0.0371,
      "step": 1671,
      "video_reward_cumulative_accuracy": 0.8168761220825853
    },
    {
      "epoch": 0.4962897002077768,
      "grad_norm": 3.0329394340515137,
      "learning_rate": 2.966112151569734e-06,
      "loss": 0.0305,
      "step": 1672,
      "video_reward_cumulative_accuracy": 0.8166866028708134
    },
    {
      "epoch": 0.4965865241911546,
      "grad_norm": 1.6530811786651611,
      "learning_rate": 2.963566959397774e-06,
      "loss": 0.0277,
      "step": 1673,
      "video_reward_cumulative_accuracy": 0.8164973102211596
    },
    {
      "epoch": 0.4968833481745325,
      "grad_norm": 4.417179107666016,
      "learning_rate": 2.9610212695416908e-06,
      "loss": 0.058,
      "step": 1674,
      "video_reward_cumulative_accuracy": 0.8166069295101553
    },
    {
      "epoch": 0.4971801721579104,
      "grad_norm": 3.6994571685791016,
      "learning_rate": 2.958475084734529e-06,
      "loss": 0.0651,
      "step": 1675,
      "video_reward_cumulative_accuracy": 0.8167164179104478
    },
    {
      "epoch": 0.4974769961412882,
      "grad_norm": 1.733941912651062,
      "learning_rate": 2.955928407709864e-06,
      "loss": 0.0476,
      "step": 1676,
      "video_reward_cumulative_accuracy": 0.8168257756563246
    },
    {
      "epoch": 0.4977738201246661,
      "grad_norm": 1.2677801847457886,
      "learning_rate": 2.953381241201804e-06,
      "loss": 0.0159,
      "step": 1677,
      "video_reward_cumulative_accuracy": 0.8169350029815146
    },
    {
      "epoch": 0.4980706441080439,
      "grad_norm": 1.808666467666626,
      "learning_rate": 2.9508335879449764e-06,
      "loss": 0.0442,
      "step": 1678,
      "video_reward_cumulative_accuracy": 0.8170441001191895
    },
    {
      "epoch": 0.4983674680914218,
      "grad_norm": 1.5872184038162231,
      "learning_rate": 2.9482854506745353e-06,
      "loss": 0.0499,
      "step": 1679,
      "video_reward_cumulative_accuracy": 0.8171530673019655
    },
    {
      "epoch": 0.4986642920747996,
      "grad_norm": 0.5910095572471619,
      "learning_rate": 2.945736832126153e-06,
      "loss": 0.0082,
      "step": 1680,
      "video_reward_cumulative_accuracy": 0.8172619047619047
    },
    {
      "epoch": 0.4989611160581775,
      "grad_norm": 3.666095018386841,
      "learning_rate": 2.9431877350360198e-06,
      "loss": 0.0307,
      "step": 1681,
      "video_reward_cumulative_accuracy": 0.8173706127305176
    },
    {
      "epoch": 0.4992579400415554,
      "grad_norm": 0.6903228163719177,
      "learning_rate": 2.9406381621408374e-06,
      "loss": 0.0174,
      "step": 1682,
      "video_reward_cumulative_accuracy": 0.8174791914387634
    },
    {
      "epoch": 0.4995547640249332,
      "grad_norm": 1.7546206712722778,
      "learning_rate": 2.9380881161778214e-06,
      "loss": 0.0198,
      "step": 1683,
      "video_reward_cumulative_accuracy": 0.8175876411170528
    },
    {
      "epoch": 0.4998515880083111,
      "grad_norm": 1.842877745628357,
      "learning_rate": 2.9355375998846923e-06,
      "loss": 0.0213,
      "step": 1684,
      "video_reward_cumulative_accuracy": 0.8176959619952494
    },
    {
      "epoch": 0.5001484119916889,
      "grad_norm": 2.9342010021209717,
      "learning_rate": 2.932986615999678e-06,
      "loss": 0.0577,
      "step": 1685,
      "video_reward_cumulative_accuracy": 0.8175074183976261
    },
    {
      "epoch": 0.5004452359750667,
      "grad_norm": 4.135309219360352,
      "learning_rate": 2.9304351672615067e-06,
      "loss": 0.0362,
      "step": 1686,
      "video_reward_cumulative_accuracy": 0.8173190984578885
    },
    {
      "epoch": 0.5007420599584447,
      "grad_norm": 3.366182804107666,
      "learning_rate": 2.9278832564094064e-06,
      "loss": 0.0511,
      "step": 1687,
      "video_reward_cumulative_accuracy": 0.8174273858921162
    },
    {
      "epoch": 0.5010388839418225,
      "grad_norm": 2.525951623916626,
      "learning_rate": 2.9253308861831e-06,
      "loss": 0.0335,
      "step": 1688,
      "video_reward_cumulative_accuracy": 0.8175355450236966
    },
    {
      "epoch": 0.5013357079252003,
      "grad_norm": 3.1717841625213623,
      "learning_rate": 2.9227780593228063e-06,
      "loss": 0.0498,
      "step": 1689,
      "video_reward_cumulative_accuracy": 0.8173475429248076
    },
    {
      "epoch": 0.5016325319085783,
      "grad_norm": 4.952225208282471,
      "learning_rate": 2.9202247785692323e-06,
      "loss": 0.0765,
      "step": 1690,
      "video_reward_cumulative_accuracy": 0.8168639053254438
    },
    {
      "epoch": 0.5019293558919561,
      "grad_norm": 2.3946175575256348,
      "learning_rate": 2.9176710466635718e-06,
      "loss": 0.0809,
      "step": 1691,
      "video_reward_cumulative_accuracy": 0.8166765227675932
    },
    {
      "epoch": 0.5022261798753339,
      "grad_norm": 1.2514123916625977,
      "learning_rate": 2.915116866347505e-06,
      "loss": 0.03,
      "step": 1692,
      "video_reward_cumulative_accuracy": 0.8167848699763594
    },
    {
      "epoch": 0.5025230038587117,
      "grad_norm": 1.9969528913497925,
      "learning_rate": 2.9125622403631913e-06,
      "loss": 0.017,
      "step": 1693,
      "video_reward_cumulative_accuracy": 0.8168930891907856
    },
    {
      "epoch": 0.5028198278420897,
      "grad_norm": 1.2726820707321167,
      "learning_rate": 2.9100071714532706e-06,
      "loss": 0.043,
      "step": 1694,
      "video_reward_cumulative_accuracy": 0.8170011806375442
    },
    {
      "epoch": 0.5031166518254675,
      "grad_norm": 1.732360601425171,
      "learning_rate": 2.907451662360857e-06,
      "loss": 0.0243,
      "step": 1695,
      "video_reward_cumulative_accuracy": 0.8171091445427728
    },
    {
      "epoch": 0.5034134758088453,
      "grad_norm": 0.7770466804504395,
      "learning_rate": 2.904895715829537e-06,
      "loss": 0.0144,
      "step": 1696,
      "video_reward_cumulative_accuracy": 0.8172169811320755
    },
    {
      "epoch": 0.5037102997922233,
      "grad_norm": 1.9971357583999634,
      "learning_rate": 2.902339334603369e-06,
      "loss": 0.0488,
      "step": 1697,
      "video_reward_cumulative_accuracy": 0.8173246906305245
    },
    {
      "epoch": 0.5040071237756011,
      "grad_norm": 2.280515670776367,
      "learning_rate": 2.8997825214268743e-06,
      "loss": 0.0486,
      "step": 1698,
      "video_reward_cumulative_accuracy": 0.8171378091872792
    },
    {
      "epoch": 0.5043039477589789,
      "grad_norm": 4.495890140533447,
      "learning_rate": 2.8972252790450413e-06,
      "loss": 0.082,
      "step": 1699,
      "video_reward_cumulative_accuracy": 0.8169511477339612
    },
    {
      "epoch": 0.5046007717423567,
      "grad_norm": 1.4922055006027222,
      "learning_rate": 2.8946676102033167e-06,
      "loss": 0.0249,
      "step": 1700,
      "video_reward_cumulative_accuracy": 0.8170588235294117
    },
    {
      "epoch": 0.5048975957257347,
      "grad_norm": 1.0637956857681274,
      "learning_rate": 2.892109517647607e-06,
      "loss": 0.0296,
      "step": 1701,
      "video_reward_cumulative_accuracy": 0.8168724279835391
    },
    {
      "epoch": 0.5051944197091125,
      "grad_norm": 1.7560207843780518,
      "learning_rate": 2.8895510041242737e-06,
      "loss": 0.0522,
      "step": 1702,
      "video_reward_cumulative_accuracy": 0.8169800235017627
    },
    {
      "epoch": 0.5054912436924903,
      "grad_norm": 0.3437102735042572,
      "learning_rate": 2.886992072380128e-06,
      "loss": 0.0042,
      "step": 1703,
      "video_reward_cumulative_accuracy": 0.8170874926600118
    },
    {
      "epoch": 0.5057880676758683,
      "grad_norm": 3.053436279296875,
      "learning_rate": 2.884432725162433e-06,
      "loss": 0.0345,
      "step": 1704,
      "video_reward_cumulative_accuracy": 0.8171948356807511
    },
    {
      "epoch": 0.5060848916592461,
      "grad_norm": 1.0694425106048584,
      "learning_rate": 2.8818729652188936e-06,
      "loss": 0.0315,
      "step": 1705,
      "video_reward_cumulative_accuracy": 0.817008797653959
    },
    {
      "epoch": 0.5063817156426239,
      "grad_norm": 3.757838249206543,
      "learning_rate": 2.879312795297663e-06,
      "loss": 0.0712,
      "step": 1706,
      "video_reward_cumulative_accuracy": 0.8171160609613131
    },
    {
      "epoch": 0.5066785396260017,
      "grad_norm": 2.922731637954712,
      "learning_rate": 2.8767522181473323e-06,
      "loss": 0.0432,
      "step": 1707,
      "video_reward_cumulative_accuracy": 0.81693028705331
    },
    {
      "epoch": 0.5069753636093797,
      "grad_norm": 2.4191317558288574,
      "learning_rate": 2.8741912365169276e-06,
      "loss": 0.0722,
      "step": 1708,
      "video_reward_cumulative_accuracy": 0.8167447306791569
    },
    {
      "epoch": 0.5072721875927575,
      "grad_norm": 1.132919192314148,
      "learning_rate": 2.8716298531559133e-06,
      "loss": 0.0104,
      "step": 1709,
      "video_reward_cumulative_accuracy": 0.8168519602106495
    },
    {
      "epoch": 0.5075690115761353,
      "grad_norm": 3.451291561126709,
      "learning_rate": 2.8690680708141814e-06,
      "loss": 0.1011,
      "step": 1710,
      "video_reward_cumulative_accuracy": 0.8169590643274853
    },
    {
      "epoch": 0.5078658355595133,
      "grad_norm": 2.044468879699707,
      "learning_rate": 2.866505892242055e-06,
      "loss": 0.0415,
      "step": 1711,
      "video_reward_cumulative_accuracy": 0.8170660432495617
    },
    {
      "epoch": 0.5081626595428911,
      "grad_norm": 1.3463438749313354,
      "learning_rate": 2.8639433201902807e-06,
      "loss": 0.0201,
      "step": 1712,
      "video_reward_cumulative_accuracy": 0.8171728971962616
    },
    {
      "epoch": 0.5084594835262689,
      "grad_norm": 1.1717925071716309,
      "learning_rate": 2.8613803574100284e-06,
      "loss": 0.0327,
      "step": 1713,
      "video_reward_cumulative_accuracy": 0.8172796263864565
    },
    {
      "epoch": 0.5087563075096467,
      "grad_norm": 2.5041465759277344,
      "learning_rate": 2.858817006652888e-06,
      "loss": 0.0391,
      "step": 1714,
      "video_reward_cumulative_accuracy": 0.8170945157526255
    },
    {
      "epoch": 0.5090531314930247,
      "grad_norm": 2.035304546356201,
      "learning_rate": 2.8562532706708655e-06,
      "loss": 0.017,
      "step": 1715,
      "video_reward_cumulative_accuracy": 0.817201166180758
    },
    {
      "epoch": 0.5093499554764025,
      "grad_norm": 2.6891417503356934,
      "learning_rate": 2.853689152216379e-06,
      "loss": 0.0498,
      "step": 1716,
      "video_reward_cumulative_accuracy": 0.8173076923076923
    },
    {
      "epoch": 0.5096467794597803,
      "grad_norm": 4.318187713623047,
      "learning_rate": 2.8511246540422597e-06,
      "loss": 0.0503,
      "step": 1717,
      "video_reward_cumulative_accuracy": 0.8174140943506115
    },
    {
      "epoch": 0.5099436034431583,
      "grad_norm": 5.709010124206543,
      "learning_rate": 2.848559778901745e-06,
      "loss": 0.062,
      "step": 1718,
      "video_reward_cumulative_accuracy": 0.8175203725261933
    },
    {
      "epoch": 0.5102404274265361,
      "grad_norm": 1.210877537727356,
      "learning_rate": 2.845994529548477e-06,
      "loss": 0.0096,
      "step": 1719,
      "video_reward_cumulative_accuracy": 0.8176265270506108
    },
    {
      "epoch": 0.5105372514099139,
      "grad_norm": 1.9540245532989502,
      "learning_rate": 2.8434289087365002e-06,
      "loss": 0.041,
      "step": 1720,
      "video_reward_cumulative_accuracy": 0.8177325581395349
    },
    {
      "epoch": 0.5108340753932917,
      "grad_norm": 2.498683452606201,
      "learning_rate": 2.8408629192202574e-06,
      "loss": 0.0548,
      "step": 1721,
      "video_reward_cumulative_accuracy": 0.8178384660081348
    },
    {
      "epoch": 0.5111308993766697,
      "grad_norm": 1.3648625612258911,
      "learning_rate": 2.8382965637545877e-06,
      "loss": 0.0349,
      "step": 1722,
      "video_reward_cumulative_accuracy": 0.8179442508710801
    },
    {
      "epoch": 0.5114277233600475,
      "grad_norm": 2.748464345932007,
      "learning_rate": 2.835729845094722e-06,
      "loss": 0.0527,
      "step": 1723,
      "video_reward_cumulative_accuracy": 0.818049912942542
    },
    {
      "epoch": 0.5117245473434253,
      "grad_norm": 1.2629681825637817,
      "learning_rate": 2.8331627659962824e-06,
      "loss": 0.0243,
      "step": 1724,
      "video_reward_cumulative_accuracy": 0.8181554524361949
    },
    {
      "epoch": 0.5120213713268033,
      "grad_norm": 2.213495969772339,
      "learning_rate": 2.8305953292152785e-06,
      "loss": 0.035,
      "step": 1725,
      "video_reward_cumulative_accuracy": 0.8182608695652174
    },
    {
      "epoch": 0.5123181953101811,
      "grad_norm": 2.6524288654327393,
      "learning_rate": 2.8280275375081e-06,
      "loss": 0.0457,
      "step": 1726,
      "video_reward_cumulative_accuracy": 0.8180764774044033
    },
    {
      "epoch": 0.5126150192935589,
      "grad_norm": 2.8354032039642334,
      "learning_rate": 2.8254593936315243e-06,
      "loss": 0.0827,
      "step": 1727,
      "video_reward_cumulative_accuracy": 0.8178922987840185
    },
    {
      "epoch": 0.5129118432769367,
      "grad_norm": 2.953294515609741,
      "learning_rate": 2.8228909003427003e-06,
      "loss": 0.0621,
      "step": 1728,
      "video_reward_cumulative_accuracy": 0.8179976851851852
    },
    {
      "epoch": 0.5132086672603147,
      "grad_norm": 3.763993263244629,
      "learning_rate": 2.820322060399156e-06,
      "loss": 0.0425,
      "step": 1729,
      "video_reward_cumulative_accuracy": 0.818102949681897
    },
    {
      "epoch": 0.5135054912436925,
      "grad_norm": 1.8136401176452637,
      "learning_rate": 2.817752876558789e-06,
      "loss": 0.0226,
      "step": 1730,
      "video_reward_cumulative_accuracy": 0.8182080924855492
    },
    {
      "epoch": 0.5138023152270703,
      "grad_norm": 0.8390701413154602,
      "learning_rate": 2.81518335157987e-06,
      "loss": 0.0311,
      "step": 1731,
      "video_reward_cumulative_accuracy": 0.818313113807048
    },
    {
      "epoch": 0.5140991392104483,
      "grad_norm": 1.5557044744491577,
      "learning_rate": 2.8126134882210313e-06,
      "loss": 0.0402,
      "step": 1732,
      "video_reward_cumulative_accuracy": 0.8181293302540416
    },
    {
      "epoch": 0.5143959631938261,
      "grad_norm": 2.67010235786438,
      "learning_rate": 2.8100432892412723e-06,
      "loss": 0.0301,
      "step": 1733,
      "video_reward_cumulative_accuracy": 0.8182342758222735
    },
    {
      "epoch": 0.5146927871772039,
      "grad_norm": 2.489520311355591,
      "learning_rate": 2.8074727573999495e-06,
      "loss": 0.0829,
      "step": 1734,
      "video_reward_cumulative_accuracy": 0.8183391003460208
    },
    {
      "epoch": 0.5149896111605817,
      "grad_norm": 2.488553524017334,
      "learning_rate": 2.8049018954567797e-06,
      "loss": 0.0745,
      "step": 1735,
      "video_reward_cumulative_accuracy": 0.8181556195965418
    },
    {
      "epoch": 0.5152864351439597,
      "grad_norm": 3.8210175037384033,
      "learning_rate": 2.802330706171831e-06,
      "loss": 0.049,
      "step": 1736,
      "video_reward_cumulative_accuracy": 0.8179723502304147
    },
    {
      "epoch": 0.5155832591273375,
      "grad_norm": 7.683956623077393,
      "learning_rate": 2.799759192305526e-06,
      "loss": 0.0934,
      "step": 1737,
      "video_reward_cumulative_accuracy": 0.8177892918825561
    },
    {
      "epoch": 0.5158800831107153,
      "grad_norm": 1.5301347970962524,
      "learning_rate": 2.7971873566186347e-06,
      "loss": 0.0483,
      "step": 1738,
      "video_reward_cumulative_accuracy": 0.8178941311852704
    },
    {
      "epoch": 0.5161769070940933,
      "grad_norm": 0.7959959506988525,
      "learning_rate": 2.7946152018722714e-06,
      "loss": 0.0287,
      "step": 1739,
      "video_reward_cumulative_accuracy": 0.8177113283496262
    },
    {
      "epoch": 0.5164737310774711,
      "grad_norm": 3.1695709228515625,
      "learning_rate": 2.7920427308278946e-06,
      "loss": 0.034,
      "step": 1740,
      "video_reward_cumulative_accuracy": 0.8178160919540229
    },
    {
      "epoch": 0.5167705550608489,
      "grad_norm": 4.1620917320251465,
      "learning_rate": 2.7894699462473008e-06,
      "loss": 0.039,
      "step": 1741,
      "video_reward_cumulative_accuracy": 0.8179207352096496
    },
    {
      "epoch": 0.5170673790442267,
      "grad_norm": 1.3403260707855225,
      "learning_rate": 2.7868968508926242e-06,
      "loss": 0.0228,
      "step": 1742,
      "video_reward_cumulative_accuracy": 0.8180252583237658
    },
    {
      "epoch": 0.5173642030276047,
      "grad_norm": 2.3487467765808105,
      "learning_rate": 2.784323447526333e-06,
      "loss": 0.0681,
      "step": 1743,
      "video_reward_cumulative_accuracy": 0.8181296615031555
    },
    {
      "epoch": 0.5176610270109825,
      "grad_norm": 1.9424347877502441,
      "learning_rate": 2.7817497389112247e-06,
      "loss": 0.0378,
      "step": 1744,
      "video_reward_cumulative_accuracy": 0.8182339449541285
    },
    {
      "epoch": 0.5179578509943603,
      "grad_norm": 2.208332061767578,
      "learning_rate": 2.779175727810426e-06,
      "loss": 0.0513,
      "step": 1745,
      "video_reward_cumulative_accuracy": 0.8177650429799427
    },
    {
      "epoch": 0.5182546749777382,
      "grad_norm": 4.050232887268066,
      "learning_rate": 2.7766014169873874e-06,
      "loss": 0.0422,
      "step": 1746,
      "video_reward_cumulative_accuracy": 0.8178694158075601
    },
    {
      "epoch": 0.5185514989611161,
      "grad_norm": 1.5156047344207764,
      "learning_rate": 2.7740268092058813e-06,
      "loss": 0.0397,
      "step": 1747,
      "video_reward_cumulative_accuracy": 0.8179736691471093
    },
    {
      "epoch": 0.5188483229444939,
      "grad_norm": 0.790824294090271,
      "learning_rate": 2.771451907229999e-06,
      "loss": 0.0114,
      "step": 1748,
      "video_reward_cumulative_accuracy": 0.8180778032036613
    },
    {
      "epoch": 0.5191451469278717,
      "grad_norm": 0.7043279409408569,
      "learning_rate": 2.7688767138241474e-06,
      "loss": 0.017,
      "step": 1749,
      "video_reward_cumulative_accuracy": 0.8181818181818182
    },
    {
      "epoch": 0.5194419709112497,
      "grad_norm": 2.117793560028076,
      "learning_rate": 2.7663012317530474e-06,
      "loss": 0.0303,
      "step": 1750,
      "video_reward_cumulative_accuracy": 0.818
    },
    {
      "epoch": 0.5197387948946275,
      "grad_norm": 2.9384334087371826,
      "learning_rate": 2.7637254637817284e-06,
      "loss": 0.066,
      "step": 1751,
      "video_reward_cumulative_accuracy": 0.817818389491719
    },
    {
      "epoch": 0.5200356188780053,
      "grad_norm": 5.217811584472656,
      "learning_rate": 2.7611494126755276e-06,
      "loss": 0.0593,
      "step": 1752,
      "video_reward_cumulative_accuracy": 0.817351598173516
    },
    {
      "epoch": 0.5203324428613832,
      "grad_norm": 4.0168890953063965,
      "learning_rate": 2.7585730812000855e-06,
      "loss": 0.0452,
      "step": 1753,
      "video_reward_cumulative_accuracy": 0.8171705647461495
    },
    {
      "epoch": 0.5206292668447611,
      "grad_norm": 2.7809596061706543,
      "learning_rate": 2.755996472121344e-06,
      "loss": 0.043,
      "step": 1754,
      "video_reward_cumulative_accuracy": 0.8169897377423033
    },
    {
      "epoch": 0.5209260908281389,
      "grad_norm": 2.7551541328430176,
      "learning_rate": 2.753419588205544e-06,
      "loss": 0.0537,
      "step": 1755,
      "video_reward_cumulative_accuracy": 0.8170940170940171
    },
    {
      "epoch": 0.5212229148115167,
      "grad_norm": 1.8034993410110474,
      "learning_rate": 2.750842432219219e-06,
      "loss": 0.0245,
      "step": 1756,
      "video_reward_cumulative_accuracy": 0.8171981776765376
    },
    {
      "epoch": 0.5215197387948947,
      "grad_norm": 2.4644577503204346,
      "learning_rate": 2.7482650069291976e-06,
      "loss": 0.0258,
      "step": 1757,
      "video_reward_cumulative_accuracy": 0.8170176437108708
    },
    {
      "epoch": 0.5218165627782725,
      "grad_norm": 3.7700116634368896,
      "learning_rate": 2.745687315102595e-06,
      "loss": 0.0812,
      "step": 1758,
      "video_reward_cumulative_accuracy": 0.8168373151308305
    },
    {
      "epoch": 0.5221133867616503,
      "grad_norm": 2.102640390396118,
      "learning_rate": 2.743109359506813e-06,
      "loss": 0.0272,
      "step": 1759,
      "video_reward_cumulative_accuracy": 0.816941444002274
    },
    {
      "epoch": 0.5224102107450282,
      "grad_norm": 3.0133395195007324,
      "learning_rate": 2.7405311429095384e-06,
      "loss": 0.0467,
      "step": 1760,
      "video_reward_cumulative_accuracy": 0.8170454545454545
    },
    {
      "epoch": 0.5227070347284061,
      "grad_norm": 4.504801273345947,
      "learning_rate": 2.7379526680787365e-06,
      "loss": 0.0576,
      "step": 1761,
      "video_reward_cumulative_accuracy": 0.8168654173764907
    },
    {
      "epoch": 0.5230038587117839,
      "grad_norm": 1.1885194778442383,
      "learning_rate": 2.7353739377826503e-06,
      "loss": 0.0166,
      "step": 1762,
      "video_reward_cumulative_accuracy": 0.8169693530079455
    },
    {
      "epoch": 0.5233006826951617,
      "grad_norm": 2.73569393157959,
      "learning_rate": 2.7327949547897977e-06,
      "loss": 0.0413,
      "step": 1763,
      "video_reward_cumulative_accuracy": 0.8170731707317073
    },
    {
      "epoch": 0.5235975066785397,
      "grad_norm": 4.12099027633667,
      "learning_rate": 2.7302157218689655e-06,
      "loss": 0.0539,
      "step": 1764,
      "video_reward_cumulative_accuracy": 0.8171768707482994
    },
    {
      "epoch": 0.5238943306619175,
      "grad_norm": 2.6001250743865967,
      "learning_rate": 2.7276362417892124e-06,
      "loss": 0.034,
      "step": 1765,
      "video_reward_cumulative_accuracy": 0.8172804532577904
    },
    {
      "epoch": 0.5241911546452953,
      "grad_norm": 2.1774399280548096,
      "learning_rate": 2.7250565173198596e-06,
      "loss": 0.0154,
      "step": 1766,
      "video_reward_cumulative_accuracy": 0.8173839184597962
    },
    {
      "epoch": 0.5244879786286732,
      "grad_norm": 3.843973159790039,
      "learning_rate": 2.722476551230491e-06,
      "loss": 0.0978,
      "step": 1767,
      "video_reward_cumulative_accuracy": 0.8174872665534805
    },
    {
      "epoch": 0.5247848026120511,
      "grad_norm": 1.590867280960083,
      "learning_rate": 2.7198963462909534e-06,
      "loss": 0.0226,
      "step": 1768,
      "video_reward_cumulative_accuracy": 0.8175904977375565
    },
    {
      "epoch": 0.5250816265954289,
      "grad_norm": 4.675069808959961,
      "learning_rate": 2.717315905271344e-06,
      "loss": 0.1181,
      "step": 1769,
      "video_reward_cumulative_accuracy": 0.8174109666478236
    },
    {
      "epoch": 0.5253784505788067,
      "grad_norm": 1.5898845195770264,
      "learning_rate": 2.714735230942019e-06,
      "loss": 0.0133,
      "step": 1770,
      "video_reward_cumulative_accuracy": 0.8175141242937853
    },
    {
      "epoch": 0.5256752745621847,
      "grad_norm": 2.406005382537842,
      "learning_rate": 2.712154326073581e-06,
      "loss": 0.042,
      "step": 1771,
      "video_reward_cumulative_accuracy": 0.8176171654432524
    },
    {
      "epoch": 0.5259720985455625,
      "grad_norm": 1.935448408126831,
      "learning_rate": 2.709573193436883e-06,
      "loss": 0.0373,
      "step": 1772,
      "video_reward_cumulative_accuracy": 0.8174379232505643
    },
    {
      "epoch": 0.5262689225289403,
      "grad_norm": 1.7007020711898804,
      "learning_rate": 2.7069918358030218e-06,
      "loss": 0.0311,
      "step": 1773,
      "video_reward_cumulative_accuracy": 0.817258883248731
    },
    {
      "epoch": 0.5265657465123182,
      "grad_norm": 5.44350528717041,
      "learning_rate": 2.7044102559433346e-06,
      "loss": 0.0672,
      "step": 1774,
      "video_reward_cumulative_accuracy": 0.8170800450958287
    },
    {
      "epoch": 0.5268625704956961,
      "grad_norm": 2.1259396076202393,
      "learning_rate": 2.701828456629398e-06,
      "loss": 0.0689,
      "step": 1775,
      "video_reward_cumulative_accuracy": 0.8169014084507042
    },
    {
      "epoch": 0.5271593944790739,
      "grad_norm": 0.9994587302207947,
      "learning_rate": 2.699246440633023e-06,
      "loss": 0.015,
      "step": 1776,
      "video_reward_cumulative_accuracy": 0.8170045045045045
    },
    {
      "epoch": 0.5274562184624517,
      "grad_norm": 2.5973446369171143,
      "learning_rate": 2.696664210726257e-06,
      "loss": 0.0958,
      "step": 1777,
      "video_reward_cumulative_accuracy": 0.8171074845244795
    },
    {
      "epoch": 0.5277530424458297,
      "grad_norm": 2.029411792755127,
      "learning_rate": 2.694081769681373e-06,
      "loss": 0.0194,
      "step": 1778,
      "video_reward_cumulative_accuracy": 0.8172103487064117
    },
    {
      "epoch": 0.5280498664292075,
      "grad_norm": 1.6319355964660645,
      "learning_rate": 2.6914991202708707e-06,
      "loss": 0.0408,
      "step": 1779,
      "video_reward_cumulative_accuracy": 0.8173130972456436
    },
    {
      "epoch": 0.5283466904125853,
      "grad_norm": 1.4673457145690918,
      "learning_rate": 2.6889162652674776e-06,
      "loss": 0.0176,
      "step": 1780,
      "video_reward_cumulative_accuracy": 0.8174157303370787
    },
    {
      "epoch": 0.5286435143959632,
      "grad_norm": 2.3411591053009033,
      "learning_rate": 2.6863332074441374e-06,
      "loss": 0.0476,
      "step": 1781,
      "video_reward_cumulative_accuracy": 0.8172375070185289
    },
    {
      "epoch": 0.5289403383793411,
      "grad_norm": 2.2734508514404297,
      "learning_rate": 2.6837499495740144e-06,
      "loss": 0.0589,
      "step": 1782,
      "video_reward_cumulative_accuracy": 0.8173400673400674
    },
    {
      "epoch": 0.5292371623627189,
      "grad_norm": 4.031787872314453,
      "learning_rate": 2.681166494430486e-06,
      "loss": 0.0424,
      "step": 1783,
      "video_reward_cumulative_accuracy": 0.8174425126191811
    },
    {
      "epoch": 0.5295339863460967,
      "grad_norm": 3.090514898300171,
      "learning_rate": 2.6785828447871415e-06,
      "loss": 0.0261,
      "step": 1784,
      "video_reward_cumulative_accuracy": 0.8175448430493274
    },
    {
      "epoch": 0.5298308103294747,
      "grad_norm": 0.7824661731719971,
      "learning_rate": 2.6759990034177814e-06,
      "loss": 0.0154,
      "step": 1785,
      "video_reward_cumulative_accuracy": 0.8176470588235294
    },
    {
      "epoch": 0.5301276343128525,
      "grad_norm": 1.5541051626205444,
      "learning_rate": 2.6734149730964085e-06,
      "loss": 0.0278,
      "step": 1786,
      "video_reward_cumulative_accuracy": 0.8177491601343785
    },
    {
      "epoch": 0.5304244582962303,
      "grad_norm": 1.8912038803100586,
      "learning_rate": 2.6708307565972307e-06,
      "loss": 0.0368,
      "step": 1787,
      "video_reward_cumulative_accuracy": 0.8178511471740347
    },
    {
      "epoch": 0.5307212822796082,
      "grad_norm": 0.8161097168922424,
      "learning_rate": 2.668246356694656e-06,
      "loss": 0.0143,
      "step": 1788,
      "video_reward_cumulative_accuracy": 0.8179530201342282
    },
    {
      "epoch": 0.5310181062629861,
      "grad_norm": 3.4581801891326904,
      "learning_rate": 2.6656617761632863e-06,
      "loss": 0.0676,
      "step": 1789,
      "video_reward_cumulative_accuracy": 0.8180547792062605
    },
    {
      "epoch": 0.5313149302463639,
      "grad_norm": 0.8631388545036316,
      "learning_rate": 2.6630770177779218e-06,
      "loss": 0.0121,
      "step": 1790,
      "video_reward_cumulative_accuracy": 0.8181564245810056
    },
    {
      "epoch": 0.5316117542297417,
      "grad_norm": 1.1447865962982178,
      "learning_rate": 2.660492084313551e-06,
      "loss": 0.0248,
      "step": 1791,
      "video_reward_cumulative_accuracy": 0.8182579564489112
    },
    {
      "epoch": 0.5319085782131197,
      "grad_norm": 2.4453208446502686,
      "learning_rate": 2.657906978545351e-06,
      "loss": 0.0264,
      "step": 1792,
      "video_reward_cumulative_accuracy": 0.8180803571428571
    },
    {
      "epoch": 0.5322054021964975,
      "grad_norm": 2.468322515487671,
      "learning_rate": 2.6553217032486832e-06,
      "loss": 0.0487,
      "step": 1793,
      "video_reward_cumulative_accuracy": 0.8181818181818182
    },
    {
      "epoch": 0.5325022261798753,
      "grad_norm": 0.8765484094619751,
      "learning_rate": 2.6527362611990915e-06,
      "loss": 0.0165,
      "step": 1794,
      "video_reward_cumulative_accuracy": 0.8182831661092531
    },
    {
      "epoch": 0.5327990501632532,
      "grad_norm": 2.160796880722046,
      "learning_rate": 2.6501506551722995e-06,
      "loss": 0.0564,
      "step": 1795,
      "video_reward_cumulative_accuracy": 0.8181058495821727
    },
    {
      "epoch": 0.5330958741466311,
      "grad_norm": 2.3637046813964844,
      "learning_rate": 2.6475648879442055e-06,
      "loss": 0.0582,
      "step": 1796,
      "video_reward_cumulative_accuracy": 0.8182071269487751
    },
    {
      "epoch": 0.5333926981300089,
      "grad_norm": 1.2972532510757446,
      "learning_rate": 2.6449789622908823e-06,
      "loss": 0.0191,
      "step": 1797,
      "video_reward_cumulative_accuracy": 0.8183082915971063
    },
    {
      "epoch": 0.5336895221133867,
      "grad_norm": 3.382450580596924,
      "learning_rate": 2.6423928809885716e-06,
      "loss": 0.0522,
      "step": 1798,
      "video_reward_cumulative_accuracy": 0.818131256952169
    },
    {
      "epoch": 0.5339863460967647,
      "grad_norm": 2.019676685333252,
      "learning_rate": 2.639806646813683e-06,
      "loss": 0.0699,
      "step": 1799,
      "video_reward_cumulative_accuracy": 0.8182323513062812
    },
    {
      "epoch": 0.5342831700801425,
      "grad_norm": 2.9581692218780518,
      "learning_rate": 2.6372202625427897e-06,
      "loss": 0.0817,
      "step": 1800,
      "video_reward_cumulative_accuracy": 0.8183333333333334
    },
    {
      "epoch": 0.5342831700801425,
      "eval_runtime": 131.6405,
      "eval_samples_per_second": 5.994,
      "eval_steps_per_second": 0.752,
      "eval_test_set_accuracy": 0.7765151515151515,
      "step": 1800
    },
    {
      "epoch": 0.5345799940635203,
      "grad_norm": 3.738384485244751,
      "learning_rate": 2.6346337309526265e-06,
      "loss": 0.0749,
      "step": 1801,
      "video_reward_cumulative_accuracy": 0.8181565796779567
    },
    {
      "epoch": 0.5348768180468982,
      "grad_norm": 1.4811760187149048,
      "learning_rate": 2.6320470548200848e-06,
      "loss": 0.0336,
      "step": 1802,
      "video_reward_cumulative_accuracy": 0.8182574916759157
    },
    {
      "epoch": 0.5351736420302761,
      "grad_norm": 1.9211323261260986,
      "learning_rate": 2.6294602369222145e-06,
      "loss": 0.0304,
      "step": 1803,
      "video_reward_cumulative_accuracy": 0.8183582917359956
    },
    {
      "epoch": 0.5354704660136539,
      "grad_norm": 0.7293126583099365,
      "learning_rate": 2.6268732800362147e-06,
      "loss": 0.0101,
      "step": 1804,
      "video_reward_cumulative_accuracy": 0.8184589800443459
    },
    {
      "epoch": 0.5357672899970317,
      "grad_norm": 2.35774827003479,
      "learning_rate": 2.624286186939435e-06,
      "loss": 0.0468,
      "step": 1805,
      "video_reward_cumulative_accuracy": 0.8182825484764543
    },
    {
      "epoch": 0.5360641139804097,
      "grad_norm": 3.0999953746795654,
      "learning_rate": 2.62169896040937e-06,
      "loss": 0.0384,
      "step": 1806,
      "video_reward_cumulative_accuracy": 0.8183831672203765
    },
    {
      "epoch": 0.5363609379637875,
      "grad_norm": 4.446628093719482,
      "learning_rate": 2.6191116032236598e-06,
      "loss": 0.0707,
      "step": 1807,
      "video_reward_cumulative_accuracy": 0.8184836745987825
    },
    {
      "epoch": 0.5366577619471653,
      "grad_norm": 2.9089622497558594,
      "learning_rate": 2.616524118160082e-06,
      "loss": 0.0499,
      "step": 1808,
      "video_reward_cumulative_accuracy": 0.8185840707964602
    },
    {
      "epoch": 0.5369545859305432,
      "grad_norm": 0.42697280645370483,
      "learning_rate": 2.613936507996554e-06,
      "loss": 0.0118,
      "step": 1809,
      "video_reward_cumulative_accuracy": 0.8186843559977889
    },
    {
      "epoch": 0.5372514099139211,
      "grad_norm": 2.4700889587402344,
      "learning_rate": 2.611348775511127e-06,
      "loss": 0.06,
      "step": 1810,
      "video_reward_cumulative_accuracy": 0.8187845303867404
    },
    {
      "epoch": 0.5375482338972989,
      "grad_norm": 1.9913703203201294,
      "learning_rate": 2.6087609234819822e-06,
      "loss": 0.0386,
      "step": 1811,
      "video_reward_cumulative_accuracy": 0.8188845941468802
    },
    {
      "epoch": 0.5378450578806767,
      "grad_norm": 2.4547126293182373,
      "learning_rate": 2.606172954687429e-06,
      "loss": 0.0696,
      "step": 1812,
      "video_reward_cumulative_accuracy": 0.8189845474613686
    },
    {
      "epoch": 0.5381418818640546,
      "grad_norm": 2.6594624519348145,
      "learning_rate": 2.603584871905905e-06,
      "loss": 0.065,
      "step": 1813,
      "video_reward_cumulative_accuracy": 0.8188086045228903
    },
    {
      "epoch": 0.5384387058474325,
      "grad_norm": 1.8410438299179077,
      "learning_rate": 2.600996677915967e-06,
      "loss": 0.0422,
      "step": 1814,
      "video_reward_cumulative_accuracy": 0.8189084895259096
    },
    {
      "epoch": 0.5387355298308103,
      "grad_norm": 1.4541661739349365,
      "learning_rate": 2.598408375496292e-06,
      "loss": 0.0367,
      "step": 1815,
      "video_reward_cumulative_accuracy": 0.8190082644628099
    },
    {
      "epoch": 0.5390323538141882,
      "grad_norm": 0.9136159420013428,
      "learning_rate": 2.5958199674256755e-06,
      "loss": 0.0166,
      "step": 1816,
      "video_reward_cumulative_accuracy": 0.8191079295154186
    },
    {
      "epoch": 0.5393291777975661,
      "grad_norm": 3.7377233505249023,
      "learning_rate": 2.5932314564830237e-06,
      "loss": 0.0694,
      "step": 1817,
      "video_reward_cumulative_accuracy": 0.8192074848651624
    },
    {
      "epoch": 0.5396260017809439,
      "grad_norm": 3.6235744953155518,
      "learning_rate": 2.5906428454473546e-06,
      "loss": 0.0713,
      "step": 1818,
      "video_reward_cumulative_accuracy": 0.819031903190319
    },
    {
      "epoch": 0.5399228257643217,
      "grad_norm": 3.179281711578369,
      "learning_rate": 2.588054137097793e-06,
      "loss": 0.0389,
      "step": 1819,
      "video_reward_cumulative_accuracy": 0.8191313908741067
    },
    {
      "epoch": 0.5402196497476996,
      "grad_norm": 2.8550491333007812,
      "learning_rate": 2.5854653342135687e-06,
      "loss": 0.0421,
      "step": 1820,
      "video_reward_cumulative_accuracy": 0.8192307692307692
    },
    {
      "epoch": 0.5405164737310775,
      "grad_norm": 4.346277713775635,
      "learning_rate": 2.5828764395740135e-06,
      "loss": 0.0625,
      "step": 1821,
      "video_reward_cumulative_accuracy": 0.8193300384404174
    },
    {
      "epoch": 0.5408132977144553,
      "grad_norm": 2.5140342712402344,
      "learning_rate": 2.5802874559585567e-06,
      "loss": 0.0429,
      "step": 1822,
      "video_reward_cumulative_accuracy": 0.8194291986827662
    },
    {
      "epoch": 0.5411101216978332,
      "grad_norm": 6.247716903686523,
      "learning_rate": 2.5776983861467237e-06,
      "loss": 0.0708,
      "step": 1823,
      "video_reward_cumulative_accuracy": 0.8195282501371366
    },
    {
      "epoch": 0.5414069456812111,
      "grad_norm": 3.389479875564575,
      "learning_rate": 2.575109232918131e-06,
      "loss": 0.0473,
      "step": 1824,
      "video_reward_cumulative_accuracy": 0.8196271929824561
    },
    {
      "epoch": 0.5417037696645889,
      "grad_norm": 2.464885711669922,
      "learning_rate": 2.5725199990524874e-06,
      "loss": 0.0297,
      "step": 1825,
      "video_reward_cumulative_accuracy": 0.8197260273972603
    },
    {
      "epoch": 0.5420005936479667,
      "grad_norm": 1.1972076892852783,
      "learning_rate": 2.569930687329586e-06,
      "loss": 0.0302,
      "step": 1826,
      "video_reward_cumulative_accuracy": 0.8198247535596933
    },
    {
      "epoch": 0.5422974176313446,
      "grad_norm": 1.1544277667999268,
      "learning_rate": 2.567341300529305e-06,
      "loss": 0.0154,
      "step": 1827,
      "video_reward_cumulative_accuracy": 0.8199233716475096
    },
    {
      "epoch": 0.5425942416147225,
      "grad_norm": 1.3733747005462646,
      "learning_rate": 2.5647518414316015e-06,
      "loss": 0.0441,
      "step": 1828,
      "video_reward_cumulative_accuracy": 0.8200218818380745
    },
    {
      "epoch": 0.5428910655981003,
      "grad_norm": 2.372936964035034,
      "learning_rate": 2.562162312816511e-06,
      "loss": 0.0401,
      "step": 1829,
      "video_reward_cumulative_accuracy": 0.8201202843083653
    },
    {
      "epoch": 0.5431878895814782,
      "grad_norm": 2.063004970550537,
      "learning_rate": 2.559572717464145e-06,
      "loss": 0.0189,
      "step": 1830,
      "video_reward_cumulative_accuracy": 0.8199453551912569
    },
    {
      "epoch": 0.5434847135648561,
      "grad_norm": 2.8820412158966064,
      "learning_rate": 2.556983058154685e-06,
      "loss": 0.0546,
      "step": 1831,
      "video_reward_cumulative_accuracy": 0.8200436919716002
    },
    {
      "epoch": 0.5437815375482339,
      "grad_norm": 1.9526046514511108,
      "learning_rate": 2.5543933376683805e-06,
      "loss": 0.0197,
      "step": 1832,
      "video_reward_cumulative_accuracy": 0.82014192139738
    },
    {
      "epoch": 0.5440783615316117,
      "grad_norm": 2.945932149887085,
      "learning_rate": 2.5518035587855492e-06,
      "loss": 0.06,
      "step": 1833,
      "video_reward_cumulative_accuracy": 0.8202400436442989
    },
    {
      "epoch": 0.5443751855149896,
      "grad_norm": 1.299325942993164,
      "learning_rate": 2.5492137242865706e-06,
      "loss": 0.0312,
      "step": 1834,
      "video_reward_cumulative_accuracy": 0.8203380588876772
    },
    {
      "epoch": 0.5446720094983675,
      "grad_norm": 1.5175635814666748,
      "learning_rate": 2.5466238369518808e-06,
      "loss": 0.0433,
      "step": 1835,
      "video_reward_cumulative_accuracy": 0.8204359673024523
    },
    {
      "epoch": 0.5449688334817453,
      "grad_norm": 3.6290009021759033,
      "learning_rate": 2.544033899561978e-06,
      "loss": 0.0684,
      "step": 1836,
      "video_reward_cumulative_accuracy": 0.8205337690631809
    },
    {
      "epoch": 0.5452656574651232,
      "grad_norm": 2.739287853240967,
      "learning_rate": 2.5414439148974096e-06,
      "loss": 0.0701,
      "step": 1837,
      "video_reward_cumulative_accuracy": 0.8206314643440392
    },
    {
      "epoch": 0.5455624814485011,
      "grad_norm": 2.4160091876983643,
      "learning_rate": 2.5388538857387756e-06,
      "loss": 0.0424,
      "step": 1838,
      "video_reward_cumulative_accuracy": 0.8204570184983678
    },
    {
      "epoch": 0.5458593054318789,
      "grad_norm": 2.7636618614196777,
      "learning_rate": 2.5362638148667256e-06,
      "loss": 0.0486,
      "step": 1839,
      "video_reward_cumulative_accuracy": 0.8205546492659054
    },
    {
      "epoch": 0.5461561294152567,
      "grad_norm": 1.2934203147888184,
      "learning_rate": 2.5336737050619497e-06,
      "loss": 0.0313,
      "step": 1840,
      "video_reward_cumulative_accuracy": 0.8206521739130435
    },
    {
      "epoch": 0.5464529533986346,
      "grad_norm": 1.7498849630355835,
      "learning_rate": 2.5310835591051837e-06,
      "loss": 0.0527,
      "step": 1841,
      "video_reward_cumulative_accuracy": 0.8207495926127105
    },
    {
      "epoch": 0.5467497773820125,
      "grad_norm": 2.3050217628479004,
      "learning_rate": 2.528493379777199e-06,
      "loss": 0.0268,
      "step": 1842,
      "video_reward_cumulative_accuracy": 0.8208469055374593
    },
    {
      "epoch": 0.5470466013653903,
      "grad_norm": 1.1211016178131104,
      "learning_rate": 2.5259031698588065e-06,
      "loss": 0.0133,
      "step": 1843,
      "video_reward_cumulative_accuracy": 0.8209441128594682
    },
    {
      "epoch": 0.5473434253487682,
      "grad_norm": 2.8683953285217285,
      "learning_rate": 2.523312932130847e-06,
      "loss": 0.0691,
      "step": 1844,
      "video_reward_cumulative_accuracy": 0.8210412147505423
    },
    {
      "epoch": 0.5476402493321461,
      "grad_norm": 2.4542722702026367,
      "learning_rate": 2.5207226693741914e-06,
      "loss": 0.0429,
      "step": 1845,
      "video_reward_cumulative_accuracy": 0.8208672086720867
    },
    {
      "epoch": 0.5479370733155239,
      "grad_norm": 3.8734562397003174,
      "learning_rate": 2.5181323843697403e-06,
      "loss": 0.0458,
      "step": 1846,
      "video_reward_cumulative_accuracy": 0.820964247020585
    },
    {
      "epoch": 0.5482338972989017,
      "grad_norm": 2.5339853763580322,
      "learning_rate": 2.5155420798984137e-06,
      "loss": 0.0254,
      "step": 1847,
      "video_reward_cumulative_accuracy": 0.821061180292366
    },
    {
      "epoch": 0.5485307212822796,
      "grad_norm": 1.4046697616577148,
      "learning_rate": 2.512951758741156e-06,
      "loss": 0.0352,
      "step": 1848,
      "video_reward_cumulative_accuracy": 0.8211580086580087
    },
    {
      "epoch": 0.5488275452656575,
      "grad_norm": 0.8442341089248657,
      "learning_rate": 2.510361423678929e-06,
      "loss": 0.024,
      "step": 1849,
      "video_reward_cumulative_accuracy": 0.8209843158464034
    },
    {
      "epoch": 0.5491243692490353,
      "grad_norm": 1.8290486335754395,
      "learning_rate": 2.5077710774927067e-06,
      "loss": 0.0147,
      "step": 1850,
      "video_reward_cumulative_accuracy": 0.8210810810810811
    },
    {
      "epoch": 0.5494211932324132,
      "grad_norm": 1.443580150604248,
      "learning_rate": 2.5051807229634796e-06,
      "loss": 0.0335,
      "step": 1851,
      "video_reward_cumulative_accuracy": 0.8211777417612102
    },
    {
      "epoch": 0.549718017215791,
      "grad_norm": 0.5750948190689087,
      "learning_rate": 2.5025903628722427e-06,
      "loss": 0.0131,
      "step": 1852,
      "video_reward_cumulative_accuracy": 0.8212742980561555
    },
    {
      "epoch": 0.5500148411991689,
      "grad_norm": 0.9644943475723267,
      "learning_rate": 2.5e-06,
      "loss": 0.0187,
      "step": 1853,
      "video_reward_cumulative_accuracy": 0.8213707501349163
    },
    {
      "epoch": 0.5503116651825467,
      "grad_norm": 2.259012460708618,
      "learning_rate": 2.4974096371277577e-06,
      "loss": 0.0195,
      "step": 1854,
      "video_reward_cumulative_accuracy": 0.8214670981661273
    },
    {
      "epoch": 0.5506084891659246,
      "grad_norm": 4.043064117431641,
      "learning_rate": 2.4948192770365217e-06,
      "loss": 0.0473,
      "step": 1855,
      "video_reward_cumulative_accuracy": 0.8215633423180593
    },
    {
      "epoch": 0.5509053131493025,
      "grad_norm": 2.6557042598724365,
      "learning_rate": 2.4922289225072937e-06,
      "loss": 0.0347,
      "step": 1856,
      "video_reward_cumulative_accuracy": 0.8213900862068966
    },
    {
      "epoch": 0.5512021371326803,
      "grad_norm": 3.153779983520508,
      "learning_rate": 2.4896385763210725e-06,
      "loss": 0.0883,
      "step": 1857,
      "video_reward_cumulative_accuracy": 0.821486268174475
    },
    {
      "epoch": 0.5514989611160582,
      "grad_norm": 2.566129446029663,
      "learning_rate": 2.4870482412588444e-06,
      "loss": 0.0617,
      "step": 1858,
      "video_reward_cumulative_accuracy": 0.8215823466092572
    },
    {
      "epoch": 0.551795785099436,
      "grad_norm": 1.7508662939071655,
      "learning_rate": 2.484457920101587e-06,
      "loss": 0.0305,
      "step": 1859,
      "video_reward_cumulative_accuracy": 0.8216783216783217
    },
    {
      "epoch": 0.5520926090828139,
      "grad_norm": 2.019207715988159,
      "learning_rate": 2.4818676156302605e-06,
      "loss": 0.0521,
      "step": 1860,
      "video_reward_cumulative_accuracy": 0.8217741935483871
    },
    {
      "epoch": 0.5523894330661917,
      "grad_norm": 0.6667674779891968,
      "learning_rate": 2.4792773306258085e-06,
      "loss": 0.011,
      "step": 1861,
      "video_reward_cumulative_accuracy": 0.8218699623858141
    },
    {
      "epoch": 0.5526862570495696,
      "grad_norm": 1.5519354343414307,
      "learning_rate": 2.4766870678691538e-06,
      "loss": 0.0503,
      "step": 1862,
      "video_reward_cumulative_accuracy": 0.8219656283566058
    },
    {
      "epoch": 0.5529830810329475,
      "grad_norm": 3.2907981872558594,
      "learning_rate": 2.474096830141194e-06,
      "loss": 0.0484,
      "step": 1863,
      "video_reward_cumulative_accuracy": 0.8217928073000537
    },
    {
      "epoch": 0.5532799050163253,
      "grad_norm": 1.4493234157562256,
      "learning_rate": 2.4715066202228017e-06,
      "loss": 0.0132,
      "step": 1864,
      "video_reward_cumulative_accuracy": 0.8218884120171673
    },
    {
      "epoch": 0.5535767289997032,
      "grad_norm": 0.8373463749885559,
      "learning_rate": 2.4689164408948176e-06,
      "loss": 0.0103,
      "step": 1865,
      "video_reward_cumulative_accuracy": 0.8219839142091153
    },
    {
      "epoch": 0.553873552983081,
      "grad_norm": 1.3251855373382568,
      "learning_rate": 2.4663262949380508e-06,
      "loss": 0.0186,
      "step": 1866,
      "video_reward_cumulative_accuracy": 0.8220793140407289
    },
    {
      "epoch": 0.5541703769664589,
      "grad_norm": 2.757843255996704,
      "learning_rate": 2.4637361851332752e-06,
      "loss": 0.0747,
      "step": 1867,
      "video_reward_cumulative_accuracy": 0.8221746116764863
    },
    {
      "epoch": 0.5544672009498367,
      "grad_norm": 3.380035877227783,
      "learning_rate": 2.4611461142612243e-06,
      "loss": 0.0661,
      "step": 1868,
      "video_reward_cumulative_accuracy": 0.8222698072805139
    },
    {
      "epoch": 0.5547640249332146,
      "grad_norm": 1.7979135513305664,
      "learning_rate": 2.4585560851025917e-06,
      "loss": 0.0395,
      "step": 1869,
      "video_reward_cumulative_accuracy": 0.8223649010165864
    },
    {
      "epoch": 0.5550608489165925,
      "grad_norm": 1.7438832521438599,
      "learning_rate": 2.455966100438023e-06,
      "loss": 0.0389,
      "step": 1870,
      "video_reward_cumulative_accuracy": 0.8224598930481284
    },
    {
      "epoch": 0.5553576728999703,
      "grad_norm": 2.5413877964019775,
      "learning_rate": 2.4533761630481205e-06,
      "loss": 0.0202,
      "step": 1871,
      "video_reward_cumulative_accuracy": 0.8225547835382149
    },
    {
      "epoch": 0.5556544968833482,
      "grad_norm": 2.7013587951660156,
      "learning_rate": 2.45078627571343e-06,
      "loss": 0.0537,
      "step": 1872,
      "video_reward_cumulative_accuracy": 0.8226495726495726
    },
    {
      "epoch": 0.555951320866726,
      "grad_norm": 2.5262835025787354,
      "learning_rate": 2.4481964412144508e-06,
      "loss": 0.0388,
      "step": 1873,
      "video_reward_cumulative_accuracy": 0.8227442605445809
    },
    {
      "epoch": 0.5562481448501039,
      "grad_norm": 1.8535455465316772,
      "learning_rate": 2.4456066623316203e-06,
      "loss": 0.0714,
      "step": 1874,
      "video_reward_cumulative_accuracy": 0.8228388473852721
    },
    {
      "epoch": 0.5565449688334817,
      "grad_norm": 2.635227680206299,
      "learning_rate": 2.4430169418453157e-06,
      "loss": 0.0385,
      "step": 1875,
      "video_reward_cumulative_accuracy": 0.8229333333333333
    },
    {
      "epoch": 0.5568417928168596,
      "grad_norm": 1.0287766456604004,
      "learning_rate": 2.4404272825358564e-06,
      "loss": 0.0146,
      "step": 1876,
      "video_reward_cumulative_accuracy": 0.8230277185501066
    },
    {
      "epoch": 0.5571386168002375,
      "grad_norm": 0.640048086643219,
      "learning_rate": 2.4378376871834896e-06,
      "loss": 0.0152,
      "step": 1877,
      "video_reward_cumulative_accuracy": 0.8231220031965903
    },
    {
      "epoch": 0.5574354407836153,
      "grad_norm": 1.0190867185592651,
      "learning_rate": 2.435248158568399e-06,
      "loss": 0.0137,
      "step": 1878,
      "video_reward_cumulative_accuracy": 0.8232161874334398
    },
    {
      "epoch": 0.5577322647669932,
      "grad_norm": 0.6473628282546997,
      "learning_rate": 2.4326586994706964e-06,
      "loss": 0.0099,
      "step": 1879,
      "video_reward_cumulative_accuracy": 0.8233102714209686
    },
    {
      "epoch": 0.558029088750371,
      "grad_norm": 1.6340287923812866,
      "learning_rate": 2.430069312670414e-06,
      "loss": 0.0222,
      "step": 1880,
      "video_reward_cumulative_accuracy": 0.823404255319149
    },
    {
      "epoch": 0.5583259127337489,
      "grad_norm": 2.285609245300293,
      "learning_rate": 2.4274800009475134e-06,
      "loss": 0.06,
      "step": 1881,
      "video_reward_cumulative_accuracy": 0.823498139287613
    },
    {
      "epoch": 0.5586227367171267,
      "grad_norm": 2.4804632663726807,
      "learning_rate": 2.42489076708187e-06,
      "loss": 0.0291,
      "step": 1882,
      "video_reward_cumulative_accuracy": 0.8235919234856536
    },
    {
      "epoch": 0.5589195607005046,
      "grad_norm": 2.7268261909484863,
      "learning_rate": 2.422301613853278e-06,
      "loss": 0.0443,
      "step": 1883,
      "video_reward_cumulative_accuracy": 0.8236856080722251
    },
    {
      "epoch": 0.5592163846838825,
      "grad_norm": 3.1525275707244873,
      "learning_rate": 2.419712544041444e-06,
      "loss": 0.0388,
      "step": 1884,
      "video_reward_cumulative_accuracy": 0.8237791932059448
    },
    {
      "epoch": 0.5595132086672603,
      "grad_norm": 1.7583348751068115,
      "learning_rate": 2.4171235604259865e-06,
      "loss": 0.0213,
      "step": 1885,
      "video_reward_cumulative_accuracy": 0.8238726790450929
    },
    {
      "epoch": 0.5598100326506382,
      "grad_norm": 4.774654388427734,
      "learning_rate": 2.4145346657864318e-06,
      "loss": 0.0528,
      "step": 1886,
      "video_reward_cumulative_accuracy": 0.823966065747614
    },
    {
      "epoch": 0.560106856634016,
      "grad_norm": 2.143681526184082,
      "learning_rate": 2.4119458629022077e-06,
      "loss": 0.0185,
      "step": 1887,
      "video_reward_cumulative_accuracy": 0.8240593534711181
    },
    {
      "epoch": 0.5604036806173939,
      "grad_norm": 1.6760286092758179,
      "learning_rate": 2.4093571545526466e-06,
      "loss": 0.0488,
      "step": 1888,
      "video_reward_cumulative_accuracy": 0.8241525423728814
    },
    {
      "epoch": 0.5607005046007717,
      "grad_norm": 0.8435209393501282,
      "learning_rate": 2.406768543516977e-06,
      "loss": 0.0108,
      "step": 1889,
      "video_reward_cumulative_accuracy": 0.8242456326098465
    },
    {
      "epoch": 0.5609973285841496,
      "grad_norm": 3.637840747833252,
      "learning_rate": 2.404180032574325e-06,
      "loss": 0.085,
      "step": 1890,
      "video_reward_cumulative_accuracy": 0.8243386243386244
    },
    {
      "epoch": 0.5612941525675275,
      "grad_norm": 0.5115882158279419,
      "learning_rate": 2.4015916245037086e-06,
      "loss": 0.0259,
      "step": 1891,
      "video_reward_cumulative_accuracy": 0.8244315177154945
    },
    {
      "epoch": 0.5615909765509053,
      "grad_norm": 1.5231218338012695,
      "learning_rate": 2.3990033220840344e-06,
      "loss": 0.0462,
      "step": 1892,
      "video_reward_cumulative_accuracy": 0.8242600422832981
    },
    {
      "epoch": 0.5618878005342832,
      "grad_norm": 2.774663209915161,
      "learning_rate": 2.3964151280940963e-06,
      "loss": 0.0213,
      "step": 1893,
      "video_reward_cumulative_accuracy": 0.8243528790279979
    },
    {
      "epoch": 0.562184624517661,
      "grad_norm": 7.451237201690674,
      "learning_rate": 2.3938270453125717e-06,
      "loss": 0.0661,
      "step": 1894,
      "video_reward_cumulative_accuracy": 0.8244456177402323
    },
    {
      "epoch": 0.5624814485010389,
      "grad_norm": 3.062457323074341,
      "learning_rate": 2.3912390765180195e-06,
      "loss": 0.018,
      "step": 1895,
      "video_reward_cumulative_accuracy": 0.8242744063324539
    },
    {
      "epoch": 0.5627782724844167,
      "grad_norm": 5.17927885055542,
      "learning_rate": 2.3886512244888737e-06,
      "loss": 0.071,
      "step": 1896,
      "video_reward_cumulative_accuracy": 0.8238396624472574
    },
    {
      "epoch": 0.5630750964677946,
      "grad_norm": 2.72430682182312,
      "learning_rate": 2.386063492003446e-06,
      "loss": 0.0373,
      "step": 1897,
      "video_reward_cumulative_accuracy": 0.8236689509752241
    },
    {
      "epoch": 0.5633719204511725,
      "grad_norm": 1.81307053565979,
      "learning_rate": 2.3834758818399185e-06,
      "loss": 0.0787,
      "step": 1898,
      "video_reward_cumulative_accuracy": 0.8232349841938883
    },
    {
      "epoch": 0.5636687444345503,
      "grad_norm": 2.822479248046875,
      "learning_rate": 2.3808883967763415e-06,
      "loss": 0.0702,
      "step": 1899,
      "video_reward_cumulative_accuracy": 0.8230647709320695
    },
    {
      "epoch": 0.5639655684179282,
      "grad_norm": 3.3593389987945557,
      "learning_rate": 2.378301039590631e-06,
      "loss": 0.0254,
      "step": 1900,
      "video_reward_cumulative_accuracy": 0.8231578947368421
    },
    {
      "epoch": 0.564262392401306,
      "grad_norm": 4.469653129577637,
      "learning_rate": 2.3757138130605662e-06,
      "loss": 0.0387,
      "step": 1901,
      "video_reward_cumulative_accuracy": 0.823250920568122
    },
    {
      "epoch": 0.5645592163846839,
      "grad_norm": 1.8031344413757324,
      "learning_rate": 2.3731267199637857e-06,
      "loss": 0.0324,
      "step": 1902,
      "video_reward_cumulative_accuracy": 0.8233438485804416
    },
    {
      "epoch": 0.5648560403680617,
      "grad_norm": 2.311720132827759,
      "learning_rate": 2.370539763077786e-06,
      "loss": 0.0386,
      "step": 1903,
      "video_reward_cumulative_accuracy": 0.8234366789280084
    },
    {
      "epoch": 0.5651528643514396,
      "grad_norm": 2.129380464553833,
      "learning_rate": 2.3679529451799156e-06,
      "loss": 0.0571,
      "step": 1904,
      "video_reward_cumulative_accuracy": 0.823266806722689
    },
    {
      "epoch": 0.5654496883348175,
      "grad_norm": 1.209018349647522,
      "learning_rate": 2.3653662690473747e-06,
      "loss": 0.0215,
      "step": 1905,
      "video_reward_cumulative_accuracy": 0.8233595800524934
    },
    {
      "epoch": 0.5657465123181953,
      "grad_norm": 2.74529767036438,
      "learning_rate": 2.3627797374572107e-06,
      "loss": 0.0363,
      "step": 1906,
      "video_reward_cumulative_accuracy": 0.8234522560335782
    },
    {
      "epoch": 0.5660433363015732,
      "grad_norm": 2.243732452392578,
      "learning_rate": 2.3601933531863182e-06,
      "loss": 0.0252,
      "step": 1907,
      "video_reward_cumulative_accuracy": 0.8235448348190876
    },
    {
      "epoch": 0.566340160284951,
      "grad_norm": 2.2682273387908936,
      "learning_rate": 2.357607119011429e-06,
      "loss": 0.0386,
      "step": 1908,
      "video_reward_cumulative_accuracy": 0.8236373165618449
    },
    {
      "epoch": 0.5666369842683289,
      "grad_norm": 1.7562425136566162,
      "learning_rate": 2.355021037709118e-06,
      "loss": 0.0565,
      "step": 1909,
      "video_reward_cumulative_accuracy": 0.8237297014143531
    },
    {
      "epoch": 0.5669338082517067,
      "grad_norm": 1.4634464979171753,
      "learning_rate": 2.352435112055795e-06,
      "loss": 0.0267,
      "step": 1910,
      "video_reward_cumulative_accuracy": 0.8238219895287958
    },
    {
      "epoch": 0.5672306322350846,
      "grad_norm": 1.8789516687393188,
      "learning_rate": 2.3498493448277013e-06,
      "loss": 0.0227,
      "step": 1911,
      "video_reward_cumulative_accuracy": 0.8239141810570382
    },
    {
      "epoch": 0.5675274562184625,
      "grad_norm": 2.6937637329101562,
      "learning_rate": 2.3472637388009094e-06,
      "loss": 0.0524,
      "step": 1912,
      "video_reward_cumulative_accuracy": 0.8240062761506276
    },
    {
      "epoch": 0.5678242802018403,
      "grad_norm": 1.9026590585708618,
      "learning_rate": 2.3446782967513176e-06,
      "loss": 0.04,
      "step": 1913,
      "video_reward_cumulative_accuracy": 0.8240982749607946
    },
    {
      "epoch": 0.5681211041852182,
      "grad_norm": 2.191349744796753,
      "learning_rate": 2.3420930214546496e-06,
      "loss": 0.0194,
      "step": 1914,
      "video_reward_cumulative_accuracy": 0.8241901776384535
    },
    {
      "epoch": 0.568417928168596,
      "grad_norm": 0.43110188841819763,
      "learning_rate": 2.3395079156864493e-06,
      "loss": 0.0067,
      "step": 1915,
      "video_reward_cumulative_accuracy": 0.8242819843342036
    },
    {
      "epoch": 0.5687147521519739,
      "grad_norm": 2.1239190101623535,
      "learning_rate": 2.3369229822220782e-06,
      "loss": 0.0494,
      "step": 1916,
      "video_reward_cumulative_accuracy": 0.8243736951983298
    },
    {
      "epoch": 0.5690115761353517,
      "grad_norm": 2.330325126647949,
      "learning_rate": 2.3343382238367145e-06,
      "loss": 0.0496,
      "step": 1917,
      "video_reward_cumulative_accuracy": 0.8244653103808033
    },
    {
      "epoch": 0.5693084001187296,
      "grad_norm": 2.316889524459839,
      "learning_rate": 2.331753643305345e-06,
      "loss": 0.0605,
      "step": 1918,
      "video_reward_cumulative_accuracy": 0.82429614181439
    },
    {
      "epoch": 0.5696052241021075,
      "grad_norm": 2.9871466159820557,
      "learning_rate": 2.3291692434027705e-06,
      "loss": 0.0446,
      "step": 1919,
      "video_reward_cumulative_accuracy": 0.8243877019280875
    },
    {
      "epoch": 0.5699020480854853,
      "grad_norm": 2.9490387439727783,
      "learning_rate": 2.326585026903592e-06,
      "loss": 0.03,
      "step": 1920,
      "video_reward_cumulative_accuracy": 0.8244791666666667
    },
    {
      "epoch": 0.5701988720688632,
      "grad_norm": 2.3850908279418945,
      "learning_rate": 2.324000996582219e-06,
      "loss": 0.0334,
      "step": 1921,
      "video_reward_cumulative_accuracy": 0.8243102550754815
    },
    {
      "epoch": 0.570495696052241,
      "grad_norm": 1.3517158031463623,
      "learning_rate": 2.3214171552128594e-06,
      "loss": 0.0335,
      "step": 1922,
      "video_reward_cumulative_accuracy": 0.8244016649323621
    },
    {
      "epoch": 0.5707925200356189,
      "grad_norm": 2.155529022216797,
      "learning_rate": 2.3188335055695145e-06,
      "loss": 0.0204,
      "step": 1923,
      "video_reward_cumulative_accuracy": 0.8244929797191888
    },
    {
      "epoch": 0.5710893440189967,
      "grad_norm": 2.1936349868774414,
      "learning_rate": 2.316250050425987e-06,
      "loss": 0.0398,
      "step": 1924,
      "video_reward_cumulative_accuracy": 0.8243243243243243
    },
    {
      "epoch": 0.5713861680023746,
      "grad_norm": 3.196364164352417,
      "learning_rate": 2.3136667925558635e-06,
      "loss": 0.0479,
      "step": 1925,
      "video_reward_cumulative_accuracy": 0.8244155844155844
    },
    {
      "epoch": 0.5716829919857525,
      "grad_norm": 1.847775936126709,
      "learning_rate": 2.311083734732523e-06,
      "loss": 0.0229,
      "step": 1926,
      "video_reward_cumulative_accuracy": 0.8245067497403946
    },
    {
      "epoch": 0.5719798159691303,
      "grad_norm": 0.9182643890380859,
      "learning_rate": 2.30850087972913e-06,
      "loss": 0.0187,
      "step": 1927,
      "video_reward_cumulative_accuracy": 0.8245978204462896
    },
    {
      "epoch": 0.5722766399525082,
      "grad_norm": 1.595245599746704,
      "learning_rate": 2.3059182303186276e-06,
      "loss": 0.0361,
      "step": 1928,
      "video_reward_cumulative_accuracy": 0.8246887966804979
    },
    {
      "epoch": 0.572573463935886,
      "grad_norm": 1.5071243047714233,
      "learning_rate": 2.303335789273744e-06,
      "loss": 0.028,
      "step": 1929,
      "video_reward_cumulative_accuracy": 0.8247796785899429
    },
    {
      "epoch": 0.5728702879192639,
      "grad_norm": 1.241547703742981,
      "learning_rate": 2.3007535593669773e-06,
      "loss": 0.0086,
      "step": 1930,
      "video_reward_cumulative_accuracy": 0.8248704663212435
    },
    {
      "epoch": 0.5731671119026417,
      "grad_norm": 1.961064100265503,
      "learning_rate": 2.2981715433706037e-06,
      "loss": 0.0262,
      "step": 1931,
      "video_reward_cumulative_accuracy": 0.824702226825479
    },
    {
      "epoch": 0.5734639358860196,
      "grad_norm": 0.9366904497146606,
      "learning_rate": 2.2955897440566667e-06,
      "loss": 0.0133,
      "step": 1932,
      "video_reward_cumulative_accuracy": 0.8247929606625258
    },
    {
      "epoch": 0.5737607598693975,
      "grad_norm": 1.7824345827102661,
      "learning_rate": 2.2930081641969782e-06,
      "loss": 0.0164,
      "step": 1933,
      "video_reward_cumulative_accuracy": 0.8248836006207967
    },
    {
      "epoch": 0.5740575838527753,
      "grad_norm": 2.103070020675659,
      "learning_rate": 2.2904268065631174e-06,
      "loss": 0.0494,
      "step": 1934,
      "video_reward_cumulative_accuracy": 0.8249741468459152
    },
    {
      "epoch": 0.5743544078361532,
      "grad_norm": 1.738095760345459,
      "learning_rate": 2.2878456739264197e-06,
      "loss": 0.0459,
      "step": 1935,
      "video_reward_cumulative_accuracy": 0.8250645994832041
    },
    {
      "epoch": 0.574651231819531,
      "grad_norm": 2.6681344509124756,
      "learning_rate": 2.2852647690579823e-06,
      "loss": 0.1033,
      "step": 1936,
      "video_reward_cumulative_accuracy": 0.824896694214876
    },
    {
      "epoch": 0.5749480558029089,
      "grad_norm": 2.3271491527557373,
      "learning_rate": 2.2826840947286566e-06,
      "loss": 0.0543,
      "step": 1937,
      "video_reward_cumulative_accuracy": 0.8249870934434693
    },
    {
      "epoch": 0.5752448797862867,
      "grad_norm": 3.099133014678955,
      "learning_rate": 2.2801036537090475e-06,
      "loss": 0.0862,
      "step": 1938,
      "video_reward_cumulative_accuracy": 0.8248194014447885
    },
    {
      "epoch": 0.5755417037696646,
      "grad_norm": 2.7922987937927246,
      "learning_rate": 2.2775234487695093e-06,
      "loss": 0.0294,
      "step": 1939,
      "video_reward_cumulative_accuracy": 0.8249097472924187
    },
    {
      "epoch": 0.5758385277530425,
      "grad_norm": 2.1268813610076904,
      "learning_rate": 2.2749434826801416e-06,
      "loss": 0.0671,
      "step": 1940,
      "video_reward_cumulative_accuracy": 0.825
    },
    {
      "epoch": 0.5761353517364203,
      "grad_norm": 3.367703676223755,
      "learning_rate": 2.272363758210789e-06,
      "loss": 0.0779,
      "step": 1941,
      "video_reward_cumulative_accuracy": 0.8250901597114889
    },
    {
      "epoch": 0.5764321757197982,
      "grad_norm": 4.295740127563477,
      "learning_rate": 2.2697842781310354e-06,
      "loss": 0.0447,
      "step": 1942,
      "video_reward_cumulative_accuracy": 0.8251802265705458
    },
    {
      "epoch": 0.576728999703176,
      "grad_norm": 6.525967597961426,
      "learning_rate": 2.2672050452102036e-06,
      "loss": 0.0787,
      "step": 1943,
      "video_reward_cumulative_accuracy": 0.8252702007205353
    },
    {
      "epoch": 0.5770258236865539,
      "grad_norm": 3.3833274841308594,
      "learning_rate": 2.26462606221735e-06,
      "loss": 0.0647,
      "step": 1944,
      "video_reward_cumulative_accuracy": 0.8253600823045267
    },
    {
      "epoch": 0.5773226476699317,
      "grad_norm": 1.5874844789505005,
      "learning_rate": 2.262047331921264e-06,
      "loss": 0.0274,
      "step": 1945,
      "video_reward_cumulative_accuracy": 0.8254498714652956
    },
    {
      "epoch": 0.5776194716533096,
      "grad_norm": 1.2541552782058716,
      "learning_rate": 2.259468857090462e-06,
      "loss": 0.0108,
      "step": 1946,
      "video_reward_cumulative_accuracy": 0.8255395683453237
    },
    {
      "epoch": 0.5779162956366874,
      "grad_norm": 1.4068396091461182,
      "learning_rate": 2.2568906404931878e-06,
      "loss": 0.0234,
      "step": 1947,
      "video_reward_cumulative_accuracy": 0.8256291730868002
    },
    {
      "epoch": 0.5782131196200653,
      "grad_norm": 1.276092290878296,
      "learning_rate": 2.254312684897406e-06,
      "loss": 0.0289,
      "step": 1948,
      "video_reward_cumulative_accuracy": 0.8257186858316222
    },
    {
      "epoch": 0.5785099436034432,
      "grad_norm": 0.7035172581672668,
      "learning_rate": 2.2517349930708032e-06,
      "loss": 0.0199,
      "step": 1949,
      "video_reward_cumulative_accuracy": 0.8258081067213956
    },
    {
      "epoch": 0.578806767586821,
      "grad_norm": 1.7570953369140625,
      "learning_rate": 2.2491575677807813e-06,
      "loss": 0.0319,
      "step": 1950,
      "video_reward_cumulative_accuracy": 0.8258974358974359
    },
    {
      "epoch": 0.5791035915701989,
      "grad_norm": 2.2827887535095215,
      "learning_rate": 2.2465804117944568e-06,
      "loss": 0.0531,
      "step": 1951,
      "video_reward_cumulative_accuracy": 0.8259866735007688
    },
    {
      "epoch": 0.5794004155535767,
      "grad_norm": 0.8664276003837585,
      "learning_rate": 2.244003527878656e-06,
      "loss": 0.0131,
      "step": 1952,
      "video_reward_cumulative_accuracy": 0.8260758196721312
    },
    {
      "epoch": 0.5796972395369546,
      "grad_norm": 0.4808574914932251,
      "learning_rate": 2.2414269187999153e-06,
      "loss": 0.0139,
      "step": 1953,
      "video_reward_cumulative_accuracy": 0.8261648745519713
    },
    {
      "epoch": 0.5799940635203324,
      "grad_norm": 3.057589054107666,
      "learning_rate": 2.2388505873244728e-06,
      "loss": 0.0282,
      "step": 1954,
      "video_reward_cumulative_accuracy": 0.8259979529170931
    },
    {
      "epoch": 0.5802908875037103,
      "grad_norm": 1.7676241397857666,
      "learning_rate": 2.2362745362182724e-06,
      "loss": 0.0718,
      "step": 1955,
      "video_reward_cumulative_accuracy": 0.8258312020460358
    },
    {
      "epoch": 0.5805877114870882,
      "grad_norm": 1.0364630222320557,
      "learning_rate": 2.2336987682469534e-06,
      "loss": 0.017,
      "step": 1956,
      "video_reward_cumulative_accuracy": 0.825920245398773
    },
    {
      "epoch": 0.580884535470466,
      "grad_norm": 1.8541654348373413,
      "learning_rate": 2.2311232861758526e-06,
      "loss": 0.0202,
      "step": 1957,
      "video_reward_cumulative_accuracy": 0.8260091977516607
    },
    {
      "epoch": 0.5811813594538439,
      "grad_norm": 1.1879740953445435,
      "learning_rate": 2.228548092770002e-06,
      "loss": 0.0549,
      "step": 1958,
      "video_reward_cumulative_accuracy": 0.8260980592441267
    },
    {
      "epoch": 0.5814781834372217,
      "grad_norm": 1.5490862131118774,
      "learning_rate": 2.2259731907941195e-06,
      "loss": 0.0214,
      "step": 1959,
      "video_reward_cumulative_accuracy": 0.8261868300153139
    },
    {
      "epoch": 0.5817750074205996,
      "grad_norm": 2.618912696838379,
      "learning_rate": 2.223398583012614e-06,
      "loss": 0.0376,
      "step": 1960,
      "video_reward_cumulative_accuracy": 0.8262755102040816
    },
    {
      "epoch": 0.5820718314039774,
      "grad_norm": 1.805620551109314,
      "learning_rate": 2.2208242721895744e-06,
      "loss": 0.0346,
      "step": 1961,
      "video_reward_cumulative_accuracy": 0.8263640999490056
    },
    {
      "epoch": 0.5823686553873553,
      "grad_norm": 4.237667083740234,
      "learning_rate": 2.2182502610887757e-06,
      "loss": 0.0546,
      "step": 1962,
      "video_reward_cumulative_accuracy": 0.8264525993883792
    },
    {
      "epoch": 0.5826654793707332,
      "grad_norm": 0.6842634677886963,
      "learning_rate": 2.215676552473668e-06,
      "loss": 0.005,
      "step": 1963,
      "video_reward_cumulative_accuracy": 0.826541008660214
    },
    {
      "epoch": 0.582962303354111,
      "grad_norm": 2.4333693981170654,
      "learning_rate": 2.213103149107376e-06,
      "loss": 0.0525,
      "step": 1964,
      "video_reward_cumulative_accuracy": 0.8263747454175153
    },
    {
      "epoch": 0.5832591273374889,
      "grad_norm": 2.396888256072998,
      "learning_rate": 2.210530053752701e-06,
      "loss": 0.0805,
      "step": 1965,
      "video_reward_cumulative_accuracy": 0.8264631043256997
    },
    {
      "epoch": 0.5835559513208667,
      "grad_norm": 2.7673487663269043,
      "learning_rate": 2.2079572691721063e-06,
      "loss": 0.0213,
      "step": 1966,
      "video_reward_cumulative_accuracy": 0.8262970498474059
    },
    {
      "epoch": 0.5838527753042446,
      "grad_norm": 1.4659632444381714,
      "learning_rate": 2.20538479812773e-06,
      "loss": 0.023,
      "step": 1967,
      "video_reward_cumulative_accuracy": 0.8263853584138282
    },
    {
      "epoch": 0.5841495992876224,
      "grad_norm": 2.001253366470337,
      "learning_rate": 2.2028126433813657e-06,
      "loss": 0.0304,
      "step": 1968,
      "video_reward_cumulative_accuracy": 0.8264735772357723
    },
    {
      "epoch": 0.5844464232710003,
      "grad_norm": 2.858795166015625,
      "learning_rate": 2.200240807694474e-06,
      "loss": 0.0827,
      "step": 1969,
      "video_reward_cumulative_accuracy": 0.8263077704418487
    },
    {
      "epoch": 0.5847432472543782,
      "grad_norm": 1.0106351375579834,
      "learning_rate": 2.19766929382817e-06,
      "loss": 0.035,
      "step": 1970,
      "video_reward_cumulative_accuracy": 0.8263959390862944
    },
    {
      "epoch": 0.585040071237756,
      "grad_norm": 1.513059377670288,
      "learning_rate": 2.195098104543221e-06,
      "loss": 0.019,
      "step": 1971,
      "video_reward_cumulative_accuracy": 0.8264840182648402
    },
    {
      "epoch": 0.5853368952211339,
      "grad_norm": 1.704352855682373,
      "learning_rate": 2.1925272426000514e-06,
      "loss": 0.0135,
      "step": 1972,
      "video_reward_cumulative_accuracy": 0.8265720081135902
    },
    {
      "epoch": 0.5856337192045117,
      "grad_norm": 3.4350345134735107,
      "learning_rate": 2.189956710758729e-06,
      "loss": 0.0736,
      "step": 1973,
      "video_reward_cumulative_accuracy": 0.8264064875823619
    },
    {
      "epoch": 0.5859305431878896,
      "grad_norm": 0.5892782807350159,
      "learning_rate": 2.1873865117789682e-06,
      "loss": 0.0074,
      "step": 1974,
      "video_reward_cumulative_accuracy": 0.8264944275582573
    },
    {
      "epoch": 0.5862273671712674,
      "grad_norm": 0.523690938949585,
      "learning_rate": 2.184816648420131e-06,
      "loss": 0.0095,
      "step": 1975,
      "video_reward_cumulative_accuracy": 0.8265822784810126
    },
    {
      "epoch": 0.5865241911546453,
      "grad_norm": 3.0676653385162354,
      "learning_rate": 2.1822471234412106e-06,
      "loss": 0.0585,
      "step": 1976,
      "video_reward_cumulative_accuracy": 0.8266700404858299
    },
    {
      "epoch": 0.5868210151380232,
      "grad_norm": 1.3661112785339355,
      "learning_rate": 2.1796779396008456e-06,
      "loss": 0.0415,
      "step": 1977,
      "video_reward_cumulative_accuracy": 0.8265048052604957
    },
    {
      "epoch": 0.587117839121401,
      "grad_norm": 2.5101301670074463,
      "learning_rate": 2.177109099657301e-06,
      "loss": 0.0227,
      "step": 1978,
      "video_reward_cumulative_accuracy": 0.826592517694641
    },
    {
      "epoch": 0.5874146631047789,
      "grad_norm": 1.466486930847168,
      "learning_rate": 2.174540606368477e-06,
      "loss": 0.054,
      "step": 1979,
      "video_reward_cumulative_accuracy": 0.8266801414855988
    },
    {
      "epoch": 0.5877114870881567,
      "grad_norm": 1.7742908000946045,
      "learning_rate": 2.1719724624919004e-06,
      "loss": 0.0465,
      "step": 1980,
      "video_reward_cumulative_accuracy": 0.8267676767676768
    },
    {
      "epoch": 0.5880083110715346,
      "grad_norm": 5.307931423187256,
      "learning_rate": 2.169404670784722e-06,
      "loss": 0.07,
      "step": 1981,
      "video_reward_cumulative_accuracy": 0.8268551236749117
    },
    {
      "epoch": 0.5883051350549124,
      "grad_norm": 2.4654183387756348,
      "learning_rate": 2.1668372340037184e-06,
      "loss": 0.0548,
      "step": 1982,
      "video_reward_cumulative_accuracy": 0.8269424823410696
    },
    {
      "epoch": 0.5886019590382903,
      "grad_norm": 3.1083977222442627,
      "learning_rate": 2.164270154905279e-06,
      "loss": 0.0294,
      "step": 1983,
      "video_reward_cumulative_accuracy": 0.827029752899647
    },
    {
      "epoch": 0.5888987830216682,
      "grad_norm": 2.782560348510742,
      "learning_rate": 2.1617034362454136e-06,
      "loss": 0.0229,
      "step": 1984,
      "video_reward_cumulative_accuracy": 0.827116935483871
    },
    {
      "epoch": 0.589195607005046,
      "grad_norm": 3.644629955291748,
      "learning_rate": 2.1591370807797434e-06,
      "loss": 0.0401,
      "step": 1985,
      "video_reward_cumulative_accuracy": 0.8272040302267003
    },
    {
      "epoch": 0.5894924309884239,
      "grad_norm": 3.797820806503296,
      "learning_rate": 2.1565710912635006e-06,
      "loss": 0.0408,
      "step": 1986,
      "video_reward_cumulative_accuracy": 0.8272910372608258
    },
    {
      "epoch": 0.5897892549718017,
      "grad_norm": 2.0654091835021973,
      "learning_rate": 2.154005470451524e-06,
      "loss": 0.0144,
      "step": 1987,
      "video_reward_cumulative_accuracy": 0.8273779567186713
    },
    {
      "epoch": 0.5900860789551796,
      "grad_norm": 2.2619528770446777,
      "learning_rate": 2.1514402210982558e-06,
      "loss": 0.0629,
      "step": 1988,
      "video_reward_cumulative_accuracy": 0.8272132796780685
    },
    {
      "epoch": 0.5903829029385574,
      "grad_norm": 3.204058885574341,
      "learning_rate": 2.148875345957741e-06,
      "loss": 0.0629,
      "step": 1989,
      "video_reward_cumulative_accuracy": 0.8273001508295625
    },
    {
      "epoch": 0.5906797269219353,
      "grad_norm": 2.4990057945251465,
      "learning_rate": 2.1463108477836217e-06,
      "loss": 0.0596,
      "step": 1990,
      "video_reward_cumulative_accuracy": 0.8273869346733669
    },
    {
      "epoch": 0.5909765509053132,
      "grad_norm": 1.8199396133422852,
      "learning_rate": 2.1437467293291357e-06,
      "loss": 0.0306,
      "step": 1991,
      "video_reward_cumulative_accuracy": 0.8274736313410347
    },
    {
      "epoch": 0.591273374888691,
      "grad_norm": 1.290522813796997,
      "learning_rate": 2.1411829933471124e-06,
      "loss": 0.0254,
      "step": 1992,
      "video_reward_cumulative_accuracy": 0.8273092369477911
    },
    {
      "epoch": 0.5915701988720689,
      "grad_norm": 0.8122701048851013,
      "learning_rate": 2.138619642589972e-06,
      "loss": 0.0428,
      "step": 1993,
      "video_reward_cumulative_accuracy": 0.8273958855995986
    },
    {
      "epoch": 0.5918670228554467,
      "grad_norm": 1.304606318473816,
      "learning_rate": 2.13605667980972e-06,
      "loss": 0.0408,
      "step": 1994,
      "video_reward_cumulative_accuracy": 0.827482447342026
    },
    {
      "epoch": 0.5921638468388246,
      "grad_norm": 0.6447356343269348,
      "learning_rate": 2.1334941077579457e-06,
      "loss": 0.0113,
      "step": 1995,
      "video_reward_cumulative_accuracy": 0.8275689223057644
    },
    {
      "epoch": 0.5924606708222024,
      "grad_norm": 1.6157499551773071,
      "learning_rate": 2.1309319291858194e-06,
      "loss": 0.0341,
      "step": 1996,
      "video_reward_cumulative_accuracy": 0.8276553106212425
    },
    {
      "epoch": 0.5927574948055803,
      "grad_norm": 1.3407137393951416,
      "learning_rate": 2.1283701468440875e-06,
      "loss": 0.0139,
      "step": 1997,
      "video_reward_cumulative_accuracy": 0.827741612418628
    },
    {
      "epoch": 0.5930543187889582,
      "grad_norm": 2.7344970703125,
      "learning_rate": 2.1258087634830724e-06,
      "loss": 0.0588,
      "step": 1998,
      "video_reward_cumulative_accuracy": 0.8275775775775776
    },
    {
      "epoch": 0.593351142772336,
      "grad_norm": 2.2847628593444824,
      "learning_rate": 2.1232477818526685e-06,
      "loss": 0.0199,
      "step": 1999,
      "video_reward_cumulative_accuracy": 0.8276638319159579
    },
    {
      "epoch": 0.5936479667557139,
      "grad_norm": 1.1096928119659424,
      "learning_rate": 2.120687204702337e-06,
      "loss": 0.0169,
      "step": 2000,
      "video_reward_cumulative_accuracy": 0.82775
    },
    {
      "epoch": 0.5936479667557139,
      "eval_runtime": 130.6244,
      "eval_samples_per_second": 6.04,
      "eval_steps_per_second": 0.758,
      "eval_test_set_accuracy": 0.8106060606060606,
      "step": 2000
    },
    {
      "epoch": 0.5939447907390917,
      "grad_norm": 0.6623875498771667,
      "learning_rate": 2.118127034781107e-06,
      "loss": 0.017,
      "step": 2001,
      "video_reward_cumulative_accuracy": 0.8278360819590205
    },
    {
      "epoch": 0.5942416147224696,
      "grad_norm": 1.5880417823791504,
      "learning_rate": 2.1155672748375684e-06,
      "loss": 0.0249,
      "step": 2002,
      "video_reward_cumulative_accuracy": 0.827922077922078
    },
    {
      "epoch": 0.5945384387058474,
      "grad_norm": 0.9980058670043945,
      "learning_rate": 2.1130079276198727e-06,
      "loss": 0.0209,
      "step": 2003,
      "video_reward_cumulative_accuracy": 0.828007988017973
    },
    {
      "epoch": 0.5948352626892253,
      "grad_norm": 1.9351149797439575,
      "learning_rate": 2.1104489958757267e-06,
      "loss": 0.028,
      "step": 2004,
      "video_reward_cumulative_accuracy": 0.8278443113772455
    },
    {
      "epoch": 0.5951320866726032,
      "grad_norm": 1.2848634719848633,
      "learning_rate": 2.107890482352393e-06,
      "loss": 0.0193,
      "step": 2005,
      "video_reward_cumulative_accuracy": 0.827930174563591
    },
    {
      "epoch": 0.595428910655981,
      "grad_norm": 2.291006565093994,
      "learning_rate": 2.105332389796684e-06,
      "loss": 0.0395,
      "step": 2006,
      "video_reward_cumulative_accuracy": 0.8280159521435693
    },
    {
      "epoch": 0.5957257346393589,
      "grad_norm": 1.295036792755127,
      "learning_rate": 2.1027747209549596e-06,
      "loss": 0.0176,
      "step": 2007,
      "video_reward_cumulative_accuracy": 0.828101644245142
    },
    {
      "epoch": 0.5960225586227367,
      "grad_norm": 1.5013102293014526,
      "learning_rate": 2.1002174785731265e-06,
      "loss": 0.0294,
      "step": 2008,
      "video_reward_cumulative_accuracy": 0.828187250996016
    },
    {
      "epoch": 0.5963193826061146,
      "grad_norm": 1.3362897634506226,
      "learning_rate": 2.097660665396632e-06,
      "loss": 0.0279,
      "step": 2009,
      "video_reward_cumulative_accuracy": 0.8282727725236436
    },
    {
      "epoch": 0.5966162065894924,
      "grad_norm": 2.3554601669311523,
      "learning_rate": 2.0951042841704628e-06,
      "loss": 0.0542,
      "step": 2010,
      "video_reward_cumulative_accuracy": 0.8278606965174129
    },
    {
      "epoch": 0.5969130305728703,
      "grad_norm": 2.2153587341308594,
      "learning_rate": 2.0925483376391437e-06,
      "loss": 0.0155,
      "step": 2011,
      "video_reward_cumulative_accuracy": 0.8279462953754351
    },
    {
      "epoch": 0.5972098545562482,
      "grad_norm": 2.6395490169525146,
      "learning_rate": 2.08999282854673e-06,
      "loss": 0.0452,
      "step": 2012,
      "video_reward_cumulative_accuracy": 0.8280318091451292
    },
    {
      "epoch": 0.597506678539626,
      "grad_norm": 2.8601341247558594,
      "learning_rate": 2.08743775963681e-06,
      "loss": 0.0375,
      "step": 2013,
      "video_reward_cumulative_accuracy": 0.8281172379533035
    },
    {
      "epoch": 0.5978035025230038,
      "grad_norm": 1.6802117824554443,
      "learning_rate": 2.0848831336524956e-06,
      "loss": 0.0371,
      "step": 2014,
      "video_reward_cumulative_accuracy": 0.8282025819265144
    },
    {
      "epoch": 0.5981003265063817,
      "grad_norm": 2.9565298557281494,
      "learning_rate": 2.0823289533364295e-06,
      "loss": 0.0639,
      "step": 2015,
      "video_reward_cumulative_accuracy": 0.828287841191067
    },
    {
      "epoch": 0.5983971504897596,
      "grad_norm": 3.7333922386169434,
      "learning_rate": 2.0797752214307685e-06,
      "loss": 0.0546,
      "step": 2016,
      "video_reward_cumulative_accuracy": 0.8283730158730159
    },
    {
      "epoch": 0.5986939744731374,
      "grad_norm": 1.6850907802581787,
      "learning_rate": 2.077221940677194e-06,
      "loss": 0.03,
      "step": 2017,
      "video_reward_cumulative_accuracy": 0.8284581060981656
    },
    {
      "epoch": 0.5989907984565153,
      "grad_norm": 0.4525964856147766,
      "learning_rate": 2.0746691138169013e-06,
      "loss": 0.0098,
      "step": 2018,
      "video_reward_cumulative_accuracy": 0.8285431119920713
    },
    {
      "epoch": 0.5992876224398932,
      "grad_norm": 1.5810643434524536,
      "learning_rate": 2.0721167435905945e-06,
      "loss": 0.0129,
      "step": 2019,
      "video_reward_cumulative_accuracy": 0.8283803863298663
    },
    {
      "epoch": 0.599584446423271,
      "grad_norm": 3.38840389251709,
      "learning_rate": 2.069564832738495e-06,
      "loss": 0.0289,
      "step": 2020,
      "video_reward_cumulative_accuracy": 0.8284653465346534
    },
    {
      "epoch": 0.5998812704066488,
      "grad_norm": 2.073776960372925,
      "learning_rate": 2.067013384000323e-06,
      "loss": 0.0237,
      "step": 2021,
      "video_reward_cumulative_accuracy": 0.8285502226620485
    },
    {
      "epoch": 0.6001780943900267,
      "grad_norm": 3.2106335163116455,
      "learning_rate": 2.0644624001153073e-06,
      "loss": 0.0942,
      "step": 2022,
      "video_reward_cumulative_accuracy": 0.8283877349159249
    },
    {
      "epoch": 0.6004749183734046,
      "grad_norm": 2.6965174674987793,
      "learning_rate": 2.06191188382218e-06,
      "loss": 0.0288,
      "step": 2023,
      "video_reward_cumulative_accuracy": 0.828225407810183
    },
    {
      "epoch": 0.6007717423567824,
      "grad_norm": 0.7816161513328552,
      "learning_rate": 2.0593618378591625e-06,
      "loss": 0.0159,
      "step": 2024,
      "video_reward_cumulative_accuracy": 0.8283102766798419
    },
    {
      "epoch": 0.6010685663401603,
      "grad_norm": 2.3955113887786865,
      "learning_rate": 2.0568122649639815e-06,
      "loss": 0.0364,
      "step": 2025,
      "video_reward_cumulative_accuracy": 0.828395061728395
    },
    {
      "epoch": 0.6013653903235382,
      "grad_norm": 2.5972840785980225,
      "learning_rate": 2.0542631678738478e-06,
      "loss": 0.0567,
      "step": 2026,
      "video_reward_cumulative_accuracy": 0.8284797630799605
    },
    {
      "epoch": 0.601662214306916,
      "grad_norm": 2.920619249343872,
      "learning_rate": 2.051714549325466e-06,
      "loss": 0.0282,
      "step": 2027,
      "video_reward_cumulative_accuracy": 0.8285643808584114
    },
    {
      "epoch": 0.6019590382902938,
      "grad_norm": 2.846386432647705,
      "learning_rate": 2.049166412055025e-06,
      "loss": 0.0385,
      "step": 2028,
      "video_reward_cumulative_accuracy": 0.8286489151873767
    },
    {
      "epoch": 0.6022558622736717,
      "grad_norm": 1.5138801336288452,
      "learning_rate": 2.046618758798197e-06,
      "loss": 0.0298,
      "step": 2029,
      "video_reward_cumulative_accuracy": 0.8287333661902415
    },
    {
      "epoch": 0.6025526862570496,
      "grad_norm": 3.822578191757202,
      "learning_rate": 2.0440715922901362e-06,
      "loss": 0.0489,
      "step": 2030,
      "video_reward_cumulative_accuracy": 0.8288177339901478
    },
    {
      "epoch": 0.6028495102404274,
      "grad_norm": 4.538577556610107,
      "learning_rate": 2.041524915265472e-06,
      "loss": 0.056,
      "step": 2031,
      "video_reward_cumulative_accuracy": 0.828902018709995
    },
    {
      "epoch": 0.6031463342238053,
      "grad_norm": 2.920750141143799,
      "learning_rate": 2.0389787304583105e-06,
      "loss": 0.0301,
      "step": 2032,
      "video_reward_cumulative_accuracy": 0.828986220472441
    },
    {
      "epoch": 0.6034431582071832,
      "grad_norm": 2.3497962951660156,
      "learning_rate": 2.0364330406022265e-06,
      "loss": 0.0964,
      "step": 2033,
      "video_reward_cumulative_accuracy": 0.8288243974422036
    },
    {
      "epoch": 0.603739982190561,
      "grad_norm": 3.301518440246582,
      "learning_rate": 2.033887848430267e-06,
      "loss": 0.057,
      "step": 2034,
      "video_reward_cumulative_accuracy": 0.8289085545722714
    },
    {
      "epoch": 0.6040368061739388,
      "grad_norm": 2.4687845706939697,
      "learning_rate": 2.031343156674942e-06,
      "loss": 0.1124,
      "step": 2035,
      "video_reward_cumulative_accuracy": 0.8287469287469288
    },
    {
      "epoch": 0.6043336301573167,
      "grad_norm": 4.89956521987915,
      "learning_rate": 2.0287989680682247e-06,
      "loss": 0.0747,
      "step": 2036,
      "video_reward_cumulative_accuracy": 0.8288310412573674
    },
    {
      "epoch": 0.6046304541406946,
      "grad_norm": 3.3470458984375,
      "learning_rate": 2.026255285341549e-06,
      "loss": 0.0345,
      "step": 2037,
      "video_reward_cumulative_accuracy": 0.8289150711831125
    },
    {
      "epoch": 0.6049272781240724,
      "grad_norm": 1.8060578107833862,
      "learning_rate": 2.023712111225805e-06,
      "loss": 0.0265,
      "step": 2038,
      "video_reward_cumulative_accuracy": 0.8289990186457311
    },
    {
      "epoch": 0.6052241021074503,
      "grad_norm": 4.508866310119629,
      "learning_rate": 2.0211694484513376e-06,
      "loss": 0.0483,
      "step": 2039,
      "video_reward_cumulative_accuracy": 0.8288376655223149
    },
    {
      "epoch": 0.6055209260908282,
      "grad_norm": 1.380003571510315,
      "learning_rate": 2.0186272997479407e-06,
      "loss": 0.0247,
      "step": 2040,
      "video_reward_cumulative_accuracy": 0.828921568627451
    },
    {
      "epoch": 0.605817750074206,
      "grad_norm": 2.6239430904388428,
      "learning_rate": 2.016085667844859e-06,
      "loss": 0.0568,
      "step": 2041,
      "video_reward_cumulative_accuracy": 0.8290053895149436
    },
    {
      "epoch": 0.6061145740575838,
      "grad_norm": 3.0193066596984863,
      "learning_rate": 2.0135445554707803e-06,
      "loss": 0.0539,
      "step": 2042,
      "video_reward_cumulative_accuracy": 0.8290891283055828
    },
    {
      "epoch": 0.6064113980409617,
      "grad_norm": 2.1342217922210693,
      "learning_rate": 2.011003965353835e-06,
      "loss": 0.0406,
      "step": 2043,
      "video_reward_cumulative_accuracy": 0.8291727851199217
    },
    {
      "epoch": 0.6067082220243396,
      "grad_norm": 2.42179536819458,
      "learning_rate": 2.008463900221595e-06,
      "loss": 0.0604,
      "step": 2044,
      "video_reward_cumulative_accuracy": 0.8292563600782779
    },
    {
      "epoch": 0.6070050460077174,
      "grad_norm": 3.034480333328247,
      "learning_rate": 2.0059243628010643e-06,
      "loss": 0.0389,
      "step": 2045,
      "video_reward_cumulative_accuracy": 0.8290953545232274
    },
    {
      "epoch": 0.6073018699910953,
      "grad_norm": 1.7863432168960571,
      "learning_rate": 2.0033853558186845e-06,
      "loss": 0.0353,
      "step": 2046,
      "video_reward_cumulative_accuracy": 0.8291788856304986
    },
    {
      "epoch": 0.6075986939744732,
      "grad_norm": 1.3945716619491577,
      "learning_rate": 2.0008468820003257e-06,
      "loss": 0.0178,
      "step": 2047,
      "video_reward_cumulative_accuracy": 0.8290180752320468
    },
    {
      "epoch": 0.607895517957851,
      "grad_norm": 1.2314127683639526,
      "learning_rate": 1.9983089440712853e-06,
      "loss": 0.0172,
      "step": 2048,
      "video_reward_cumulative_accuracy": 0.8291015625
    },
    {
      "epoch": 0.6081923419412288,
      "grad_norm": 2.0700814723968506,
      "learning_rate": 1.995771544756287e-06,
      "loss": 0.0232,
      "step": 2049,
      "video_reward_cumulative_accuracy": 0.8289409468033186
    },
    {
      "epoch": 0.6084891659246067,
      "grad_norm": 2.8075079917907715,
      "learning_rate": 1.993234686779474e-06,
      "loss": 0.034,
      "step": 2050,
      "video_reward_cumulative_accuracy": 0.8290243902439024
    },
    {
      "epoch": 0.6087859899079846,
      "grad_norm": 0.5491511225700378,
      "learning_rate": 1.990698372864411e-06,
      "loss": 0.0198,
      "step": 2051,
      "video_reward_cumulative_accuracy": 0.8291077523159435
    },
    {
      "epoch": 0.6090828138913624,
      "grad_norm": 0.6947237253189087,
      "learning_rate": 1.9881626057340757e-06,
      "loss": 0.019,
      "step": 2052,
      "video_reward_cumulative_accuracy": 0.8291910331384016
    },
    {
      "epoch": 0.6093796378747403,
      "grad_norm": 2.1727752685546875,
      "learning_rate": 1.9856273881108613e-06,
      "loss": 0.0855,
      "step": 2053,
      "video_reward_cumulative_accuracy": 0.8292742328300049
    },
    {
      "epoch": 0.6096764618581182,
      "grad_norm": 1.6275689601898193,
      "learning_rate": 1.9830927227165697e-06,
      "loss": 0.0308,
      "step": 2054,
      "video_reward_cumulative_accuracy": 0.8293573515092503
    },
    {
      "epoch": 0.609973285841496,
      "grad_norm": 2.2714242935180664,
      "learning_rate": 1.9805586122724095e-06,
      "loss": 0.0478,
      "step": 2055,
      "video_reward_cumulative_accuracy": 0.8294403892944039
    },
    {
      "epoch": 0.6102701098248738,
      "grad_norm": 1.9769221544265747,
      "learning_rate": 1.978025059498996e-06,
      "loss": 0.0581,
      "step": 2056,
      "video_reward_cumulative_accuracy": 0.829523346303502
    },
    {
      "epoch": 0.6105669338082517,
      "grad_norm": 0.7704261541366577,
      "learning_rate": 1.9754920671163418e-06,
      "loss": 0.0099,
      "step": 2057,
      "video_reward_cumulative_accuracy": 0.829606222654351
    },
    {
      "epoch": 0.6108637577916296,
      "grad_norm": 2.209693193435669,
      "learning_rate": 1.972959637843861e-06,
      "loss": 0.0349,
      "step": 2058,
      "video_reward_cumulative_accuracy": 0.8296890184645287
    },
    {
      "epoch": 0.6111605817750074,
      "grad_norm": 0.5300698280334473,
      "learning_rate": 1.9704277744003632e-06,
      "loss": 0.0127,
      "step": 2059,
      "video_reward_cumulative_accuracy": 0.8297717338513841
    },
    {
      "epoch": 0.6114574057583853,
      "grad_norm": 2.17396879196167,
      "learning_rate": 1.967896479504048e-06,
      "loss": 0.0377,
      "step": 2060,
      "video_reward_cumulative_accuracy": 0.8298543689320388
    },
    {
      "epoch": 0.6117542297417632,
      "grad_norm": 2.26611328125,
      "learning_rate": 1.9653657558725077e-06,
      "loss": 0.0517,
      "step": 2061,
      "video_reward_cumulative_accuracy": 0.8296943231441049
    },
    {
      "epoch": 0.612051053725141,
      "grad_norm": 1.2621009349822998,
      "learning_rate": 1.962835606222717e-06,
      "loss": 0.0343,
      "step": 2062,
      "video_reward_cumulative_accuracy": 0.8297769156159069
    },
    {
      "epoch": 0.6123478777085188,
      "grad_norm": 1.5711784362792969,
      "learning_rate": 1.9603060332710415e-06,
      "loss": 0.0368,
      "step": 2063,
      "video_reward_cumulative_accuracy": 0.8298594280174503
    },
    {
      "epoch": 0.6126447016918967,
      "grad_norm": 2.2621207237243652,
      "learning_rate": 1.9577770397332184e-06,
      "loss": 0.0516,
      "step": 2064,
      "video_reward_cumulative_accuracy": 0.8296996124031008
    },
    {
      "epoch": 0.6129415256752746,
      "grad_norm": 2.1944663524627686,
      "learning_rate": 1.955248628324371e-06,
      "loss": 0.0442,
      "step": 2065,
      "video_reward_cumulative_accuracy": 0.8295399515738499
    },
    {
      "epoch": 0.6132383496586524,
      "grad_norm": 2.246171236038208,
      "learning_rate": 1.9527208017589944e-06,
      "loss": 0.0516,
      "step": 2066,
      "video_reward_cumulative_accuracy": 0.829622458857696
    },
    {
      "epoch": 0.6135351736420303,
      "grad_norm": 1.9881268739700317,
      "learning_rate": 1.950193562750953e-06,
      "loss": 0.034,
      "step": 2067,
      "video_reward_cumulative_accuracy": 0.8297048863086599
    },
    {
      "epoch": 0.6138319976254082,
      "grad_norm": 0.437326580286026,
      "learning_rate": 1.947666914013487e-06,
      "loss": 0.0082,
      "step": 2068,
      "video_reward_cumulative_accuracy": 0.8297872340425532
    },
    {
      "epoch": 0.614128821608786,
      "grad_norm": 2.9777653217315674,
      "learning_rate": 1.945140858259195e-06,
      "loss": 0.0259,
      "step": 2069,
      "video_reward_cumulative_accuracy": 0.8298695021749638
    },
    {
      "epoch": 0.6144256455921638,
      "grad_norm": 1.040705919265747,
      "learning_rate": 1.9426153982000455e-06,
      "loss": 0.0141,
      "step": 2070,
      "video_reward_cumulative_accuracy": 0.8299516908212561
    },
    {
      "epoch": 0.6147224695755417,
      "grad_norm": 1.5954067707061768,
      "learning_rate": 1.9400905365473656e-06,
      "loss": 0.033,
      "step": 2071,
      "video_reward_cumulative_accuracy": 0.8297923708353453
    },
    {
      "epoch": 0.6150192935589196,
      "grad_norm": 4.421472549438477,
      "learning_rate": 1.937566276011837e-06,
      "loss": 0.0657,
      "step": 2072,
      "video_reward_cumulative_accuracy": 0.8298745173745173
    },
    {
      "epoch": 0.6153161175422974,
      "grad_norm": 0.8702653050422668,
      "learning_rate": 1.935042619303501e-06,
      "loss": 0.0172,
      "step": 2073,
      "video_reward_cumulative_accuracy": 0.8299565846599132
    },
    {
      "epoch": 0.6156129415256753,
      "grad_norm": 1.1119422912597656,
      "learning_rate": 1.9325195691317457e-06,
      "loss": 0.0224,
      "step": 2074,
      "video_reward_cumulative_accuracy": 0.8300385728061717
    },
    {
      "epoch": 0.6159097655090532,
      "grad_norm": 0.5069667100906372,
      "learning_rate": 1.929997128205312e-06,
      "loss": 0.008,
      "step": 2075,
      "video_reward_cumulative_accuracy": 0.8301204819277108
    },
    {
      "epoch": 0.616206589492431,
      "grad_norm": 3.037325143814087,
      "learning_rate": 1.927475299232283e-06,
      "loss": 0.0442,
      "step": 2076,
      "video_reward_cumulative_accuracy": 0.8299614643545279
    },
    {
      "epoch": 0.6165034134758088,
      "grad_norm": 1.0800257921218872,
      "learning_rate": 1.924954084920089e-06,
      "loss": 0.0153,
      "step": 2077,
      "video_reward_cumulative_accuracy": 0.8300433317284545
    },
    {
      "epoch": 0.6168002374591867,
      "grad_norm": 2.045151710510254,
      "learning_rate": 1.922433487975498e-06,
      "loss": 0.0427,
      "step": 2078,
      "video_reward_cumulative_accuracy": 0.8298845043310876
    },
    {
      "epoch": 0.6170970614425646,
      "grad_norm": 2.991358518600464,
      "learning_rate": 1.919913511104614e-06,
      "loss": 0.0411,
      "step": 2079,
      "video_reward_cumulative_accuracy": 0.8297258297258298
    },
    {
      "epoch": 0.6173938854259424,
      "grad_norm": 1.7816275358200073,
      "learning_rate": 1.9173941570128786e-06,
      "loss": 0.0149,
      "step": 2080,
      "video_reward_cumulative_accuracy": 0.8298076923076924
    },
    {
      "epoch": 0.6176907094093202,
      "grad_norm": 1.7904562950134277,
      "learning_rate": 1.9148754284050616e-06,
      "loss": 0.0377,
      "step": 2081,
      "video_reward_cumulative_accuracy": 0.829889476213359
    },
    {
      "epoch": 0.6179875333926982,
      "grad_norm": 3.2209877967834473,
      "learning_rate": 1.9123573279852632e-06,
      "loss": 0.0225,
      "step": 2082,
      "video_reward_cumulative_accuracy": 0.829971181556196
    },
    {
      "epoch": 0.618284357376076,
      "grad_norm": 0.7754690051078796,
      "learning_rate": 1.9098398584569085e-06,
      "loss": 0.0197,
      "step": 2083,
      "video_reward_cumulative_accuracy": 0.8300528084493519
    },
    {
      "epoch": 0.6185811813594538,
      "grad_norm": 1.7929531335830688,
      "learning_rate": 1.9073230225227451e-06,
      "loss": 0.039,
      "step": 2084,
      "video_reward_cumulative_accuracy": 0.82989443378119
    },
    {
      "epoch": 0.6188780053428317,
      "grad_norm": 1.5716784000396729,
      "learning_rate": 1.9048068228848412e-06,
      "loss": 0.0188,
      "step": 2085,
      "video_reward_cumulative_accuracy": 0.8299760191846522
    },
    {
      "epoch": 0.6191748293262096,
      "grad_norm": 2.760315179824829,
      "learning_rate": 1.9022912622445808e-06,
      "loss": 0.0232,
      "step": 2086,
      "video_reward_cumulative_accuracy": 0.8300575263662512
    },
    {
      "epoch": 0.6194716533095874,
      "grad_norm": 7.752729892730713,
      "learning_rate": 1.8997763433026631e-06,
      "loss": 0.0878,
      "step": 2087,
      "video_reward_cumulative_accuracy": 0.8298993770963105
    },
    {
      "epoch": 0.6197684772929652,
      "grad_norm": 2.39508056640625,
      "learning_rate": 1.8972620687590964e-06,
      "loss": 0.0234,
      "step": 2088,
      "video_reward_cumulative_accuracy": 0.8299808429118773
    },
    {
      "epoch": 0.6200653012763432,
      "grad_norm": 3.0315792560577393,
      "learning_rate": 1.8947484413131996e-06,
      "loss": 0.0888,
      "step": 2089,
      "video_reward_cumulative_accuracy": 0.8300622307324078
    },
    {
      "epoch": 0.620362125259721,
      "grad_norm": 0.3297179341316223,
      "learning_rate": 1.892235463663596e-06,
      "loss": 0.0049,
      "step": 2090,
      "video_reward_cumulative_accuracy": 0.8301435406698564
    },
    {
      "epoch": 0.6206589492430988,
      "grad_norm": 0.5340771675109863,
      "learning_rate": 1.8897231385082096e-06,
      "loss": 0.0084,
      "step": 2091,
      "video_reward_cumulative_accuracy": 0.8302247728359636
    },
    {
      "epoch": 0.6209557732264767,
      "grad_norm": 2.871993064880371,
      "learning_rate": 1.8872114685442665e-06,
      "loss": 0.0711,
      "step": 2092,
      "video_reward_cumulative_accuracy": 0.8303059273422562
    },
    {
      "epoch": 0.6212525972098546,
      "grad_norm": 1.4503148794174194,
      "learning_rate": 1.8847004564682878e-06,
      "loss": 0.0436,
      "step": 2093,
      "video_reward_cumulative_accuracy": 0.8303870043000477
    },
    {
      "epoch": 0.6215494211932324,
      "grad_norm": 3.7459793090820312,
      "learning_rate": 1.8821901049760882e-06,
      "loss": 0.0717,
      "step": 2094,
      "video_reward_cumulative_accuracy": 0.8304680038204394
    },
    {
      "epoch": 0.6218462451766102,
      "grad_norm": 2.9643142223358154,
      "learning_rate": 1.879680416762775e-06,
      "loss": 0.0366,
      "step": 2095,
      "video_reward_cumulative_accuracy": 0.830310262529833
    },
    {
      "epoch": 0.6221430691599882,
      "grad_norm": 1.8001595735549927,
      "learning_rate": 1.8771713945227404e-06,
      "loss": 0.0272,
      "step": 2096,
      "video_reward_cumulative_accuracy": 0.8303912213740458
    },
    {
      "epoch": 0.622439893143366,
      "grad_norm": 3.3852388858795166,
      "learning_rate": 1.8746630409496647e-06,
      "loss": 0.0394,
      "step": 2097,
      "video_reward_cumulative_accuracy": 0.8304721030042919
    },
    {
      "epoch": 0.6227367171267438,
      "grad_norm": 2.9447176456451416,
      "learning_rate": 1.872155358736508e-06,
      "loss": 0.0385,
      "step": 2098,
      "video_reward_cumulative_accuracy": 0.8305529075309819
    },
    {
      "epoch": 0.6230335411101217,
      "grad_norm": 2.6223206520080566,
      "learning_rate": 1.8696483505755114e-06,
      "loss": 0.0507,
      "step": 2099,
      "video_reward_cumulative_accuracy": 0.8303954263935207
    },
    {
      "epoch": 0.6233303650934996,
      "grad_norm": 1.8533953428268433,
      "learning_rate": 1.8671420191581901e-06,
      "loss": 0.0151,
      "step": 2100,
      "video_reward_cumulative_accuracy": 0.8304761904761905
    },
    {
      "epoch": 0.6236271890768774,
      "grad_norm": 2.846639633178711,
      "learning_rate": 1.8646363671753354e-06,
      "loss": 0.0545,
      "step": 2101,
      "video_reward_cumulative_accuracy": 0.8305568776772966
    },
    {
      "epoch": 0.6239240130602552,
      "grad_norm": 2.0987446308135986,
      "learning_rate": 1.8621313973170074e-06,
      "loss": 0.0465,
      "step": 2102,
      "video_reward_cumulative_accuracy": 0.8306374881065651
    },
    {
      "epoch": 0.6242208370436332,
      "grad_norm": 2.0528335571289062,
      "learning_rate": 1.8596271122725346e-06,
      "loss": 0.0421,
      "step": 2103,
      "video_reward_cumulative_accuracy": 0.8304802662862577
    },
    {
      "epoch": 0.624517661027011,
      "grad_norm": 1.371273159980774,
      "learning_rate": 1.8571235147305106e-06,
      "loss": 0.0196,
      "step": 2104,
      "video_reward_cumulative_accuracy": 0.8305608365019012
    },
    {
      "epoch": 0.6248144850103888,
      "grad_norm": 1.087815761566162,
      "learning_rate": 1.8546206073787882e-06,
      "loss": 0.0236,
      "step": 2105,
      "video_reward_cumulative_accuracy": 0.8306413301662707
    },
    {
      "epoch": 0.6251113089937667,
      "grad_norm": 0.9951589703559875,
      "learning_rate": 1.8521183929044834e-06,
      "loss": 0.0072,
      "step": 2106,
      "video_reward_cumulative_accuracy": 0.8307217473884141
    },
    {
      "epoch": 0.6254081329771446,
      "grad_norm": 1.7407307624816895,
      "learning_rate": 1.8496168739939662e-06,
      "loss": 0.0414,
      "step": 2107,
      "video_reward_cumulative_accuracy": 0.8305647840531561
    },
    {
      "epoch": 0.6257049569605224,
      "grad_norm": 0.9107375741004944,
      "learning_rate": 1.8471160533328591e-06,
      "loss": 0.0086,
      "step": 2108,
      "video_reward_cumulative_accuracy": 0.8306451612903226
    },
    {
      "epoch": 0.6260017809439002,
      "grad_norm": 1.3645784854888916,
      "learning_rate": 1.844615933606037e-06,
      "loss": 0.0305,
      "step": 2109,
      "video_reward_cumulative_accuracy": 0.8307254623044097
    },
    {
      "epoch": 0.6262986049272781,
      "grad_norm": 0.9860436320304871,
      "learning_rate": 1.8421165174976191e-06,
      "loss": 0.0254,
      "step": 2110,
      "video_reward_cumulative_accuracy": 0.8308056872037914
    },
    {
      "epoch": 0.626595428910656,
      "grad_norm": 1.1706582307815552,
      "learning_rate": 1.8396178076909735e-06,
      "loss": 0.0133,
      "step": 2111,
      "video_reward_cumulative_accuracy": 0.8306489815253434
    },
    {
      "epoch": 0.6268922528940338,
      "grad_norm": 2.995161533355713,
      "learning_rate": 1.8371198068687051e-06,
      "loss": 0.0276,
      "step": 2112,
      "video_reward_cumulative_accuracy": 0.8307291666666666
    },
    {
      "epoch": 0.6271890768774117,
      "grad_norm": 3.4480783939361572,
      "learning_rate": 1.8346225177126622e-06,
      "loss": 0.0283,
      "step": 2113,
      "video_reward_cumulative_accuracy": 0.8308092759110269
    },
    {
      "epoch": 0.6274859008607896,
      "grad_norm": 4.289238929748535,
      "learning_rate": 1.8321259429039276e-06,
      "loss": 0.0667,
      "step": 2114,
      "video_reward_cumulative_accuracy": 0.8306527909176916
    },
    {
      "epoch": 0.6277827248441674,
      "grad_norm": 4.29648494720459,
      "learning_rate": 1.829630085122814e-06,
      "loss": 0.0629,
      "step": 2115,
      "video_reward_cumulative_accuracy": 0.8304964539007093
    },
    {
      "epoch": 0.6280795488275452,
      "grad_norm": 1.3143110275268555,
      "learning_rate": 1.8271349470488703e-06,
      "loss": 0.0287,
      "step": 2116,
      "video_reward_cumulative_accuracy": 0.8305765595463138
    },
    {
      "epoch": 0.6283763728109231,
      "grad_norm": 2.627892255783081,
      "learning_rate": 1.8246405313608668e-06,
      "loss": 0.055,
      "step": 2117,
      "video_reward_cumulative_accuracy": 0.8306565895134624
    },
    {
      "epoch": 0.628673196794301,
      "grad_norm": 4.371956825256348,
      "learning_rate": 1.8221468407368009e-06,
      "loss": 0.0693,
      "step": 2118,
      "video_reward_cumulative_accuracy": 0.8307365439093485
    },
    {
      "epoch": 0.6289700207776788,
      "grad_norm": 1.8598741292953491,
      "learning_rate": 1.8196538778538941e-06,
      "loss": 0.0516,
      "step": 2119,
      "video_reward_cumulative_accuracy": 0.8308164228409627
    },
    {
      "epoch": 0.6292668447610567,
      "grad_norm": 1.7819162607192993,
      "learning_rate": 1.8171616453885806e-06,
      "loss": 0.029,
      "step": 2120,
      "video_reward_cumulative_accuracy": 0.8308962264150943
    },
    {
      "epoch": 0.6295636687444346,
      "grad_norm": 1.686316967010498,
      "learning_rate": 1.8146701460165172e-06,
      "loss": 0.0179,
      "step": 2121,
      "video_reward_cumulative_accuracy": 0.830975954738331
    },
    {
      "epoch": 0.6298604927278124,
      "grad_norm": 3.108572006225586,
      "learning_rate": 1.8121793824125677e-06,
      "loss": 0.0561,
      "step": 2122,
      "video_reward_cumulative_accuracy": 0.8310556079170593
    },
    {
      "epoch": 0.6301573167111902,
      "grad_norm": 1.9357826709747314,
      "learning_rate": 1.80968935725081e-06,
      "loss": 0.0569,
      "step": 2123,
      "video_reward_cumulative_accuracy": 0.8311351860574658
    },
    {
      "epoch": 0.6304541406945681,
      "grad_norm": 5.203329563140869,
      "learning_rate": 1.8072000732045265e-06,
      "loss": 0.0402,
      "step": 2124,
      "video_reward_cumulative_accuracy": 0.8312146892655368
    },
    {
      "epoch": 0.630750964677946,
      "grad_norm": 1.325329065322876,
      "learning_rate": 1.804711532946206e-06,
      "loss": 0.0609,
      "step": 2125,
      "video_reward_cumulative_accuracy": 0.8312941176470589
    },
    {
      "epoch": 0.6310477886613238,
      "grad_norm": 3.6725287437438965,
      "learning_rate": 1.8022237391475389e-06,
      "loss": 0.1029,
      "step": 2126,
      "video_reward_cumulative_accuracy": 0.83137347130762
    },
    {
      "epoch": 0.6313446126447017,
      "grad_norm": 1.4435230493545532,
      "learning_rate": 1.7997366944794116e-06,
      "loss": 0.0419,
      "step": 2127,
      "video_reward_cumulative_accuracy": 0.8314527503526093
    },
    {
      "epoch": 0.6316414366280796,
      "grad_norm": 1.3416098356246948,
      "learning_rate": 1.7972504016119092e-06,
      "loss": 0.0154,
      "step": 2128,
      "video_reward_cumulative_accuracy": 0.8315319548872181
    },
    {
      "epoch": 0.6319382606114574,
      "grad_norm": 2.8020572662353516,
      "learning_rate": 1.7947648632143075e-06,
      "loss": 0.0215,
      "step": 2129,
      "video_reward_cumulative_accuracy": 0.8316110850164397
    },
    {
      "epoch": 0.6322350845948352,
      "grad_norm": 0.8301799893379211,
      "learning_rate": 1.7922800819550737e-06,
      "loss": 0.0204,
      "step": 2130,
      "video_reward_cumulative_accuracy": 0.8316901408450704
    },
    {
      "epoch": 0.6325319085782131,
      "grad_norm": 0.9084307551383972,
      "learning_rate": 1.7897960605018623e-06,
      "loss": 0.008,
      "step": 2131,
      "video_reward_cumulative_accuracy": 0.83176912247771
    },
    {
      "epoch": 0.632828732561591,
      "grad_norm": 1.8782029151916504,
      "learning_rate": 1.78731280152151e-06,
      "loss": 0.0184,
      "step": 2132,
      "video_reward_cumulative_accuracy": 0.8318480300187617
    },
    {
      "epoch": 0.6331255565449688,
      "grad_norm": 1.1400971412658691,
      "learning_rate": 1.7848303076800378e-06,
      "loss": 0.0157,
      "step": 2133,
      "video_reward_cumulative_accuracy": 0.8319268635724332
    },
    {
      "epoch": 0.6334223805283467,
      "grad_norm": 1.0256128311157227,
      "learning_rate": 1.7823485816426422e-06,
      "loss": 0.0287,
      "step": 2134,
      "video_reward_cumulative_accuracy": 0.8317713214620431
    },
    {
      "epoch": 0.6337192045117246,
      "grad_norm": 3.9153847694396973,
      "learning_rate": 1.7798676260736986e-06,
      "loss": 0.0366,
      "step": 2135,
      "video_reward_cumulative_accuracy": 0.831615925058548
    },
    {
      "epoch": 0.6340160284951024,
      "grad_norm": 3.1156997680664062,
      "learning_rate": 1.7773874436367521e-06,
      "loss": 0.0212,
      "step": 2136,
      "video_reward_cumulative_accuracy": 0.8316947565543071
    },
    {
      "epoch": 0.6343128524784802,
      "grad_norm": 1.2179923057556152,
      "learning_rate": 1.77490803699452e-06,
      "loss": 0.03,
      "step": 2137,
      "video_reward_cumulative_accuracy": 0.8317735142723444
    },
    {
      "epoch": 0.6346096764618581,
      "grad_norm": 1.7540775537490845,
      "learning_rate": 1.7724294088088867e-06,
      "loss": 0.0142,
      "step": 2138,
      "video_reward_cumulative_accuracy": 0.8318521983161834
    },
    {
      "epoch": 0.634906500445236,
      "grad_norm": 0.7738943696022034,
      "learning_rate": 1.769951561740899e-06,
      "loss": 0.0245,
      "step": 2139,
      "video_reward_cumulative_accuracy": 0.8319308087891538
    },
    {
      "epoch": 0.6352033244286138,
      "grad_norm": 1.5957173109054565,
      "learning_rate": 1.7674744984507668e-06,
      "loss": 0.028,
      "step": 2140,
      "video_reward_cumulative_accuracy": 0.8320093457943926
    },
    {
      "epoch": 0.6355001484119916,
      "grad_norm": 4.45468282699585,
      "learning_rate": 1.7649982215978573e-06,
      "loss": 0.0393,
      "step": 2141,
      "video_reward_cumulative_accuracy": 0.8320878094348435
    },
    {
      "epoch": 0.6357969723953696,
      "grad_norm": 2.190316677093506,
      "learning_rate": 1.7625227338406946e-06,
      "loss": 0.0443,
      "step": 2142,
      "video_reward_cumulative_accuracy": 0.8321661998132587
    },
    {
      "epoch": 0.6360937963787474,
      "grad_norm": 1.6457760334014893,
      "learning_rate": 1.7600480378369555e-06,
      "loss": 0.0134,
      "step": 2143,
      "video_reward_cumulative_accuracy": 0.8322445170321978
    },
    {
      "epoch": 0.6363906203621252,
      "grad_norm": 2.3591673374176025,
      "learning_rate": 1.7575741362434655e-06,
      "loss": 0.0413,
      "step": 2144,
      "video_reward_cumulative_accuracy": 0.831856343283582
    },
    {
      "epoch": 0.6366874443455031,
      "grad_norm": 2.5493083000183105,
      "learning_rate": 1.7551010317161987e-06,
      "loss": 0.0269,
      "step": 2145,
      "video_reward_cumulative_accuracy": 0.8319347319347319
    },
    {
      "epoch": 0.636984268328881,
      "grad_norm": 1.5170857906341553,
      "learning_rate": 1.7526287269102724e-06,
      "loss": 0.0253,
      "step": 2146,
      "video_reward_cumulative_accuracy": 0.8320130475302889
    },
    {
      "epoch": 0.6372810923122588,
      "grad_norm": 1.9697927236557007,
      "learning_rate": 1.750157224479946e-06,
      "loss": 0.0429,
      "step": 2147,
      "video_reward_cumulative_accuracy": 0.8320912901723335
    },
    {
      "epoch": 0.6375779162956366,
      "grad_norm": 1.7776871919631958,
      "learning_rate": 1.7476865270786169e-06,
      "loss": 0.0185,
      "step": 2148,
      "video_reward_cumulative_accuracy": 0.832169459962756
    },
    {
      "epoch": 0.6378747402790146,
      "grad_norm": 1.8674986362457275,
      "learning_rate": 1.7452166373588185e-06,
      "loss": 0.0131,
      "step": 2149,
      "video_reward_cumulative_accuracy": 0.8322475570032574
    },
    {
      "epoch": 0.6381715642623924,
      "grad_norm": 4.821762561798096,
      "learning_rate": 1.7427475579722186e-06,
      "loss": 0.1067,
      "step": 2150,
      "video_reward_cumulative_accuracy": 0.8323255813953488
    },
    {
      "epoch": 0.6384683882457702,
      "grad_norm": 1.7846288681030273,
      "learning_rate": 1.7402792915696115e-06,
      "loss": 0.0251,
      "step": 2151,
      "video_reward_cumulative_accuracy": 0.8324035332403533
    },
    {
      "epoch": 0.6387652122291481,
      "grad_norm": 3.2803213596343994,
      "learning_rate": 1.7378118408009227e-06,
      "loss": 0.045,
      "step": 2152,
      "video_reward_cumulative_accuracy": 0.8322490706319703
    },
    {
      "epoch": 0.639062036212526,
      "grad_norm": 3.1670420169830322,
      "learning_rate": 1.7353452083151975e-06,
      "loss": 0.0336,
      "step": 2153,
      "video_reward_cumulative_accuracy": 0.8323269856014863
    },
    {
      "epoch": 0.6393588601959038,
      "grad_norm": 0.46488067507743835,
      "learning_rate": 1.7328793967606072e-06,
      "loss": 0.006,
      "step": 2154,
      "video_reward_cumulative_accuracy": 0.8324048282265553
    },
    {
      "epoch": 0.6396556841792816,
      "grad_norm": 2.008270740509033,
      "learning_rate": 1.7304144087844405e-06,
      "loss": 0.0287,
      "step": 2155,
      "video_reward_cumulative_accuracy": 0.8324825986078886
    },
    {
      "epoch": 0.6399525081626596,
      "grad_norm": 4.751814365386963,
      "learning_rate": 1.7279502470330994e-06,
      "loss": 0.1412,
      "step": 2156,
      "video_reward_cumulative_accuracy": 0.8325602968460112
    },
    {
      "epoch": 0.6402493321460374,
      "grad_norm": 0.5087360143661499,
      "learning_rate": 1.7254869141521026e-06,
      "loss": 0.0166,
      "step": 2157,
      "video_reward_cumulative_accuracy": 0.832637923041261
    },
    {
      "epoch": 0.6405461561294152,
      "grad_norm": 2.7528774738311768,
      "learning_rate": 1.723024412786074e-06,
      "loss": 0.1185,
      "step": 2158,
      "video_reward_cumulative_accuracy": 0.8322520852641334
    },
    {
      "epoch": 0.6408429801127931,
      "grad_norm": 2.7754266262054443,
      "learning_rate": 1.7205627455787515e-06,
      "loss": 0.0346,
      "step": 2159,
      "video_reward_cumulative_accuracy": 0.8323297823066235
    },
    {
      "epoch": 0.641139804096171,
      "grad_norm": 2.181504726409912,
      "learning_rate": 1.7181019151729709e-06,
      "loss": 0.054,
      "step": 2160,
      "video_reward_cumulative_accuracy": 0.8324074074074074
    },
    {
      "epoch": 0.6414366280795488,
      "grad_norm": 1.7188265323638916,
      "learning_rate": 1.7156419242106736e-06,
      "loss": 0.0377,
      "step": 2161,
      "video_reward_cumulative_accuracy": 0.8324849606663581
    },
    {
      "epoch": 0.6417334520629266,
      "grad_norm": 3.2530951499938965,
      "learning_rate": 1.7131827753329e-06,
      "loss": 0.0481,
      "step": 2162,
      "video_reward_cumulative_accuracy": 0.8323311748381128
    },
    {
      "epoch": 0.6420302760463046,
      "grad_norm": 0.4359917938709259,
      "learning_rate": 1.710724471179782e-06,
      "loss": 0.0043,
      "step": 2163,
      "video_reward_cumulative_accuracy": 0.8324086916319926
    },
    {
      "epoch": 0.6423271000296824,
      "grad_norm": 2.456068754196167,
      "learning_rate": 1.7082670143905516e-06,
      "loss": 0.0374,
      "step": 2164,
      "video_reward_cumulative_accuracy": 0.8324861367837338
    },
    {
      "epoch": 0.6426239240130602,
      "grad_norm": 1.3380742073059082,
      "learning_rate": 1.7058104076035237e-06,
      "loss": 0.0174,
      "step": 2165,
      "video_reward_cumulative_accuracy": 0.8325635103926097
    },
    {
      "epoch": 0.6429207479964381,
      "grad_norm": 2.8565738201141357,
      "learning_rate": 1.7033546534561046e-06,
      "loss": 0.0186,
      "step": 2166,
      "video_reward_cumulative_accuracy": 0.832409972299169
    },
    {
      "epoch": 0.643217571979816,
      "grad_norm": 3.0799217224121094,
      "learning_rate": 1.7008997545847878e-06,
      "loss": 0.066,
      "step": 2167,
      "video_reward_cumulative_accuracy": 0.8322565759113982
    },
    {
      "epoch": 0.6435143959631938,
      "grad_norm": 1.7654461860656738,
      "learning_rate": 1.6984457136251415e-06,
      "loss": 0.0339,
      "step": 2168,
      "video_reward_cumulative_accuracy": 0.8323339483394834
    },
    {
      "epoch": 0.6438112199465716,
      "grad_norm": 4.95688533782959,
      "learning_rate": 1.69599253321182e-06,
      "loss": 0.0901,
      "step": 2169,
      "video_reward_cumulative_accuracy": 0.8321807284462887
    },
    {
      "epoch": 0.6441080439299496,
      "grad_norm": 1.675530195236206,
      "learning_rate": 1.6935402159785482e-06,
      "loss": 0.0691,
      "step": 2170,
      "video_reward_cumulative_accuracy": 0.8320276497695852
    },
    {
      "epoch": 0.6444048679133274,
      "grad_norm": 2.567598342895508,
      "learning_rate": 1.6910887645581288e-06,
      "loss": 0.0519,
      "step": 2171,
      "video_reward_cumulative_accuracy": 0.8321050207277753
    },
    {
      "epoch": 0.6447016918967052,
      "grad_norm": 2.3529388904571533,
      "learning_rate": 1.6886381815824304e-06,
      "loss": 0.0239,
      "step": 2172,
      "video_reward_cumulative_accuracy": 0.8321823204419889
    },
    {
      "epoch": 0.6449985158800831,
      "grad_norm": 3.154841899871826,
      "learning_rate": 1.6861884696823935e-06,
      "loss": 0.0572,
      "step": 2173,
      "video_reward_cumulative_accuracy": 0.8322595490105844
    },
    {
      "epoch": 0.645295339863461,
      "grad_norm": 2.80783748626709,
      "learning_rate": 1.6837396314880216e-06,
      "loss": 0.0874,
      "step": 2174,
      "video_reward_cumulative_accuracy": 0.8323367065317387
    },
    {
      "epoch": 0.6455921638468388,
      "grad_norm": 1.9993150234222412,
      "learning_rate": 1.681291669628379e-06,
      "loss": 0.0253,
      "step": 2175,
      "video_reward_cumulative_accuracy": 0.8324137931034483
    },
    {
      "epoch": 0.6458889878302166,
      "grad_norm": 2.1698851585388184,
      "learning_rate": 1.6788445867315918e-06,
      "loss": 0.0296,
      "step": 2176,
      "video_reward_cumulative_accuracy": 0.8324908088235294
    },
    {
      "epoch": 0.6461858118135946,
      "grad_norm": 3.3127455711364746,
      "learning_rate": 1.6763983854248395e-06,
      "loss": 0.0626,
      "step": 2177,
      "video_reward_cumulative_accuracy": 0.8325677537896188
    },
    {
      "epoch": 0.6464826357969724,
      "grad_norm": 2.9646496772766113,
      "learning_rate": 1.6739530683343574e-06,
      "loss": 0.0443,
      "step": 2178,
      "video_reward_cumulative_accuracy": 0.8321854912764004
    },
    {
      "epoch": 0.6467794597803502,
      "grad_norm": 3.9705727100372314,
      "learning_rate": 1.6715086380854311e-06,
      "loss": 0.0516,
      "step": 2179,
      "video_reward_cumulative_accuracy": 0.8322625057365765
    },
    {
      "epoch": 0.647076283763728,
      "grad_norm": 1.3406922817230225,
      "learning_rate": 1.669065097302393e-06,
      "loss": 0.0174,
      "step": 2180,
      "video_reward_cumulative_accuracy": 0.8323394495412844
    },
    {
      "epoch": 0.647373107747106,
      "grad_norm": 0.7483333349227905,
      "learning_rate": 1.666622448608622e-06,
      "loss": 0.0206,
      "step": 2181,
      "video_reward_cumulative_accuracy": 0.832416322787712
    },
    {
      "epoch": 0.6476699317304838,
      "grad_norm": 1.2349746227264404,
      "learning_rate": 1.6641806946265377e-06,
      "loss": 0.0168,
      "step": 2182,
      "video_reward_cumulative_accuracy": 0.8324931255728689
    },
    {
      "epoch": 0.6479667557138616,
      "grad_norm": 1.7513976097106934,
      "learning_rate": 1.6617398379776008e-06,
      "loss": 0.0558,
      "step": 2183,
      "video_reward_cumulative_accuracy": 0.8323408153916628
    },
    {
      "epoch": 0.6482635796972396,
      "grad_norm": 0.5160261988639832,
      "learning_rate": 1.6592998812823072e-06,
      "loss": 0.0109,
      "step": 2184,
      "video_reward_cumulative_accuracy": 0.8324175824175825
    },
    {
      "epoch": 0.6485604036806174,
      "grad_norm": 5.037443161010742,
      "learning_rate": 1.6568608271601873e-06,
      "loss": 0.0496,
      "step": 2185,
      "video_reward_cumulative_accuracy": 0.8324942791762013
    },
    {
      "epoch": 0.6488572276639952,
      "grad_norm": 2.4052493572235107,
      "learning_rate": 1.6544226782298033e-06,
      "loss": 0.0281,
      "step": 2186,
      "video_reward_cumulative_accuracy": 0.8325709057639524
    },
    {
      "epoch": 0.649154051647373,
      "grad_norm": 2.0196239948272705,
      "learning_rate": 1.6519854371087434e-06,
      "loss": 0.0943,
      "step": 2187,
      "video_reward_cumulative_accuracy": 0.8324188385916781
    },
    {
      "epoch": 0.649450875630751,
      "grad_norm": 1.2571297883987427,
      "learning_rate": 1.6495491064136239e-06,
      "loss": 0.0215,
      "step": 2188,
      "video_reward_cumulative_accuracy": 0.8324954296160878
    },
    {
      "epoch": 0.6497476996141288,
      "grad_norm": 1.217132806777954,
      "learning_rate": 1.6471136887600805e-06,
      "loss": 0.017,
      "step": 2189,
      "video_reward_cumulative_accuracy": 0.832571950662403
    },
    {
      "epoch": 0.6500445235975066,
      "grad_norm": 2.48207426071167,
      "learning_rate": 1.6446791867627718e-06,
      "loss": 0.0539,
      "step": 2190,
      "video_reward_cumulative_accuracy": 0.832648401826484
    },
    {
      "epoch": 0.6503413475808846,
      "grad_norm": 4.670328617095947,
      "learning_rate": 1.642245603035372e-06,
      "loss": 0.0683,
      "step": 2191,
      "video_reward_cumulative_accuracy": 0.8327247832040164
    },
    {
      "epoch": 0.6506381715642624,
      "grad_norm": 0.8069223761558533,
      "learning_rate": 1.6398129401905687e-06,
      "loss": 0.032,
      "step": 2192,
      "video_reward_cumulative_accuracy": 0.8328010948905109
    },
    {
      "epoch": 0.6509349955476402,
      "grad_norm": 3.4688570499420166,
      "learning_rate": 1.6373812008400623e-06,
      "loss": 0.0296,
      "step": 2193,
      "video_reward_cumulative_accuracy": 0.8326493388052896
    },
    {
      "epoch": 0.651231819531018,
      "grad_norm": 1.3955700397491455,
      "learning_rate": 1.6349503875945599e-06,
      "loss": 0.041,
      "step": 2194,
      "video_reward_cumulative_accuracy": 0.8327256153144941
    },
    {
      "epoch": 0.651528643514396,
      "grad_norm": 0.7285477519035339,
      "learning_rate": 1.632520503063777e-06,
      "loss": 0.0208,
      "step": 2195,
      "video_reward_cumulative_accuracy": 0.8328018223234624
    },
    {
      "epoch": 0.6518254674977738,
      "grad_norm": 1.377070426940918,
      "learning_rate": 1.630091549856429e-06,
      "loss": 0.0357,
      "step": 2196,
      "video_reward_cumulative_accuracy": 0.8328779599271403
    },
    {
      "epoch": 0.6521222914811516,
      "grad_norm": 1.5354235172271729,
      "learning_rate": 1.6276635305802336e-06,
      "loss": 0.0356,
      "step": 2197,
      "video_reward_cumulative_accuracy": 0.8329540282203004
    },
    {
      "epoch": 0.6524191154645296,
      "grad_norm": 0.5859040021896362,
      "learning_rate": 1.6252364478419057e-06,
      "loss": 0.0181,
      "step": 2198,
      "video_reward_cumulative_accuracy": 0.8330300272975433
    },
    {
      "epoch": 0.6527159394479074,
      "grad_norm": 0.6209362745285034,
      "learning_rate": 1.622810304247153e-06,
      "loss": 0.0127,
      "step": 2199,
      "video_reward_cumulative_accuracy": 0.833105957253297
    },
    {
      "epoch": 0.6530127634312852,
      "grad_norm": 1.3316088914871216,
      "learning_rate": 1.6203851024006779e-06,
      "loss": 0.0219,
      "step": 2200,
      "video_reward_cumulative_accuracy": 0.8331818181818181
    },
    {
      "epoch": 0.6530127634312852,
      "eval_runtime": 132.7968,
      "eval_samples_per_second": 5.941,
      "eval_steps_per_second": 0.745,
      "eval_test_set_accuracy": 0.7878787878787878,
      "step": 2200
    },
    {
      "epoch": 0.653309587414663,
      "grad_norm": 1.4014110565185547,
      "learning_rate": 1.6179608449061671e-06,
      "loss": 0.0302,
      "step": 2201,
      "video_reward_cumulative_accuracy": 0.8330304407087687
    },
    {
      "epoch": 0.653606411398041,
      "grad_norm": 1.8992540836334229,
      "learning_rate": 1.6155375343662986e-06,
      "loss": 0.0529,
      "step": 2202,
      "video_reward_cumulative_accuracy": 0.8331062670299727
    },
    {
      "epoch": 0.6539032353814188,
      "grad_norm": 0.6547995209693909,
      "learning_rate": 1.6131151733827314e-06,
      "loss": 0.0144,
      "step": 2203,
      "video_reward_cumulative_accuracy": 0.833182024512029
    },
    {
      "epoch": 0.6542000593647966,
      "grad_norm": 4.633205890655518,
      "learning_rate": 1.6106937645561042e-06,
      "loss": 0.0385,
      "step": 2204,
      "video_reward_cumulative_accuracy": 0.8332577132486388
    },
    {
      "epoch": 0.6544968833481746,
      "grad_norm": 0.8929312229156494,
      "learning_rate": 1.6082733104860354e-06,
      "loss": 0.0146,
      "step": 2205,
      "video_reward_cumulative_accuracy": 0.8333333333333334
    },
    {
      "epoch": 0.6547937073315524,
      "grad_norm": 1.7991684675216675,
      "learning_rate": 1.6058538137711155e-06,
      "loss": 0.0463,
      "step": 2206,
      "video_reward_cumulative_accuracy": 0.8334088848594742
    },
    {
      "epoch": 0.6550905313149302,
      "grad_norm": 1.2529010772705078,
      "learning_rate": 1.603435277008912e-06,
      "loss": 0.0193,
      "step": 2207,
      "video_reward_cumulative_accuracy": 0.8334843679202537
    },
    {
      "epoch": 0.655387355298308,
      "grad_norm": 2.0492186546325684,
      "learning_rate": 1.6010177027959556e-06,
      "loss": 0.0362,
      "step": 2208,
      "video_reward_cumulative_accuracy": 0.8335597826086957
    },
    {
      "epoch": 0.655684179281686,
      "grad_norm": 3.027116060256958,
      "learning_rate": 1.598601093727749e-06,
      "loss": 0.0604,
      "step": 2209,
      "video_reward_cumulative_accuracy": 0.833635129017655
    },
    {
      "epoch": 0.6559810032650638,
      "grad_norm": 2.721015691757202,
      "learning_rate": 1.5961854523987569e-06,
      "loss": 0.0486,
      "step": 2210,
      "video_reward_cumulative_accuracy": 0.833710407239819
    },
    {
      "epoch": 0.6562778272484416,
      "grad_norm": 0.9648367166519165,
      "learning_rate": 1.5937707814024024e-06,
      "loss": 0.0094,
      "step": 2211,
      "video_reward_cumulative_accuracy": 0.833785617367707
    },
    {
      "epoch": 0.6565746512318196,
      "grad_norm": 1.7013683319091797,
      "learning_rate": 1.5913570833310721e-06,
      "loss": 0.0201,
      "step": 2212,
      "video_reward_cumulative_accuracy": 0.8338607594936709
    },
    {
      "epoch": 0.6568714752151974,
      "grad_norm": 1.8337125778198242,
      "learning_rate": 1.5889443607761023e-06,
      "loss": 0.0281,
      "step": 2213,
      "video_reward_cumulative_accuracy": 0.8339358337098961
    },
    {
      "epoch": 0.6571682991985752,
      "grad_norm": 3.3444020748138428,
      "learning_rate": 1.5865326163277862e-06,
      "loss": 0.0687,
      "step": 2214,
      "video_reward_cumulative_accuracy": 0.8337850045167118
    },
    {
      "epoch": 0.657465123181953,
      "grad_norm": 1.9684284925460815,
      "learning_rate": 1.584121852575367e-06,
      "loss": 0.0323,
      "step": 2215,
      "video_reward_cumulative_accuracy": 0.8338600451467268
    },
    {
      "epoch": 0.657761947165331,
      "grad_norm": 5.16708517074585,
      "learning_rate": 1.5817120721070302e-06,
      "loss": 0.055,
      "step": 2216,
      "video_reward_cumulative_accuracy": 0.8339350180505415
    },
    {
      "epoch": 0.6580587711487088,
      "grad_norm": 1.9357199668884277,
      "learning_rate": 1.579303277509913e-06,
      "loss": 0.0191,
      "step": 2217,
      "video_reward_cumulative_accuracy": 0.8340099233198015
    },
    {
      "epoch": 0.6583555951320866,
      "grad_norm": 1.5035184621810913,
      "learning_rate": 1.5768954713700868e-06,
      "loss": 0.0106,
      "step": 2218,
      "video_reward_cumulative_accuracy": 0.8340847610459874
    },
    {
      "epoch": 0.6586524191154646,
      "grad_norm": 2.6335108280181885,
      "learning_rate": 1.574488656272567e-06,
      "loss": 0.0271,
      "step": 2219,
      "video_reward_cumulative_accuracy": 0.8341595313204146
    },
    {
      "epoch": 0.6589492430988424,
      "grad_norm": 2.186413049697876,
      "learning_rate": 1.5720828348013022e-06,
      "loss": 0.031,
      "step": 2220,
      "video_reward_cumulative_accuracy": 0.8342342342342343
    },
    {
      "epoch": 0.6592460670822202,
      "grad_norm": 0.6132137775421143,
      "learning_rate": 1.5696780095391762e-06,
      "loss": 0.0234,
      "step": 2221,
      "video_reward_cumulative_accuracy": 0.8343088698784331
    },
    {
      "epoch": 0.659542891065598,
      "grad_norm": 2.6216671466827393,
      "learning_rate": 1.5672741830680022e-06,
      "loss": 0.0649,
      "step": 2222,
      "video_reward_cumulative_accuracy": 0.8343834383438344
    },
    {
      "epoch": 0.659839715048976,
      "grad_norm": 1.9002763032913208,
      "learning_rate": 1.5648713579685201e-06,
      "loss": 0.036,
      "step": 2223,
      "video_reward_cumulative_accuracy": 0.8342330184435448
    },
    {
      "epoch": 0.6601365390323538,
      "grad_norm": 4.694214820861816,
      "learning_rate": 1.5624695368203975e-06,
      "loss": 0.0338,
      "step": 2224,
      "video_reward_cumulative_accuracy": 0.8343075539568345
    },
    {
      "epoch": 0.6604333630157316,
      "grad_norm": 2.101926326751709,
      "learning_rate": 1.560068722202221e-06,
      "loss": 0.0163,
      "step": 2225,
      "video_reward_cumulative_accuracy": 0.8343820224719101
    },
    {
      "epoch": 0.6607301869991096,
      "grad_norm": 2.703768491744995,
      "learning_rate": 1.557668916691499e-06,
      "loss": 0.0342,
      "step": 2226,
      "video_reward_cumulative_accuracy": 0.8344564240790656
    },
    {
      "epoch": 0.6610270109824874,
      "grad_norm": 1.3713093996047974,
      "learning_rate": 1.555270122864656e-06,
      "loss": 0.0089,
      "step": 2227,
      "video_reward_cumulative_accuracy": 0.8345307588684329
    },
    {
      "epoch": 0.6613238349658652,
      "grad_norm": 1.0959382057189941,
      "learning_rate": 1.5528723432970288e-06,
      "loss": 0.0285,
      "step": 2228,
      "video_reward_cumulative_accuracy": 0.8346050269299821
    },
    {
      "epoch": 0.661620658949243,
      "grad_norm": 2.450881242752075,
      "learning_rate": 1.5504755805628677e-06,
      "loss": 0.026,
      "step": 2229,
      "video_reward_cumulative_accuracy": 0.8346792283535217
    },
    {
      "epoch": 0.661917482932621,
      "grad_norm": 3.218003749847412,
      "learning_rate": 1.548079837235329e-06,
      "loss": 0.0422,
      "step": 2230,
      "video_reward_cumulative_accuracy": 0.8345291479820628
    },
    {
      "epoch": 0.6622143069159988,
      "grad_norm": 3.611631393432617,
      "learning_rate": 1.545685115886477e-06,
      "loss": 0.1359,
      "step": 2231,
      "video_reward_cumulative_accuracy": 0.834603316898252
    },
    {
      "epoch": 0.6625111308993766,
      "grad_norm": 3.1366591453552246,
      "learning_rate": 1.5432914190872757e-06,
      "loss": 0.1005,
      "step": 2232,
      "video_reward_cumulative_accuracy": 0.8346774193548387
    },
    {
      "epoch": 0.6628079548827546,
      "grad_norm": 1.874556064605713,
      "learning_rate": 1.5408987494075924e-06,
      "loss": 0.0187,
      "step": 2233,
      "video_reward_cumulative_accuracy": 0.8345275414240931
    },
    {
      "epoch": 0.6631047788661324,
      "grad_norm": 2.389155864715576,
      "learning_rate": 1.53850710941619e-06,
      "loss": 0.0388,
      "step": 2234,
      "video_reward_cumulative_accuracy": 0.8346016114592659
    },
    {
      "epoch": 0.6634016028495102,
      "grad_norm": 2.063230037689209,
      "learning_rate": 1.5361165016807261e-06,
      "loss": 0.0311,
      "step": 2235,
      "video_reward_cumulative_accuracy": 0.834675615212528
    },
    {
      "epoch": 0.663698426832888,
      "grad_norm": 2.24172043800354,
      "learning_rate": 1.5337269287677497e-06,
      "loss": 0.0262,
      "step": 2236,
      "video_reward_cumulative_accuracy": 0.8347495527728086
    },
    {
      "epoch": 0.663995250816266,
      "grad_norm": 2.5830023288726807,
      "learning_rate": 1.5313383932426996e-06,
      "loss": 0.0807,
      "step": 2237,
      "video_reward_cumulative_accuracy": 0.834823424228878
    },
    {
      "epoch": 0.6642920747996438,
      "grad_norm": 2.467978000640869,
      "learning_rate": 1.5289508976699007e-06,
      "loss": 0.0606,
      "step": 2238,
      "video_reward_cumulative_accuracy": 0.8348972296693477
    },
    {
      "epoch": 0.6645888987830216,
      "grad_norm": 2.111464500427246,
      "learning_rate": 1.5265644446125606e-06,
      "loss": 0.0398,
      "step": 2239,
      "video_reward_cumulative_accuracy": 0.8349709691826709
    },
    {
      "epoch": 0.6648857227663996,
      "grad_norm": 2.0798873901367188,
      "learning_rate": 1.5241790366327685e-06,
      "loss": 0.0338,
      "step": 2240,
      "video_reward_cumulative_accuracy": 0.8350446428571429
    },
    {
      "epoch": 0.6651825467497774,
      "grad_norm": 0.6554487943649292,
      "learning_rate": 1.5217946762914924e-06,
      "loss": 0.0063,
      "step": 2241,
      "video_reward_cumulative_accuracy": 0.8351182507809014
    },
    {
      "epoch": 0.6654793707331552,
      "grad_norm": 0.8652382493019104,
      "learning_rate": 1.5194113661485727e-06,
      "loss": 0.0122,
      "step": 2242,
      "video_reward_cumulative_accuracy": 0.8351917930419268
    },
    {
      "epoch": 0.665776194716533,
      "grad_norm": 1.6087634563446045,
      "learning_rate": 1.5170291087627258e-06,
      "loss": 0.0285,
      "step": 2243,
      "video_reward_cumulative_accuracy": 0.8352652697280428
    },
    {
      "epoch": 0.666073018699911,
      "grad_norm": 2.6194751262664795,
      "learning_rate": 1.5146479066915355e-06,
      "loss": 0.0324,
      "step": 2244,
      "video_reward_cumulative_accuracy": 0.8351158645276292
    },
    {
      "epoch": 0.6663698426832888,
      "grad_norm": 1.7327265739440918,
      "learning_rate": 1.5122677624914528e-06,
      "loss": 0.0704,
      "step": 2245,
      "video_reward_cumulative_accuracy": 0.834966592427617
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.7121922373771667,
      "learning_rate": 1.5098886787177951e-06,
      "loss": 0.0064,
      "step": 2246,
      "video_reward_cumulative_accuracy": 0.835040071237756
    },
    {
      "epoch": 0.6669634906500446,
      "grad_norm": 2.6367523670196533,
      "learning_rate": 1.507510657924738e-06,
      "loss": 0.0327,
      "step": 2247,
      "video_reward_cumulative_accuracy": 0.835113484646195
    },
    {
      "epoch": 0.6672603146334224,
      "grad_norm": 4.55868673324585,
      "learning_rate": 1.5051337026653195e-06,
      "loss": 0.048,
      "step": 2248,
      "video_reward_cumulative_accuracy": 0.8351868327402135
    },
    {
      "epoch": 0.6675571386168002,
      "grad_norm": 3.684610605239868,
      "learning_rate": 1.502757815491429e-06,
      "loss": 0.0326,
      "step": 2249,
      "video_reward_cumulative_accuracy": 0.8352601156069365
    },
    {
      "epoch": 0.667853962600178,
      "grad_norm": 3.4382266998291016,
      "learning_rate": 1.5003829989538154e-06,
      "loss": 0.0588,
      "step": 2250,
      "video_reward_cumulative_accuracy": 0.8351111111111111
    },
    {
      "epoch": 0.668150786583556,
      "grad_norm": 2.8827478885650635,
      "learning_rate": 1.4980092556020713e-06,
      "loss": 0.041,
      "step": 2251,
      "video_reward_cumulative_accuracy": 0.8351843625055531
    },
    {
      "epoch": 0.6684476105669338,
      "grad_norm": 0.7824203968048096,
      "learning_rate": 1.495636587984643e-06,
      "loss": 0.0075,
      "step": 2252,
      "video_reward_cumulative_accuracy": 0.8352575488454707
    },
    {
      "epoch": 0.6687444345503116,
      "grad_norm": 1.5358774662017822,
      "learning_rate": 1.4932649986488192e-06,
      "loss": 0.0158,
      "step": 2253,
      "video_reward_cumulative_accuracy": 0.8353306702174877
    },
    {
      "epoch": 0.6690412585336896,
      "grad_norm": 0.3571893572807312,
      "learning_rate": 1.4908944901407296e-06,
      "loss": 0.0068,
      "step": 2254,
      "video_reward_cumulative_accuracy": 0.8354037267080745
    },
    {
      "epoch": 0.6693380825170674,
      "grad_norm": 5.035340785980225,
      "learning_rate": 1.488525065005348e-06,
      "loss": 0.0604,
      "step": 2255,
      "video_reward_cumulative_accuracy": 0.8352549889135255
    },
    {
      "epoch": 0.6696349065004452,
      "grad_norm": 4.127224922180176,
      "learning_rate": 1.4861567257864795e-06,
      "loss": 0.0443,
      "step": 2256,
      "video_reward_cumulative_accuracy": 0.8353280141843972
    },
    {
      "epoch": 0.669931730483823,
      "grad_norm": 1.52211594581604,
      "learning_rate": 1.4837894750267664e-06,
      "loss": 0.0399,
      "step": 2257,
      "video_reward_cumulative_accuracy": 0.835400974745237
    },
    {
      "epoch": 0.670228554467201,
      "grad_norm": 2.7589707374572754,
      "learning_rate": 1.481423315267685e-06,
      "loss": 0.0633,
      "step": 2258,
      "video_reward_cumulative_accuracy": 0.8354738706820195
    },
    {
      "epoch": 0.6705253784505788,
      "grad_norm": 1.0256385803222656,
      "learning_rate": 1.479058249049533e-06,
      "loss": 0.0133,
      "step": 2259,
      "video_reward_cumulative_accuracy": 0.8355467020805666
    },
    {
      "epoch": 0.6708222024339566,
      "grad_norm": 1.3333526849746704,
      "learning_rate": 1.4766942789114432e-06,
      "loss": 0.0201,
      "step": 2260,
      "video_reward_cumulative_accuracy": 0.8353982300884956
    },
    {
      "epoch": 0.6711190264173346,
      "grad_norm": 0.8754663467407227,
      "learning_rate": 1.4743314073913636e-06,
      "loss": 0.0151,
      "step": 2261,
      "video_reward_cumulative_accuracy": 0.8354710305174702
    },
    {
      "epoch": 0.6714158504007124,
      "grad_norm": 3.1899867057800293,
      "learning_rate": 1.4719696370260678e-06,
      "loss": 0.0595,
      "step": 2262,
      "video_reward_cumulative_accuracy": 0.8353227232537578
    },
    {
      "epoch": 0.6717126743840902,
      "grad_norm": 0.2183128446340561,
      "learning_rate": 1.469608970351144e-06,
      "loss": 0.004,
      "step": 2263,
      "video_reward_cumulative_accuracy": 0.8353954927087937
    },
    {
      "epoch": 0.672009498367468,
      "grad_norm": 2.6553597450256348,
      "learning_rate": 1.4672494099009981e-06,
      "loss": 0.0242,
      "step": 2264,
      "video_reward_cumulative_accuracy": 0.8354681978798587
    },
    {
      "epoch": 0.672306322350846,
      "grad_norm": 1.7975043058395386,
      "learning_rate": 1.4648909582088482e-06,
      "loss": 0.0358,
      "step": 2265,
      "video_reward_cumulative_accuracy": 0.8355408388520972
    },
    {
      "epoch": 0.6726031463342238,
      "grad_norm": 2.4716291427612305,
      "learning_rate": 1.4625336178067189e-06,
      "loss": 0.0977,
      "step": 2266,
      "video_reward_cumulative_accuracy": 0.8351721094439541
    },
    {
      "epoch": 0.6728999703176016,
      "grad_norm": 1.4434208869934082,
      "learning_rate": 1.4601773912254457e-06,
      "loss": 0.0303,
      "step": 2267,
      "video_reward_cumulative_accuracy": 0.8352448169386855
    },
    {
      "epoch": 0.6731967943009796,
      "grad_norm": 0.3788398504257202,
      "learning_rate": 1.4578222809946655e-06,
      "loss": 0.0153,
      "step": 2268,
      "video_reward_cumulative_accuracy": 0.8353174603174603
    },
    {
      "epoch": 0.6734936182843574,
      "grad_norm": 2.074415683746338,
      "learning_rate": 1.4554682896428179e-06,
      "loss": 0.0558,
      "step": 2269,
      "video_reward_cumulative_accuracy": 0.8351696782723667
    },
    {
      "epoch": 0.6737904422677352,
      "grad_norm": 6.2886962890625,
      "learning_rate": 1.4531154196971414e-06,
      "loss": 0.0606,
      "step": 2270,
      "video_reward_cumulative_accuracy": 0.8352422907488987
    },
    {
      "epoch": 0.674087266251113,
      "grad_norm": 3.100346803665161,
      "learning_rate": 1.45076367368367e-06,
      "loss": 0.027,
      "step": 2271,
      "video_reward_cumulative_accuracy": 0.8350946719506825
    },
    {
      "epoch": 0.674384090234491,
      "grad_norm": 2.0418272018432617,
      "learning_rate": 1.4484130541272323e-06,
      "loss": 0.0412,
      "step": 2272,
      "video_reward_cumulative_accuracy": 0.8351672535211268
    },
    {
      "epoch": 0.6746809142178688,
      "grad_norm": 1.8124505281448364,
      "learning_rate": 1.4460635635514448e-06,
      "loss": 0.0292,
      "step": 2273,
      "video_reward_cumulative_accuracy": 0.8352397712274527
    },
    {
      "epoch": 0.6749777382012466,
      "grad_norm": 3.0479700565338135,
      "learning_rate": 1.443715204478715e-06,
      "loss": 0.0704,
      "step": 2274,
      "video_reward_cumulative_accuracy": 0.8353122251539138
    },
    {
      "epoch": 0.6752745621846246,
      "grad_norm": 1.432740330696106,
      "learning_rate": 1.4413679794302325e-06,
      "loss": 0.0208,
      "step": 2275,
      "video_reward_cumulative_accuracy": 0.8353846153846154
    },
    {
      "epoch": 0.6755713861680024,
      "grad_norm": 1.0187022686004639,
      "learning_rate": 1.4390218909259731e-06,
      "loss": 0.0187,
      "step": 2276,
      "video_reward_cumulative_accuracy": 0.835456942003515
    },
    {
      "epoch": 0.6758682101513802,
      "grad_norm": 3.2267208099365234,
      "learning_rate": 1.436676941484691e-06,
      "loss": 0.0574,
      "step": 2277,
      "video_reward_cumulative_accuracy": 0.8353096179183136
    },
    {
      "epoch": 0.676165034134758,
      "grad_norm": 1.9733569622039795,
      "learning_rate": 1.4343331336239151e-06,
      "loss": 0.031,
      "step": 2278,
      "video_reward_cumulative_accuracy": 0.8353819139596137
    },
    {
      "epoch": 0.676461858118136,
      "grad_norm": 1.0220005512237549,
      "learning_rate": 1.4319904698599524e-06,
      "loss": 0.0171,
      "step": 2279,
      "video_reward_cumulative_accuracy": 0.8354541465555068
    },
    {
      "epoch": 0.6767586821015138,
      "grad_norm": 0.9054021239280701,
      "learning_rate": 1.4296489527078777e-06,
      "loss": 0.0106,
      "step": 2280,
      "video_reward_cumulative_accuracy": 0.8355263157894737
    },
    {
      "epoch": 0.6770555060848916,
      "grad_norm": 1.9927374124526978,
      "learning_rate": 1.4273085846815374e-06,
      "loss": 0.0309,
      "step": 2281,
      "video_reward_cumulative_accuracy": 0.8355984217448488
    },
    {
      "epoch": 0.6773523300682696,
      "grad_norm": 1.0571626424789429,
      "learning_rate": 1.4249693682935462e-06,
      "loss": 0.0116,
      "step": 2282,
      "video_reward_cumulative_accuracy": 0.8356704645048203
    },
    {
      "epoch": 0.6776491540516474,
      "grad_norm": 2.752652883529663,
      "learning_rate": 1.4226313060552774e-06,
      "loss": 0.0646,
      "step": 2283,
      "video_reward_cumulative_accuracy": 0.835742444152431
    },
    {
      "epoch": 0.6779459780350252,
      "grad_norm": 1.6572239398956299,
      "learning_rate": 1.4202944004768694e-06,
      "loss": 0.0332,
      "step": 2284,
      "video_reward_cumulative_accuracy": 0.8358143607705779
    },
    {
      "epoch": 0.678242802018403,
      "grad_norm": 2.04713773727417,
      "learning_rate": 1.4179586540672152e-06,
      "loss": 0.0624,
      "step": 2285,
      "video_reward_cumulative_accuracy": 0.8358862144420132
    },
    {
      "epoch": 0.678539626001781,
      "grad_norm": 0.3329226076602936,
      "learning_rate": 1.4156240693339663e-06,
      "loss": 0.0062,
      "step": 2286,
      "video_reward_cumulative_accuracy": 0.8359580052493438
    },
    {
      "epoch": 0.6788364499851588,
      "grad_norm": 2.5659382343292236,
      "learning_rate": 1.4132906487835263e-06,
      "loss": 0.0549,
      "step": 2287,
      "video_reward_cumulative_accuracy": 0.8360297332750328
    },
    {
      "epoch": 0.6791332739685366,
      "grad_norm": 2.2131476402282715,
      "learning_rate": 1.4109583949210481e-06,
      "loss": 0.0267,
      "step": 2288,
      "video_reward_cumulative_accuracy": 0.8361013986013986
    },
    {
      "epoch": 0.6794300979519146,
      "grad_norm": 0.8246326446533203,
      "learning_rate": 1.408627310250434e-06,
      "loss": 0.0205,
      "step": 2289,
      "video_reward_cumulative_accuracy": 0.836173001310616
    },
    {
      "epoch": 0.6797269219352924,
      "grad_norm": 1.7747602462768555,
      "learning_rate": 1.4062973972743277e-06,
      "loss": 0.0255,
      "step": 2290,
      "video_reward_cumulative_accuracy": 0.8362445414847162
    },
    {
      "epoch": 0.6800237459186702,
      "grad_norm": 2.40305495262146,
      "learning_rate": 1.4039686584941176e-06,
      "loss": 0.0255,
      "step": 2291,
      "video_reward_cumulative_accuracy": 0.8360977738978612
    },
    {
      "epoch": 0.680320569902048,
      "grad_norm": 1.9818377494812012,
      "learning_rate": 1.4016410964099308e-06,
      "loss": 0.0512,
      "step": 2292,
      "video_reward_cumulative_accuracy": 0.8361692844677138
    },
    {
      "epoch": 0.680617393885426,
      "grad_norm": 2.252643585205078,
      "learning_rate": 1.3993147135206311e-06,
      "loss": 0.0324,
      "step": 2293,
      "video_reward_cumulative_accuracy": 0.8362407326646315
    },
    {
      "epoch": 0.6809142178688038,
      "grad_norm": 2.190392017364502,
      "learning_rate": 1.3969895123238177e-06,
      "loss": 0.0207,
      "step": 2294,
      "video_reward_cumulative_accuracy": 0.8360941586748039
    },
    {
      "epoch": 0.6812110418521816,
      "grad_norm": 2.94880747795105,
      "learning_rate": 1.3946654953158176e-06,
      "loss": 0.0357,
      "step": 2295,
      "video_reward_cumulative_accuracy": 0.8359477124183007
    },
    {
      "epoch": 0.6815078658355596,
      "grad_norm": 1.3520139455795288,
      "learning_rate": 1.3923426649916894e-06,
      "loss": 0.0097,
      "step": 2296,
      "video_reward_cumulative_accuracy": 0.8360191637630662
    },
    {
      "epoch": 0.6818046898189374,
      "grad_norm": 2.1631641387939453,
      "learning_rate": 1.3900210238452169e-06,
      "loss": 0.0406,
      "step": 2297,
      "video_reward_cumulative_accuracy": 0.8360905528950805
    },
    {
      "epoch": 0.6821015138023152,
      "grad_norm": 3.2488834857940674,
      "learning_rate": 1.3877005743689087e-06,
      "loss": 0.055,
      "step": 2298,
      "video_reward_cumulative_accuracy": 0.8361618798955613
    },
    {
      "epoch": 0.682398337785693,
      "grad_norm": 3.1709814071655273,
      "learning_rate": 1.3853813190539899e-06,
      "loss": 0.0387,
      "step": 2299,
      "video_reward_cumulative_accuracy": 0.8362331448455851
    },
    {
      "epoch": 0.682695161769071,
      "grad_norm": 5.1996307373046875,
      "learning_rate": 1.3830632603904075e-06,
      "loss": 0.0409,
      "step": 2300,
      "video_reward_cumulative_accuracy": 0.836304347826087
    },
    {
      "epoch": 0.6829919857524488,
      "grad_norm": 1.6898528337478638,
      "learning_rate": 1.3807464008668225e-06,
      "loss": 0.0352,
      "step": 2301,
      "video_reward_cumulative_accuracy": 0.8363754889178618
    },
    {
      "epoch": 0.6832888097358266,
      "grad_norm": 2.885385513305664,
      "learning_rate": 1.3784307429706084e-06,
      "loss": 0.0583,
      "step": 2302,
      "video_reward_cumulative_accuracy": 0.8364465682015638
    },
    {
      "epoch": 0.6835856337192046,
      "grad_norm": 0.6500884890556335,
      "learning_rate": 1.3761162891878496e-06,
      "loss": 0.0198,
      "step": 2303,
      "video_reward_cumulative_accuracy": 0.8363004776378636
    },
    {
      "epoch": 0.6838824577025824,
      "grad_norm": 2.2626075744628906,
      "learning_rate": 1.3738030420033349e-06,
      "loss": 0.0381,
      "step": 2304,
      "video_reward_cumulative_accuracy": 0.8363715277777778
    },
    {
      "epoch": 0.6841792816859602,
      "grad_norm": 2.9060208797454834,
      "learning_rate": 1.3714910039005608e-06,
      "loss": 0.0655,
      "step": 2305,
      "video_reward_cumulative_accuracy": 0.8364425162689805
    },
    {
      "epoch": 0.684476105669338,
      "grad_norm": 0.6458223462104797,
      "learning_rate": 1.3691801773617247e-06,
      "loss": 0.0079,
      "step": 2306,
      "video_reward_cumulative_accuracy": 0.8365134431916739
    },
    {
      "epoch": 0.684772929652716,
      "grad_norm": 0.7338875532150269,
      "learning_rate": 1.3668705648677227e-06,
      "loss": 0.0113,
      "step": 2307,
      "video_reward_cumulative_accuracy": 0.8365843086259211
    },
    {
      "epoch": 0.6850697536360938,
      "grad_norm": 2.2893717288970947,
      "learning_rate": 1.3645621688981497e-06,
      "loss": 0.0306,
      "step": 2308,
      "video_reward_cumulative_accuracy": 0.8366551126516465
    },
    {
      "epoch": 0.6853665776194716,
      "grad_norm": 0.5800947546958923,
      "learning_rate": 1.3622549919312902e-06,
      "loss": 0.0128,
      "step": 2309,
      "video_reward_cumulative_accuracy": 0.8367258553486357
    },
    {
      "epoch": 0.6856634016028496,
      "grad_norm": 5.060973167419434,
      "learning_rate": 1.3599490364441236e-06,
      "loss": 0.0676,
      "step": 2310,
      "video_reward_cumulative_accuracy": 0.8367965367965368
    },
    {
      "epoch": 0.6859602255862274,
      "grad_norm": 0.9198673367500305,
      "learning_rate": 1.3576443049123175e-06,
      "loss": 0.0215,
      "step": 2311,
      "video_reward_cumulative_accuracy": 0.8368671570748594
    },
    {
      "epoch": 0.6862570495696052,
      "grad_norm": 1.2726801633834839,
      "learning_rate": 1.3553407998102243e-06,
      "loss": 0.0265,
      "step": 2312,
      "video_reward_cumulative_accuracy": 0.8369377162629758
    },
    {
      "epoch": 0.686553873552983,
      "grad_norm": 0.5441017746925354,
      "learning_rate": 1.3530385236108817e-06,
      "loss": 0.0066,
      "step": 2313,
      "video_reward_cumulative_accuracy": 0.8370082144401211
    },
    {
      "epoch": 0.686850697536361,
      "grad_norm": 5.122513771057129,
      "learning_rate": 1.3507374787860045e-06,
      "loss": 0.0473,
      "step": 2314,
      "video_reward_cumulative_accuracy": 0.8370786516853933
    },
    {
      "epoch": 0.6871475215197388,
      "grad_norm": 3.541785478591919,
      "learning_rate": 1.3484376678059885e-06,
      "loss": 0.0438,
      "step": 2315,
      "video_reward_cumulative_accuracy": 0.8371490280777538
    },
    {
      "epoch": 0.6874443455031166,
      "grad_norm": 1.8629510402679443,
      "learning_rate": 1.3461390931399044e-06,
      "loss": 0.05,
      "step": 2316,
      "video_reward_cumulative_accuracy": 0.8372193436960277
    },
    {
      "epoch": 0.6877411694864946,
      "grad_norm": 2.798170328140259,
      "learning_rate": 1.3438417572554947e-06,
      "loss": 0.0348,
      "step": 2317,
      "video_reward_cumulative_accuracy": 0.8372895986189037
    },
    {
      "epoch": 0.6880379934698724,
      "grad_norm": 1.2799954414367676,
      "learning_rate": 1.3415456626191737e-06,
      "loss": 0.0092,
      "step": 2318,
      "video_reward_cumulative_accuracy": 0.8373597929249353
    },
    {
      "epoch": 0.6883348174532502,
      "grad_norm": 2.5710394382476807,
      "learning_rate": 1.33925081169602e-06,
      "loss": 0.0219,
      "step": 2319,
      "video_reward_cumulative_accuracy": 0.8374299266925399
    },
    {
      "epoch": 0.688631641436628,
      "grad_norm": 2.2735490798950195,
      "learning_rate": 1.3369572069497802e-06,
      "loss": 0.0518,
      "step": 2320,
      "video_reward_cumulative_accuracy": 0.8375
    },
    {
      "epoch": 0.688928465420006,
      "grad_norm": 2.3334121704101562,
      "learning_rate": 1.3346648508428595e-06,
      "loss": 0.0304,
      "step": 2321,
      "video_reward_cumulative_accuracy": 0.8375700129254632
    },
    {
      "epoch": 0.6892252894033838,
      "grad_norm": 2.004912853240967,
      "learning_rate": 1.3323737458363278e-06,
      "loss": 0.0474,
      "step": 2322,
      "video_reward_cumulative_accuracy": 0.8376399655469423
    },
    {
      "epoch": 0.6895221133867616,
      "grad_norm": 3.0432021617889404,
      "learning_rate": 1.3300838943899064e-06,
      "loss": 0.0394,
      "step": 2323,
      "video_reward_cumulative_accuracy": 0.837709857942316
    },
    {
      "epoch": 0.6898189373701396,
      "grad_norm": 0.710309624671936,
      "learning_rate": 1.327795298961974e-06,
      "loss": 0.0132,
      "step": 2324,
      "video_reward_cumulative_accuracy": 0.8377796901893287
    },
    {
      "epoch": 0.6901157613535174,
      "grad_norm": 1.954226016998291,
      "learning_rate": 1.3255079620095602e-06,
      "loss": 0.0161,
      "step": 2325,
      "video_reward_cumulative_accuracy": 0.8378494623655914
    },
    {
      "epoch": 0.6904125853368952,
      "grad_norm": 2.4498023986816406,
      "learning_rate": 1.323221885988341e-06,
      "loss": 0.0582,
      "step": 2326,
      "video_reward_cumulative_accuracy": 0.8377042132416165
    },
    {
      "epoch": 0.690709409320273,
      "grad_norm": 0.965714693069458,
      "learning_rate": 1.3209370733526444e-06,
      "loss": 0.013,
      "step": 2327,
      "video_reward_cumulative_accuracy": 0.8377739578856898
    },
    {
      "epoch": 0.691006233303651,
      "grad_norm": 1.6976726055145264,
      "learning_rate": 1.3186535265554363e-06,
      "loss": 0.0317,
      "step": 2328,
      "video_reward_cumulative_accuracy": 0.8378436426116839
    },
    {
      "epoch": 0.6913030572870288,
      "grad_norm": 2.1996965408325195,
      "learning_rate": 1.3163712480483255e-06,
      "loss": 0.0466,
      "step": 2329,
      "video_reward_cumulative_accuracy": 0.8379132674967797
    },
    {
      "epoch": 0.6915998812704066,
      "grad_norm": 5.554974555969238,
      "learning_rate": 1.3140902402815616e-06,
      "loss": 0.038,
      "step": 2330,
      "video_reward_cumulative_accuracy": 0.8379828326180258
    },
    {
      "epoch": 0.6918967052537845,
      "grad_norm": 2.487802028656006,
      "learning_rate": 1.3118105057040245e-06,
      "loss": 0.0667,
      "step": 2331,
      "video_reward_cumulative_accuracy": 0.838052338052338
    },
    {
      "epoch": 0.6921935292371624,
      "grad_norm": 2.990607976913452,
      "learning_rate": 1.3095320467632344e-06,
      "loss": 0.0428,
      "step": 2332,
      "video_reward_cumulative_accuracy": 0.8381217838765008
    },
    {
      "epoch": 0.6924903532205402,
      "grad_norm": 1.0717166662216187,
      "learning_rate": 1.3072548659053353e-06,
      "loss": 0.0196,
      "step": 2333,
      "video_reward_cumulative_accuracy": 0.8381911701671667
    },
    {
      "epoch": 0.692787177203918,
      "grad_norm": 2.585355043411255,
      "learning_rate": 1.3049789655751039e-06,
      "loss": 0.0557,
      "step": 2334,
      "video_reward_cumulative_accuracy": 0.8382604970008569
    },
    {
      "epoch": 0.693084001187296,
      "grad_norm": 1.229258418083191,
      "learning_rate": 1.3027043482159378e-06,
      "loss": 0.0284,
      "step": 2335,
      "video_reward_cumulative_accuracy": 0.8383297644539615
    },
    {
      "epoch": 0.6933808251706738,
      "grad_norm": 1.2362085580825806,
      "learning_rate": 1.3004310162698598e-06,
      "loss": 0.025,
      "step": 2336,
      "video_reward_cumulative_accuracy": 0.8383989726027398
    },
    {
      "epoch": 0.6936776491540516,
      "grad_norm": 2.241290807723999,
      "learning_rate": 1.298158972177515e-06,
      "loss": 0.0327,
      "step": 2337,
      "video_reward_cumulative_accuracy": 0.8382541720154044
    },
    {
      "epoch": 0.6939744731374295,
      "grad_norm": 2.196943998336792,
      "learning_rate": 1.2958882183781612e-06,
      "loss": 0.0565,
      "step": 2338,
      "video_reward_cumulative_accuracy": 0.838109495295124
    },
    {
      "epoch": 0.6942712971208074,
      "grad_norm": 1.276810884475708,
      "learning_rate": 1.2936187573096737e-06,
      "loss": 0.0237,
      "step": 2339,
      "video_reward_cumulative_accuracy": 0.837964942283027
    },
    {
      "epoch": 0.6945681211041852,
      "grad_norm": 2.0675230026245117,
      "learning_rate": 1.2913505914085384e-06,
      "loss": 0.0823,
      "step": 2340,
      "video_reward_cumulative_accuracy": 0.8378205128205128
    },
    {
      "epoch": 0.694864945087563,
      "grad_norm": 1.5794941186904907,
      "learning_rate": 1.2890837231098513e-06,
      "loss": 0.0305,
      "step": 2341,
      "video_reward_cumulative_accuracy": 0.8378897906877403
    },
    {
      "epoch": 0.695161769070941,
      "grad_norm": 1.307797908782959,
      "learning_rate": 1.2868181548473168e-06,
      "loss": 0.0242,
      "step": 2342,
      "video_reward_cumulative_accuracy": 0.8379590093936806
    },
    {
      "epoch": 0.6954585930543188,
      "grad_norm": 1.9544386863708496,
      "learning_rate": 1.2845538890532416e-06,
      "loss": 0.0246,
      "step": 2343,
      "video_reward_cumulative_accuracy": 0.8380281690140845
    },
    {
      "epoch": 0.6957554170376966,
      "grad_norm": 2.473175048828125,
      "learning_rate": 1.2822909281585359e-06,
      "loss": 0.0998,
      "step": 2344,
      "video_reward_cumulative_accuracy": 0.8380972696245734
    },
    {
      "epoch": 0.6960522410210745,
      "grad_norm": 2.0167837142944336,
      "learning_rate": 1.280029274592706e-06,
      "loss": 0.0363,
      "step": 2345,
      "video_reward_cumulative_accuracy": 0.8381663113006397
    },
    {
      "epoch": 0.6963490650044524,
      "grad_norm": 0.8653875589370728,
      "learning_rate": 1.2777689307838572e-06,
      "loss": 0.0146,
      "step": 2346,
      "video_reward_cumulative_accuracy": 0.8382352941176471
    },
    {
      "epoch": 0.6966458889878302,
      "grad_norm": 2.326350688934326,
      "learning_rate": 1.2755098991586884e-06,
      "loss": 0.0474,
      "step": 2347,
      "video_reward_cumulative_accuracy": 0.8383042181508309
    },
    {
      "epoch": 0.696942712971208,
      "grad_norm": 3.8789632320404053,
      "learning_rate": 1.273252182142489e-06,
      "loss": 0.0332,
      "step": 2348,
      "video_reward_cumulative_accuracy": 0.8383730834752982
    },
    {
      "epoch": 0.697239536954586,
      "grad_norm": 1.4527256488800049,
      "learning_rate": 1.2709957821591384e-06,
      "loss": 0.0275,
      "step": 2349,
      "video_reward_cumulative_accuracy": 0.8384418901660281
    },
    {
      "epoch": 0.6975363609379638,
      "grad_norm": 2.494121551513672,
      "learning_rate": 1.2687407016310992e-06,
      "loss": 0.0569,
      "step": 2350,
      "video_reward_cumulative_accuracy": 0.8382978723404255
    },
    {
      "epoch": 0.6978331849213416,
      "grad_norm": 0.8342524170875549,
      "learning_rate": 1.2664869429794197e-06,
      "loss": 0.0104,
      "step": 2351,
      "video_reward_cumulative_accuracy": 0.8383666524883029
    },
    {
      "epoch": 0.6981300089047195,
      "grad_norm": 0.7167454361915588,
      "learning_rate": 1.2642345086237294e-06,
      "loss": 0.0146,
      "step": 2352,
      "video_reward_cumulative_accuracy": 0.8384353741496599
    },
    {
      "epoch": 0.6984268328880974,
      "grad_norm": 3.59609317779541,
      "learning_rate": 1.261983400982234e-06,
      "loss": 0.0414,
      "step": 2353,
      "video_reward_cumulative_accuracy": 0.838504037399065
    },
    {
      "epoch": 0.6987236568714752,
      "grad_norm": 2.267436981201172,
      "learning_rate": 1.2597336224717183e-06,
      "loss": 0.0416,
      "step": 2354,
      "video_reward_cumulative_accuracy": 0.8383602378929482
    },
    {
      "epoch": 0.699020480854853,
      "grad_norm": 2.883449077606201,
      "learning_rate": 1.257485175507535e-06,
      "loss": 0.038,
      "step": 2355,
      "video_reward_cumulative_accuracy": 0.8384288747346073
    },
    {
      "epoch": 0.699317304838231,
      "grad_norm": 0.7676532864570618,
      "learning_rate": 1.255238062503612e-06,
      "loss": 0.0109,
      "step": 2356,
      "video_reward_cumulative_accuracy": 0.8384974533106961
    },
    {
      "epoch": 0.6996141288216088,
      "grad_norm": 0.7922206521034241,
      "learning_rate": 1.2529922858724422e-06,
      "loss": 0.0178,
      "step": 2357,
      "video_reward_cumulative_accuracy": 0.8385659736953754
    },
    {
      "epoch": 0.6999109528049866,
      "grad_norm": 1.5223472118377686,
      "learning_rate": 1.2507478480250862e-06,
      "loss": 0.0353,
      "step": 2358,
      "video_reward_cumulative_accuracy": 0.8386344359626803
    },
    {
      "epoch": 0.7002077767883645,
      "grad_norm": 2.4586706161499023,
      "learning_rate": 1.2485047513711643e-06,
      "loss": 0.0442,
      "step": 2359,
      "video_reward_cumulative_accuracy": 0.8387028401865197
    },
    {
      "epoch": 0.7005046007717424,
      "grad_norm": 3.2379069328308105,
      "learning_rate": 1.2462629983188586e-06,
      "loss": 0.0436,
      "step": 2360,
      "video_reward_cumulative_accuracy": 0.8387711864406779
    },
    {
      "epoch": 0.7008014247551202,
      "grad_norm": 1.5388333797454834,
      "learning_rate": 1.2440225912749096e-06,
      "loss": 0.0233,
      "step": 2361,
      "video_reward_cumulative_accuracy": 0.838839474798814
    },
    {
      "epoch": 0.701098248738498,
      "grad_norm": 1.7202107906341553,
      "learning_rate": 1.241783532644611e-06,
      "loss": 0.0366,
      "step": 2362,
      "video_reward_cumulative_accuracy": 0.8389077053344624
    },
    {
      "epoch": 0.701395072721876,
      "grad_norm": 2.6380770206451416,
      "learning_rate": 1.2395458248318107e-06,
      "loss": 0.0371,
      "step": 2363,
      "video_reward_cumulative_accuracy": 0.8387642826914938
    },
    {
      "epoch": 0.7016918967052538,
      "grad_norm": 1.3252801895141602,
      "learning_rate": 1.2373094702389031e-06,
      "loss": 0.015,
      "step": 2364,
      "video_reward_cumulative_accuracy": 0.8388324873096447
    },
    {
      "epoch": 0.7019887206886316,
      "grad_norm": 2.323251724243164,
      "learning_rate": 1.2350744712668332e-06,
      "loss": 0.0574,
      "step": 2365,
      "video_reward_cumulative_accuracy": 0.8386892177589852
    },
    {
      "epoch": 0.7022855446720095,
      "grad_norm": 2.107527017593384,
      "learning_rate": 1.2328408303150892e-06,
      "loss": 0.0387,
      "step": 2366,
      "video_reward_cumulative_accuracy": 0.8387573964497042
    },
    {
      "epoch": 0.7025823686553874,
      "grad_norm": 2.007369041442871,
      "learning_rate": 1.2306085497817016e-06,
      "loss": 0.0187,
      "step": 2367,
      "video_reward_cumulative_accuracy": 0.8388255175327418
    },
    {
      "epoch": 0.7028791926387652,
      "grad_norm": 2.632237195968628,
      "learning_rate": 1.2283776320632409e-06,
      "loss": 0.0215,
      "step": 2368,
      "video_reward_cumulative_accuracy": 0.838893581081081
    },
    {
      "epoch": 0.703176016622143,
      "grad_norm": 2.760110378265381,
      "learning_rate": 1.2261480795548123e-06,
      "loss": 0.0942,
      "step": 2369,
      "video_reward_cumulative_accuracy": 0.8389615871675813
    },
    {
      "epoch": 0.703472840605521,
      "grad_norm": 1.2644137144088745,
      "learning_rate": 1.223919894650058e-06,
      "loss": 0.0105,
      "step": 2370,
      "video_reward_cumulative_accuracy": 0.8390295358649789
    },
    {
      "epoch": 0.7037696645888988,
      "grad_norm": 1.7368650436401367,
      "learning_rate": 1.2216930797411486e-06,
      "loss": 0.0209,
      "step": 2371,
      "video_reward_cumulative_accuracy": 0.8390974272458878
    },
    {
      "epoch": 0.7040664885722766,
      "grad_norm": 1.736447811126709,
      "learning_rate": 1.2194676372187886e-06,
      "loss": 0.0279,
      "step": 2372,
      "video_reward_cumulative_accuracy": 0.8389544688026982
    },
    {
      "epoch": 0.7043633125556545,
      "grad_norm": 2.79758620262146,
      "learning_rate": 1.2172435694722064e-06,
      "loss": 0.0739,
      "step": 2373,
      "video_reward_cumulative_accuracy": 0.838811630847029
    },
    {
      "epoch": 0.7046601365390324,
      "grad_norm": 0.7806415557861328,
      "learning_rate": 1.2150208788891533e-06,
      "loss": 0.0124,
      "step": 2374,
      "video_reward_cumulative_accuracy": 0.8388795282224094
    },
    {
      "epoch": 0.7049569605224102,
      "grad_norm": 1.3390443325042725,
      "learning_rate": 1.2127995678559042e-06,
      "loss": 0.015,
      "step": 2375,
      "video_reward_cumulative_accuracy": 0.8389473684210527
    },
    {
      "epoch": 0.705253784505788,
      "grad_norm": 2.772890567779541,
      "learning_rate": 1.2105796387572514e-06,
      "loss": 0.0162,
      "step": 2376,
      "video_reward_cumulative_accuracy": 0.8390151515151515
    },
    {
      "epoch": 0.705550608489166,
      "grad_norm": 1.042292594909668,
      "learning_rate": 1.2083610939765031e-06,
      "loss": 0.0178,
      "step": 2377,
      "video_reward_cumulative_accuracy": 0.8390828775767775
    },
    {
      "epoch": 0.7058474324725438,
      "grad_norm": 2.8607852458953857,
      "learning_rate": 1.2061439358954862e-06,
      "loss": 0.0205,
      "step": 2378,
      "video_reward_cumulative_accuracy": 0.8391505466778806
    },
    {
      "epoch": 0.7061442564559216,
      "grad_norm": 1.2580045461654663,
      "learning_rate": 1.203928166894532e-06,
      "loss": 0.0118,
      "step": 2379,
      "video_reward_cumulative_accuracy": 0.8392181588902901
    },
    {
      "epoch": 0.7064410804392995,
      "grad_norm": 2.3520500659942627,
      "learning_rate": 1.2017137893524851e-06,
      "loss": 0.0244,
      "step": 2380,
      "video_reward_cumulative_accuracy": 0.8392857142857143
    },
    {
      "epoch": 0.7067379044226774,
      "grad_norm": 0.5174872875213623,
      "learning_rate": 1.1995008056466933e-06,
      "loss": 0.0071,
      "step": 2381,
      "video_reward_cumulative_accuracy": 0.8393532129357413
    },
    {
      "epoch": 0.7070347284060552,
      "grad_norm": 1.8015003204345703,
      "learning_rate": 1.19728921815301e-06,
      "loss": 0.019,
      "step": 2382,
      "video_reward_cumulative_accuracy": 0.8394206549118388
    },
    {
      "epoch": 0.707331552389433,
      "grad_norm": 1.3630971908569336,
      "learning_rate": 1.1950790292457893e-06,
      "loss": 0.0423,
      "step": 2383,
      "video_reward_cumulative_accuracy": 0.8394880402853546
    },
    {
      "epoch": 0.707628376372811,
      "grad_norm": 3.022733449935913,
      "learning_rate": 1.1928702412978833e-06,
      "loss": 0.066,
      "step": 2384,
      "video_reward_cumulative_accuracy": 0.8395553691275168
    },
    {
      "epoch": 0.7079252003561888,
      "grad_norm": 1.3637797832489014,
      "learning_rate": 1.1906628566806414e-06,
      "loss": 0.0447,
      "step": 2385,
      "video_reward_cumulative_accuracy": 0.8394129979035639
    },
    {
      "epoch": 0.7082220243395666,
      "grad_norm": 0.23082542419433594,
      "learning_rate": 1.188456877763903e-06,
      "loss": 0.0028,
      "step": 2386,
      "video_reward_cumulative_accuracy": 0.8394803017602682
    },
    {
      "epoch": 0.7085188483229445,
      "grad_norm": 3.9995596408843994,
      "learning_rate": 1.1862523069160017e-06,
      "loss": 0.0492,
      "step": 2387,
      "video_reward_cumulative_accuracy": 0.8395475492249685
    },
    {
      "epoch": 0.7088156723063224,
      "grad_norm": 2.762430429458618,
      "learning_rate": 1.1840491465037584e-06,
      "loss": 0.0473,
      "step": 2388,
      "video_reward_cumulative_accuracy": 0.8396147403685092
    },
    {
      "epoch": 0.7091124962897002,
      "grad_norm": 1.9774202108383179,
      "learning_rate": 1.1818473988924797e-06,
      "loss": 0.0493,
      "step": 2389,
      "video_reward_cumulative_accuracy": 0.8396818752616158
    },
    {
      "epoch": 0.709409320273078,
      "grad_norm": 1.2784936428070068,
      "learning_rate": 1.179647066445956e-06,
      "loss": 0.0216,
      "step": 2390,
      "video_reward_cumulative_accuracy": 0.8397489539748954
    },
    {
      "epoch": 0.709706144256456,
      "grad_norm": 3.470581531524658,
      "learning_rate": 1.177448151526456e-06,
      "loss": 0.0223,
      "step": 2391,
      "video_reward_cumulative_accuracy": 0.8398159765788373
    },
    {
      "epoch": 0.7100029682398338,
      "grad_norm": 2.1586861610412598,
      "learning_rate": 1.1752506564947294e-06,
      "loss": 0.0198,
      "step": 2392,
      "video_reward_cumulative_accuracy": 0.8398829431438127
    },
    {
      "epoch": 0.7102997922232116,
      "grad_norm": 2.96563982963562,
      "learning_rate": 1.1730545837099999e-06,
      "loss": 0.071,
      "step": 2393,
      "video_reward_cumulative_accuracy": 0.8399498537400752
    },
    {
      "epoch": 0.7105966162065895,
      "grad_norm": 1.8292112350463867,
      "learning_rate": 1.1708599355299662e-06,
      "loss": 0.0207,
      "step": 2394,
      "video_reward_cumulative_accuracy": 0.8400167084377611
    },
    {
      "epoch": 0.7108934401899674,
      "grad_norm": 4.007872581481934,
      "learning_rate": 1.168666714310794e-06,
      "loss": 0.0726,
      "step": 2395,
      "video_reward_cumulative_accuracy": 0.8400835073068893
    },
    {
      "epoch": 0.7111902641733452,
      "grad_norm": 3.1843671798706055,
      "learning_rate": 1.1664749224071203e-06,
      "loss": 0.0496,
      "step": 2396,
      "video_reward_cumulative_accuracy": 0.8401502504173622
    },
    {
      "epoch": 0.711487088156723,
      "grad_norm": 3.3738343715667725,
      "learning_rate": 1.1642845621720463e-06,
      "loss": 0.0419,
      "step": 2397,
      "video_reward_cumulative_accuracy": 0.8402169378389653
    },
    {
      "epoch": 0.711783912140101,
      "grad_norm": 1.7037007808685303,
      "learning_rate": 1.1620956359571364e-06,
      "loss": 0.0587,
      "step": 2398,
      "video_reward_cumulative_accuracy": 0.8402835696413679
    },
    {
      "epoch": 0.7120807361234788,
      "grad_norm": 2.8686163425445557,
      "learning_rate": 1.1599081461124161e-06,
      "loss": 0.0247,
      "step": 2399,
      "video_reward_cumulative_accuracy": 0.8403501458941226
    },
    {
      "epoch": 0.7123775601068566,
      "grad_norm": 0.6228769421577454,
      "learning_rate": 1.1577220949863663e-06,
      "loss": 0.0058,
      "step": 2400,
      "video_reward_cumulative_accuracy": 0.8404166666666667
    },
    {
      "epoch": 0.7123775601068566,
      "eval_runtime": 130.8618,
      "eval_samples_per_second": 6.029,
      "eval_steps_per_second": 0.757,
      "eval_test_set_accuracy": 0.8131313131313131,
      "step": 2400
    },
    {
      "epoch": 0.7126743840902345,
      "grad_norm": 1.7672585248947144,
      "learning_rate": 1.155537484925926e-06,
      "loss": 0.0586,
      "step": 2401,
      "video_reward_cumulative_accuracy": 0.8404831320283215
    },
    {
      "epoch": 0.7129712080736124,
      "grad_norm": 2.9688684940338135,
      "learning_rate": 1.153354318276486e-06,
      "loss": 0.0505,
      "step": 2402,
      "video_reward_cumulative_accuracy": 0.8405495420482931
    },
    {
      "epoch": 0.7132680320569902,
      "grad_norm": 2.505089044570923,
      "learning_rate": 1.1511725973818879e-06,
      "loss": 0.0257,
      "step": 2403,
      "video_reward_cumulative_accuracy": 0.8406158967956721
    },
    {
      "epoch": 0.713564856040368,
      "grad_norm": 0.5991393327713013,
      "learning_rate": 1.1489923245844214e-06,
      "loss": 0.0085,
      "step": 2404,
      "video_reward_cumulative_accuracy": 0.8406821963394343
    },
    {
      "epoch": 0.713861680023746,
      "grad_norm": 1.6461894512176514,
      "learning_rate": 1.1468135022248195e-06,
      "loss": 0.0169,
      "step": 2405,
      "video_reward_cumulative_accuracy": 0.8407484407484408
    },
    {
      "epoch": 0.7141585040071238,
      "grad_norm": 3.0698747634887695,
      "learning_rate": 1.14463613264226e-06,
      "loss": 0.0623,
      "step": 2406,
      "video_reward_cumulative_accuracy": 0.8408146300914381
    },
    {
      "epoch": 0.7144553279905016,
      "grad_norm": 2.5160300731658936,
      "learning_rate": 1.1424602181743609e-06,
      "loss": 0.066,
      "step": 2407,
      "video_reward_cumulative_accuracy": 0.8406730369754881
    },
    {
      "epoch": 0.7147521519738795,
      "grad_norm": 1.7137246131896973,
      "learning_rate": 1.1402857611571772e-06,
      "loss": 0.0148,
      "step": 2408,
      "video_reward_cumulative_accuracy": 0.8407392026578073
    },
    {
      "epoch": 0.7150489759572574,
      "grad_norm": 1.8538068532943726,
      "learning_rate": 1.1381127639252005e-06,
      "loss": 0.0292,
      "step": 2409,
      "video_reward_cumulative_accuracy": 0.8408053134080531
    },
    {
      "epoch": 0.7153457999406352,
      "grad_norm": 4.450146675109863,
      "learning_rate": 1.1359412288113526e-06,
      "loss": 0.031,
      "step": 2410,
      "video_reward_cumulative_accuracy": 0.8404564315352697
    },
    {
      "epoch": 0.715642623924013,
      "grad_norm": 1.273587703704834,
      "learning_rate": 1.133771158146988e-06,
      "loss": 0.0124,
      "step": 2411,
      "video_reward_cumulative_accuracy": 0.8405226047283285
    },
    {
      "epoch": 0.715939447907391,
      "grad_norm": 2.9038286209106445,
      "learning_rate": 1.131602554261888e-06,
      "loss": 0.0557,
      "step": 2412,
      "video_reward_cumulative_accuracy": 0.8405887230514096
    },
    {
      "epoch": 0.7162362718907688,
      "grad_norm": 1.5233856439590454,
      "learning_rate": 1.1294354194842597e-06,
      "loss": 0.0135,
      "step": 2413,
      "video_reward_cumulative_accuracy": 0.8406547865727311
    },
    {
      "epoch": 0.7165330958741466,
      "grad_norm": 0.3505679666996002,
      "learning_rate": 1.1272697561407334e-06,
      "loss": 0.004,
      "step": 2414,
      "video_reward_cumulative_accuracy": 0.8407207953603977
    },
    {
      "epoch": 0.7168299198575245,
      "grad_norm": 3.6674511432647705,
      "learning_rate": 1.125105566556357e-06,
      "loss": 0.0877,
      "step": 2415,
      "video_reward_cumulative_accuracy": 0.8405797101449275
    },
    {
      "epoch": 0.7171267438409024,
      "grad_norm": 0.8542490601539612,
      "learning_rate": 1.1229428530546002e-06,
      "loss": 0.0147,
      "step": 2416,
      "video_reward_cumulative_accuracy": 0.8406456953642384
    },
    {
      "epoch": 0.7174235678242802,
      "grad_norm": 2.0247857570648193,
      "learning_rate": 1.1207816179573427e-06,
      "loss": 0.0484,
      "step": 2417,
      "video_reward_cumulative_accuracy": 0.8407116259826231
    },
    {
      "epoch": 0.717720391807658,
      "grad_norm": 1.7398759126663208,
      "learning_rate": 1.1186218635848838e-06,
      "loss": 0.0595,
      "step": 2418,
      "video_reward_cumulative_accuracy": 0.8407775020678246
    },
    {
      "epoch": 0.718017215791036,
      "grad_norm": 1.2904212474822998,
      "learning_rate": 1.1164635922559273e-06,
      "loss": 0.07,
      "step": 2419,
      "video_reward_cumulative_accuracy": 0.8408433236874742
    },
    {
      "epoch": 0.7183140397744138,
      "grad_norm": 3.129547595977783,
      "learning_rate": 1.114306806287587e-06,
      "loss": 0.0425,
      "step": 2420,
      "video_reward_cumulative_accuracy": 0.8409090909090909
    },
    {
      "epoch": 0.7186108637577916,
      "grad_norm": 3.4823479652404785,
      "learning_rate": 1.1121515079953834e-06,
      "loss": 0.0369,
      "step": 2421,
      "video_reward_cumulative_accuracy": 0.8409748038000826
    },
    {
      "epoch": 0.7189076877411695,
      "grad_norm": 2.5278398990631104,
      "learning_rate": 1.1099976996932357e-06,
      "loss": 0.0397,
      "step": 2422,
      "video_reward_cumulative_accuracy": 0.8410404624277457
    },
    {
      "epoch": 0.7192045117245474,
      "grad_norm": 1.669782280921936,
      "learning_rate": 1.1078453836934697e-06,
      "loss": 0.0514,
      "step": 2423,
      "video_reward_cumulative_accuracy": 0.8406933553446141
    },
    {
      "epoch": 0.7195013357079252,
      "grad_norm": 1.4766911268234253,
      "learning_rate": 1.1056945623068023e-06,
      "loss": 0.026,
      "step": 2424,
      "video_reward_cumulative_accuracy": 0.8407590759075908
    },
    {
      "epoch": 0.719798159691303,
      "grad_norm": 2.538355588912964,
      "learning_rate": 1.1035452378423512e-06,
      "loss": 0.0246,
      "step": 2425,
      "video_reward_cumulative_accuracy": 0.8408247422680413
    },
    {
      "epoch": 0.7200949836746809,
      "grad_norm": 2.352414131164551,
      "learning_rate": 1.1013974126076243e-06,
      "loss": 0.0457,
      "step": 2426,
      "video_reward_cumulative_accuracy": 0.8408903544929925
    },
    {
      "epoch": 0.7203918076580588,
      "grad_norm": 1.7153733968734741,
      "learning_rate": 1.0992510889085187e-06,
      "loss": 0.0419,
      "step": 2427,
      "video_reward_cumulative_accuracy": 0.8409559126493613
    },
    {
      "epoch": 0.7206886316414366,
      "grad_norm": 4.937928199768066,
      "learning_rate": 1.0971062690493242e-06,
      "loss": 0.045,
      "step": 2428,
      "video_reward_cumulative_accuracy": 0.8410214168039539
    },
    {
      "epoch": 0.7209854556248145,
      "grad_norm": 1.5569862127304077,
      "learning_rate": 1.0949629553327106e-06,
      "loss": 0.0175,
      "step": 2429,
      "video_reward_cumulative_accuracy": 0.8410868670234665
    },
    {
      "epoch": 0.7212822796081924,
      "grad_norm": 0.807574987411499,
      "learning_rate": 1.0928211500597355e-06,
      "loss": 0.0236,
      "step": 2430,
      "video_reward_cumulative_accuracy": 0.8411522633744856
    },
    {
      "epoch": 0.7215791035915702,
      "grad_norm": 1.268849492073059,
      "learning_rate": 1.0906808555298323e-06,
      "loss": 0.0497,
      "step": 2431,
      "video_reward_cumulative_accuracy": 0.8410119292472233
    },
    {
      "epoch": 0.721875927574948,
      "grad_norm": 3.1053664684295654,
      "learning_rate": 1.088542074040816e-06,
      "loss": 0.0356,
      "step": 2432,
      "video_reward_cumulative_accuracy": 0.841077302631579
    },
    {
      "epoch": 0.7221727515583259,
      "grad_norm": 2.7275187969207764,
      "learning_rate": 1.0864048078888758e-06,
      "loss": 0.0367,
      "step": 2433,
      "video_reward_cumulative_accuracy": 0.8411426222770243
    },
    {
      "epoch": 0.7224695755417038,
      "grad_norm": 3.4648077487945557,
      "learning_rate": 1.084269059368575e-06,
      "loss": 0.041,
      "step": 2434,
      "video_reward_cumulative_accuracy": 0.8412078882497945
    },
    {
      "epoch": 0.7227663995250816,
      "grad_norm": 1.614362359046936,
      "learning_rate": 1.0821348307728478e-06,
      "loss": 0.0259,
      "step": 2435,
      "video_reward_cumulative_accuracy": 0.8412731006160165
    },
    {
      "epoch": 0.7230632235084595,
      "grad_norm": 3.655402898788452,
      "learning_rate": 1.0800021243929931e-06,
      "loss": 0.0496,
      "step": 2436,
      "video_reward_cumulative_accuracy": 0.8411330049261084
    },
    {
      "epoch": 0.7233600474918374,
      "grad_norm": 2.584467649459839,
      "learning_rate": 1.0778709425186801e-06,
      "loss": 0.0659,
      "step": 2437,
      "video_reward_cumulative_accuracy": 0.8409930242100944
    },
    {
      "epoch": 0.7236568714752152,
      "grad_norm": 3.22719669342041,
      "learning_rate": 1.0757412874379386e-06,
      "loss": 0.0443,
      "step": 2438,
      "video_reward_cumulative_accuracy": 0.8408531583264971
    },
    {
      "epoch": 0.723953695458593,
      "grad_norm": 0.4424673318862915,
      "learning_rate": 1.0736131614371602e-06,
      "loss": 0.0074,
      "step": 2439,
      "video_reward_cumulative_accuracy": 0.8409184091840919
    },
    {
      "epoch": 0.7242505194419709,
      "grad_norm": 0.7209199070930481,
      "learning_rate": 1.0714865668010962e-06,
      "loss": 0.03,
      "step": 2440,
      "video_reward_cumulative_accuracy": 0.840983606557377
    },
    {
      "epoch": 0.7245473434253488,
      "grad_norm": 2.2565574645996094,
      "learning_rate": 1.0693615058128502e-06,
      "loss": 0.0298,
      "step": 2441,
      "video_reward_cumulative_accuracy": 0.8410487505120852
    },
    {
      "epoch": 0.7248441674087266,
      "grad_norm": 2.199859857559204,
      "learning_rate": 1.0672379807538818e-06,
      "loss": 0.0788,
      "step": 2442,
      "video_reward_cumulative_accuracy": 0.8411138411138411
    },
    {
      "epoch": 0.7251409913921045,
      "grad_norm": 1.1875553131103516,
      "learning_rate": 1.0651159939040017e-06,
      "loss": 0.0301,
      "step": 2443,
      "video_reward_cumulative_accuracy": 0.8409742120343839
    },
    {
      "epoch": 0.7254378153754824,
      "grad_norm": 2.6030995845794678,
      "learning_rate": 1.0629955475413691e-06,
      "loss": 0.0283,
      "step": 2444,
      "video_reward_cumulative_accuracy": 0.8410392798690671
    },
    {
      "epoch": 0.7257346393588602,
      "grad_norm": 2.654780864715576,
      "learning_rate": 1.0608766439424895e-06,
      "loss": 0.0215,
      "step": 2445,
      "video_reward_cumulative_accuracy": 0.8411042944785276
    },
    {
      "epoch": 0.726031463342238,
      "grad_norm": 1.8764584064483643,
      "learning_rate": 1.0587592853822096e-06,
      "loss": 0.038,
      "step": 2446,
      "video_reward_cumulative_accuracy": 0.8411692559280458
    },
    {
      "epoch": 0.7263282873256159,
      "grad_norm": 2.492081642150879,
      "learning_rate": 1.0566434741337204e-06,
      "loss": 0.0329,
      "step": 2447,
      "video_reward_cumulative_accuracy": 0.8412341642827953
    },
    {
      "epoch": 0.7266251113089938,
      "grad_norm": 2.2990763187408447,
      "learning_rate": 1.0545292124685506e-06,
      "loss": 0.0288,
      "step": 2448,
      "video_reward_cumulative_accuracy": 0.8412990196078431
    },
    {
      "epoch": 0.7269219352923716,
      "grad_norm": 1.3020623922348022,
      "learning_rate": 1.0524165026565655e-06,
      "loss": 0.0178,
      "step": 2449,
      "video_reward_cumulative_accuracy": 0.8413638219681503
    },
    {
      "epoch": 0.7272187592757495,
      "grad_norm": 0.8552646040916443,
      "learning_rate": 1.0503053469659647e-06,
      "loss": 0.0229,
      "step": 2450,
      "video_reward_cumulative_accuracy": 0.8414285714285714
    },
    {
      "epoch": 0.7275155832591274,
      "grad_norm": 2.16868257522583,
      "learning_rate": 1.0481957476632773e-06,
      "loss": 0.0564,
      "step": 2451,
      "video_reward_cumulative_accuracy": 0.8412892696858425
    },
    {
      "epoch": 0.7278124072425052,
      "grad_norm": 0.4563276767730713,
      "learning_rate": 1.0460877070133634e-06,
      "loss": 0.012,
      "step": 2452,
      "video_reward_cumulative_accuracy": 0.8413539967373572
    },
    {
      "epoch": 0.728109231225883,
      "grad_norm": 3.3756730556488037,
      "learning_rate": 1.0439812272794096e-06,
      "loss": 0.0633,
      "step": 2453,
      "video_reward_cumulative_accuracy": 0.8414186710150836
    },
    {
      "epoch": 0.7284060552092609,
      "grad_norm": 5.538758754730225,
      "learning_rate": 1.0418763107229271e-06,
      "loss": 0.0483,
      "step": 2454,
      "video_reward_cumulative_accuracy": 0.841483292583537
    },
    {
      "epoch": 0.7287028791926388,
      "grad_norm": 1.5109965801239014,
      "learning_rate": 1.0397729596037463e-06,
      "loss": 0.0185,
      "step": 2455,
      "video_reward_cumulative_accuracy": 0.8415478615071283
    },
    {
      "epoch": 0.7289997031760166,
      "grad_norm": 2.8143222332000732,
      "learning_rate": 1.0376711761800196e-06,
      "loss": 0.0553,
      "step": 2456,
      "video_reward_cumulative_accuracy": 0.8416123778501629
    },
    {
      "epoch": 0.7292965271593945,
      "grad_norm": 1.900389313697815,
      "learning_rate": 1.0355709627082155e-06,
      "loss": 0.0637,
      "step": 2457,
      "video_reward_cumulative_accuracy": 0.8414733414733415
    },
    {
      "epoch": 0.7295933511427724,
      "grad_norm": 2.070751905441284,
      "learning_rate": 1.0334723214431175e-06,
      "loss": 0.0273,
      "step": 2458,
      "video_reward_cumulative_accuracy": 0.8415378356387306
    },
    {
      "epoch": 0.7298901751261502,
      "grad_norm": 3.6097755432128906,
      "learning_rate": 1.031375254637821e-06,
      "loss": 0.0496,
      "step": 2459,
      "video_reward_cumulative_accuracy": 0.8413989426596177
    },
    {
      "epoch": 0.730186999109528,
      "grad_norm": 2.63653302192688,
      "learning_rate": 1.0292797645437288e-06,
      "loss": 0.0594,
      "step": 2460,
      "video_reward_cumulative_accuracy": 0.8412601626016261
    },
    {
      "epoch": 0.7304838230929059,
      "grad_norm": 3.204415798187256,
      "learning_rate": 1.0271858534105547e-06,
      "loss": 0.0269,
      "step": 2461,
      "video_reward_cumulative_accuracy": 0.8411214953271028
    },
    {
      "epoch": 0.7307806470762838,
      "grad_norm": 1.9313973188400269,
      "learning_rate": 1.0250935234863147e-06,
      "loss": 0.0336,
      "step": 2462,
      "video_reward_cumulative_accuracy": 0.8411860276198213
    },
    {
      "epoch": 0.7310774710596616,
      "grad_norm": 1.1721389293670654,
      "learning_rate": 1.0230027770173282e-06,
      "loss": 0.0178,
      "step": 2463,
      "video_reward_cumulative_accuracy": 0.8412505075111653
    },
    {
      "epoch": 0.7313742950430395,
      "grad_norm": 2.0116591453552246,
      "learning_rate": 1.0209136162482155e-06,
      "loss": 0.026,
      "step": 2464,
      "video_reward_cumulative_accuracy": 0.841314935064935
    },
    {
      "epoch": 0.7316711190264173,
      "grad_norm": 2.902461290359497,
      "learning_rate": 1.0188260434218919e-06,
      "loss": 0.0487,
      "step": 2465,
      "video_reward_cumulative_accuracy": 0.8411764705882353
    },
    {
      "epoch": 0.7319679430097952,
      "grad_norm": 0.5362818241119385,
      "learning_rate": 1.0167400607795708e-06,
      "loss": 0.0077,
      "step": 2466,
      "video_reward_cumulative_accuracy": 0.8412408759124088
    },
    {
      "epoch": 0.732264766993173,
      "grad_norm": 1.2604457139968872,
      "learning_rate": 1.0146556705607544e-06,
      "loss": 0.0254,
      "step": 2467,
      "video_reward_cumulative_accuracy": 0.841305229023105
    },
    {
      "epoch": 0.7325615909765509,
      "grad_norm": 0.365590900182724,
      "learning_rate": 1.012572875003241e-06,
      "loss": 0.0057,
      "step": 2468,
      "video_reward_cumulative_accuracy": 0.8413695299837926
    },
    {
      "epoch": 0.7328584149599288,
      "grad_norm": 0.8860239386558533,
      "learning_rate": 1.0104916763431133e-06,
      "loss": 0.0209,
      "step": 2469,
      "video_reward_cumulative_accuracy": 0.8414337788578372
    },
    {
      "epoch": 0.7331552389433066,
      "grad_norm": 1.316934585571289,
      "learning_rate": 1.0084120768147385e-06,
      "loss": 0.0112,
      "step": 2470,
      "video_reward_cumulative_accuracy": 0.841497975708502
    },
    {
      "epoch": 0.7334520629266845,
      "grad_norm": 1.1632755994796753,
      "learning_rate": 1.00633407865077e-06,
      "loss": 0.0526,
      "step": 2471,
      "video_reward_cumulative_accuracy": 0.8413597733711048
    },
    {
      "epoch": 0.7337488869100623,
      "grad_norm": 0.5752823352813721,
      "learning_rate": 1.0042576840821394e-06,
      "loss": 0.0115,
      "step": 2472,
      "video_reward_cumulative_accuracy": 0.8414239482200647
    },
    {
      "epoch": 0.7340457108934402,
      "grad_norm": 2.8331027030944824,
      "learning_rate": 1.0021828953380572e-06,
      "loss": 0.0209,
      "step": 2473,
      "video_reward_cumulative_accuracy": 0.8414880711686211
    },
    {
      "epoch": 0.734342534876818,
      "grad_norm": 3.890144109725952,
      "learning_rate": 1.0001097146460134e-06,
      "loss": 0.0889,
      "step": 2474,
      "video_reward_cumulative_accuracy": 0.8413500404203719
    },
    {
      "epoch": 0.7346393588601959,
      "grad_norm": 1.1951637268066406,
      "learning_rate": 9.980381442317661e-07,
      "loss": 0.0127,
      "step": 2475,
      "video_reward_cumulative_accuracy": 0.8414141414141414
    },
    {
      "epoch": 0.7349361828435738,
      "grad_norm": 1.1834206581115723,
      "learning_rate": 9.959681863193489e-07,
      "loss": 0.0174,
      "step": 2476,
      "video_reward_cumulative_accuracy": 0.8414781906300485
    },
    {
      "epoch": 0.7352330068269516,
      "grad_norm": 0.8492854833602905,
      "learning_rate": 9.938998431310604e-07,
      "loss": 0.0202,
      "step": 2477,
      "video_reward_cumulative_accuracy": 0.8415421881308034
    },
    {
      "epoch": 0.7355298308103295,
      "grad_norm": 1.2624728679656982,
      "learning_rate": 9.918331168874693e-07,
      "loss": 0.0083,
      "step": 2478,
      "video_reward_cumulative_accuracy": 0.8416061339790153
    },
    {
      "epoch": 0.7358266547937073,
      "grad_norm": 2.332775831222534,
      "learning_rate": 9.897680098074063e-07,
      "loss": 0.0323,
      "step": 2479,
      "video_reward_cumulative_accuracy": 0.8416700282371924
    },
    {
      "epoch": 0.7361234787770852,
      "grad_norm": 2.9723246097564697,
      "learning_rate": 9.877045241079647e-07,
      "loss": 0.0763,
      "step": 2480,
      "video_reward_cumulative_accuracy": 0.8415322580645161
    },
    {
      "epoch": 0.736420302760463,
      "grad_norm": 1.4844021797180176,
      "learning_rate": 9.85642662004497e-07,
      "loss": 0.0088,
      "step": 2481,
      "video_reward_cumulative_accuracy": 0.841596130592503
    },
    {
      "epoch": 0.7367171267438409,
      "grad_norm": 0.8543074727058411,
      "learning_rate": 9.835824257106112e-07,
      "loss": 0.019,
      "step": 2482,
      "video_reward_cumulative_accuracy": 0.8416599516518937
    },
    {
      "epoch": 0.7370139507272188,
      "grad_norm": 1.366849422454834,
      "learning_rate": 9.815238174381711e-07,
      "loss": 0.0092,
      "step": 2483,
      "video_reward_cumulative_accuracy": 0.8417237213048732
    },
    {
      "epoch": 0.7373107747105966,
      "grad_norm": 0.7628781199455261,
      "learning_rate": 9.794668393972932e-07,
      "loss": 0.0266,
      "step": 2484,
      "video_reward_cumulative_accuracy": 0.8417874396135265
    },
    {
      "epoch": 0.7376075986939745,
      "grad_norm": 3.3191943168640137,
      "learning_rate": 9.774114937963425e-07,
      "loss": 0.0279,
      "step": 2485,
      "video_reward_cumulative_accuracy": 0.8418511066398391
    },
    {
      "epoch": 0.7379044226773523,
      "grad_norm": 0.4921557903289795,
      "learning_rate": 9.753577828419331e-07,
      "loss": 0.0094,
      "step": 2486,
      "video_reward_cumulative_accuracy": 0.8419147224456959
    },
    {
      "epoch": 0.7382012466607302,
      "grad_norm": 5.423759937286377,
      "learning_rate": 9.73305708738921e-07,
      "loss": 0.0882,
      "step": 2487,
      "video_reward_cumulative_accuracy": 0.8415761962203459
    },
    {
      "epoch": 0.738498070644108,
      "grad_norm": 2.609516143798828,
      "learning_rate": 9.712552736904085e-07,
      "loss": 0.02,
      "step": 2488,
      "video_reward_cumulative_accuracy": 0.8416398713826366
    },
    {
      "epoch": 0.7387948946274859,
      "grad_norm": 2.0234172344207764,
      "learning_rate": 9.69206479897736e-07,
      "loss": 0.0207,
      "step": 2489,
      "video_reward_cumulative_accuracy": 0.8417034953796706
    },
    {
      "epoch": 0.7390917186108638,
      "grad_norm": 2.902467966079712,
      "learning_rate": 9.671593295604836e-07,
      "loss": 0.0226,
      "step": 2490,
      "video_reward_cumulative_accuracy": 0.8417670682730923
    },
    {
      "epoch": 0.7393885425942416,
      "grad_norm": 1.8308614492416382,
      "learning_rate": 9.65113824876464e-07,
      "loss": 0.0383,
      "step": 2491,
      "video_reward_cumulative_accuracy": 0.841830590124448
    },
    {
      "epoch": 0.7396853665776195,
      "grad_norm": 0.8829131722450256,
      "learning_rate": 9.63069968041726e-07,
      "loss": 0.0106,
      "step": 2492,
      "video_reward_cumulative_accuracy": 0.8418940609951846
    },
    {
      "epoch": 0.7399821905609973,
      "grad_norm": 1.45628821849823,
      "learning_rate": 9.610277612505483e-07,
      "loss": 0.0142,
      "step": 2493,
      "video_reward_cumulative_accuracy": 0.8419574809466506
    },
    {
      "epoch": 0.7402790145443752,
      "grad_norm": 2.1599154472351074,
      "learning_rate": 9.58987206695438e-07,
      "loss": 0.0342,
      "step": 2494,
      "video_reward_cumulative_accuracy": 0.8420208500400962
    },
    {
      "epoch": 0.740575838527753,
      "grad_norm": 3.008652687072754,
      "learning_rate": 9.569483065671294e-07,
      "loss": 0.0649,
      "step": 2495,
      "video_reward_cumulative_accuracy": 0.8420841683366733
    },
    {
      "epoch": 0.7408726625111309,
      "grad_norm": 1.7300156354904175,
      "learning_rate": 9.549110630545783e-07,
      "loss": 0.025,
      "step": 2496,
      "video_reward_cumulative_accuracy": 0.8419471153846154
    },
    {
      "epoch": 0.7411694864945088,
      "grad_norm": 1.144446611404419,
      "learning_rate": 9.528754783449634e-07,
      "loss": 0.0149,
      "step": 2497,
      "video_reward_cumulative_accuracy": 0.842010412494994
    },
    {
      "epoch": 0.7414663104778866,
      "grad_norm": 0.29633480310440063,
      "learning_rate": 9.508415546236829e-07,
      "loss": 0.004,
      "step": 2498,
      "video_reward_cumulative_accuracy": 0.8420736589271417
    },
    {
      "epoch": 0.7417631344612645,
      "grad_norm": 2.104227304458618,
      "learning_rate": 9.488092940743516e-07,
      "loss": 0.0251,
      "step": 2499,
      "video_reward_cumulative_accuracy": 0.8421368547418968
    },
    {
      "epoch": 0.7420599584446423,
      "grad_norm": 2.1143417358398438,
      "learning_rate": 9.467786988787989e-07,
      "loss": 0.0524,
      "step": 2500,
      "video_reward_cumulative_accuracy": 0.8422
    },
    {
      "epoch": 0.7423567824280202,
      "grad_norm": 3.5316734313964844,
      "learning_rate": 9.447497712170642e-07,
      "loss": 0.0873,
      "step": 2501,
      "video_reward_cumulative_accuracy": 0.8422630947620952
    },
    {
      "epoch": 0.742653606411398,
      "grad_norm": 1.2431542873382568,
      "learning_rate": 9.427225132673992e-07,
      "loss": 0.0786,
      "step": 2502,
      "video_reward_cumulative_accuracy": 0.842326139088729
    },
    {
      "epoch": 0.7429504303947759,
      "grad_norm": 0.4430326819419861,
      "learning_rate": 9.406969272062619e-07,
      "loss": 0.0069,
      "step": 2503,
      "video_reward_cumulative_accuracy": 0.8423891330403516
    },
    {
      "epoch": 0.7432472543781538,
      "grad_norm": 2.5912365913391113,
      "learning_rate": 9.386730152083156e-07,
      "loss": 0.0423,
      "step": 2504,
      "video_reward_cumulative_accuracy": 0.8422523961661342
    },
    {
      "epoch": 0.7435440783615316,
      "grad_norm": 1.016870379447937,
      "learning_rate": 9.366507794464275e-07,
      "loss": 0.0096,
      "step": 2505,
      "video_reward_cumulative_accuracy": 0.8423153692614771
    },
    {
      "epoch": 0.7438409023449095,
      "grad_norm": 0.46760135889053345,
      "learning_rate": 9.346302220916619e-07,
      "loss": 0.004,
      "step": 2506,
      "video_reward_cumulative_accuracy": 0.8423782920989625
    },
    {
      "epoch": 0.7441377263282873,
      "grad_norm": 2.0241291522979736,
      "learning_rate": 9.326113453132848e-07,
      "loss": 0.051,
      "step": 2507,
      "video_reward_cumulative_accuracy": 0.8424411647387315
    },
    {
      "epoch": 0.7444345503116652,
      "grad_norm": 1.800049901008606,
      "learning_rate": 9.305941512787542e-07,
      "loss": 0.0245,
      "step": 2508,
      "video_reward_cumulative_accuracy": 0.8425039872408293
    },
    {
      "epoch": 0.744731374295043,
      "grad_norm": 0.5981684923171997,
      "learning_rate": 9.28578642153726e-07,
      "loss": 0.0119,
      "step": 2509,
      "video_reward_cumulative_accuracy": 0.8425667596652052
    },
    {
      "epoch": 0.7450281982784209,
      "grad_norm": 2.190657615661621,
      "learning_rate": 9.265648201020447e-07,
      "loss": 0.0158,
      "step": 2510,
      "video_reward_cumulative_accuracy": 0.8426294820717132
    },
    {
      "epoch": 0.7453250222617988,
      "grad_norm": 3.271556854248047,
      "learning_rate": 9.245526872857424e-07,
      "loss": 0.0234,
      "step": 2511,
      "video_reward_cumulative_accuracy": 0.8426921545201115
    },
    {
      "epoch": 0.7456218462451766,
      "grad_norm": 1.8033744096755981,
      "learning_rate": 9.225422458650404e-07,
      "loss": 0.0207,
      "step": 2512,
      "video_reward_cumulative_accuracy": 0.8427547770700637
    },
    {
      "epoch": 0.7459186702285545,
      "grad_norm": 1.6773443222045898,
      "learning_rate": 9.205334979983402e-07,
      "loss": 0.0244,
      "step": 2513,
      "video_reward_cumulative_accuracy": 0.8428173497811381
    },
    {
      "epoch": 0.7462154942119323,
      "grad_norm": 1.4466123580932617,
      "learning_rate": 9.185264458422313e-07,
      "loss": 0.0266,
      "step": 2514,
      "video_reward_cumulative_accuracy": 0.8428798727128083
    },
    {
      "epoch": 0.7465123181953102,
      "grad_norm": 0.8426374793052673,
      "learning_rate": 9.165210915514758e-07,
      "loss": 0.0225,
      "step": 2515,
      "video_reward_cumulative_accuracy": 0.8429423459244533
    },
    {
      "epoch": 0.746809142178688,
      "grad_norm": 1.8962173461914062,
      "learning_rate": 9.145174372790178e-07,
      "loss": 0.0292,
      "step": 2516,
      "video_reward_cumulative_accuracy": 0.8430047694753577
    },
    {
      "epoch": 0.7471059661620659,
      "grad_norm": 2.5202438831329346,
      "learning_rate": 9.125154851759749e-07,
      "loss": 0.0394,
      "step": 2517,
      "video_reward_cumulative_accuracy": 0.843067143424712
    },
    {
      "epoch": 0.7474027901454438,
      "grad_norm": 1.6238250732421875,
      "learning_rate": 9.105152373916346e-07,
      "loss": 0.069,
      "step": 2518,
      "video_reward_cumulative_accuracy": 0.8431294678316124
    },
    {
      "epoch": 0.7476996141288216,
      "grad_norm": 1.0544601678848267,
      "learning_rate": 9.085166960734604e-07,
      "loss": 0.0164,
      "step": 2519,
      "video_reward_cumulative_accuracy": 0.8431917427550615
    },
    {
      "epoch": 0.7479964381121995,
      "grad_norm": 2.0650112628936768,
      "learning_rate": 9.06519863367078e-07,
      "loss": 0.0742,
      "step": 2520,
      "video_reward_cumulative_accuracy": 0.8432539682539683
    },
    {
      "epoch": 0.7482932620955773,
      "grad_norm": 1.7210766077041626,
      "learning_rate": 9.045247414162817e-07,
      "loss": 0.0288,
      "step": 2521,
      "video_reward_cumulative_accuracy": 0.843316144387148
    },
    {
      "epoch": 0.7485900860789552,
      "grad_norm": 2.06630539894104,
      "learning_rate": 9.025313323630297e-07,
      "loss": 0.0206,
      "step": 2522,
      "video_reward_cumulative_accuracy": 0.8431800158604282
    },
    {
      "epoch": 0.748886910062333,
      "grad_norm": 2.9529051780700684,
      "learning_rate": 9.005396383474371e-07,
      "loss": 0.0575,
      "step": 2523,
      "video_reward_cumulative_accuracy": 0.8428458184700753
    },
    {
      "epoch": 0.7491837340457109,
      "grad_norm": 1.8153632879257202,
      "learning_rate": 8.985496615077849e-07,
      "loss": 0.0322,
      "step": 2524,
      "video_reward_cumulative_accuracy": 0.8429080824088748
    },
    {
      "epoch": 0.7494805580290888,
      "grad_norm": 2.185244560241699,
      "learning_rate": 8.965614039805029e-07,
      "loss": 0.0312,
      "step": 2525,
      "video_reward_cumulative_accuracy": 0.8429702970297029
    },
    {
      "epoch": 0.7497773820124666,
      "grad_norm": 0.35279035568237305,
      "learning_rate": 8.945748679001808e-07,
      "loss": 0.0041,
      "step": 2526,
      "video_reward_cumulative_accuracy": 0.8430324623911323
    },
    {
      "epoch": 0.7500742059958445,
      "grad_norm": 1.9734653234481812,
      "learning_rate": 8.925900553995564e-07,
      "loss": 0.0319,
      "step": 2527,
      "video_reward_cumulative_accuracy": 0.8430945785516423
    },
    {
      "epoch": 0.7503710299792223,
      "grad_norm": 1.5152698755264282,
      "learning_rate": 8.906069686095189e-07,
      "loss": 0.0241,
      "step": 2528,
      "video_reward_cumulative_accuracy": 0.8431566455696202
    },
    {
      "epoch": 0.7506678539626002,
      "grad_norm": 0.6552301049232483,
      "learning_rate": 8.886256096591048e-07,
      "loss": 0.0108,
      "step": 2529,
      "video_reward_cumulative_accuracy": 0.843218663503361
    },
    {
      "epoch": 0.750964677945978,
      "grad_norm": 3.156954050064087,
      "learning_rate": 8.866459806754948e-07,
      "loss": 0.0368,
      "step": 2530,
      "video_reward_cumulative_accuracy": 0.8432806324110672
    },
    {
      "epoch": 0.7512615019293559,
      "grad_norm": 1.5369415283203125,
      "learning_rate": 8.84668083784014e-07,
      "loss": 0.0243,
      "step": 2531,
      "video_reward_cumulative_accuracy": 0.8433425523508494
    },
    {
      "epoch": 0.7515583259127337,
      "grad_norm": 2.561065673828125,
      "learning_rate": 8.82691921108125e-07,
      "loss": 0.0483,
      "step": 2532,
      "video_reward_cumulative_accuracy": 0.8434044233807267
    },
    {
      "epoch": 0.7518551498961116,
      "grad_norm": 3.3253390789031982,
      "learning_rate": 8.807174947694308e-07,
      "loss": 0.0232,
      "step": 2533,
      "video_reward_cumulative_accuracy": 0.8434662455586261
    },
    {
      "epoch": 0.7521519738794895,
      "grad_norm": 2.2705724239349365,
      "learning_rate": 8.787448068876697e-07,
      "loss": 0.0315,
      "step": 2534,
      "video_reward_cumulative_accuracy": 0.8435280189423836
    },
    {
      "epoch": 0.7524487978628673,
      "grad_norm": 4.962090015411377,
      "learning_rate": 8.76773859580714e-07,
      "loss": 0.0608,
      "step": 2535,
      "video_reward_cumulative_accuracy": 0.8435897435897436
    },
    {
      "epoch": 0.7527456218462452,
      "grad_norm": 1.0470224618911743,
      "learning_rate": 8.748046549645675e-07,
      "loss": 0.0121,
      "step": 2536,
      "video_reward_cumulative_accuracy": 0.8436514195583596
    },
    {
      "epoch": 0.753042445829623,
      "grad_norm": 1.1866475343704224,
      "learning_rate": 8.72837195153361e-07,
      "loss": 0.0349,
      "step": 2537,
      "video_reward_cumulative_accuracy": 0.8437130469057943
    },
    {
      "epoch": 0.7533392698130009,
      "grad_norm": 1.4927583932876587,
      "learning_rate": 8.70871482259354e-07,
      "loss": 0.0145,
      "step": 2538,
      "video_reward_cumulative_accuracy": 0.8435776201733649
    },
    {
      "epoch": 0.7536360937963787,
      "grad_norm": 1.2378430366516113,
      "learning_rate": 8.689075183929304e-07,
      "loss": 0.015,
      "step": 2539,
      "video_reward_cumulative_accuracy": 0.8436392280425364
    },
    {
      "epoch": 0.7539329177797566,
      "grad_norm": 2.65450382232666,
      "learning_rate": 8.669453056625959e-07,
      "loss": 0.0736,
      "step": 2540,
      "video_reward_cumulative_accuracy": 0.8437007874015748
    },
    {
      "epoch": 0.7542297417631345,
      "grad_norm": 2.1863057613372803,
      "learning_rate": 8.649848461749772e-07,
      "loss": 0.0301,
      "step": 2541,
      "video_reward_cumulative_accuracy": 0.8437622983077528
    },
    {
      "epoch": 0.7545265657465123,
      "grad_norm": 1.4505633115768433,
      "learning_rate": 8.630261420348162e-07,
      "loss": 0.0232,
      "step": 2542,
      "video_reward_cumulative_accuracy": 0.8438237608182534
    },
    {
      "epoch": 0.7548233897298902,
      "grad_norm": 0.5751045942306519,
      "learning_rate": 8.610691953449727e-07,
      "loss": 0.0088,
      "step": 2543,
      "video_reward_cumulative_accuracy": 0.8438851749901691
    },
    {
      "epoch": 0.755120213713268,
      "grad_norm": 1.236505150794983,
      "learning_rate": 8.591140082064189e-07,
      "loss": 0.018,
      "step": 2544,
      "video_reward_cumulative_accuracy": 0.8439465408805031
    },
    {
      "epoch": 0.7554170376966459,
      "grad_norm": 1.4471156597137451,
      "learning_rate": 8.571605827182381e-07,
      "loss": 0.0264,
      "step": 2545,
      "video_reward_cumulative_accuracy": 0.844007858546169
    },
    {
      "epoch": 0.7557138616800237,
      "grad_norm": 1.338175892829895,
      "learning_rate": 8.552089209776226e-07,
      "loss": 0.0155,
      "step": 2546,
      "video_reward_cumulative_accuracy": 0.8440691280439906
    },
    {
      "epoch": 0.7560106856634016,
      "grad_norm": 1.3986073732376099,
      "learning_rate": 8.532590250798695e-07,
      "loss": 0.0298,
      "step": 2547,
      "video_reward_cumulative_accuracy": 0.8439340400471143
    },
    {
      "epoch": 0.7563075096467795,
      "grad_norm": 3.191577911376953,
      "learning_rate": 8.513108971183817e-07,
      "loss": 0.0398,
      "step": 2548,
      "video_reward_cumulative_accuracy": 0.8439952904238619
    },
    {
      "epoch": 0.7566043336301573,
      "grad_norm": 1.3186044692993164,
      "learning_rate": 8.493645391846642e-07,
      "loss": 0.0241,
      "step": 2549,
      "video_reward_cumulative_accuracy": 0.8440564927422519
    },
    {
      "epoch": 0.7569011576135352,
      "grad_norm": 3.0393431186676025,
      "learning_rate": 8.474199533683214e-07,
      "loss": 0.0545,
      "step": 2550,
      "video_reward_cumulative_accuracy": 0.8441176470588235
    },
    {
      "epoch": 0.757197981596913,
      "grad_norm": 2.7258641719818115,
      "learning_rate": 8.454771417570537e-07,
      "loss": 0.0403,
      "step": 2551,
      "video_reward_cumulative_accuracy": 0.8441787534300275
    },
    {
      "epoch": 0.7574948055802909,
      "grad_norm": 1.2162940502166748,
      "learning_rate": 8.435361064366585e-07,
      "loss": 0.0148,
      "step": 2552,
      "video_reward_cumulative_accuracy": 0.8442398119122257
    },
    {
      "epoch": 0.7577916295636687,
      "grad_norm": 0.32078516483306885,
      "learning_rate": 8.415968494910253e-07,
      "loss": 0.0036,
      "step": 2553,
      "video_reward_cumulative_accuracy": 0.8443008225616921
    },
    {
      "epoch": 0.7580884535470466,
      "grad_norm": 1.9234449863433838,
      "learning_rate": 8.396593730021355e-07,
      "loss": 0.0207,
      "step": 2554,
      "video_reward_cumulative_accuracy": 0.8441660140955364
    },
    {
      "epoch": 0.7583852775304245,
      "grad_norm": 4.319919586181641,
      "learning_rate": 8.377236790500584e-07,
      "loss": 0.0487,
      "step": 2555,
      "video_reward_cumulative_accuracy": 0.8442270058708415
    },
    {
      "epoch": 0.7586821015138023,
      "grad_norm": 1.7034393548965454,
      "learning_rate": 8.357897697129477e-07,
      "loss": 0.0126,
      "step": 2556,
      "video_reward_cumulative_accuracy": 0.8442879499217527
    },
    {
      "epoch": 0.7589789254971802,
      "grad_norm": 1.2667155265808105,
      "learning_rate": 8.338576470670437e-07,
      "loss": 0.0073,
      "step": 2557,
      "video_reward_cumulative_accuracy": 0.8443488463042628
    },
    {
      "epoch": 0.759275749480558,
      "grad_norm": 2.665156126022339,
      "learning_rate": 8.319273131866675e-07,
      "loss": 0.0465,
      "step": 2558,
      "video_reward_cumulative_accuracy": 0.8444096950742768
    },
    {
      "epoch": 0.7595725734639359,
      "grad_norm": 2.237888813018799,
      "learning_rate": 8.299987701442203e-07,
      "loss": 0.0296,
      "step": 2559,
      "video_reward_cumulative_accuracy": 0.8444704962876124
    },
    {
      "epoch": 0.7598693974473137,
      "grad_norm": 2.12754225730896,
      "learning_rate": 8.280720200101805e-07,
      "loss": 0.015,
      "step": 2560,
      "video_reward_cumulative_accuracy": 0.84453125
    },
    {
      "epoch": 0.7601662214306916,
      "grad_norm": 2.578101873397827,
      "learning_rate": 8.261470648530998e-07,
      "loss": 0.0259,
      "step": 2561,
      "video_reward_cumulative_accuracy": 0.8445919562670832
    },
    {
      "epoch": 0.7604630454140695,
      "grad_norm": 2.2189407348632812,
      "learning_rate": 8.242239067396063e-07,
      "loss": 0.0262,
      "step": 2562,
      "video_reward_cumulative_accuracy": 0.8446526151444185
    },
    {
      "epoch": 0.7607598693974473,
      "grad_norm": 1.3384156227111816,
      "learning_rate": 8.223025477343944e-07,
      "loss": 0.0228,
      "step": 2563,
      "video_reward_cumulative_accuracy": 0.8445181428014046
    },
    {
      "epoch": 0.7610566933808252,
      "grad_norm": 1.1982171535491943,
      "learning_rate": 8.203829899002316e-07,
      "loss": 0.0104,
      "step": 2564,
      "video_reward_cumulative_accuracy": 0.8445787831513261
    },
    {
      "epoch": 0.761353517364203,
      "grad_norm": 1.45271897315979,
      "learning_rate": 8.1846523529795e-07,
      "loss": 0.0164,
      "step": 2565,
      "video_reward_cumulative_accuracy": 0.8446393762183236
    },
    {
      "epoch": 0.7616503413475809,
      "grad_norm": 1.5878190994262695,
      "learning_rate": 8.165492859864435e-07,
      "loss": 0.0238,
      "step": 2566,
      "video_reward_cumulative_accuracy": 0.8445050662509743
    },
    {
      "epoch": 0.7619471653309587,
      "grad_norm": 2.7507424354553223,
      "learning_rate": 8.146351440226711e-07,
      "loss": 0.0443,
      "step": 2567,
      "video_reward_cumulative_accuracy": 0.8445656408258668
    },
    {
      "epoch": 0.7622439893143366,
      "grad_norm": 2.033236503601074,
      "learning_rate": 8.127228114616484e-07,
      "loss": 0.0223,
      "step": 2568,
      "video_reward_cumulative_accuracy": 0.844626168224299
    },
    {
      "epoch": 0.7625408132977145,
      "grad_norm": 0.4424172043800354,
      "learning_rate": 8.108122903564502e-07,
      "loss": 0.0082,
      "step": 2569,
      "video_reward_cumulative_accuracy": 0.8446866485013624
    },
    {
      "epoch": 0.7628376372810923,
      "grad_norm": 4.878910541534424,
      "learning_rate": 8.089035827582087e-07,
      "loss": 0.0613,
      "step": 2570,
      "video_reward_cumulative_accuracy": 0.8447470817120623
    },
    {
      "epoch": 0.7631344612644702,
      "grad_norm": 0.8770198822021484,
      "learning_rate": 8.069966907161042e-07,
      "loss": 0.0104,
      "step": 2571,
      "video_reward_cumulative_accuracy": 0.8448074679113186
    },
    {
      "epoch": 0.763431285247848,
      "grad_norm": 1.469495415687561,
      "learning_rate": 8.05091616277372e-07,
      "loss": 0.0102,
      "step": 2572,
      "video_reward_cumulative_accuracy": 0.8448678071539658
    },
    {
      "epoch": 0.7637281092312259,
      "grad_norm": 1.270909070968628,
      "learning_rate": 8.031883614872929e-07,
      "loss": 0.0271,
      "step": 2573,
      "video_reward_cumulative_accuracy": 0.8449280994947532
    },
    {
      "epoch": 0.7640249332146037,
      "grad_norm": 3.988424301147461,
      "learning_rate": 8.012869283891967e-07,
      "loss": 0.0606,
      "step": 2574,
      "video_reward_cumulative_accuracy": 0.844988344988345
    },
    {
      "epoch": 0.7643217571979816,
      "grad_norm": 2.989523410797119,
      "learning_rate": 7.993873190244558e-07,
      "loss": 0.0208,
      "step": 2575,
      "video_reward_cumulative_accuracy": 0.8448543689320388
    },
    {
      "epoch": 0.7646185811813595,
      "grad_norm": 0.9885690212249756,
      "learning_rate": 7.974895354324857e-07,
      "loss": 0.0226,
      "step": 2576,
      "video_reward_cumulative_accuracy": 0.8449145962732919
    },
    {
      "epoch": 0.7649154051647373,
      "grad_norm": 2.960150718688965,
      "learning_rate": 7.955935796507419e-07,
      "loss": 0.049,
      "step": 2577,
      "video_reward_cumulative_accuracy": 0.8449747768723321
    },
    {
      "epoch": 0.7652122291481152,
      "grad_norm": 3.4820029735565186,
      "learning_rate": 7.936994537147155e-07,
      "loss": 0.0499,
      "step": 2578,
      "video_reward_cumulative_accuracy": 0.8446470131885182
    },
    {
      "epoch": 0.765509053131493,
      "grad_norm": 2.2043135166168213,
      "learning_rate": 7.91807159657935e-07,
      "loss": 0.0192,
      "step": 2579,
      "video_reward_cumulative_accuracy": 0.8447072508724311
    },
    {
      "epoch": 0.7658058771148709,
      "grad_norm": 2.675544023513794,
      "learning_rate": 7.89916699511962e-07,
      "loss": 0.0514,
      "step": 2580,
      "video_reward_cumulative_accuracy": 0.8447674418604652
    },
    {
      "epoch": 0.7661027010982487,
      "grad_norm": 2.820864200592041,
      "learning_rate": 7.880280753063891e-07,
      "loss": 0.0133,
      "step": 2581,
      "video_reward_cumulative_accuracy": 0.8448275862068966
    },
    {
      "epoch": 0.7663995250816266,
      "grad_norm": 3.3666510581970215,
      "learning_rate": 7.86141289068838e-07,
      "loss": 0.0393,
      "step": 2582,
      "video_reward_cumulative_accuracy": 0.8448876839659178
    },
    {
      "epoch": 0.7666963490650045,
      "grad_norm": 3.5256621837615967,
      "learning_rate": 7.842563428249555e-07,
      "loss": 0.0183,
      "step": 2583,
      "video_reward_cumulative_accuracy": 0.8449477351916377
    },
    {
      "epoch": 0.7669931730483823,
      "grad_norm": 0.805884063243866,
      "learning_rate": 7.823732385984154e-07,
      "loss": 0.0258,
      "step": 2584,
      "video_reward_cumulative_accuracy": 0.8450077399380805
    },
    {
      "epoch": 0.7672899970317602,
      "grad_norm": 2.2179319858551025,
      "learning_rate": 7.804919784109124e-07,
      "loss": 0.0197,
      "step": 2585,
      "video_reward_cumulative_accuracy": 0.8450676982591876
    },
    {
      "epoch": 0.767586821015138,
      "grad_norm": 2.208432674407959,
      "learning_rate": 7.786125642821632e-07,
      "loss": 0.0248,
      "step": 2586,
      "video_reward_cumulative_accuracy": 0.8451276102088167
    },
    {
      "epoch": 0.7678836449985159,
      "grad_norm": 1.296506643295288,
      "learning_rate": 7.767349982298992e-07,
      "loss": 0.0078,
      "step": 2587,
      "video_reward_cumulative_accuracy": 0.8451874758407422
    },
    {
      "epoch": 0.7681804689818937,
      "grad_norm": 2.701901435852051,
      "learning_rate": 7.748592822698708e-07,
      "loss": 0.02,
      "step": 2588,
      "video_reward_cumulative_accuracy": 0.8450540958268934
    },
    {
      "epoch": 0.7684772929652716,
      "grad_norm": 4.324398517608643,
      "learning_rate": 7.729854184158411e-07,
      "loss": 0.033,
      "step": 2589,
      "video_reward_cumulative_accuracy": 0.8451139436075705
    },
    {
      "epoch": 0.7687741169486495,
      "grad_norm": 2.3187663555145264,
      "learning_rate": 7.711134086795852e-07,
      "loss": 0.0229,
      "step": 2590,
      "video_reward_cumulative_accuracy": 0.8451737451737452
    },
    {
      "epoch": 0.7690709409320273,
      "grad_norm": 3.8253369331359863,
      "learning_rate": 7.692432550708873e-07,
      "loss": 0.0558,
      "step": 2591,
      "video_reward_cumulative_accuracy": 0.8450405248938634
    },
    {
      "epoch": 0.7693677649154052,
      "grad_norm": 2.7652409076690674,
      "learning_rate": 7.673749595975378e-07,
      "loss": 0.0173,
      "step": 2592,
      "video_reward_cumulative_accuracy": 0.8451003086419753
    },
    {
      "epoch": 0.769664588898783,
      "grad_norm": 4.698938846588135,
      "learning_rate": 7.655085242653337e-07,
      "loss": 0.0321,
      "step": 2593,
      "video_reward_cumulative_accuracy": 0.8449672194369456
    },
    {
      "epoch": 0.7699614128821609,
      "grad_norm": 3.0443601608276367,
      "learning_rate": 7.636439510780747e-07,
      "loss": 0.0465,
      "step": 2594,
      "video_reward_cumulative_accuracy": 0.8450269853508096
    },
    {
      "epoch": 0.7702582368655387,
      "grad_norm": 3.421412706375122,
      "learning_rate": 7.617812420375611e-07,
      "loss": 0.0844,
      "step": 2595,
      "video_reward_cumulative_accuracy": 0.8450867052023121
    },
    {
      "epoch": 0.7705550608489166,
      "grad_norm": 1.4718027114868164,
      "learning_rate": 7.599203991435924e-07,
      "loss": 0.0188,
      "step": 2596,
      "video_reward_cumulative_accuracy": 0.8451463790446841
    },
    {
      "epoch": 0.7708518848322945,
      "grad_norm": 2.6113193035125732,
      "learning_rate": 7.580614243939627e-07,
      "loss": 0.0317,
      "step": 2597,
      "video_reward_cumulative_accuracy": 0.8452060069310743
    },
    {
      "epoch": 0.7711487088156723,
      "grad_norm": 3.532366991043091,
      "learning_rate": 7.562043197844626e-07,
      "loss": 0.0406,
      "step": 2598,
      "video_reward_cumulative_accuracy": 0.8452655889145496
    },
    {
      "epoch": 0.7714455327990501,
      "grad_norm": 1.3378589153289795,
      "learning_rate": 7.543490873088738e-07,
      "loss": 0.0172,
      "step": 2599,
      "video_reward_cumulative_accuracy": 0.8453251250480954
    },
    {
      "epoch": 0.771742356782428,
      "grad_norm": 6.215091228485107,
      "learning_rate": 7.52495728958969e-07,
      "loss": 0.0305,
      "step": 2600,
      "video_reward_cumulative_accuracy": 0.8453846153846154
    },
    {
      "epoch": 0.771742356782428,
      "eval_runtime": 132.3938,
      "eval_samples_per_second": 5.959,
      "eval_steps_per_second": 0.748,
      "eval_test_set_accuracy": 0.821969696969697,
      "step": 2600
    },
    {
      "epoch": 0.7720391807658059,
      "grad_norm": 2.3559024333953857,
      "learning_rate": 7.506442467245084e-07,
      "loss": 0.0387,
      "step": 2601,
      "video_reward_cumulative_accuracy": 0.845444059976932
    },
    {
      "epoch": 0.7723360047491837,
      "grad_norm": 3.2431516647338867,
      "learning_rate": 7.487946425932372e-07,
      "loss": 0.0431,
      "step": 2602,
      "video_reward_cumulative_accuracy": 0.8453112990007686
    },
    {
      "epoch": 0.7726328287325616,
      "grad_norm": 3.5290749073028564,
      "learning_rate": 7.469469185508854e-07,
      "loss": 0.0461,
      "step": 2603,
      "video_reward_cumulative_accuracy": 0.8453707260852862
    },
    {
      "epoch": 0.7729296527159395,
      "grad_norm": 4.477185249328613,
      "learning_rate": 7.451010765811628e-07,
      "loss": 0.0644,
      "step": 2604,
      "video_reward_cumulative_accuracy": 0.8452380952380952
    },
    {
      "epoch": 0.7732264766993173,
      "grad_norm": 1.7302701473236084,
      "learning_rate": 7.432571186657614e-07,
      "loss": 0.0116,
      "step": 2605,
      "video_reward_cumulative_accuracy": 0.8452975047984644
    },
    {
      "epoch": 0.7735233006826951,
      "grad_norm": 1.9252644777297974,
      "learning_rate": 7.414150467843498e-07,
      "loss": 0.027,
      "step": 2606,
      "video_reward_cumulative_accuracy": 0.8453568687643899
    },
    {
      "epoch": 0.773820124666073,
      "grad_norm": 3.0413153171539307,
      "learning_rate": 7.395748629145685e-07,
      "loss": 0.088,
      "step": 2607,
      "video_reward_cumulative_accuracy": 0.8454161871883391
    },
    {
      "epoch": 0.7741169486494509,
      "grad_norm": 2.653510093688965,
      "learning_rate": 7.37736569032036e-07,
      "loss": 0.0589,
      "step": 2608,
      "video_reward_cumulative_accuracy": 0.8452837423312883
    },
    {
      "epoch": 0.7744137726328287,
      "grad_norm": 2.2542154788970947,
      "learning_rate": 7.359001671103361e-07,
      "loss": 0.0261,
      "step": 2609,
      "video_reward_cumulative_accuracy": 0.8453430433116137
    },
    {
      "epoch": 0.7747105966162066,
      "grad_norm": 1.7998321056365967,
      "learning_rate": 7.340656591210279e-07,
      "loss": 0.0194,
      "step": 2610,
      "video_reward_cumulative_accuracy": 0.8454022988505747
    },
    {
      "epoch": 0.7750074205995845,
      "grad_norm": 2.0335445404052734,
      "learning_rate": 7.322330470336314e-07,
      "loss": 0.0157,
      "step": 2611,
      "video_reward_cumulative_accuracy": 0.845461509000383
    },
    {
      "epoch": 0.7753042445829623,
      "grad_norm": 3.780298948287964,
      "learning_rate": 7.304023328156345e-07,
      "loss": 0.0582,
      "step": 2612,
      "video_reward_cumulative_accuracy": 0.84552067381317
    },
    {
      "epoch": 0.7756010685663401,
      "grad_norm": 2.430021286010742,
      "learning_rate": 7.285735184324872e-07,
      "loss": 0.0478,
      "step": 2613,
      "video_reward_cumulative_accuracy": 0.8455797933409873
    },
    {
      "epoch": 0.775897892549718,
      "grad_norm": 2.428281784057617,
      "learning_rate": 7.267466058475969e-07,
      "loss": 0.0268,
      "step": 2614,
      "video_reward_cumulative_accuracy": 0.8456388676358072
    },
    {
      "epoch": 0.7761947165330959,
      "grad_norm": 3.825349807739258,
      "learning_rate": 7.249215970223347e-07,
      "loss": 0.033,
      "step": 2615,
      "video_reward_cumulative_accuracy": 0.8455066921606118
    },
    {
      "epoch": 0.7764915405164737,
      "grad_norm": 3.4468047618865967,
      "learning_rate": 7.230984939160227e-07,
      "loss": 0.0454,
      "step": 2616,
      "video_reward_cumulative_accuracy": 0.845565749235474
    },
    {
      "epoch": 0.7767883644998516,
      "grad_norm": 2.420203447341919,
      "learning_rate": 7.2127729848594e-07,
      "loss": 0.0323,
      "step": 2617,
      "video_reward_cumulative_accuracy": 0.8456247611769201
    },
    {
      "epoch": 0.7770851884832295,
      "grad_norm": 0.8875495195388794,
      "learning_rate": 7.194580126873155e-07,
      "loss": 0.0189,
      "step": 2618,
      "video_reward_cumulative_accuracy": 0.8456837280366692
    },
    {
      "epoch": 0.7773820124666073,
      "grad_norm": 0.7100759148597717,
      "learning_rate": 7.176406384733289e-07,
      "loss": 0.0083,
      "step": 2619,
      "video_reward_cumulative_accuracy": 0.8457426498663612
    },
    {
      "epoch": 0.7776788364499851,
      "grad_norm": 2.6363914012908936,
      "learning_rate": 7.158251777951103e-07,
      "loss": 0.0294,
      "step": 2620,
      "video_reward_cumulative_accuracy": 0.8456106870229008
    },
    {
      "epoch": 0.777975660433363,
      "grad_norm": 3.5254852771759033,
      "learning_rate": 7.140116326017304e-07,
      "loss": 0.0296,
      "step": 2621,
      "video_reward_cumulative_accuracy": 0.8456695917588707
    },
    {
      "epoch": 0.7782724844167409,
      "grad_norm": 2.1141459941864014,
      "learning_rate": 7.122000048402078e-07,
      "loss": 0.0356,
      "step": 2622,
      "video_reward_cumulative_accuracy": 0.8457284515636918
    },
    {
      "epoch": 0.7785693084001187,
      "grad_norm": 1.737390398979187,
      "learning_rate": 7.10390296455499e-07,
      "loss": 0.0434,
      "step": 2623,
      "video_reward_cumulative_accuracy": 0.8457872664887534
    },
    {
      "epoch": 0.7788661323834966,
      "grad_norm": 1.159108281135559,
      "learning_rate": 7.085825093905025e-07,
      "loss": 0.0135,
      "step": 2624,
      "video_reward_cumulative_accuracy": 0.8458460365853658
    },
    {
      "epoch": 0.7791629563668745,
      "grad_norm": 5.79228401184082,
      "learning_rate": 7.06776645586053e-07,
      "loss": 0.0594,
      "step": 2625,
      "video_reward_cumulative_accuracy": 0.8459047619047619
    },
    {
      "epoch": 0.7794597803502523,
      "grad_norm": 1.6138088703155518,
      "learning_rate": 7.049727069809206e-07,
      "loss": 0.0134,
      "step": 2626,
      "video_reward_cumulative_accuracy": 0.845963442498096
    },
    {
      "epoch": 0.7797566043336301,
      "grad_norm": 4.311069011688232,
      "learning_rate": 7.031706955118095e-07,
      "loss": 0.0717,
      "step": 2627,
      "video_reward_cumulative_accuracy": 0.8460220784164446
    },
    {
      "epoch": 0.780053428317008,
      "grad_norm": 1.8174479007720947,
      "learning_rate": 7.013706131133522e-07,
      "loss": 0.0319,
      "step": 2628,
      "video_reward_cumulative_accuracy": 0.8460806697108066
    },
    {
      "epoch": 0.7803502523003859,
      "grad_norm": 1.904004693031311,
      "learning_rate": 6.995724617181124e-07,
      "loss": 0.0252,
      "step": 2629,
      "video_reward_cumulative_accuracy": 0.8461392164321034
    },
    {
      "epoch": 0.7806470762837637,
      "grad_norm": 2.597764015197754,
      "learning_rate": 6.977762432565805e-07,
      "loss": 0.0188,
      "step": 2630,
      "video_reward_cumulative_accuracy": 0.8461977186311787
    },
    {
      "epoch": 0.7809439002671416,
      "grad_norm": 2.199734926223755,
      "learning_rate": 6.95981959657171e-07,
      "loss": 0.0377,
      "step": 2631,
      "video_reward_cumulative_accuracy": 0.8462561763587989
    },
    {
      "epoch": 0.7812407242505195,
      "grad_norm": 0.7549028396606445,
      "learning_rate": 6.941896128462227e-07,
      "loss": 0.018,
      "step": 2632,
      "video_reward_cumulative_accuracy": 0.8463145896656535
    },
    {
      "epoch": 0.7815375482338973,
      "grad_norm": 2.8155016899108887,
      "learning_rate": 6.923992047479921e-07,
      "loss": 0.0286,
      "step": 2633,
      "video_reward_cumulative_accuracy": 0.8463729586023547
    },
    {
      "epoch": 0.7818343722172751,
      "grad_norm": 2.58642578125,
      "learning_rate": 6.906107372846568e-07,
      "loss": 0.0239,
      "step": 2634,
      "video_reward_cumulative_accuracy": 0.8462414578587699
    },
    {
      "epoch": 0.782131196200653,
      "grad_norm": 2.169006109237671,
      "learning_rate": 6.888242123763103e-07,
      "loss": 0.0208,
      "step": 2635,
      "video_reward_cumulative_accuracy": 0.8462998102466793
    },
    {
      "epoch": 0.7824280201840309,
      "grad_norm": 2.2981460094451904,
      "learning_rate": 6.870396319409602e-07,
      "loss": 0.0529,
      "step": 2636,
      "video_reward_cumulative_accuracy": 0.8463581183611533
    },
    {
      "epoch": 0.7827248441674087,
      "grad_norm": 1.9309078454971313,
      "learning_rate": 6.852569978945281e-07,
      "loss": 0.0217,
      "step": 2637,
      "video_reward_cumulative_accuracy": 0.8464163822525598
    },
    {
      "epoch": 0.7830216681507866,
      "grad_norm": 0.8763541579246521,
      "learning_rate": 6.834763121508428e-07,
      "loss": 0.0146,
      "step": 2638,
      "video_reward_cumulative_accuracy": 0.8464746019711903
    },
    {
      "epoch": 0.7833184921341645,
      "grad_norm": 3.7167813777923584,
      "learning_rate": 6.816975766216441e-07,
      "loss": 0.0478,
      "step": 2639,
      "video_reward_cumulative_accuracy": 0.8465327775672603
    },
    {
      "epoch": 0.7836153161175423,
      "grad_norm": 4.534986972808838,
      "learning_rate": 6.799207932165772e-07,
      "loss": 0.045,
      "step": 2640,
      "video_reward_cumulative_accuracy": 0.8464015151515152
    },
    {
      "epoch": 0.7839121401009201,
      "grad_norm": 1.1529324054718018,
      "learning_rate": 6.781459638431923e-07,
      "loss": 0.0177,
      "step": 2641,
      "video_reward_cumulative_accuracy": 0.8464596743657705
    },
    {
      "epoch": 0.784208964084298,
      "grad_norm": 1.5981630086898804,
      "learning_rate": 6.763730904069393e-07,
      "loss": 0.0107,
      "step": 2642,
      "video_reward_cumulative_accuracy": 0.8465177895533686
    },
    {
      "epoch": 0.7845057880676759,
      "grad_norm": 2.033008575439453,
      "learning_rate": 6.746021748111709e-07,
      "loss": 0.0226,
      "step": 2643,
      "video_reward_cumulative_accuracy": 0.846575860764283
    },
    {
      "epoch": 0.7848026120510537,
      "grad_norm": 0.6989650726318359,
      "learning_rate": 6.728332189571368e-07,
      "loss": 0.007,
      "step": 2644,
      "video_reward_cumulative_accuracy": 0.8466338880484114
    },
    {
      "epoch": 0.7850994360344316,
      "grad_norm": 1.581715703010559,
      "learning_rate": 6.710662247439831e-07,
      "loss": 0.0276,
      "step": 2645,
      "video_reward_cumulative_accuracy": 0.8466918714555766
    },
    {
      "epoch": 0.7853962600178095,
      "grad_norm": 1.8581857681274414,
      "learning_rate": 6.693011940687499e-07,
      "loss": 0.0321,
      "step": 2646,
      "video_reward_cumulative_accuracy": 0.8465608465608465
    },
    {
      "epoch": 0.7856930840011873,
      "grad_norm": 1.9918158054351807,
      "learning_rate": 6.675381288263675e-07,
      "loss": 0.0228,
      "step": 2647,
      "video_reward_cumulative_accuracy": 0.8466188137514167
    },
    {
      "epoch": 0.7859899079845651,
      "grad_norm": 2.124476194381714,
      "learning_rate": 6.657770309096584e-07,
      "loss": 0.011,
      "step": 2648,
      "video_reward_cumulative_accuracy": 0.8466767371601208
    },
    {
      "epoch": 0.786286731967943,
      "grad_norm": 2.0029237270355225,
      "learning_rate": 6.640179022093324e-07,
      "loss": 0.0261,
      "step": 2649,
      "video_reward_cumulative_accuracy": 0.846734616836542
    },
    {
      "epoch": 0.7865835559513209,
      "grad_norm": 1.5002411603927612,
      "learning_rate": 6.622607446139844e-07,
      "loss": 0.0173,
      "step": 2650,
      "video_reward_cumulative_accuracy": 0.8467924528301887
    },
    {
      "epoch": 0.7868803799346987,
      "grad_norm": 2.7715702056884766,
      "learning_rate": 6.605055600100945e-07,
      "loss": 0.0615,
      "step": 2651,
      "video_reward_cumulative_accuracy": 0.8466616371180686
    },
    {
      "epoch": 0.7871772039180766,
      "grad_norm": 2.444265604019165,
      "learning_rate": 6.587523502820226e-07,
      "loss": 0.0486,
      "step": 2652,
      "video_reward_cumulative_accuracy": 0.8467194570135747
    },
    {
      "epoch": 0.7874740279014545,
      "grad_norm": 1.8649511337280273,
      "learning_rate": 6.570011173120108e-07,
      "loss": 0.043,
      "step": 2653,
      "video_reward_cumulative_accuracy": 0.8467772333207689
    },
    {
      "epoch": 0.7877708518848323,
      "grad_norm": 0.8448922634124756,
      "learning_rate": 6.552518629801752e-07,
      "loss": 0.0105,
      "step": 2654,
      "video_reward_cumulative_accuracy": 0.8468349660889224
    },
    {
      "epoch": 0.7880676758682101,
      "grad_norm": 2.1769087314605713,
      "learning_rate": 6.535045891645125e-07,
      "loss": 0.0765,
      "step": 2655,
      "video_reward_cumulative_accuracy": 0.8467043314500942
    },
    {
      "epoch": 0.788364499851588,
      "grad_norm": 3.1270945072174072,
      "learning_rate": 6.517592977408909e-07,
      "loss": 0.045,
      "step": 2656,
      "video_reward_cumulative_accuracy": 0.8467620481927711
    },
    {
      "epoch": 0.7886613238349659,
      "grad_norm": 2.3830227851867676,
      "learning_rate": 6.500159905830484e-07,
      "loss": 0.0181,
      "step": 2657,
      "video_reward_cumulative_accuracy": 0.8468197214904027
    },
    {
      "epoch": 0.7889581478183437,
      "grad_norm": 2.3455231189727783,
      "learning_rate": 6.48274669562596e-07,
      "loss": 0.0461,
      "step": 2658,
      "video_reward_cumulative_accuracy": 0.8466892400300978
    },
    {
      "epoch": 0.7892549718017215,
      "grad_norm": 1.0843396186828613,
      "learning_rate": 6.465353365490093e-07,
      "loss": 0.0176,
      "step": 2659,
      "video_reward_cumulative_accuracy": 0.8467468973298232
    },
    {
      "epoch": 0.7895517957850995,
      "grad_norm": 1.672133207321167,
      "learning_rate": 6.447979934096313e-07,
      "loss": 0.0171,
      "step": 2660,
      "video_reward_cumulative_accuracy": 0.8468045112781954
    },
    {
      "epoch": 0.7898486197684773,
      "grad_norm": 2.30859112739563,
      "learning_rate": 6.430626420096703e-07,
      "loss": 0.0343,
      "step": 2661,
      "video_reward_cumulative_accuracy": 0.8468620819240887
    },
    {
      "epoch": 0.7901454437518551,
      "grad_norm": 0.6453092098236084,
      "learning_rate": 6.413292842121927e-07,
      "loss": 0.0085,
      "step": 2662,
      "video_reward_cumulative_accuracy": 0.8469196093163035
    },
    {
      "epoch": 0.790442267735233,
      "grad_norm": 2.6562001705169678,
      "learning_rate": 6.395979218781276e-07,
      "loss": 0.0279,
      "step": 2663,
      "video_reward_cumulative_accuracy": 0.8469770935035674
    },
    {
      "epoch": 0.7907390917186109,
      "grad_norm": 0.6483455300331116,
      "learning_rate": 6.37868556866259e-07,
      "loss": 0.013,
      "step": 2664,
      "video_reward_cumulative_accuracy": 0.8470345345345346
    },
    {
      "epoch": 0.7910359157019887,
      "grad_norm": 2.082960605621338,
      "learning_rate": 6.361411910332288e-07,
      "loss": 0.027,
      "step": 2665,
      "video_reward_cumulative_accuracy": 0.8470919324577861
    },
    {
      "epoch": 0.7913327396853665,
      "grad_norm": 2.668041944503784,
      "learning_rate": 6.34415826233532e-07,
      "loss": 0.0209,
      "step": 2666,
      "video_reward_cumulative_accuracy": 0.8471492873218305
    },
    {
      "epoch": 0.7916295636687445,
      "grad_norm": 1.8569388389587402,
      "learning_rate": 6.326924643195151e-07,
      "loss": 0.027,
      "step": 2667,
      "video_reward_cumulative_accuracy": 0.8470191226096738
    },
    {
      "epoch": 0.7919263876521223,
      "grad_norm": 2.4845709800720215,
      "learning_rate": 6.309711071413752e-07,
      "loss": 0.0211,
      "step": 2668,
      "video_reward_cumulative_accuracy": 0.8470764617691154
    },
    {
      "epoch": 0.7922232116355001,
      "grad_norm": 3.088460922241211,
      "learning_rate": 6.292517565471548e-07,
      "loss": 0.0389,
      "step": 2669,
      "video_reward_cumulative_accuracy": 0.8471337579617835
    },
    {
      "epoch": 0.792520035618878,
      "grad_norm": 1.7615797519683838,
      "learning_rate": 6.275344143827442e-07,
      "loss": 0.0808,
      "step": 2670,
      "video_reward_cumulative_accuracy": 0.8471910112359551
    },
    {
      "epoch": 0.7928168596022559,
      "grad_norm": 6.1424360275268555,
      "learning_rate": 6.258190824918772e-07,
      "loss": 0.0601,
      "step": 2671,
      "video_reward_cumulative_accuracy": 0.8472482216398353
    },
    {
      "epoch": 0.7931136835856337,
      "grad_norm": 3.102992534637451,
      "learning_rate": 6.241057627161287e-07,
      "loss": 0.0441,
      "step": 2672,
      "video_reward_cumulative_accuracy": 0.8473053892215568
    },
    {
      "epoch": 0.7934105075690115,
      "grad_norm": 0.6833072900772095,
      "learning_rate": 6.223944568949147e-07,
      "loss": 0.005,
      "step": 2673,
      "video_reward_cumulative_accuracy": 0.8473625140291807
    },
    {
      "epoch": 0.7937073315523895,
      "grad_norm": 3.3606889247894287,
      "learning_rate": 6.206851668654867e-07,
      "loss": 0.0639,
      "step": 2674,
      "video_reward_cumulative_accuracy": 0.8474195961106956
    },
    {
      "epoch": 0.7940041555357673,
      "grad_norm": 2.6462979316711426,
      "learning_rate": 6.189778944629343e-07,
      "loss": 0.0401,
      "step": 2675,
      "video_reward_cumulative_accuracy": 0.8474766355140186
    },
    {
      "epoch": 0.7943009795191451,
      "grad_norm": 2.362200975418091,
      "learning_rate": 6.172726415201796e-07,
      "loss": 0.0143,
      "step": 2676,
      "video_reward_cumulative_accuracy": 0.8475336322869955
    },
    {
      "epoch": 0.794597803502523,
      "grad_norm": 3.2839114665985107,
      "learning_rate": 6.155694098679785e-07,
      "loss": 0.0293,
      "step": 2677,
      "video_reward_cumulative_accuracy": 0.8475905864774
    },
    {
      "epoch": 0.7948946274859009,
      "grad_norm": 3.1356849670410156,
      "learning_rate": 6.138682013349137e-07,
      "loss": 0.0445,
      "step": 2678,
      "video_reward_cumulative_accuracy": 0.8472740851381628
    },
    {
      "epoch": 0.7951914514692787,
      "grad_norm": 3.2600576877593994,
      "learning_rate": 6.121690177473983e-07,
      "loss": 0.0273,
      "step": 2679,
      "video_reward_cumulative_accuracy": 0.847331093691676
    },
    {
      "epoch": 0.7954882754526565,
      "grad_norm": 2.7981717586517334,
      "learning_rate": 6.104718609296709e-07,
      "loss": 0.0729,
      "step": 2680,
      "video_reward_cumulative_accuracy": 0.8473880597014926
    },
    {
      "epoch": 0.7957850994360345,
      "grad_norm": 0.5687323212623596,
      "learning_rate": 6.087767327037944e-07,
      "loss": 0.0221,
      "step": 2681,
      "video_reward_cumulative_accuracy": 0.8474449832152182
    },
    {
      "epoch": 0.7960819234194123,
      "grad_norm": 2.2972726821899414,
      "learning_rate": 6.070836348896536e-07,
      "loss": 0.0153,
      "step": 2682,
      "video_reward_cumulative_accuracy": 0.8475018642803878
    },
    {
      "epoch": 0.7963787474027901,
      "grad_norm": 0.8837341666221619,
      "learning_rate": 6.053925693049523e-07,
      "loss": 0.0174,
      "step": 2683,
      "video_reward_cumulative_accuracy": 0.8475587029444651
    },
    {
      "epoch": 0.796675571386168,
      "grad_norm": 3.468062400817871,
      "learning_rate": 6.037035377652143e-07,
      "loss": 0.0426,
      "step": 2684,
      "video_reward_cumulative_accuracy": 0.8476154992548435
    },
    {
      "epoch": 0.7969723953695459,
      "grad_norm": 2.4997260570526123,
      "learning_rate": 6.020165420837786e-07,
      "loss": 0.0243,
      "step": 2685,
      "video_reward_cumulative_accuracy": 0.8476722532588454
    },
    {
      "epoch": 0.7972692193529237,
      "grad_norm": 2.9322338104248047,
      "learning_rate": 6.003315840717991e-07,
      "loss": 0.0768,
      "step": 2686,
      "video_reward_cumulative_accuracy": 0.847728965003723
    },
    {
      "epoch": 0.7975660433363015,
      "grad_norm": 2.4431042671203613,
      "learning_rate": 5.986486655382423e-07,
      "loss": 0.0188,
      "step": 2687,
      "video_reward_cumulative_accuracy": 0.847785634536658
    },
    {
      "epoch": 0.7978628673196795,
      "grad_norm": 1.0281823873519897,
      "learning_rate": 5.96967788289883e-07,
      "loss": 0.0198,
      "step": 2688,
      "video_reward_cumulative_accuracy": 0.8478422619047619
    },
    {
      "epoch": 0.7981596913030573,
      "grad_norm": 3.702180862426758,
      "learning_rate": 5.95288954131307e-07,
      "loss": 0.0398,
      "step": 2689,
      "video_reward_cumulative_accuracy": 0.847712904425437
    },
    {
      "epoch": 0.7984565152864351,
      "grad_norm": 3.876319646835327,
      "learning_rate": 5.93612164864906e-07,
      "loss": 0.0481,
      "step": 2690,
      "video_reward_cumulative_accuracy": 0.8477695167286246
    },
    {
      "epoch": 0.798753339269813,
      "grad_norm": 1.6658989191055298,
      "learning_rate": 5.919374222908753e-07,
      "loss": 0.0348,
      "step": 2691,
      "video_reward_cumulative_accuracy": 0.8478260869565217
    },
    {
      "epoch": 0.7990501632531909,
      "grad_norm": 1.9231022596359253,
      "learning_rate": 5.902647282072149e-07,
      "loss": 0.0492,
      "step": 2692,
      "video_reward_cumulative_accuracy": 0.8478826151560178
    },
    {
      "epoch": 0.7993469872365687,
      "grad_norm": 1.0569583177566528,
      "learning_rate": 5.885940844097226e-07,
      "loss": 0.0217,
      "step": 2693,
      "video_reward_cumulative_accuracy": 0.8475677682881545
    },
    {
      "epoch": 0.7996438112199465,
      "grad_norm": 0.5432111024856567,
      "learning_rate": 5.869254926919976e-07,
      "loss": 0.011,
      "step": 2694,
      "video_reward_cumulative_accuracy": 0.8476243504083147
    },
    {
      "epoch": 0.7999406352033245,
      "grad_norm": 1.2595707178115845,
      "learning_rate": 5.852589548454346e-07,
      "loss": 0.0299,
      "step": 2695,
      "video_reward_cumulative_accuracy": 0.8476808905380334
    },
    {
      "epoch": 0.8002374591867023,
      "grad_norm": 1.7451565265655518,
      "learning_rate": 5.835944726592241e-07,
      "loss": 0.0576,
      "step": 2696,
      "video_reward_cumulative_accuracy": 0.8477373887240356
    },
    {
      "epoch": 0.8005342831700801,
      "grad_norm": 2.6311933994293213,
      "learning_rate": 5.8193204792035e-07,
      "loss": 0.0461,
      "step": 2697,
      "video_reward_cumulative_accuracy": 0.8477938450129774
    },
    {
      "epoch": 0.800831107153458,
      "grad_norm": 1.7497179508209229,
      "learning_rate": 5.802716824135849e-07,
      "loss": 0.0388,
      "step": 2698,
      "video_reward_cumulative_accuracy": 0.8478502594514455
    },
    {
      "epoch": 0.8011279311368359,
      "grad_norm": 0.3311484754085541,
      "learning_rate": 5.786133779214939e-07,
      "loss": 0.0024,
      "step": 2699,
      "video_reward_cumulative_accuracy": 0.8479066320859577
    },
    {
      "epoch": 0.8014247551202137,
      "grad_norm": 2.232048749923706,
      "learning_rate": 5.769571362244258e-07,
      "loss": 0.0189,
      "step": 2700,
      "video_reward_cumulative_accuracy": 0.8479629629629629
    },
    {
      "epoch": 0.8017215791035915,
      "grad_norm": 1.2579126358032227,
      "learning_rate": 5.753029591005197e-07,
      "loss": 0.0104,
      "step": 2701,
      "video_reward_cumulative_accuracy": 0.8480192521288412
    },
    {
      "epoch": 0.8020184030869695,
      "grad_norm": 1.1070455312728882,
      "learning_rate": 5.736508483256931e-07,
      "loss": 0.0259,
      "step": 2702,
      "video_reward_cumulative_accuracy": 0.8478904515173945
    },
    {
      "epoch": 0.8023152270703473,
      "grad_norm": 1.2897884845733643,
      "learning_rate": 5.720008056736476e-07,
      "loss": 0.0082,
      "step": 2703,
      "video_reward_cumulative_accuracy": 0.8479467258601554
    },
    {
      "epoch": 0.8026120510537251,
      "grad_norm": 2.983745813369751,
      "learning_rate": 5.703528329158653e-07,
      "loss": 0.037,
      "step": 2704,
      "video_reward_cumulative_accuracy": 0.8478180473372781
    },
    {
      "epoch": 0.802908875037103,
      "grad_norm": 3.134145975112915,
      "learning_rate": 5.687069318216027e-07,
      "loss": 0.0506,
      "step": 2705,
      "video_reward_cumulative_accuracy": 0.8476894639556377
    },
    {
      "epoch": 0.8032056990204809,
      "grad_norm": 1.839669108390808,
      "learning_rate": 5.670631041578969e-07,
      "loss": 0.0386,
      "step": 2706,
      "video_reward_cumulative_accuracy": 0.8477457501847746
    },
    {
      "epoch": 0.8035025230038587,
      "grad_norm": 1.0478723049163818,
      "learning_rate": 5.654213516895549e-07,
      "loss": 0.0117,
      "step": 2707,
      "video_reward_cumulative_accuracy": 0.8478019948282232
    },
    {
      "epoch": 0.8037993469872365,
      "grad_norm": 2.9105279445648193,
      "learning_rate": 5.637816761791573e-07,
      "loss": 0.0334,
      "step": 2708,
      "video_reward_cumulative_accuracy": 0.8478581979320532
    },
    {
      "epoch": 0.8040961709706145,
      "grad_norm": 0.682174026966095,
      "learning_rate": 5.621440793870564e-07,
      "loss": 0.0109,
      "step": 2709,
      "video_reward_cumulative_accuracy": 0.8479143595422666
    },
    {
      "epoch": 0.8043929949539923,
      "grad_norm": 1.712660312652588,
      "learning_rate": 5.605085630713686e-07,
      "loss": 0.0253,
      "step": 2710,
      "video_reward_cumulative_accuracy": 0.847970479704797
    },
    {
      "epoch": 0.8046898189373701,
      "grad_norm": 1.6642423868179321,
      "learning_rate": 5.588751289879823e-07,
      "loss": 0.0174,
      "step": 2711,
      "video_reward_cumulative_accuracy": 0.8480265584655109
    },
    {
      "epoch": 0.804986642920748,
      "grad_norm": 0.8261018395423889,
      "learning_rate": 5.572437788905455e-07,
      "loss": 0.0173,
      "step": 2712,
      "video_reward_cumulative_accuracy": 0.8480825958702065
    },
    {
      "epoch": 0.8052834669041259,
      "grad_norm": 1.6964601278305054,
      "learning_rate": 5.556145145304722e-07,
      "loss": 0.0515,
      "step": 2713,
      "video_reward_cumulative_accuracy": 0.8481385919646148
    },
    {
      "epoch": 0.8055802908875037,
      "grad_norm": 1.0702115297317505,
      "learning_rate": 5.53987337656935e-07,
      "loss": 0.015,
      "step": 2714,
      "video_reward_cumulative_accuracy": 0.8481945467943994
    },
    {
      "epoch": 0.8058771148708815,
      "grad_norm": 3.9482622146606445,
      "learning_rate": 5.523622500168651e-07,
      "loss": 0.0711,
      "step": 2715,
      "video_reward_cumulative_accuracy": 0.8482504604051565
    },
    {
      "epoch": 0.8061739388542595,
      "grad_norm": 1.610306739807129,
      "learning_rate": 5.507392533549549e-07,
      "loss": 0.0158,
      "step": 2716,
      "video_reward_cumulative_accuracy": 0.8483063328424153
    },
    {
      "epoch": 0.8064707628376373,
      "grad_norm": 2.1032094955444336,
      "learning_rate": 5.491183494136462e-07,
      "loss": 0.0246,
      "step": 2717,
      "video_reward_cumulative_accuracy": 0.8483621641516378
    },
    {
      "epoch": 0.8067675868210151,
      "grad_norm": 2.930027484893799,
      "learning_rate": 5.474995399331385e-07,
      "loss": 0.0331,
      "step": 2718,
      "video_reward_cumulative_accuracy": 0.8484179543782193
    },
    {
      "epoch": 0.807064410804393,
      "grad_norm": 3.511140823364258,
      "learning_rate": 5.458828266513788e-07,
      "loss": 0.0481,
      "step": 2719,
      "video_reward_cumulative_accuracy": 0.8482898124310408
    },
    {
      "epoch": 0.8073612347877709,
      "grad_norm": 1.0248184204101562,
      "learning_rate": 5.442682113040674e-07,
      "loss": 0.0169,
      "step": 2720,
      "video_reward_cumulative_accuracy": 0.8481617647058823
    },
    {
      "epoch": 0.8076580587711487,
      "grad_norm": 0.7856757044792175,
      "learning_rate": 5.426556956246495e-07,
      "loss": 0.0092,
      "step": 2721,
      "video_reward_cumulative_accuracy": 0.8482175670709298
    },
    {
      "epoch": 0.8079548827545265,
      "grad_norm": 5.376564979553223,
      "learning_rate": 5.410452813443182e-07,
      "loss": 0.0535,
      "step": 2722,
      "video_reward_cumulative_accuracy": 0.8482733284349743
    },
    {
      "epoch": 0.8082517067379045,
      "grad_norm": 4.981514930725098,
      "learning_rate": 5.394369701920096e-07,
      "loss": 0.0552,
      "step": 2723,
      "video_reward_cumulative_accuracy": 0.8483290488431876
    },
    {
      "epoch": 0.8085485307212823,
      "grad_norm": 0.30428871512413025,
      "learning_rate": 5.378307638944008e-07,
      "loss": 0.0053,
      "step": 2724,
      "video_reward_cumulative_accuracy": 0.8483847283406755
    },
    {
      "epoch": 0.8088453547046601,
      "grad_norm": 0.45453497767448425,
      "learning_rate": 5.362266641759103e-07,
      "loss": 0.0064,
      "step": 2725,
      "video_reward_cumulative_accuracy": 0.848440366972477
    },
    {
      "epoch": 0.809142178688038,
      "grad_norm": 0.8478443026542664,
      "learning_rate": 5.346246727586954e-07,
      "loss": 0.0151,
      "step": 2726,
      "video_reward_cumulative_accuracy": 0.8484959647835657
    },
    {
      "epoch": 0.8094390026714159,
      "grad_norm": 1.4090654850006104,
      "learning_rate": 5.330247913626494e-07,
      "loss": 0.0297,
      "step": 2727,
      "video_reward_cumulative_accuracy": 0.8485515218188485
    },
    {
      "epoch": 0.8097358266547937,
      "grad_norm": 1.971437931060791,
      "learning_rate": 5.314270217054004e-07,
      "loss": 0.0297,
      "step": 2728,
      "video_reward_cumulative_accuracy": 0.8486070381231672
    },
    {
      "epoch": 0.8100326506381715,
      "grad_norm": 1.2329754829406738,
      "learning_rate": 5.298313655023083e-07,
      "loss": 0.0111,
      "step": 2729,
      "video_reward_cumulative_accuracy": 0.8486625137412972
    },
    {
      "epoch": 0.8103294746215495,
      "grad_norm": 3.727600574493408,
      "learning_rate": 5.282378244664655e-07,
      "loss": 0.0434,
      "step": 2730,
      "video_reward_cumulative_accuracy": 0.8487179487179487
    },
    {
      "epoch": 0.8106262986049273,
      "grad_norm": 2.156374454498291,
      "learning_rate": 5.266464003086927e-07,
      "loss": 0.0274,
      "step": 2731,
      "video_reward_cumulative_accuracy": 0.8485902599780301
    },
    {
      "epoch": 0.8109231225883051,
      "grad_norm": 4.032077789306641,
      "learning_rate": 5.250570947375383e-07,
      "loss": 0.0297,
      "step": 2732,
      "video_reward_cumulative_accuracy": 0.8484626647144948
    },
    {
      "epoch": 0.811219946571683,
      "grad_norm": 2.540410280227661,
      "learning_rate": 5.234699094592771e-07,
      "loss": 0.0311,
      "step": 2733,
      "video_reward_cumulative_accuracy": 0.8485181119648738
    },
    {
      "epoch": 0.8115167705550609,
      "grad_norm": 1.1667871475219727,
      "learning_rate": 5.21884846177905e-07,
      "loss": 0.0121,
      "step": 2734,
      "video_reward_cumulative_accuracy": 0.8485735186539868
    },
    {
      "epoch": 0.8118135945384387,
      "grad_norm": 4.324409484863281,
      "learning_rate": 5.203019065951417e-07,
      "loss": 0.0714,
      "step": 2735,
      "video_reward_cumulative_accuracy": 0.8486288848263254
    },
    {
      "epoch": 0.8121104185218165,
      "grad_norm": 1.3035205602645874,
      "learning_rate": 5.187210924104269e-07,
      "loss": 0.0293,
      "step": 2736,
      "video_reward_cumulative_accuracy": 0.8486842105263158
    },
    {
      "epoch": 0.8124072425051945,
      "grad_norm": 1.0958853960037231,
      "learning_rate": 5.171424053209184e-07,
      "loss": 0.0078,
      "step": 2737,
      "video_reward_cumulative_accuracy": 0.8487394957983193
    },
    {
      "epoch": 0.8127040664885723,
      "grad_norm": 1.4998290538787842,
      "learning_rate": 5.155658470214889e-07,
      "loss": 0.0165,
      "step": 2738,
      "video_reward_cumulative_accuracy": 0.8487947406866326
    },
    {
      "epoch": 0.8130008904719501,
      "grad_norm": 3.2503786087036133,
      "learning_rate": 5.139914192047271e-07,
      "loss": 0.0309,
      "step": 2739,
      "video_reward_cumulative_accuracy": 0.8486673968601679
    },
    {
      "epoch": 0.813297714455328,
      "grad_norm": 3.0941522121429443,
      "learning_rate": 5.124191235609344e-07,
      "loss": 0.0287,
      "step": 2740,
      "video_reward_cumulative_accuracy": 0.8485401459854015
    },
    {
      "epoch": 0.8135945384387059,
      "grad_norm": 1.8579998016357422,
      "learning_rate": 5.108489617781226e-07,
      "loss": 0.0179,
      "step": 2741,
      "video_reward_cumulative_accuracy": 0.848595403137541
    },
    {
      "epoch": 0.8138913624220837,
      "grad_norm": 0.7049815654754639,
      "learning_rate": 5.092809355420137e-07,
      "loss": 0.0053,
      "step": 2742,
      "video_reward_cumulative_accuracy": 0.8486506199854121
    },
    {
      "epoch": 0.8141881864054615,
      "grad_norm": 1.6860101222991943,
      "learning_rate": 5.077150465360342e-07,
      "loss": 0.0217,
      "step": 2743,
      "video_reward_cumulative_accuracy": 0.8487057965730952
    },
    {
      "epoch": 0.8144850103888395,
      "grad_norm": 2.035961389541626,
      "learning_rate": 5.06151296441319e-07,
      "loss": 0.0333,
      "step": 2744,
      "video_reward_cumulative_accuracy": 0.8487609329446064
    },
    {
      "epoch": 0.8147818343722173,
      "grad_norm": 3.6466519832611084,
      "learning_rate": 5.045896869367056e-07,
      "loss": 0.0462,
      "step": 2745,
      "video_reward_cumulative_accuracy": 0.8486338797814208
    },
    {
      "epoch": 0.8150786583555951,
      "grad_norm": 2.12888503074646,
      "learning_rate": 5.030302196987333e-07,
      "loss": 0.0177,
      "step": 2746,
      "video_reward_cumulative_accuracy": 0.8486890021849963
    },
    {
      "epoch": 0.815375482338973,
      "grad_norm": 0.45268264412879944,
      "learning_rate": 5.014728964016422e-07,
      "loss": 0.0052,
      "step": 2747,
      "video_reward_cumulative_accuracy": 0.8487440844557699
    },
    {
      "epoch": 0.8156723063223509,
      "grad_norm": 3.4227840900421143,
      "learning_rate": 4.999177187173685e-07,
      "loss": 0.1001,
      "step": 2748,
      "video_reward_cumulative_accuracy": 0.8487991266375546
    },
    {
      "epoch": 0.8159691303057287,
      "grad_norm": 1.468361735343933,
      "learning_rate": 4.983646883155479e-07,
      "loss": 0.0192,
      "step": 2749,
      "video_reward_cumulative_accuracy": 0.8488541287740997
    },
    {
      "epoch": 0.8162659542891065,
      "grad_norm": 3.7149956226348877,
      "learning_rate": 4.968138068635076e-07,
      "loss": 0.0808,
      "step": 2750,
      "video_reward_cumulative_accuracy": 0.8487272727272728
    },
    {
      "epoch": 0.8165627782724845,
      "grad_norm": 1.8487035036087036,
      "learning_rate": 4.952650760262706e-07,
      "loss": 0.0174,
      "step": 2751,
      "video_reward_cumulative_accuracy": 0.8487822609960014
    },
    {
      "epoch": 0.8168596022558623,
      "grad_norm": 3.4142134189605713,
      "learning_rate": 4.937184974665504e-07,
      "loss": 0.0341,
      "step": 2752,
      "video_reward_cumulative_accuracy": 0.8488372093023255
    },
    {
      "epoch": 0.8171564262392401,
      "grad_norm": 2.0607471466064453,
      "learning_rate": 4.921740728447474e-07,
      "loss": 0.033,
      "step": 2753,
      "video_reward_cumulative_accuracy": 0.8487104976389394
    },
    {
      "epoch": 0.8174532502226179,
      "grad_norm": 0.7195733189582825,
      "learning_rate": 4.906318038189531e-07,
      "loss": 0.0067,
      "step": 2754,
      "video_reward_cumulative_accuracy": 0.8487654320987654
    },
    {
      "epoch": 0.8177500742059959,
      "grad_norm": 0.9425991177558899,
      "learning_rate": 4.890916920449415e-07,
      "loss": 0.0147,
      "step": 2755,
      "video_reward_cumulative_accuracy": 0.8488203266787658
    },
    {
      "epoch": 0.8180468981893737,
      "grad_norm": 6.290672302246094,
      "learning_rate": 4.87553739176172e-07,
      "loss": 0.0799,
      "step": 2756,
      "video_reward_cumulative_accuracy": 0.8488751814223512
    },
    {
      "epoch": 0.8183437221727515,
      "grad_norm": 3.862820863723755,
      "learning_rate": 4.860179468637882e-07,
      "loss": 0.0691,
      "step": 2757,
      "video_reward_cumulative_accuracy": 0.848929996372869
    },
    {
      "epoch": 0.8186405461561295,
      "grad_norm": 1.6942017078399658,
      "learning_rate": 4.844843167566104e-07,
      "loss": 0.0183,
      "step": 2758,
      "video_reward_cumulative_accuracy": 0.8489847715736041
    },
    {
      "epoch": 0.8189373701395073,
      "grad_norm": 4.168092727661133,
      "learning_rate": 4.829528505011405e-07,
      "loss": 0.091,
      "step": 2759,
      "video_reward_cumulative_accuracy": 0.8488582819862269
    },
    {
      "epoch": 0.8192341941228851,
      "grad_norm": 0.7059163451194763,
      "learning_rate": 4.81423549741555e-07,
      "loss": 0.0061,
      "step": 2760,
      "video_reward_cumulative_accuracy": 0.8489130434782609
    },
    {
      "epoch": 0.8195310181062629,
      "grad_norm": 1.3192722797393799,
      "learning_rate": 4.798964161197075e-07,
      "loss": 0.0222,
      "step": 2761,
      "video_reward_cumulative_accuracy": 0.8489677653024267
    },
    {
      "epoch": 0.8198278420896409,
      "grad_norm": 0.7440000772476196,
      "learning_rate": 4.78371451275124e-07,
      "loss": 0.0124,
      "step": 2762,
      "video_reward_cumulative_accuracy": 0.8490224475018103
    },
    {
      "epoch": 0.8201246660730187,
      "grad_norm": 3.4163613319396973,
      "learning_rate": 4.768486568450018e-07,
      "loss": 0.0335,
      "step": 2763,
      "video_reward_cumulative_accuracy": 0.8490770901194354
    },
    {
      "epoch": 0.8204214900563965,
      "grad_norm": 3.9325835704803467,
      "learning_rate": 4.7532803446420997e-07,
      "loss": 0.038,
      "step": 2764,
      "video_reward_cumulative_accuracy": 0.8491316931982634
    },
    {
      "epoch": 0.8207183140397745,
      "grad_norm": 2.8216254711151123,
      "learning_rate": 4.7380958576528247e-07,
      "loss": 0.0307,
      "step": 2765,
      "video_reward_cumulative_accuracy": 0.849005424954792
    },
    {
      "epoch": 0.8210151380231523,
      "grad_norm": 0.6501257419586182,
      "learning_rate": 4.722933123784221e-07,
      "loss": 0.0089,
      "step": 2766,
      "video_reward_cumulative_accuracy": 0.849060014461316
    },
    {
      "epoch": 0.8213119620065301,
      "grad_norm": 5.257883071899414,
      "learning_rate": 4.707792159314956e-07,
      "loss": 0.0805,
      "step": 2767,
      "video_reward_cumulative_accuracy": 0.8487531622696061
    },
    {
      "epoch": 0.8216087859899079,
      "grad_norm": 2.2783634662628174,
      "learning_rate": 4.6926729805003234e-07,
      "loss": 0.0209,
      "step": 2768,
      "video_reward_cumulative_accuracy": 0.848807803468208
    },
    {
      "epoch": 0.8219056099732859,
      "grad_norm": 2.1635773181915283,
      "learning_rate": 4.677575603572235e-07,
      "loss": 0.0443,
      "step": 2769,
      "video_reward_cumulative_accuracy": 0.8488624052004333
    },
    {
      "epoch": 0.8222024339566637,
      "grad_norm": 0.6879743933677673,
      "learning_rate": 4.6625000447391795e-07,
      "loss": 0.0059,
      "step": 2770,
      "video_reward_cumulative_accuracy": 0.8489169675090252
    },
    {
      "epoch": 0.8224992579400415,
      "grad_norm": 3.005458116531372,
      "learning_rate": 4.647446320186236e-07,
      "loss": 0.1152,
      "step": 2771,
      "video_reward_cumulative_accuracy": 0.8489714904366654
    },
    {
      "epoch": 0.8227960819234195,
      "grad_norm": 3.669316291809082,
      "learning_rate": 4.6324144460750427e-07,
      "loss": 0.032,
      "step": 2772,
      "video_reward_cumulative_accuracy": 0.849025974025974
    },
    {
      "epoch": 0.8230929059067973,
      "grad_norm": 2.6024012565612793,
      "learning_rate": 4.6174044385437765e-07,
      "loss": 0.0662,
      "step": 2773,
      "video_reward_cumulative_accuracy": 0.8489001081860801
    },
    {
      "epoch": 0.8233897298901751,
      "grad_norm": 1.4586189985275269,
      "learning_rate": 4.602416313707131e-07,
      "loss": 0.0176,
      "step": 2774,
      "video_reward_cumulative_accuracy": 0.8489545782263879
    },
    {
      "epoch": 0.8236865538735529,
      "grad_norm": 1.6324635744094849,
      "learning_rate": 4.5874500876563144e-07,
      "loss": 0.0279,
      "step": 2775,
      "video_reward_cumulative_accuracy": 0.849009009009009
    },
    {
      "epoch": 0.8239833778569309,
      "grad_norm": 0.6080179810523987,
      "learning_rate": 4.572505776459024e-07,
      "loss": 0.0066,
      "step": 2776,
      "video_reward_cumulative_accuracy": 0.8490634005763689
    },
    {
      "epoch": 0.8242802018403087,
      "grad_norm": 2.1260578632354736,
      "learning_rate": 4.557583396159429e-07,
      "loss": 0.0368,
      "step": 2777,
      "video_reward_cumulative_accuracy": 0.8489377025567159
    },
    {
      "epoch": 0.8245770258236865,
      "grad_norm": 3.0380239486694336,
      "learning_rate": 4.542682962778161e-07,
      "loss": 0.0527,
      "step": 2778,
      "video_reward_cumulative_accuracy": 0.8489920806335494
    },
    {
      "epoch": 0.8248738498070645,
      "grad_norm": 1.556333065032959,
      "learning_rate": 4.5278044923122654e-07,
      "loss": 0.0195,
      "step": 2779,
      "video_reward_cumulative_accuracy": 0.8488664987405542
    },
    {
      "epoch": 0.8251706737904423,
      "grad_norm": 1.5159677267074585,
      "learning_rate": 4.512948000735234e-07,
      "loss": 0.0095,
      "step": 2780,
      "video_reward_cumulative_accuracy": 0.8489208633093526
    },
    {
      "epoch": 0.8254674977738201,
      "grad_norm": 0.4575834274291992,
      "learning_rate": 4.498113503996948e-07,
      "loss": 0.0059,
      "step": 2781,
      "video_reward_cumulative_accuracy": 0.8489751887810141
    },
    {
      "epoch": 0.8257643217571979,
      "grad_norm": 1.370509147644043,
      "learning_rate": 4.4833010180236836e-07,
      "loss": 0.0194,
      "step": 2782,
      "video_reward_cumulative_accuracy": 0.8490294751976994
    },
    {
      "epoch": 0.8260611457405759,
      "grad_norm": 1.5616401433944702,
      "learning_rate": 4.4685105587180895e-07,
      "loss": 0.0262,
      "step": 2783,
      "video_reward_cumulative_accuracy": 0.8490837226015092
    },
    {
      "epoch": 0.8263579697239537,
      "grad_norm": 0.9546812772750854,
      "learning_rate": 4.453742141959141e-07,
      "loss": 0.0074,
      "step": 2784,
      "video_reward_cumulative_accuracy": 0.8491379310344828
    },
    {
      "epoch": 0.8266547937073315,
      "grad_norm": 1.1593352556228638,
      "learning_rate": 4.4389957836021765e-07,
      "loss": 0.0387,
      "step": 2785,
      "video_reward_cumulative_accuracy": 0.8491921005385996
    },
    {
      "epoch": 0.8269516176907095,
      "grad_norm": 1.6280272006988525,
      "learning_rate": 4.424271499478844e-07,
      "loss": 0.0173,
      "step": 2786,
      "video_reward_cumulative_accuracy": 0.8492462311557789
    },
    {
      "epoch": 0.8272484416740873,
      "grad_norm": 3.1349103450775146,
      "learning_rate": 4.409569305397088e-07,
      "loss": 0.0256,
      "step": 2787,
      "video_reward_cumulative_accuracy": 0.8493003229278795
    },
    {
      "epoch": 0.8275452656574651,
      "grad_norm": 1.798938274383545,
      "learning_rate": 4.394889217141152e-07,
      "loss": 0.0205,
      "step": 2788,
      "video_reward_cumulative_accuracy": 0.8493543758967002
    },
    {
      "epoch": 0.8278420896408429,
      "grad_norm": 1.2877916097640991,
      "learning_rate": 4.38023125047152e-07,
      "loss": 0.0248,
      "step": 2789,
      "video_reward_cumulative_accuracy": 0.84940839010398
    },
    {
      "epoch": 0.8281389136242209,
      "grad_norm": 1.9247994422912598,
      "learning_rate": 4.365595421124949e-07,
      "loss": 0.037,
      "step": 2790,
      "video_reward_cumulative_accuracy": 0.8494623655913979
    },
    {
      "epoch": 0.8284357376075987,
      "grad_norm": 1.2538220882415771,
      "learning_rate": 4.35098174481442e-07,
      "loss": 0.041,
      "step": 2791,
      "video_reward_cumulative_accuracy": 0.8495163024005733
    },
    {
      "epoch": 0.8287325615909765,
      "grad_norm": 0.8949556946754456,
      "learning_rate": 4.336390237229138e-07,
      "loss": 0.0087,
      "step": 2792,
      "video_reward_cumulative_accuracy": 0.8495702005730659
    },
    {
      "epoch": 0.8290293855743545,
      "grad_norm": 2.420616626739502,
      "learning_rate": 4.321820914034502e-07,
      "loss": 0.0267,
      "step": 2793,
      "video_reward_cumulative_accuracy": 0.849624060150376
    },
    {
      "epoch": 0.8293262095577323,
      "grad_norm": 2.788456916809082,
      "learning_rate": 4.307273790872091e-07,
      "loss": 0.0342,
      "step": 2794,
      "video_reward_cumulative_accuracy": 0.8496778811739442
    },
    {
      "epoch": 0.8296230335411101,
      "grad_norm": 1.2072490453720093,
      "learning_rate": 4.292748883359657e-07,
      "loss": 0.0171,
      "step": 2795,
      "video_reward_cumulative_accuracy": 0.8497316636851521
    },
    {
      "epoch": 0.8299198575244879,
      "grad_norm": 2.1791484355926514,
      "learning_rate": 4.278246207091083e-07,
      "loss": 0.0144,
      "step": 2796,
      "video_reward_cumulative_accuracy": 0.8497854077253219
    },
    {
      "epoch": 0.8302166815078659,
      "grad_norm": 1.2288272380828857,
      "learning_rate": 4.263765777636425e-07,
      "loss": 0.0077,
      "step": 2797,
      "video_reward_cumulative_accuracy": 0.8498391133357168
    },
    {
      "epoch": 0.8305135054912437,
      "grad_norm": 0.8437438011169434,
      "learning_rate": 4.2493076105418114e-07,
      "loss": 0.0226,
      "step": 2798,
      "video_reward_cumulative_accuracy": 0.8498927805575411
    },
    {
      "epoch": 0.8308103294746215,
      "grad_norm": 1.1709120273590088,
      "learning_rate": 4.2348717213294923e-07,
      "loss": 0.0083,
      "step": 2799,
      "video_reward_cumulative_accuracy": 0.84994640943194
    },
    {
      "epoch": 0.8311071534579995,
      "grad_norm": 1.575054407119751,
      "learning_rate": 4.2204581254978034e-07,
      "loss": 0.0187,
      "step": 2800,
      "video_reward_cumulative_accuracy": 0.85
    },
    {
      "epoch": 0.8311071534579995,
      "eval_runtime": 131.0431,
      "eval_samples_per_second": 6.021,
      "eval_steps_per_second": 0.755,
      "eval_test_set_accuracy": 0.8194444444444444,
      "step": 2800
    },
    {
      "epoch": 0.8314039774413773,
      "grad_norm": 1.4664020538330078,
      "learning_rate": 4.2060668385211196e-07,
      "loss": 0.0194,
      "step": 2801,
      "video_reward_cumulative_accuracy": 0.850053552302749
    },
    {
      "epoch": 0.8317008014247551,
      "grad_norm": 3.3089983463287354,
      "learning_rate": 4.1916978758499095e-07,
      "loss": 0.0237,
      "step": 2802,
      "video_reward_cumulative_accuracy": 0.8501070663811563
    },
    {
      "epoch": 0.8319976254081329,
      "grad_norm": 1.9900091886520386,
      "learning_rate": 4.1773512529106305e-07,
      "loss": 0.039,
      "step": 2803,
      "video_reward_cumulative_accuracy": 0.8499821619693186
    },
    {
      "epoch": 0.8322944493915109,
      "grad_norm": 1.6034806966781616,
      "learning_rate": 4.163026985105778e-07,
      "loss": 0.0162,
      "step": 2804,
      "video_reward_cumulative_accuracy": 0.8500356633380884
    },
    {
      "epoch": 0.8325912733748887,
      "grad_norm": 4.051023960113525,
      "learning_rate": 4.1487250878138567e-07,
      "loss": 0.0284,
      "step": 2805,
      "video_reward_cumulative_accuracy": 0.8500891265597148
    },
    {
      "epoch": 0.8328880973582665,
      "grad_norm": 1.4414976835250854,
      "learning_rate": 4.134445576389315e-07,
      "loss": 0.0089,
      "step": 2806,
      "video_reward_cumulative_accuracy": 0.8501425516749822
    },
    {
      "epoch": 0.8331849213416445,
      "grad_norm": 2.868297815322876,
      "learning_rate": 4.1201884661626253e-07,
      "loss": 0.051,
      "step": 2807,
      "video_reward_cumulative_accuracy": 0.8501959387246171
    },
    {
      "epoch": 0.8334817453250223,
      "grad_norm": 1.3796963691711426,
      "learning_rate": 4.105953772440158e-07,
      "loss": 0.0085,
      "step": 2808,
      "video_reward_cumulative_accuracy": 0.8502492877492878
    },
    {
      "epoch": 0.8337785693084001,
      "grad_norm": 5.215771198272705,
      "learning_rate": 4.091741510504249e-07,
      "loss": 0.0674,
      "step": 2809,
      "video_reward_cumulative_accuracy": 0.8503025987896048
    },
    {
      "epoch": 0.8340753932917779,
      "grad_norm": 1.1552796363830566,
      "learning_rate": 4.0775516956131327e-07,
      "loss": 0.0084,
      "step": 2810,
      "video_reward_cumulative_accuracy": 0.850355871886121
    },
    {
      "epoch": 0.8343722172751559,
      "grad_norm": 4.928788661956787,
      "learning_rate": 4.0633843430009563e-07,
      "loss": 0.0486,
      "step": 2811,
      "video_reward_cumulative_accuracy": 0.8502312344361437
    },
    {
      "epoch": 0.8346690412585337,
      "grad_norm": 0.6429322957992554,
      "learning_rate": 4.049239467877747e-07,
      "loss": 0.0093,
      "step": 2812,
      "video_reward_cumulative_accuracy": 0.8502844950213371
    },
    {
      "epoch": 0.8349658652419115,
      "grad_norm": 4.207029819488525,
      "learning_rate": 4.0351170854294017e-07,
      "loss": 0.0345,
      "step": 2813,
      "video_reward_cumulative_accuracy": 0.8503377177390686
    },
    {
      "epoch": 0.8352626892252895,
      "grad_norm": 1.94601309299469,
      "learning_rate": 4.0210172108176767e-07,
      "loss": 0.036,
      "step": 2814,
      "video_reward_cumulative_accuracy": 0.8503909026297086
    },
    {
      "epoch": 0.8355595132086673,
      "grad_norm": 0.2354949563741684,
      "learning_rate": 4.0069398591801423e-07,
      "loss": 0.003,
      "step": 2815,
      "video_reward_cumulative_accuracy": 0.8504440497335701
    },
    {
      "epoch": 0.8358563371920451,
      "grad_norm": 2.0666418075561523,
      "learning_rate": 3.9928850456302073e-07,
      "loss": 0.045,
      "step": 2816,
      "video_reward_cumulative_accuracy": 0.8504971590909091
    },
    {
      "epoch": 0.8361531611754229,
      "grad_norm": 0.41040000319480896,
      "learning_rate": 3.9788527852570813e-07,
      "loss": 0.0077,
      "step": 2817,
      "video_reward_cumulative_accuracy": 0.850550230741924
    },
    {
      "epoch": 0.8364499851588009,
      "grad_norm": 4.305946350097656,
      "learning_rate": 3.964843093125753e-07,
      "loss": 0.0499,
      "step": 2818,
      "video_reward_cumulative_accuracy": 0.8506032647267565
    },
    {
      "epoch": 0.8367468091421787,
      "grad_norm": 1.3210865259170532,
      "learning_rate": 3.950855984276994e-07,
      "loss": 0.0141,
      "step": 2819,
      "video_reward_cumulative_accuracy": 0.8506562610854913
    },
    {
      "epoch": 0.8370436331255565,
      "grad_norm": 1.932438850402832,
      "learning_rate": 3.936891473727314e-07,
      "loss": 0.0332,
      "step": 2820,
      "video_reward_cumulative_accuracy": 0.8507092198581561
    },
    {
      "epoch": 0.8373404571089345,
      "grad_norm": 1.8300782442092896,
      "learning_rate": 3.9229495764689734e-07,
      "loss": 0.0406,
      "step": 2821,
      "video_reward_cumulative_accuracy": 0.8507621410847217
    },
    {
      "epoch": 0.8376372810923123,
      "grad_norm": 0.9342535734176636,
      "learning_rate": 3.9090303074699546e-07,
      "loss": 0.0265,
      "step": 2822,
      "video_reward_cumulative_accuracy": 0.8506378454996456
    },
    {
      "epoch": 0.8379341050756901,
      "grad_norm": 2.6911323070526123,
      "learning_rate": 3.89513368167394e-07,
      "loss": 0.0273,
      "step": 2823,
      "video_reward_cumulative_accuracy": 0.8506907545164718
    },
    {
      "epoch": 0.8382309290590679,
      "grad_norm": 1.113362431526184,
      "learning_rate": 3.881259714000318e-07,
      "loss": 0.0098,
      "step": 2824,
      "video_reward_cumulative_accuracy": 0.8507436260623229
    },
    {
      "epoch": 0.8385277530424459,
      "grad_norm": 2.1424875259399414,
      "learning_rate": 3.8674084193441235e-07,
      "loss": 0.0223,
      "step": 2825,
      "video_reward_cumulative_accuracy": 0.8507964601769912
    },
    {
      "epoch": 0.8388245770258237,
      "grad_norm": 1.2196357250213623,
      "learning_rate": 3.8535798125760695e-07,
      "loss": 0.0245,
      "step": 2826,
      "video_reward_cumulative_accuracy": 0.8508492569002123
    },
    {
      "epoch": 0.8391214010092015,
      "grad_norm": 1.4248687028884888,
      "learning_rate": 3.839773908542513e-07,
      "loss": 0.0097,
      "step": 2827,
      "video_reward_cumulative_accuracy": 0.8509020162716661
    },
    {
      "epoch": 0.8394182249925795,
      "grad_norm": 3.166527032852173,
      "learning_rate": 3.8259907220654286e-07,
      "loss": 0.0677,
      "step": 2828,
      "video_reward_cumulative_accuracy": 0.8509547383309759
    },
    {
      "epoch": 0.8397150489759573,
      "grad_norm": 1.9461520910263062,
      "learning_rate": 3.81223026794241e-07,
      "loss": 0.0228,
      "step": 2829,
      "video_reward_cumulative_accuracy": 0.8510074231177094
    },
    {
      "epoch": 0.8400118729593351,
      "grad_norm": 4.6057353019714355,
      "learning_rate": 3.798492560946632e-07,
      "loss": 0.092,
      "step": 2830,
      "video_reward_cumulative_accuracy": 0.8508833922261484
    },
    {
      "epoch": 0.8403086969427129,
      "grad_norm": 1.7150532007217407,
      "learning_rate": 3.7847776158268594e-07,
      "loss": 0.0519,
      "step": 2831,
      "video_reward_cumulative_accuracy": 0.8507594489579654
    },
    {
      "epoch": 0.8406055209260909,
      "grad_norm": 4.691923141479492,
      "learning_rate": 3.771085447307418e-07,
      "loss": 0.0646,
      "step": 2832,
      "video_reward_cumulative_accuracy": 0.850635593220339
    },
    {
      "epoch": 0.8409023449094687,
      "grad_norm": 1.0979514122009277,
      "learning_rate": 3.757416070088185e-07,
      "loss": 0.0091,
      "step": 2833,
      "video_reward_cumulative_accuracy": 0.8506883162725026
    },
    {
      "epoch": 0.8411991688928465,
      "grad_norm": 0.6728662848472595,
      "learning_rate": 3.7437694988445517e-07,
      "loss": 0.0061,
      "step": 2834,
      "video_reward_cumulative_accuracy": 0.8507410021171489
    },
    {
      "epoch": 0.8414959928762245,
      "grad_norm": 3.404707193374634,
      "learning_rate": 3.730145748227443e-07,
      "loss": 0.0272,
      "step": 2835,
      "video_reward_cumulative_accuracy": 0.8507936507936508
    },
    {
      "epoch": 0.8417928168596023,
      "grad_norm": 1.896315097808838,
      "learning_rate": 3.716544832863275e-07,
      "loss": 0.0159,
      "step": 2836,
      "video_reward_cumulative_accuracy": 0.8506699576868829
    },
    {
      "epoch": 0.8420896408429801,
      "grad_norm": 1.7696223258972168,
      "learning_rate": 3.702966767353958e-07,
      "loss": 0.052,
      "step": 2837,
      "video_reward_cumulative_accuracy": 0.8505463517800493
    },
    {
      "epoch": 0.8423864648263579,
      "grad_norm": 2.6428275108337402,
      "learning_rate": 3.6894115662768596e-07,
      "loss": 0.0275,
      "step": 2838,
      "video_reward_cumulative_accuracy": 0.8504228329809725
    },
    {
      "epoch": 0.8426832888097359,
      "grad_norm": 1.312769889831543,
      "learning_rate": 3.675879244184799e-07,
      "loss": 0.0219,
      "step": 2839,
      "video_reward_cumulative_accuracy": 0.850475519549137
    },
    {
      "epoch": 0.8429801127931137,
      "grad_norm": 2.8779304027557373,
      "learning_rate": 3.66236981560604e-07,
      "loss": 0.0236,
      "step": 2840,
      "video_reward_cumulative_accuracy": 0.8503521126760564
    },
    {
      "epoch": 0.8432769367764915,
      "grad_norm": 3.4229025840759277,
      "learning_rate": 3.6488832950442644e-07,
      "loss": 0.0779,
      "step": 2841,
      "video_reward_cumulative_accuracy": 0.8502287926786343
    },
    {
      "epoch": 0.8435737607598695,
      "grad_norm": 1.8767544031143188,
      "learning_rate": 3.635419696978565e-07,
      "loss": 0.0558,
      "step": 2842,
      "video_reward_cumulative_accuracy": 0.8501055594651654
    },
    {
      "epoch": 0.8438705847432473,
      "grad_norm": 0.48670053482055664,
      "learning_rate": 3.621979035863421e-07,
      "loss": 0.0078,
      "step": 2843,
      "video_reward_cumulative_accuracy": 0.8501582835033415
    },
    {
      "epoch": 0.8441674087266251,
      "grad_norm": 0.923923134803772,
      "learning_rate": 3.6085613261286816e-07,
      "loss": 0.0089,
      "step": 2844,
      "video_reward_cumulative_accuracy": 0.8502109704641351
    },
    {
      "epoch": 0.8444642327100029,
      "grad_norm": 2.631030797958374,
      "learning_rate": 3.5951665821795686e-07,
      "loss": 0.0444,
      "step": 2845,
      "video_reward_cumulative_accuracy": 0.8500878734622144
    },
    {
      "epoch": 0.8447610566933809,
      "grad_norm": 2.941474437713623,
      "learning_rate": 3.5817948183966224e-07,
      "loss": 0.0307,
      "step": 2846,
      "video_reward_cumulative_accuracy": 0.8501405481377372
    },
    {
      "epoch": 0.8450578806767587,
      "grad_norm": 1.852303385734558,
      "learning_rate": 3.5684460491357457e-07,
      "loss": 0.0394,
      "step": 2847,
      "video_reward_cumulative_accuracy": 0.8501931858096242
    },
    {
      "epoch": 0.8453547046601365,
      "grad_norm": 2.0034775733947754,
      "learning_rate": 3.5551202887281423e-07,
      "loss": 0.0212,
      "step": 2848,
      "video_reward_cumulative_accuracy": 0.8502457865168539
    },
    {
      "epoch": 0.8456515286435144,
      "grad_norm": 1.9412490129470825,
      "learning_rate": 3.541817551480292e-07,
      "loss": 0.037,
      "step": 2849,
      "video_reward_cumulative_accuracy": 0.8502983502983503
    },
    {
      "epoch": 0.8459483526268923,
      "grad_norm": 1.5758916139602661,
      "learning_rate": 3.528537851673988e-07,
      "loss": 0.0143,
      "step": 2850,
      "video_reward_cumulative_accuracy": 0.8501754385964913
    },
    {
      "epoch": 0.8462451766102701,
      "grad_norm": 2.0527806282043457,
      "learning_rate": 3.5152812035662674e-07,
      "loss": 0.0376,
      "step": 2851,
      "video_reward_cumulative_accuracy": 0.8500526131182041
    },
    {
      "epoch": 0.8465420005936479,
      "grad_norm": 1.3531450033187866,
      "learning_rate": 3.502047621389426e-07,
      "loss": 0.0074,
      "step": 2852,
      "video_reward_cumulative_accuracy": 0.8501051893408135
    },
    {
      "epoch": 0.8468388245770259,
      "grad_norm": 0.5980221033096313,
      "learning_rate": 3.488837119351018e-07,
      "loss": 0.0049,
      "step": 2853,
      "video_reward_cumulative_accuracy": 0.8501577287066246
    },
    {
      "epoch": 0.8471356485604037,
      "grad_norm": 1.4160398244857788,
      "learning_rate": 3.4756497116337826e-07,
      "loss": 0.0129,
      "step": 2854,
      "video_reward_cumulative_accuracy": 0.8502102312543798
    },
    {
      "epoch": 0.8474324725437815,
      "grad_norm": 1.262010097503662,
      "learning_rate": 3.4624854123956916e-07,
      "loss": 0.0192,
      "step": 2855,
      "video_reward_cumulative_accuracy": 0.850262697022767
    },
    {
      "epoch": 0.8477292965271594,
      "grad_norm": 0.3952392041683197,
      "learning_rate": 3.449344235769886e-07,
      "loss": 0.0078,
      "step": 2856,
      "video_reward_cumulative_accuracy": 0.8503151260504201
    },
    {
      "epoch": 0.8480261205105373,
      "grad_norm": 2.0614068508148193,
      "learning_rate": 3.4362261958647e-07,
      "loss": 0.0281,
      "step": 2857,
      "video_reward_cumulative_accuracy": 0.8503675183759188
    },
    {
      "epoch": 0.8483229444939151,
      "grad_norm": 3.423657178878784,
      "learning_rate": 3.423131306763627e-07,
      "loss": 0.0277,
      "step": 2858,
      "video_reward_cumulative_accuracy": 0.8504198740377886
    },
    {
      "epoch": 0.8486197684772929,
      "grad_norm": 0.3777397871017456,
      "learning_rate": 3.4100595825252965e-07,
      "loss": 0.005,
      "step": 2859,
      "video_reward_cumulative_accuracy": 0.8504721930745016
    },
    {
      "epoch": 0.8489165924606709,
      "grad_norm": 0.7437081336975098,
      "learning_rate": 3.3970110371834814e-07,
      "loss": 0.0104,
      "step": 2860,
      "video_reward_cumulative_accuracy": 0.8505244755244755
    },
    {
      "epoch": 0.8492134164440487,
      "grad_norm": 1.1904757022857666,
      "learning_rate": 3.3839856847470485e-07,
      "loss": 0.0124,
      "step": 2861,
      "video_reward_cumulative_accuracy": 0.8505767214260748
    },
    {
      "epoch": 0.8495102404274265,
      "grad_norm": 3.3361258506774902,
      "learning_rate": 3.3709835391999846e-07,
      "loss": 0.0594,
      "step": 2862,
      "video_reward_cumulative_accuracy": 0.8504542278127184
    },
    {
      "epoch": 0.8498070644108044,
      "grad_norm": 5.005155086517334,
      "learning_rate": 3.3580046145013534e-07,
      "loss": 0.0646,
      "step": 2863,
      "video_reward_cumulative_accuracy": 0.8503318197694726
    },
    {
      "epoch": 0.8501038883941823,
      "grad_norm": 1.4068641662597656,
      "learning_rate": 3.3450489245852926e-07,
      "loss": 0.0191,
      "step": 2864,
      "video_reward_cumulative_accuracy": 0.8502094972067039
    },
    {
      "epoch": 0.8504007123775601,
      "grad_norm": 1.33848237991333,
      "learning_rate": 3.3321164833609976e-07,
      "loss": 0.0272,
      "step": 2865,
      "video_reward_cumulative_accuracy": 0.8502617801047121
    },
    {
      "epoch": 0.8506975363609379,
      "grad_norm": 1.8083772659301758,
      "learning_rate": 3.319207304712688e-07,
      "loss": 0.0599,
      "step": 2866,
      "video_reward_cumulative_accuracy": 0.8503140265177949
    },
    {
      "epoch": 0.8509943603443159,
      "grad_norm": 3.3464255332946777,
      "learning_rate": 3.306321402499627e-07,
      "loss": 0.0327,
      "step": 2867,
      "video_reward_cumulative_accuracy": 0.8503662364841298
    },
    {
      "epoch": 0.8512911843276937,
      "grad_norm": 3.124976873397827,
      "learning_rate": 3.2934587905560756e-07,
      "loss": 0.0613,
      "step": 2868,
      "video_reward_cumulative_accuracy": 0.850418410041841
    },
    {
      "epoch": 0.8515880083110715,
      "grad_norm": 4.9768171310424805,
      "learning_rate": 3.2806194826913107e-07,
      "loss": 0.0197,
      "step": 2869,
      "video_reward_cumulative_accuracy": 0.8504705472289996
    },
    {
      "epoch": 0.8518848322944494,
      "grad_norm": 3.0971407890319824,
      "learning_rate": 3.267803492689556e-07,
      "loss": 0.0494,
      "step": 2870,
      "video_reward_cumulative_accuracy": 0.8505226480836237
    },
    {
      "epoch": 0.8521816562778273,
      "grad_norm": 0.6683565974235535,
      "learning_rate": 3.2550108343100293e-07,
      "loss": 0.0072,
      "step": 2871,
      "video_reward_cumulative_accuracy": 0.8505747126436781
    },
    {
      "epoch": 0.8524784802612051,
      "grad_norm": 0.6179012060165405,
      "learning_rate": 3.242241521286893e-07,
      "loss": 0.0036,
      "step": 2872,
      "video_reward_cumulative_accuracy": 0.8506267409470752
    },
    {
      "epoch": 0.8527753042445829,
      "grad_norm": 2.777172327041626,
      "learning_rate": 3.2294955673292437e-07,
      "loss": 0.023,
      "step": 2873,
      "video_reward_cumulative_accuracy": 0.8506787330316742
    },
    {
      "epoch": 0.8530721282279609,
      "grad_norm": 3.996541976928711,
      "learning_rate": 3.2167729861211026e-07,
      "loss": 0.0392,
      "step": 2874,
      "video_reward_cumulative_accuracy": 0.8505567153792624
    },
    {
      "epoch": 0.8533689522113387,
      "grad_norm": 1.6711208820343018,
      "learning_rate": 3.2040737913213853e-07,
      "loss": 0.0176,
      "step": 2875,
      "video_reward_cumulative_accuracy": 0.8506086956521739
    },
    {
      "epoch": 0.8536657761947165,
      "grad_norm": 0.736202597618103,
      "learning_rate": 3.1913979965639166e-07,
      "loss": 0.0054,
      "step": 2876,
      "video_reward_cumulative_accuracy": 0.8506606397774688
    },
    {
      "epoch": 0.8539626001780944,
      "grad_norm": 1.9681017398834229,
      "learning_rate": 3.178745615457393e-07,
      "loss": 0.0178,
      "step": 2877,
      "video_reward_cumulative_accuracy": 0.8507125477928398
    },
    {
      "epoch": 0.8542594241614723,
      "grad_norm": 4.273847579956055,
      "learning_rate": 3.1661166615853723e-07,
      "loss": 0.0847,
      "step": 2878,
      "video_reward_cumulative_accuracy": 0.8505906879777624
    },
    {
      "epoch": 0.8545562481448501,
      "grad_norm": 2.556535243988037,
      "learning_rate": 3.153511148506269e-07,
      "loss": 0.0457,
      "step": 2879,
      "video_reward_cumulative_accuracy": 0.8506425842306357
    },
    {
      "epoch": 0.8548530721282279,
      "grad_norm": 3.7858211994171143,
      "learning_rate": 3.140929089753311e-07,
      "loss": 0.0359,
      "step": 2880,
      "video_reward_cumulative_accuracy": 0.8506944444444444
    },
    {
      "epoch": 0.8551498961116059,
      "grad_norm": 3.342463254928589,
      "learning_rate": 3.128370498834571e-07,
      "loss": 0.0899,
      "step": 2881,
      "video_reward_cumulative_accuracy": 0.8505727178063173
    },
    {
      "epoch": 0.8554467200949837,
      "grad_norm": 3.1436612606048584,
      "learning_rate": 3.1158353892329075e-07,
      "loss": 0.0864,
      "step": 2882,
      "video_reward_cumulative_accuracy": 0.8506245662734212
    },
    {
      "epoch": 0.8557435440783615,
      "grad_norm": 0.46163490414619446,
      "learning_rate": 3.1033237744059805e-07,
      "loss": 0.0036,
      "step": 2883,
      "video_reward_cumulative_accuracy": 0.8506763787721123
    },
    {
      "epoch": 0.8560403680617394,
      "grad_norm": 1.36380934715271,
      "learning_rate": 3.090835667786232e-07,
      "loss": 0.0211,
      "step": 2884,
      "video_reward_cumulative_accuracy": 0.8507281553398058
    },
    {
      "epoch": 0.8563371920451173,
      "grad_norm": 5.0028977394104,
      "learning_rate": 3.078371082780843e-07,
      "loss": 0.0455,
      "step": 2885,
      "video_reward_cumulative_accuracy": 0.8506065857885615
    },
    {
      "epoch": 0.8566340160284951,
      "grad_norm": 3.535642147064209,
      "learning_rate": 3.065930032771763e-07,
      "loss": 0.0395,
      "step": 2886,
      "video_reward_cumulative_accuracy": 0.8506583506583506
    },
    {
      "epoch": 0.8569308400118729,
      "grad_norm": 0.9238957166671753,
      "learning_rate": 3.053512531115654e-07,
      "loss": 0.0094,
      "step": 2887,
      "video_reward_cumulative_accuracy": 0.8507100796674749
    },
    {
      "epoch": 0.8572276639952509,
      "grad_norm": 1.2704198360443115,
      "learning_rate": 3.041118591143924e-07,
      "loss": 0.0114,
      "step": 2888,
      "video_reward_cumulative_accuracy": 0.8507617728531855
    },
    {
      "epoch": 0.8575244879786287,
      "grad_norm": 5.546009063720703,
      "learning_rate": 3.0287482261626727e-07,
      "loss": 0.1227,
      "step": 2889,
      "video_reward_cumulative_accuracy": 0.8506403599861544
    },
    {
      "epoch": 0.8578213119620065,
      "grad_norm": 0.7070528268814087,
      "learning_rate": 3.016401449452674e-07,
      "loss": 0.011,
      "step": 2890,
      "video_reward_cumulative_accuracy": 0.8506920415224913
    },
    {
      "epoch": 0.8581181359453844,
      "grad_norm": 2.1009104251861572,
      "learning_rate": 3.0040782742694037e-07,
      "loss": 0.0506,
      "step": 2891,
      "video_reward_cumulative_accuracy": 0.8507436873054306
    },
    {
      "epoch": 0.8584149599287623,
      "grad_norm": 1.1483750343322754,
      "learning_rate": 2.991778713842969e-07,
      "loss": 0.025,
      "step": 2892,
      "video_reward_cumulative_accuracy": 0.8507952973720608
    },
    {
      "epoch": 0.8587117839121401,
      "grad_norm": 0.6957268714904785,
      "learning_rate": 2.979502781378163e-07,
      "loss": 0.0083,
      "step": 2893,
      "video_reward_cumulative_accuracy": 0.8508468717594193
    },
    {
      "epoch": 0.8590086078955179,
      "grad_norm": 1.5722553730010986,
      "learning_rate": 2.967250490054377e-07,
      "loss": 0.044,
      "step": 2894,
      "video_reward_cumulative_accuracy": 0.850898410504492
    },
    {
      "epoch": 0.8593054318788959,
      "grad_norm": 1.9897538423538208,
      "learning_rate": 2.955021853025639e-07,
      "loss": 0.0717,
      "step": 2895,
      "video_reward_cumulative_accuracy": 0.8509499136442141
    },
    {
      "epoch": 0.8596022558622737,
      "grad_norm": 1.8997362852096558,
      "learning_rate": 2.942816883420582e-07,
      "loss": 0.0146,
      "step": 2896,
      "video_reward_cumulative_accuracy": 0.8510013812154696
    },
    {
      "epoch": 0.8598990798456515,
      "grad_norm": 1.694429636001587,
      "learning_rate": 2.9306355943424097e-07,
      "loss": 0.0154,
      "step": 2897,
      "video_reward_cumulative_accuracy": 0.8510528132550915
    },
    {
      "epoch": 0.8601959038290294,
      "grad_norm": 2.0843210220336914,
      "learning_rate": 2.91847799886894e-07,
      "loss": 0.0171,
      "step": 2898,
      "video_reward_cumulative_accuracy": 0.851104209799862
    },
    {
      "epoch": 0.8604927278124073,
      "grad_norm": 3.370720386505127,
      "learning_rate": 2.9063441100525167e-07,
      "loss": 0.0258,
      "step": 2899,
      "video_reward_cumulative_accuracy": 0.8509830976198689
    },
    {
      "epoch": 0.8607895517957851,
      "grad_norm": 1.6068284511566162,
      "learning_rate": 2.8942339409200523e-07,
      "loss": 0.0278,
      "step": 2900,
      "video_reward_cumulative_accuracy": 0.8510344827586207
    },
    {
      "epoch": 0.8610863757791629,
      "grad_norm": 1.0114235877990723,
      "learning_rate": 2.88214750447299e-07,
      "loss": 0.0117,
      "step": 2901,
      "video_reward_cumulative_accuracy": 0.8510858324715616
    },
    {
      "epoch": 0.8613831997625409,
      "grad_norm": 1.6765480041503906,
      "learning_rate": 2.8700848136872823e-07,
      "loss": 0.0457,
      "step": 2902,
      "video_reward_cumulative_accuracy": 0.8511371467953136
    },
    {
      "epoch": 0.8616800237459187,
      "grad_norm": 4.060369968414307,
      "learning_rate": 2.858045881513416e-07,
      "loss": 0.0417,
      "step": 2903,
      "video_reward_cumulative_accuracy": 0.8510161901481227
    },
    {
      "epoch": 0.8619768477292965,
      "grad_norm": 2.511009693145752,
      "learning_rate": 2.846030720876339e-07,
      "loss": 0.0231,
      "step": 2904,
      "video_reward_cumulative_accuracy": 0.8510674931129476
    },
    {
      "epoch": 0.8622736717126744,
      "grad_norm": 1.7390601634979248,
      "learning_rate": 2.834039344675504e-07,
      "loss": 0.0318,
      "step": 2905,
      "video_reward_cumulative_accuracy": 0.851118760757315
    },
    {
      "epoch": 0.8625704956960523,
      "grad_norm": 2.4160759449005127,
      "learning_rate": 2.8220717657848037e-07,
      "loss": 0.0218,
      "step": 2906,
      "video_reward_cumulative_accuracy": 0.8511699931176876
    },
    {
      "epoch": 0.8628673196794301,
      "grad_norm": 2.386310338973999,
      "learning_rate": 2.8101279970526e-07,
      "loss": 0.0381,
      "step": 2907,
      "video_reward_cumulative_accuracy": 0.8512211902304782
    },
    {
      "epoch": 0.8631641436628079,
      "grad_norm": 1.4243983030319214,
      "learning_rate": 2.798208051301693e-07,
      "loss": 0.0189,
      "step": 2908,
      "video_reward_cumulative_accuracy": 0.8512723521320495
    },
    {
      "epoch": 0.8634609676461859,
      "grad_norm": 2.750420331954956,
      "learning_rate": 2.786311941329298e-07,
      "loss": 0.0442,
      "step": 2909,
      "video_reward_cumulative_accuracy": 0.8511515984874527
    },
    {
      "epoch": 0.8637577916295637,
      "grad_norm": 1.8915764093399048,
      "learning_rate": 2.774439679907051e-07,
      "loss": 0.0549,
      "step": 2910,
      "video_reward_cumulative_accuracy": 0.8512027491408934
    },
    {
      "epoch": 0.8640546156129415,
      "grad_norm": 2.3890373706817627,
      "learning_rate": 2.762591279780963e-07,
      "loss": 0.0192,
      "step": 2911,
      "video_reward_cumulative_accuracy": 0.8512538646513226
    },
    {
      "epoch": 0.8643514395963194,
      "grad_norm": 2.8765714168548584,
      "learning_rate": 2.7507667536714496e-07,
      "loss": 0.0201,
      "step": 2912,
      "video_reward_cumulative_accuracy": 0.851304945054945
    },
    {
      "epoch": 0.8646482635796973,
      "grad_norm": 0.4579935073852539,
      "learning_rate": 2.738966114273287e-07,
      "loss": 0.0051,
      "step": 2913,
      "video_reward_cumulative_accuracy": 0.8513559903879162
    },
    {
      "epoch": 0.8649450875630751,
      "grad_norm": 1.4378376007080078,
      "learning_rate": 2.727189374255604e-07,
      "loss": 0.0094,
      "step": 2914,
      "video_reward_cumulative_accuracy": 0.8514070006863418
    },
    {
      "epoch": 0.8652419115464529,
      "grad_norm": 1.244295358657837,
      "learning_rate": 2.715436546261882e-07,
      "loss": 0.0155,
      "step": 2915,
      "video_reward_cumulative_accuracy": 0.8514579759862779
    },
    {
      "epoch": 0.8655387355298308,
      "grad_norm": 2.9890527725219727,
      "learning_rate": 2.703707642909914e-07,
      "loss": 0.0832,
      "step": 2916,
      "video_reward_cumulative_accuracy": 0.8513374485596708
    },
    {
      "epoch": 0.8658355595132087,
      "grad_norm": 0.4092223644256592,
      "learning_rate": 2.6920026767918163e-07,
      "loss": 0.0084,
      "step": 2917,
      "video_reward_cumulative_accuracy": 0.8513884127528283
    },
    {
      "epoch": 0.8661323834965865,
      "grad_norm": 3.3004024028778076,
      "learning_rate": 2.680321660474011e-07,
      "loss": 0.054,
      "step": 2918,
      "video_reward_cumulative_accuracy": 0.8514393420150789
    },
    {
      "epoch": 0.8664292074799644,
      "grad_norm": 2.0161333084106445,
      "learning_rate": 2.6686646064971983e-07,
      "loss": 0.0137,
      "step": 2919,
      "video_reward_cumulative_accuracy": 0.8514902363823227
    },
    {
      "epoch": 0.8667260314633423,
      "grad_norm": 1.7455112934112549,
      "learning_rate": 2.6570315273763663e-07,
      "loss": 0.0391,
      "step": 2920,
      "video_reward_cumulative_accuracy": 0.8515410958904109
    },
    {
      "epoch": 0.8670228554467201,
      "grad_norm": 1.8997581005096436,
      "learning_rate": 2.6454224356007417e-07,
      "loss": 0.0372,
      "step": 2921,
      "video_reward_cumulative_accuracy": 0.8515919205751455
    },
    {
      "epoch": 0.8673196794300979,
      "grad_norm": 3.565519094467163,
      "learning_rate": 2.633837343633816e-07,
      "loss": 0.0423,
      "step": 2922,
      "video_reward_cumulative_accuracy": 0.8516427104722792
    },
    {
      "epoch": 0.8676165034134758,
      "grad_norm": 2.546748399734497,
      "learning_rate": 2.622276263913312e-07,
      "loss": 0.0663,
      "step": 2923,
      "video_reward_cumulative_accuracy": 0.8515224084844338
    },
    {
      "epoch": 0.8679133273968537,
      "grad_norm": 0.6215806603431702,
      "learning_rate": 2.6107392088511706e-07,
      "loss": 0.0154,
      "step": 2924,
      "video_reward_cumulative_accuracy": 0.8515731874145007
    },
    {
      "epoch": 0.8682101513802315,
      "grad_norm": 2.480520009994507,
      "learning_rate": 2.5992261908335454e-07,
      "loss": 0.0223,
      "step": 2925,
      "video_reward_cumulative_accuracy": 0.8516239316239316
    },
    {
      "epoch": 0.8685069753636094,
      "grad_norm": 2.851381301879883,
      "learning_rate": 2.587737222220765e-07,
      "loss": 0.0466,
      "step": 2926,
      "video_reward_cumulative_accuracy": 0.8515037593984962
    },
    {
      "epoch": 0.8688037993469873,
      "grad_norm": 1.1734604835510254,
      "learning_rate": 2.576272315347361e-07,
      "loss": 0.0104,
      "step": 2927,
      "video_reward_cumulative_accuracy": 0.8515544926545952
    },
    {
      "epoch": 0.8691006233303651,
      "grad_norm": 2.432842493057251,
      "learning_rate": 2.5648314825220233e-07,
      "loss": 0.0306,
      "step": 2928,
      "video_reward_cumulative_accuracy": 0.8516051912568307
    },
    {
      "epoch": 0.8693974473137429,
      "grad_norm": 1.94566011428833,
      "learning_rate": 2.5534147360276014e-07,
      "loss": 0.0697,
      "step": 2929,
      "video_reward_cumulative_accuracy": 0.8514851485148515
    },
    {
      "epoch": 0.8696942712971208,
      "grad_norm": 3.096381902694702,
      "learning_rate": 2.542022088121068e-07,
      "loss": 0.0651,
      "step": 2930,
      "video_reward_cumulative_accuracy": 0.8515358361774744
    },
    {
      "epoch": 0.8699910952804987,
      "grad_norm": 1.368943452835083,
      "learning_rate": 2.530653551033546e-07,
      "loss": 0.0131,
      "step": 2931,
      "video_reward_cumulative_accuracy": 0.8515864892528148
    },
    {
      "epoch": 0.8702879192638765,
      "grad_norm": 4.917116165161133,
      "learning_rate": 2.519309136970258e-07,
      "loss": 0.0782,
      "step": 2932,
      "video_reward_cumulative_accuracy": 0.8516371077762619
    },
    {
      "epoch": 0.8705847432472544,
      "grad_norm": 3.473444700241089,
      "learning_rate": 2.507988858110538e-07,
      "loss": 0.0313,
      "step": 2933,
      "video_reward_cumulative_accuracy": 0.8516876917831572
    },
    {
      "epoch": 0.8708815672306323,
      "grad_norm": 0.6671600937843323,
      "learning_rate": 2.4966927266078077e-07,
      "loss": 0.008,
      "step": 2934,
      "video_reward_cumulative_accuracy": 0.8517382413087935
    },
    {
      "epoch": 0.8711783912140101,
      "grad_norm": 0.5415387749671936,
      "learning_rate": 2.4854207545895515e-07,
      "loss": 0.0091,
      "step": 2935,
      "video_reward_cumulative_accuracy": 0.8517887563884157
    },
    {
      "epoch": 0.8714752151973879,
      "grad_norm": 1.7057019472122192,
      "learning_rate": 2.474172954157328e-07,
      "loss": 0.0371,
      "step": 2936,
      "video_reward_cumulative_accuracy": 0.8518392370572208
    },
    {
      "epoch": 0.8717720391807658,
      "grad_norm": 1.5639770030975342,
      "learning_rate": 2.462949337386744e-07,
      "loss": 0.0343,
      "step": 2937,
      "video_reward_cumulative_accuracy": 0.8518896833503575
    },
    {
      "epoch": 0.8720688631641437,
      "grad_norm": 2.864452362060547,
      "learning_rate": 2.4517499163274395e-07,
      "loss": 0.0471,
      "step": 2938,
      "video_reward_cumulative_accuracy": 0.8519400953029271
    },
    {
      "epoch": 0.8723656871475215,
      "grad_norm": 2.2662036418914795,
      "learning_rate": 2.4405747030030903e-07,
      "loss": 0.0436,
      "step": 2939,
      "video_reward_cumulative_accuracy": 0.851990472949983
    },
    {
      "epoch": 0.8726625111308994,
      "grad_norm": 0.40300530195236206,
      "learning_rate": 2.4294237094113576e-07,
      "loss": 0.0072,
      "step": 2940,
      "video_reward_cumulative_accuracy": 0.8520408163265306
    },
    {
      "epoch": 0.8729593351142773,
      "grad_norm": 5.474096298217773,
      "learning_rate": 2.418296947523927e-07,
      "loss": 0.0834,
      "step": 2941,
      "video_reward_cumulative_accuracy": 0.8520911254675281
    },
    {
      "epoch": 0.8732561590976551,
      "grad_norm": 2.9174013137817383,
      "learning_rate": 2.4071944292864414e-07,
      "loss": 0.0285,
      "step": 2942,
      "video_reward_cumulative_accuracy": 0.8521414004078858
    },
    {
      "epoch": 0.8735529830810329,
      "grad_norm": 2.5676088333129883,
      "learning_rate": 2.3961161666185476e-07,
      "loss": 0.027,
      "step": 2943,
      "video_reward_cumulative_accuracy": 0.8521916411824668
    },
    {
      "epoch": 0.8738498070644108,
      "grad_norm": 2.0752131938934326,
      "learning_rate": 2.385062171413838e-07,
      "loss": 0.0428,
      "step": 2944,
      "video_reward_cumulative_accuracy": 0.8522418478260869
    },
    {
      "epoch": 0.8741466310477887,
      "grad_norm": 1.9137473106384277,
      "learning_rate": 2.3740324555398346e-07,
      "loss": 0.0525,
      "step": 2945,
      "video_reward_cumulative_accuracy": 0.8522920203735145
    },
    {
      "epoch": 0.8744434550311665,
      "grad_norm": 2.5203845500946045,
      "learning_rate": 2.3630270308380221e-07,
      "loss": 0.0348,
      "step": 2946,
      "video_reward_cumulative_accuracy": 0.8521724372029871
    },
    {
      "epoch": 0.8747402790145444,
      "grad_norm": 1.3323040008544922,
      "learning_rate": 2.352045909123779e-07,
      "loss": 0.0112,
      "step": 2947,
      "video_reward_cumulative_accuracy": 0.8522225992534781
    },
    {
      "epoch": 0.8750371029979223,
      "grad_norm": 0.23800607025623322,
      "learning_rate": 2.3410891021864058e-07,
      "loss": 0.002,
      "step": 2948,
      "video_reward_cumulative_accuracy": 0.8522727272727273
    },
    {
      "epoch": 0.8753339269813001,
      "grad_norm": 1.832039475440979,
      "learning_rate": 2.3301566217891148e-07,
      "loss": 0.0263,
      "step": 2949,
      "video_reward_cumulative_accuracy": 0.8523228212953543
    },
    {
      "epoch": 0.8756307509646779,
      "grad_norm": 2.428018569946289,
      "learning_rate": 2.31924847966897e-07,
      "loss": 0.0579,
      "step": 2950,
      "video_reward_cumulative_accuracy": 0.8520338983050847
    },
    {
      "epoch": 0.8759275749480558,
      "grad_norm": 1.4113751649856567,
      "learning_rate": 2.3083646875369293e-07,
      "loss": 0.0297,
      "step": 2951,
      "video_reward_cumulative_accuracy": 0.8520840393087089
    },
    {
      "epoch": 0.8762243989314337,
      "grad_norm": 2.2765519618988037,
      "learning_rate": 2.2975052570777896e-07,
      "loss": 0.0271,
      "step": 2952,
      "video_reward_cumulative_accuracy": 0.8519647696476965
    },
    {
      "epoch": 0.8765212229148115,
      "grad_norm": 1.4904448986053467,
      "learning_rate": 2.2866701999502083e-07,
      "loss": 0.0244,
      "step": 2953,
      "video_reward_cumulative_accuracy": 0.8520149001015916
    },
    {
      "epoch": 0.8768180468981893,
      "grad_norm": 2.649986505508423,
      "learning_rate": 2.275859527786675e-07,
      "loss": 0.0505,
      "step": 2954,
      "video_reward_cumulative_accuracy": 0.8518957345971564
    },
    {
      "epoch": 0.8771148708815673,
      "grad_norm": 2.690307140350342,
      "learning_rate": 2.2650732521934891e-07,
      "loss": 0.0813,
      "step": 2955,
      "video_reward_cumulative_accuracy": 0.8519458544839256
    },
    {
      "epoch": 0.8774116948649451,
      "grad_norm": 1.8081152439117432,
      "learning_rate": 2.2543113847507735e-07,
      "loss": 0.0534,
      "step": 2956,
      "video_reward_cumulative_accuracy": 0.8519959404600812
    },
    {
      "epoch": 0.8777085188483229,
      "grad_norm": 1.8062191009521484,
      "learning_rate": 2.2435739370124277e-07,
      "loss": 0.0196,
      "step": 2957,
      "video_reward_cumulative_accuracy": 0.852045992560027
    },
    {
      "epoch": 0.8780053428317008,
      "grad_norm": 1.1374989748001099,
      "learning_rate": 2.2328609205061442e-07,
      "loss": 0.0244,
      "step": 2958,
      "video_reward_cumulative_accuracy": 0.8520960108181204
    },
    {
      "epoch": 0.8783021668150787,
      "grad_norm": 0.8241883516311646,
      "learning_rate": 2.2221723467333922e-07,
      "loss": 0.022,
      "step": 2959,
      "video_reward_cumulative_accuracy": 0.8521459952686719
    },
    {
      "epoch": 0.8785989907984565,
      "grad_norm": 2.666593551635742,
      "learning_rate": 2.21150822716939e-07,
      "loss": 0.0145,
      "step": 2960,
      "video_reward_cumulative_accuracy": 0.8521959459459459
    },
    {
      "epoch": 0.8788958147818343,
      "grad_norm": 1.3375494480133057,
      "learning_rate": 2.2008685732631096e-07,
      "loss": 0.0177,
      "step": 2961,
      "video_reward_cumulative_accuracy": 0.8522458628841607
    },
    {
      "epoch": 0.8791926387652123,
      "grad_norm": 3.6023035049438477,
      "learning_rate": 2.1902533964372448e-07,
      "loss": 0.0534,
      "step": 2962,
      "video_reward_cumulative_accuracy": 0.8521269412559082
    },
    {
      "epoch": 0.8794894627485901,
      "grad_norm": 3.426396608352661,
      "learning_rate": 2.1796627080882205e-07,
      "loss": 0.0531,
      "step": 2963,
      "video_reward_cumulative_accuracy": 0.8521768477894026
    },
    {
      "epoch": 0.8797862867319679,
      "grad_norm": 2.153428077697754,
      "learning_rate": 2.1690965195861668e-07,
      "loss": 0.0237,
      "step": 2964,
      "video_reward_cumulative_accuracy": 0.8522267206477733
    },
    {
      "epoch": 0.8800831107153458,
      "grad_norm": 1.4342325925827026,
      "learning_rate": 2.1585548422749236e-07,
      "loss": 0.0163,
      "step": 2965,
      "video_reward_cumulative_accuracy": 0.8522765598650928
    },
    {
      "epoch": 0.8803799346987237,
      "grad_norm": 1.447503924369812,
      "learning_rate": 2.1480376874719876e-07,
      "loss": 0.0233,
      "step": 2966,
      "video_reward_cumulative_accuracy": 0.8523263654753878
    },
    {
      "epoch": 0.8806767586821015,
      "grad_norm": 2.423767328262329,
      "learning_rate": 2.1375450664685577e-07,
      "loss": 0.0742,
      "step": 2967,
      "video_reward_cumulative_accuracy": 0.8523761375126391
    },
    {
      "epoch": 0.8809735826654793,
      "grad_norm": 2.4350879192352295,
      "learning_rate": 2.1270769905294752e-07,
      "loss": 0.0423,
      "step": 2968,
      "video_reward_cumulative_accuracy": 0.8524258760107817
    },
    {
      "epoch": 0.8812704066488573,
      "grad_norm": 4.910870552062988,
      "learning_rate": 2.1166334708932367e-07,
      "loss": 0.051,
      "step": 2969,
      "video_reward_cumulative_accuracy": 0.8524755810037049
    },
    {
      "epoch": 0.8815672306322351,
      "grad_norm": 3.4264378547668457,
      "learning_rate": 2.1062145187719818e-07,
      "loss": 0.0618,
      "step": 2970,
      "video_reward_cumulative_accuracy": 0.8525252525252526
    },
    {
      "epoch": 0.8818640546156129,
      "grad_norm": 0.8235085010528564,
      "learning_rate": 2.0958201453514515e-07,
      "loss": 0.0156,
      "step": 2971,
      "video_reward_cumulative_accuracy": 0.8525748906092225
    },
    {
      "epoch": 0.8821608785989908,
      "grad_norm": 3.6554598808288574,
      "learning_rate": 2.0854503617910278e-07,
      "loss": 0.0275,
      "step": 2972,
      "video_reward_cumulative_accuracy": 0.8526244952893675
    },
    {
      "epoch": 0.8824577025823687,
      "grad_norm": 1.463263750076294,
      "learning_rate": 2.0751051792236714e-07,
      "loss": 0.022,
      "step": 2973,
      "video_reward_cumulative_accuracy": 0.8526740665993946
    },
    {
      "epoch": 0.8827545265657465,
      "grad_norm": 2.6287763118743896,
      "learning_rate": 2.0647846087559459e-07,
      "loss": 0.0397,
      "step": 2974,
      "video_reward_cumulative_accuracy": 0.8527236045729657
    },
    {
      "epoch": 0.8830513505491243,
      "grad_norm": 1.2779161930084229,
      "learning_rate": 2.0544886614679848e-07,
      "loss": 0.0286,
      "step": 2975,
      "video_reward_cumulative_accuracy": 0.8526050420168068
    },
    {
      "epoch": 0.8833481745325023,
      "grad_norm": 2.465494394302368,
      "learning_rate": 2.0442173484134826e-07,
      "loss": 0.0588,
      "step": 2976,
      "video_reward_cumulative_accuracy": 0.8526545698924731
    },
    {
      "epoch": 0.8836449985158801,
      "grad_norm": 1.980695128440857,
      "learning_rate": 2.033970680619693e-07,
      "loss": 0.0563,
      "step": 2977,
      "video_reward_cumulative_accuracy": 0.8527040644944575
    },
    {
      "epoch": 0.8839418224992579,
      "grad_norm": 1.9491227865219116,
      "learning_rate": 2.023748669087408e-07,
      "loss": 0.0332,
      "step": 2978,
      "video_reward_cumulative_accuracy": 0.8525856279382136
    },
    {
      "epoch": 0.8842386464826358,
      "grad_norm": 2.006883144378662,
      "learning_rate": 2.0135513247909493e-07,
      "loss": 0.0141,
      "step": 2979,
      "video_reward_cumulative_accuracy": 0.8526351124538436
    },
    {
      "epoch": 0.8845354704660137,
      "grad_norm": 2.424649715423584,
      "learning_rate": 2.0033786586781624e-07,
      "loss": 0.0493,
      "step": 2980,
      "video_reward_cumulative_accuracy": 0.85251677852349
    },
    {
      "epoch": 0.8848322944493915,
      "grad_norm": 2.046546697616577,
      "learning_rate": 1.9932306816703773e-07,
      "loss": 0.084,
      "step": 2981,
      "video_reward_cumulative_accuracy": 0.8525662529352567
    },
    {
      "epoch": 0.8851291184327693,
      "grad_norm": 1.7148088216781616,
      "learning_rate": 1.9831074046624488e-07,
      "loss": 0.0261,
      "step": 2982,
      "video_reward_cumulative_accuracy": 0.85261569416499
    },
    {
      "epoch": 0.8854259424161472,
      "grad_norm": 1.020740032196045,
      "learning_rate": 1.9730088385226774e-07,
      "loss": 0.01,
      "step": 2983,
      "video_reward_cumulative_accuracy": 0.852665102246061
    },
    {
      "epoch": 0.8857227663995251,
      "grad_norm": 1.4954924583435059,
      "learning_rate": 1.9629349940928715e-07,
      "loss": 0.0195,
      "step": 2984,
      "video_reward_cumulative_accuracy": 0.8527144772117963
    },
    {
      "epoch": 0.8860195903829029,
      "grad_norm": 0.6353304982185364,
      "learning_rate": 1.952885882188277e-07,
      "loss": 0.023,
      "step": 2985,
      "video_reward_cumulative_accuracy": 0.8527638190954774
    },
    {
      "epoch": 0.8863164143662808,
      "grad_norm": 1.529733657836914,
      "learning_rate": 1.9428615135975855e-07,
      "loss": 0.018,
      "step": 2986,
      "video_reward_cumulative_accuracy": 0.8528131279303416
    },
    {
      "epoch": 0.8866132383496587,
      "grad_norm": 2.5808000564575195,
      "learning_rate": 1.9328618990829384e-07,
      "loss": 0.0279,
      "step": 2987,
      "video_reward_cumulative_accuracy": 0.8528624037495816
    },
    {
      "epoch": 0.8869100623330365,
      "grad_norm": 1.363269329071045,
      "learning_rate": 1.9228870493798763e-07,
      "loss": 0.0136,
      "step": 2988,
      "video_reward_cumulative_accuracy": 0.8529116465863453
    },
    {
      "epoch": 0.8872068863164143,
      "grad_norm": 0.9487797021865845,
      "learning_rate": 1.912936975197388e-07,
      "loss": 0.0137,
      "step": 2989,
      "video_reward_cumulative_accuracy": 0.852960856473737
    },
    {
      "epoch": 0.8875037102997922,
      "grad_norm": 0.6261467337608337,
      "learning_rate": 1.9030116872178317e-07,
      "loss": 0.0065,
      "step": 2990,
      "video_reward_cumulative_accuracy": 0.8530100334448161
    },
    {
      "epoch": 0.8878005342831701,
      "grad_norm": 1.148743987083435,
      "learning_rate": 1.8931111960969694e-07,
      "loss": 0.0226,
      "step": 2991,
      "video_reward_cumulative_accuracy": 0.8530591775325977
    },
    {
      "epoch": 0.8880973582665479,
      "grad_norm": 2.52793025970459,
      "learning_rate": 1.8832355124639463e-07,
      "loss": 0.0447,
      "step": 2992,
      "video_reward_cumulative_accuracy": 0.852774064171123
    },
    {
      "epoch": 0.8883941822499258,
      "grad_norm": 1.2899773120880127,
      "learning_rate": 1.873384646921253e-07,
      "loss": 0.0289,
      "step": 2993,
      "video_reward_cumulative_accuracy": 0.8528232542599399
    },
    {
      "epoch": 0.8886910062333037,
      "grad_norm": 1.8950523138046265,
      "learning_rate": 1.8635586100447633e-07,
      "loss": 0.0585,
      "step": 2994,
      "video_reward_cumulative_accuracy": 0.852872411489646
    },
    {
      "epoch": 0.8889878302166815,
      "grad_norm": 2.048049211502075,
      "learning_rate": 1.8537574123836748e-07,
      "loss": 0.0715,
      "step": 2995,
      "video_reward_cumulative_accuracy": 0.8529215358931552
    },
    {
      "epoch": 0.8892846542000593,
      "grad_norm": 1.89472496509552,
      "learning_rate": 1.843981064460529e-07,
      "loss": 0.0298,
      "step": 2996,
      "video_reward_cumulative_accuracy": 0.8529706275033377
    },
    {
      "epoch": 0.8895814781834372,
      "grad_norm": 2.2065141201019287,
      "learning_rate": 1.8342295767711794e-07,
      "loss": 0.0269,
      "step": 2997,
      "video_reward_cumulative_accuracy": 0.8530196863530197
    },
    {
      "epoch": 0.8898783021668151,
      "grad_norm": 1.0499368906021118,
      "learning_rate": 1.8245029597847907e-07,
      "loss": 0.006,
      "step": 2998,
      "video_reward_cumulative_accuracy": 0.8530687124749833
    },
    {
      "epoch": 0.8901751261501929,
      "grad_norm": 3.8206775188446045,
      "learning_rate": 1.8148012239438434e-07,
      "loss": 0.033,
      "step": 2999,
      "video_reward_cumulative_accuracy": 0.8531177059019673
    },
    {
      "epoch": 0.8904719501335708,
      "grad_norm": 3.880343198776245,
      "learning_rate": 1.8051243796640805e-07,
      "loss": 0.0258,
      "step": 3000,
      "video_reward_cumulative_accuracy": 0.8531666666666666
    },
    {
      "epoch": 0.8904719501335708,
      "eval_runtime": 132.9639,
      "eval_samples_per_second": 5.934,
      "eval_steps_per_second": 0.745,
      "eval_test_set_accuracy": 0.8169191919191919,
      "step": 3000
    },
    {
      "epoch": 0.8907687741169487,
      "grad_norm": 1.0296834707260132,
      "learning_rate": 1.7954724373345445e-07,
      "loss": 0.0095,
      "step": 3001,
      "video_reward_cumulative_accuracy": 0.8532155948017327
    },
    {
      "epoch": 0.8910655981003265,
      "grad_norm": 1.028411626815796,
      "learning_rate": 1.7858454073175185e-07,
      "loss": 0.0088,
      "step": 3002,
      "video_reward_cumulative_accuracy": 0.8532644903397735
    },
    {
      "epoch": 0.8913624220837043,
      "grad_norm": 3.756903886795044,
      "learning_rate": 1.776243299948563e-07,
      "loss": 0.0585,
      "step": 3003,
      "video_reward_cumulative_accuracy": 0.8533133533133533
    },
    {
      "epoch": 0.8916592460670822,
      "grad_norm": 2.9036705493927,
      "learning_rate": 1.7666661255364704e-07,
      "loss": 0.0412,
      "step": 3004,
      "video_reward_cumulative_accuracy": 0.8533621837549934
    },
    {
      "epoch": 0.8919560700504601,
      "grad_norm": 2.294867992401123,
      "learning_rate": 1.7571138943632688e-07,
      "loss": 0.0249,
      "step": 3005,
      "video_reward_cumulative_accuracy": 0.8534109816971713
    },
    {
      "epoch": 0.8922528940338379,
      "grad_norm": 3.16927170753479,
      "learning_rate": 1.7475866166842048e-07,
      "loss": 0.0349,
      "step": 3006,
      "video_reward_cumulative_accuracy": 0.853459747172322
    },
    {
      "epoch": 0.8925497180172158,
      "grad_norm": 1.4870845079421997,
      "learning_rate": 1.73808430272773e-07,
      "loss": 0.0282,
      "step": 3007,
      "video_reward_cumulative_accuracy": 0.8535084802128368
    },
    {
      "epoch": 0.8928465420005937,
      "grad_norm": 2.188786029815674,
      "learning_rate": 1.728606962695506e-07,
      "loss": 0.0509,
      "step": 3008,
      "video_reward_cumulative_accuracy": 0.8535571808510638
    },
    {
      "epoch": 0.8931433659839715,
      "grad_norm": 3.7729499340057373,
      "learning_rate": 1.7191546067623772e-07,
      "loss": 0.0906,
      "step": 3009,
      "video_reward_cumulative_accuracy": 0.8534396809571286
    },
    {
      "epoch": 0.8934401899673493,
      "grad_norm": 3.361833095550537,
      "learning_rate": 1.7097272450763646e-07,
      "loss": 0.0877,
      "step": 3010,
      "video_reward_cumulative_accuracy": 0.8533222591362126
    },
    {
      "epoch": 0.8937370139507272,
      "grad_norm": 2.5203890800476074,
      "learning_rate": 1.7003248877586558e-07,
      "loss": 0.0243,
      "step": 3011,
      "video_reward_cumulative_accuracy": 0.8533709730986383
    },
    {
      "epoch": 0.8940338379341051,
      "grad_norm": 1.8145414590835571,
      "learning_rate": 1.6909475449035929e-07,
      "loss": 0.0231,
      "step": 3012,
      "video_reward_cumulative_accuracy": 0.8534196547144755
    },
    {
      "epoch": 0.8943306619174829,
      "grad_norm": 3.811539888381958,
      "learning_rate": 1.6815952265786638e-07,
      "loss": 0.0447,
      "step": 3013,
      "video_reward_cumulative_accuracy": 0.8533023564553601
    },
    {
      "epoch": 0.8946274859008608,
      "grad_norm": 3.9783248901367188,
      "learning_rate": 1.6722679428244903e-07,
      "loss": 0.0442,
      "step": 3014,
      "video_reward_cumulative_accuracy": 0.8533510285335103
    },
    {
      "epoch": 0.8949243098842387,
      "grad_norm": 0.7299622893333435,
      "learning_rate": 1.6629657036548175e-07,
      "loss": 0.01,
      "step": 3015,
      "video_reward_cumulative_accuracy": 0.8533996683250414
    },
    {
      "epoch": 0.8952211338676165,
      "grad_norm": 3.1430046558380127,
      "learning_rate": 1.6536885190565066e-07,
      "loss": 0.0237,
      "step": 3016,
      "video_reward_cumulative_accuracy": 0.8532824933687002
    },
    {
      "epoch": 0.8955179578509943,
      "grad_norm": 1.1799105405807495,
      "learning_rate": 1.6444363989895046e-07,
      "loss": 0.0112,
      "step": 3017,
      "video_reward_cumulative_accuracy": 0.8533311236327478
    },
    {
      "epoch": 0.8958147818343722,
      "grad_norm": 2.950824022293091,
      "learning_rate": 1.6352093533868658e-07,
      "loss": 0.0471,
      "step": 3018,
      "video_reward_cumulative_accuracy": 0.8532140490390987
    },
    {
      "epoch": 0.8961116058177501,
      "grad_norm": 2.92531156539917,
      "learning_rate": 1.6260073921547215e-07,
      "loss": 0.0304,
      "step": 3019,
      "video_reward_cumulative_accuracy": 0.8532626697581981
    },
    {
      "epoch": 0.8964084298011279,
      "grad_norm": 2.9492204189300537,
      "learning_rate": 1.616830525172272e-07,
      "loss": 0.0292,
      "step": 3020,
      "video_reward_cumulative_accuracy": 0.8533112582781457
    },
    {
      "epoch": 0.8967052537845058,
      "grad_norm": 1.7324609756469727,
      "learning_rate": 1.6076787622917673e-07,
      "loss": 0.0184,
      "step": 3021,
      "video_reward_cumulative_accuracy": 0.8533598146309169
    },
    {
      "epoch": 0.8970020777678837,
      "grad_norm": 2.3193140029907227,
      "learning_rate": 1.5985521133385168e-07,
      "loss": 0.0551,
      "step": 3022,
      "video_reward_cumulative_accuracy": 0.8534083388484447
    },
    {
      "epoch": 0.8972989017512615,
      "grad_norm": 3.8793790340423584,
      "learning_rate": 1.5894505881108635e-07,
      "loss": 0.0541,
      "step": 3023,
      "video_reward_cumulative_accuracy": 0.8531260337413166
    },
    {
      "epoch": 0.8975957257346393,
      "grad_norm": 1.8072460889816284,
      "learning_rate": 1.58037419638018e-07,
      "loss": 0.0564,
      "step": 3024,
      "video_reward_cumulative_accuracy": 0.8531746031746031
    },
    {
      "epoch": 0.8978925497180172,
      "grad_norm": 3.4717845916748047,
      "learning_rate": 1.5713229478908577e-07,
      "loss": 0.0491,
      "step": 3025,
      "video_reward_cumulative_accuracy": 0.8532231404958678
    },
    {
      "epoch": 0.8981893737013951,
      "grad_norm": 0.3415747880935669,
      "learning_rate": 1.562296852360279e-07,
      "loss": 0.0053,
      "step": 3026,
      "video_reward_cumulative_accuracy": 0.8532716457369465
    },
    {
      "epoch": 0.8984861976847729,
      "grad_norm": 1.9290162324905396,
      "learning_rate": 1.5532959194788395e-07,
      "loss": 0.035,
      "step": 3027,
      "video_reward_cumulative_accuracy": 0.8529897588371325
    },
    {
      "epoch": 0.8987830216681508,
      "grad_norm": 3.8427798748016357,
      "learning_rate": 1.5443201589099149e-07,
      "loss": 0.0378,
      "step": 3028,
      "video_reward_cumulative_accuracy": 0.8530383091149274
    },
    {
      "epoch": 0.8990798456515287,
      "grad_norm": 1.119760513305664,
      "learning_rate": 1.5353695802898556e-07,
      "loss": 0.0185,
      "step": 3029,
      "video_reward_cumulative_accuracy": 0.8530868273357544
    },
    {
      "epoch": 0.8993766696349065,
      "grad_norm": 1.9450827836990356,
      "learning_rate": 1.52644419322798e-07,
      "loss": 0.0364,
      "step": 3030,
      "video_reward_cumulative_accuracy": 0.8531353135313532
    },
    {
      "epoch": 0.8996734936182843,
      "grad_norm": 1.9632068872451782,
      "learning_rate": 1.5175440073065485e-07,
      "loss": 0.0607,
      "step": 3031,
      "video_reward_cumulative_accuracy": 0.8531837677334213
    },
    {
      "epoch": 0.8999703176016622,
      "grad_norm": 0.8110266327857971,
      "learning_rate": 1.508669032080781e-07,
      "loss": 0.0176,
      "step": 3032,
      "video_reward_cumulative_accuracy": 0.8532321899736148
    },
    {
      "epoch": 0.9002671415850401,
      "grad_norm": 1.5525119304656982,
      "learning_rate": 1.499819277078818e-07,
      "loss": 0.0092,
      "step": 3033,
      "video_reward_cumulative_accuracy": 0.8532805802835477
    },
    {
      "epoch": 0.9005639655684179,
      "grad_norm": 1.3226412534713745,
      "learning_rate": 1.4909947518017387e-07,
      "loss": 0.0238,
      "step": 3034,
      "video_reward_cumulative_accuracy": 0.8533289386947923
    },
    {
      "epoch": 0.9008607895517958,
      "grad_norm": 0.9626790881156921,
      "learning_rate": 1.4821954657235266e-07,
      "loss": 0.012,
      "step": 3035,
      "video_reward_cumulative_accuracy": 0.8533772652388797
    },
    {
      "epoch": 0.9011576135351737,
      "grad_norm": 0.8461338877677917,
      "learning_rate": 1.4734214282910664e-07,
      "loss": 0.0223,
      "step": 3036,
      "video_reward_cumulative_accuracy": 0.8532608695652174
    },
    {
      "epoch": 0.9014544375185515,
      "grad_norm": 4.338505268096924,
      "learning_rate": 1.4646726489241397e-07,
      "loss": 0.0458,
      "step": 3037,
      "video_reward_cumulative_accuracy": 0.8531445505432993
    },
    {
      "epoch": 0.9017512615019293,
      "grad_norm": 1.510209560394287,
      "learning_rate": 1.4559491370154083e-07,
      "loss": 0.0465,
      "step": 3038,
      "video_reward_cumulative_accuracy": 0.8531928900592495
    },
    {
      "epoch": 0.9020480854853072,
      "grad_norm": 2.6341168880462646,
      "learning_rate": 1.4472509019304053e-07,
      "loss": 0.0466,
      "step": 3039,
      "video_reward_cumulative_accuracy": 0.8532411977624218
    },
    {
      "epoch": 0.9023449094686851,
      "grad_norm": 2.911071300506592,
      "learning_rate": 1.4385779530075444e-07,
      "loss": 0.0256,
      "step": 3040,
      "video_reward_cumulative_accuracy": 0.8532894736842105
    },
    {
      "epoch": 0.9026417334520629,
      "grad_norm": 2.444882869720459,
      "learning_rate": 1.4299302995580634e-07,
      "loss": 0.027,
      "step": 3041,
      "video_reward_cumulative_accuracy": 0.8533377178559685
    },
    {
      "epoch": 0.9029385574354408,
      "grad_norm": 4.529541969299316,
      "learning_rate": 1.4213079508660688e-07,
      "loss": 0.051,
      "step": 3042,
      "video_reward_cumulative_accuracy": 0.8532215647600263
    },
    {
      "epoch": 0.9032353814188187,
      "grad_norm": 3.1979563236236572,
      "learning_rate": 1.412710916188481e-07,
      "loss": 0.0291,
      "step": 3043,
      "video_reward_cumulative_accuracy": 0.8532697995399277
    },
    {
      "epoch": 0.9035322054021965,
      "grad_norm": 3.461008071899414,
      "learning_rate": 1.4041392047550506e-07,
      "loss": 0.0434,
      "step": 3044,
      "video_reward_cumulative_accuracy": 0.8533180026281209
    },
    {
      "epoch": 0.9038290293855743,
      "grad_norm": 3.109524726867676,
      "learning_rate": 1.3955928257683465e-07,
      "loss": 0.0798,
      "step": 3045,
      "video_reward_cumulative_accuracy": 0.8533661740558293
    },
    {
      "epoch": 0.9041258533689522,
      "grad_norm": 3.351715326309204,
      "learning_rate": 1.3870717884037321e-07,
      "loss": 0.0346,
      "step": 3046,
      "video_reward_cumulative_accuracy": 0.853414313854235
    },
    {
      "epoch": 0.9044226773523301,
      "grad_norm": 1.3447494506835938,
      "learning_rate": 1.3785761018093757e-07,
      "loss": 0.0162,
      "step": 3047,
      "video_reward_cumulative_accuracy": 0.8534624220544799
    },
    {
      "epoch": 0.9047195013357079,
      "grad_norm": 4.213351249694824,
      "learning_rate": 1.3701057751062146e-07,
      "loss": 0.0712,
      "step": 3048,
      "video_reward_cumulative_accuracy": 0.853510498687664
    },
    {
      "epoch": 0.9050163253190858,
      "grad_norm": 2.7610223293304443,
      "learning_rate": 1.3616608173879636e-07,
      "loss": 0.0292,
      "step": 3049,
      "video_reward_cumulative_accuracy": 0.8535585437848475
    },
    {
      "epoch": 0.9053131493024636,
      "grad_norm": 0.9333323240280151,
      "learning_rate": 1.3532412377211119e-07,
      "loss": 0.0066,
      "step": 3050,
      "video_reward_cumulative_accuracy": 0.8536065573770492
    },
    {
      "epoch": 0.9056099732858415,
      "grad_norm": 2.758444309234619,
      "learning_rate": 1.3448470451448896e-07,
      "loss": 0.0446,
      "step": 3051,
      "video_reward_cumulative_accuracy": 0.8536545394952475
    },
    {
      "epoch": 0.9059067972692193,
      "grad_norm": 1.3979527950286865,
      "learning_rate": 1.3364782486712857e-07,
      "loss": 0.016,
      "step": 3052,
      "video_reward_cumulative_accuracy": 0.8535386631716907
    },
    {
      "epoch": 0.9062036212525972,
      "grad_norm": 2.014826774597168,
      "learning_rate": 1.3281348572850045e-07,
      "loss": 0.0658,
      "step": 3053,
      "video_reward_cumulative_accuracy": 0.853422862757943
    },
    {
      "epoch": 0.9065004452359751,
      "grad_norm": 1.4265682697296143,
      "learning_rate": 1.3198168799434947e-07,
      "loss": 0.0116,
      "step": 3054,
      "video_reward_cumulative_accuracy": 0.8534708578912901
    },
    {
      "epoch": 0.9067972692193529,
      "grad_norm": 1.420961618423462,
      "learning_rate": 1.3115243255769072e-07,
      "loss": 0.0359,
      "step": 3055,
      "video_reward_cumulative_accuracy": 0.853518821603928
    },
    {
      "epoch": 0.9070940932027308,
      "grad_norm": 1.8929790258407593,
      "learning_rate": 1.3032572030881097e-07,
      "loss": 0.0078,
      "step": 3056,
      "video_reward_cumulative_accuracy": 0.8535667539267016
    },
    {
      "epoch": 0.9073909171861086,
      "grad_norm": 3.999789237976074,
      "learning_rate": 1.295015521352652e-07,
      "loss": 0.0465,
      "step": 3057,
      "video_reward_cumulative_accuracy": 0.8536146548904154
    },
    {
      "epoch": 0.9076877411694865,
      "grad_norm": 1.7095073461532593,
      "learning_rate": 1.2867992892187846e-07,
      "loss": 0.0234,
      "step": 3058,
      "video_reward_cumulative_accuracy": 0.8534990189666448
    },
    {
      "epoch": 0.9079845651528643,
      "grad_norm": 1.0274499654769897,
      "learning_rate": 1.2786085155074318e-07,
      "loss": 0.0247,
      "step": 3059,
      "video_reward_cumulative_accuracy": 0.8533834586466166
    },
    {
      "epoch": 0.9082813891362422,
      "grad_norm": 3.9192311763763428,
      "learning_rate": 1.2704432090121815e-07,
      "loss": 0.0385,
      "step": 3060,
      "video_reward_cumulative_accuracy": 0.8534313725490196
    },
    {
      "epoch": 0.9085782131196201,
      "grad_norm": 0.8952836394309998,
      "learning_rate": 1.2623033784992855e-07,
      "loss": 0.0113,
      "step": 3061,
      "video_reward_cumulative_accuracy": 0.8534792551453774
    },
    {
      "epoch": 0.9088750371029979,
      "grad_norm": 0.603430449962616,
      "learning_rate": 1.2541890327076368e-07,
      "loss": 0.0136,
      "step": 3062,
      "video_reward_cumulative_accuracy": 0.8535271064663619
    },
    {
      "epoch": 0.9091718610863758,
      "grad_norm": 1.0425788164138794,
      "learning_rate": 1.246100180348775e-07,
      "loss": 0.0135,
      "step": 3063,
      "video_reward_cumulative_accuracy": 0.8535749265426053
    },
    {
      "epoch": 0.9094686850697536,
      "grad_norm": 0.720369279384613,
      "learning_rate": 1.2380368301068728e-07,
      "loss": 0.0103,
      "step": 3064,
      "video_reward_cumulative_accuracy": 0.8536227154046997
    },
    {
      "epoch": 0.9097655090531315,
      "grad_norm": 1.9932835102081299,
      "learning_rate": 1.2299989906387116e-07,
      "loss": 0.0127,
      "step": 3065,
      "video_reward_cumulative_accuracy": 0.8536704730831974
    },
    {
      "epoch": 0.9100623330365093,
      "grad_norm": 1.871851921081543,
      "learning_rate": 1.221986670573702e-07,
      "loss": 0.0446,
      "step": 3066,
      "video_reward_cumulative_accuracy": 0.8537181996086106
    },
    {
      "epoch": 0.9103591570198872,
      "grad_norm": 3.508359909057617,
      "learning_rate": 1.2139998785138386e-07,
      "loss": 0.0373,
      "step": 3067,
      "video_reward_cumulative_accuracy": 0.8537658950114118
    },
    {
      "epoch": 0.9106559810032651,
      "grad_norm": 1.359955072402954,
      "learning_rate": 1.20603862303372e-07,
      "loss": 0.0153,
      "step": 3068,
      "video_reward_cumulative_accuracy": 0.8538135593220338
    },
    {
      "epoch": 0.9109528049866429,
      "grad_norm": 0.9213302135467529,
      "learning_rate": 1.1981029126805293e-07,
      "loss": 0.0142,
      "step": 3069,
      "video_reward_cumulative_accuracy": 0.85386119257087
    },
    {
      "epoch": 0.9112496289700208,
      "grad_norm": 0.6175004839897156,
      "learning_rate": 1.1901927559740178e-07,
      "loss": 0.0092,
      "step": 3070,
      "video_reward_cumulative_accuracy": 0.8539087947882736
    },
    {
      "epoch": 0.9115464529533986,
      "grad_norm": 1.5162551403045654,
      "learning_rate": 1.1823081614065146e-07,
      "loss": 0.0549,
      "step": 3071,
      "video_reward_cumulative_accuracy": 0.8537935525887334
    },
    {
      "epoch": 0.9118432769367765,
      "grad_norm": 0.9067859053611755,
      "learning_rate": 1.1744491374428845e-07,
      "loss": 0.0102,
      "step": 3072,
      "video_reward_cumulative_accuracy": 0.8538411458333334
    },
    {
      "epoch": 0.9121401009201543,
      "grad_norm": 1.3547265529632568,
      "learning_rate": 1.1666156925205619e-07,
      "loss": 0.048,
      "step": 3073,
      "video_reward_cumulative_accuracy": 0.8538887081028311
    },
    {
      "epoch": 0.9124369249035322,
      "grad_norm": 2.882599353790283,
      "learning_rate": 1.158807835049508e-07,
      "loss": 0.0496,
      "step": 3074,
      "video_reward_cumulative_accuracy": 0.8537735849056604
    },
    {
      "epoch": 0.9127337488869101,
      "grad_norm": 2.568582773208618,
      "learning_rate": 1.151025573412215e-07,
      "loss": 0.0347,
      "step": 3075,
      "video_reward_cumulative_accuracy": 0.8536585365853658
    },
    {
      "epoch": 0.9130305728702879,
      "grad_norm": 1.8911688327789307,
      "learning_rate": 1.1432689159636995e-07,
      "loss": 0.0527,
      "step": 3076,
      "video_reward_cumulative_accuracy": 0.85370611183355
    },
    {
      "epoch": 0.9133273968536658,
      "grad_norm": 1.7498602867126465,
      "learning_rate": 1.1355378710314779e-07,
      "loss": 0.0607,
      "step": 3077,
      "video_reward_cumulative_accuracy": 0.853753656158596
    },
    {
      "epoch": 0.9136242208370436,
      "grad_norm": 2.1445226669311523,
      "learning_rate": 1.1278324469155888e-07,
      "loss": 0.0548,
      "step": 3078,
      "video_reward_cumulative_accuracy": 0.8538011695906432
    },
    {
      "epoch": 0.9139210448204215,
      "grad_norm": 2.0121655464172363,
      "learning_rate": 1.120152651888537e-07,
      "loss": 0.0366,
      "step": 3079,
      "video_reward_cumulative_accuracy": 0.8538486521597921
    },
    {
      "epoch": 0.9142178688037993,
      "grad_norm": 2.319936990737915,
      "learning_rate": 1.1124984941953465e-07,
      "loss": 0.0509,
      "step": 3080,
      "video_reward_cumulative_accuracy": 0.8538961038961039
    },
    {
      "epoch": 0.9145146927871772,
      "grad_norm": 3.709453582763672,
      "learning_rate": 1.1048699820534831e-07,
      "loss": 0.0419,
      "step": 3081,
      "video_reward_cumulative_accuracy": 0.8539435248296008
    },
    {
      "epoch": 0.914811516770555,
      "grad_norm": 1.580238938331604,
      "learning_rate": 1.0972671236529037e-07,
      "loss": 0.0369,
      "step": 3082,
      "video_reward_cumulative_accuracy": 0.853990914990266
    },
    {
      "epoch": 0.9151083407539329,
      "grad_norm": 1.9665991067886353,
      "learning_rate": 1.0896899271560152e-07,
      "loss": 0.0545,
      "step": 3083,
      "video_reward_cumulative_accuracy": 0.8540382744080441
    },
    {
      "epoch": 0.9154051647373108,
      "grad_norm": 1.6465821266174316,
      "learning_rate": 1.0821384006976631e-07,
      "loss": 0.0158,
      "step": 3084,
      "video_reward_cumulative_accuracy": 0.8540856031128404
    },
    {
      "epoch": 0.9157019887206886,
      "grad_norm": 3.7942326068878174,
      "learning_rate": 1.074612552385157e-07,
      "loss": 0.0206,
      "step": 3085,
      "video_reward_cumulative_accuracy": 0.8541329011345219
    },
    {
      "epoch": 0.9159988127040665,
      "grad_norm": 0.7322973012924194,
      "learning_rate": 1.0671123902982166e-07,
      "loss": 0.015,
      "step": 3086,
      "video_reward_cumulative_accuracy": 0.8541801685029164
    },
    {
      "epoch": 0.9162956366874443,
      "grad_norm": 1.750848412513733,
      "learning_rate": 1.0596379224889986e-07,
      "loss": 0.0467,
      "step": 3087,
      "video_reward_cumulative_accuracy": 0.8540654356980888
    },
    {
      "epoch": 0.9165924606708222,
      "grad_norm": 3.0623202323913574,
      "learning_rate": 1.0521891569820698e-07,
      "loss": 0.05,
      "step": 3088,
      "video_reward_cumulative_accuracy": 0.8541126943005182
    },
    {
      "epoch": 0.9168892846542,
      "grad_norm": 1.9967219829559326,
      "learning_rate": 1.0447661017743971e-07,
      "loss": 0.0225,
      "step": 3089,
      "video_reward_cumulative_accuracy": 0.8539980576238265
    },
    {
      "epoch": 0.9171861086375779,
      "grad_norm": 1.6754310131072998,
      "learning_rate": 1.0373687648353586e-07,
      "loss": 0.016,
      "step": 3090,
      "video_reward_cumulative_accuracy": 0.8538834951456311
    },
    {
      "epoch": 0.9174829326209558,
      "grad_norm": 3.090818166732788,
      "learning_rate": 1.02999715410671e-07,
      "loss": 0.0264,
      "step": 3091,
      "video_reward_cumulative_accuracy": 0.8539307667421546
    },
    {
      "epoch": 0.9177797566043336,
      "grad_norm": 0.48801377415657043,
      "learning_rate": 1.0226512775025899e-07,
      "loss": 0.0097,
      "step": 3092,
      "video_reward_cumulative_accuracy": 0.8539780077619664
    },
    {
      "epoch": 0.9180765805877115,
      "grad_norm": 0.5541077852249146,
      "learning_rate": 1.015331142909512e-07,
      "loss": 0.0132,
      "step": 3093,
      "video_reward_cumulative_accuracy": 0.8538635628839315
    },
    {
      "epoch": 0.9183734045710893,
      "grad_norm": 6.074105739593506,
      "learning_rate": 1.0080367581863425e-07,
      "loss": 0.0787,
      "step": 3094,
      "video_reward_cumulative_accuracy": 0.8537491919844861
    },
    {
      "epoch": 0.9186702285544672,
      "grad_norm": 1.0763957500457764,
      "learning_rate": 1.0007681311643258e-07,
      "loss": 0.0124,
      "step": 3095,
      "video_reward_cumulative_accuracy": 0.8537964458804523
    },
    {
      "epoch": 0.918967052537845,
      "grad_norm": 1.8836250305175781,
      "learning_rate": 9.935252696470305e-08,
      "loss": 0.027,
      "step": 3096,
      "video_reward_cumulative_accuracy": 0.853843669250646
    },
    {
      "epoch": 0.9192638765212229,
      "grad_norm": 0.8730959296226501,
      "learning_rate": 9.863081814103725e-08,
      "loss": 0.0128,
      "step": 3097,
      "video_reward_cumulative_accuracy": 0.8538908621246367
    },
    {
      "epoch": 0.9195607005046008,
      "grad_norm": 1.2798937559127808,
      "learning_rate": 9.791168742025958e-08,
      "loss": 0.0128,
      "step": 3098,
      "video_reward_cumulative_accuracy": 0.8539380245319561
    },
    {
      "epoch": 0.9198575244879786,
      "grad_norm": 1.4854799509048462,
      "learning_rate": 9.719513557442661e-08,
      "loss": 0.0283,
      "step": 3099,
      "video_reward_cumulative_accuracy": 0.8539851565020975
    },
    {
      "epoch": 0.9201543484713565,
      "grad_norm": 1.6215635538101196,
      "learning_rate": 9.648116337282631e-08,
      "loss": 0.0177,
      "step": 3100,
      "video_reward_cumulative_accuracy": 0.8540322580645161
    },
    {
      "epoch": 0.9204511724547343,
      "grad_norm": 5.3873209953308105,
      "learning_rate": 9.57697715819772e-08,
      "loss": 0.0611,
      "step": 3101,
      "video_reward_cumulative_accuracy": 0.8540793292486295
    },
    {
      "epoch": 0.9207479964381122,
      "grad_norm": 1.7452203035354614,
      "learning_rate": 9.506096096562806e-08,
      "loss": 0.0156,
      "step": 3102,
      "video_reward_cumulative_accuracy": 0.8541263700838169
    },
    {
      "epoch": 0.92104482042149,
      "grad_norm": 1.4887933731079102,
      "learning_rate": 9.435473228475462e-08,
      "loss": 0.0424,
      "step": 3103,
      "video_reward_cumulative_accuracy": 0.8541733805994199
    },
    {
      "epoch": 0.9213416444048679,
      "grad_norm": 2.0373613834381104,
      "learning_rate": 9.365108629756259e-08,
      "loss": 0.0173,
      "step": 3104,
      "video_reward_cumulative_accuracy": 0.8542203608247423
    },
    {
      "epoch": 0.9216384683882458,
      "grad_norm": 4.531852722167969,
      "learning_rate": 9.295002375948436e-08,
      "loss": 0.0994,
      "step": 3105,
      "video_reward_cumulative_accuracy": 0.8541062801932368
    },
    {
      "epoch": 0.9219352923716236,
      "grad_norm": 0.6652273535728455,
      "learning_rate": 9.225154542317843e-08,
      "loss": 0.0133,
      "step": 3106,
      "video_reward_cumulative_accuracy": 0.8541532517707663
    },
    {
      "epoch": 0.9222321163550015,
      "grad_norm": 0.861679196357727,
      "learning_rate": 9.15556520385294e-08,
      "loss": 0.0291,
      "step": 3107,
      "video_reward_cumulative_accuracy": 0.854200193112327
    },
    {
      "epoch": 0.9225289403383793,
      "grad_norm": 2.65281343460083,
      "learning_rate": 9.086234435264574e-08,
      "loss": 0.0827,
      "step": 3108,
      "video_reward_cumulative_accuracy": 0.8540862290862291
    },
    {
      "epoch": 0.9228257643217572,
      "grad_norm": 1.5000420808792114,
      "learning_rate": 9.017162310986067e-08,
      "loss": 0.0087,
      "step": 3109,
      "video_reward_cumulative_accuracy": 0.8541331617883564
    },
    {
      "epoch": 0.923122588305135,
      "grad_norm": 0.5715723633766174,
      "learning_rate": 8.94834890517307e-08,
      "loss": 0.0047,
      "step": 3110,
      "video_reward_cumulative_accuracy": 0.8541800643086817
    },
    {
      "epoch": 0.9234194122885129,
      "grad_norm": 2.6026268005371094,
      "learning_rate": 8.879794291703464e-08,
      "loss": 0.0407,
      "step": 3111,
      "video_reward_cumulative_accuracy": 0.8542269366763099
    },
    {
      "epoch": 0.9237162362718908,
      "grad_norm": 1.6955796480178833,
      "learning_rate": 8.811498544177316e-08,
      "loss": 0.0212,
      "step": 3112,
      "video_reward_cumulative_accuracy": 0.8542737789203085
    },
    {
      "epoch": 0.9240130602552686,
      "grad_norm": 2.5438053607940674,
      "learning_rate": 8.743461735916642e-08,
      "loss": 0.039,
      "step": 3113,
      "video_reward_cumulative_accuracy": 0.8543205910697077
    },
    {
      "epoch": 0.9243098842386465,
      "grad_norm": 2.37009859085083,
      "learning_rate": 8.675683939965595e-08,
      "loss": 0.0194,
      "step": 3114,
      "video_reward_cumulative_accuracy": 0.8543673731535003
    },
    {
      "epoch": 0.9246067082220243,
      "grad_norm": 3.996877908706665,
      "learning_rate": 8.608165229090248e-08,
      "loss": 0.0388,
      "step": 3115,
      "video_reward_cumulative_accuracy": 0.8540930979133227
    },
    {
      "epoch": 0.9249035322054022,
      "grad_norm": 1.1192494630813599,
      "learning_rate": 8.540905675778504e-08,
      "loss": 0.0274,
      "step": 3116,
      "video_reward_cumulative_accuracy": 0.8541399229781772
    },
    {
      "epoch": 0.92520035618878,
      "grad_norm": 3.1228842735290527,
      "learning_rate": 8.473905352239936e-08,
      "loss": 0.0702,
      "step": 3117,
      "video_reward_cumulative_accuracy": 0.8541867179980751
    },
    {
      "epoch": 0.9254971801721579,
      "grad_norm": 2.0726311206817627,
      "learning_rate": 8.407164330405976e-08,
      "loss": 0.0352,
      "step": 3118,
      "video_reward_cumulative_accuracy": 0.8542334830019244
    },
    {
      "epoch": 0.9257940041555358,
      "grad_norm": 2.6938111782073975,
      "learning_rate": 8.34068268192953e-08,
      "loss": 0.0524,
      "step": 3119,
      "video_reward_cumulative_accuracy": 0.8542802180185957
    },
    {
      "epoch": 0.9260908281389136,
      "grad_norm": 2.6164710521698,
      "learning_rate": 8.27446047818517e-08,
      "loss": 0.0353,
      "step": 3120,
      "video_reward_cumulative_accuracy": 0.854326923076923
    },
    {
      "epoch": 0.9263876521222915,
      "grad_norm": 2.0380940437316895,
      "learning_rate": 8.208497790268833e-08,
      "loss": 0.0319,
      "step": 3121,
      "video_reward_cumulative_accuracy": 0.8543735982057034
    },
    {
      "epoch": 0.9266844761056693,
      "grad_norm": 1.6423115730285645,
      "learning_rate": 8.142794688997812e-08,
      "loss": 0.014,
      "step": 3122,
      "video_reward_cumulative_accuracy": 0.8544202434336964
    },
    {
      "epoch": 0.9269813000890472,
      "grad_norm": 2.5781912803649902,
      "learning_rate": 8.077351244910825e-08,
      "loss": 0.0401,
      "step": 3123,
      "video_reward_cumulative_accuracy": 0.8543067563240474
    },
    {
      "epoch": 0.927278124072425,
      "grad_norm": 2.5099053382873535,
      "learning_rate": 8.012167528267723e-08,
      "loss": 0.0258,
      "step": 3124,
      "video_reward_cumulative_accuracy": 0.8541933418693982
    },
    {
      "epoch": 0.9275749480558029,
      "grad_norm": 1.2233213186264038,
      "learning_rate": 7.947243609049581e-08,
      "loss": 0.0303,
      "step": 3125,
      "video_reward_cumulative_accuracy": 0.85408
    },
    {
      "epoch": 0.9278717720391808,
      "grad_norm": 2.0083823204040527,
      "learning_rate": 7.882579556958536e-08,
      "loss": 0.0336,
      "step": 3126,
      "video_reward_cumulative_accuracy": 0.8541266794625719
    },
    {
      "epoch": 0.9281685960225586,
      "grad_norm": 2.4612114429473877,
      "learning_rate": 7.818175441417692e-08,
      "loss": 0.0207,
      "step": 3127,
      "video_reward_cumulative_accuracy": 0.8541733290693956
    },
    {
      "epoch": 0.9284654200059365,
      "grad_norm": 2.7933738231658936,
      "learning_rate": 7.754031331571127e-08,
      "loss": 0.0375,
      "step": 3128,
      "video_reward_cumulative_accuracy": 0.8542199488491049
    },
    {
      "epoch": 0.9287622439893143,
      "grad_norm": 3.2718217372894287,
      "learning_rate": 7.690147296283757e-08,
      "loss": 0.0352,
      "step": 3129,
      "video_reward_cumulative_accuracy": 0.8541067433684884
    },
    {
      "epoch": 0.9290590679726922,
      "grad_norm": 0.4795750081539154,
      "learning_rate": 7.626523404141328e-08,
      "loss": 0.0169,
      "step": 3130,
      "video_reward_cumulative_accuracy": 0.8539936102236422
    },
    {
      "epoch": 0.92935589195607,
      "grad_norm": 2.361750602722168,
      "learning_rate": 7.563159723450259e-08,
      "loss": 0.0193,
      "step": 3131,
      "video_reward_cumulative_accuracy": 0.8540402427339508
    },
    {
      "epoch": 0.9296527159394479,
      "grad_norm": 1.1606436967849731,
      "learning_rate": 7.500056322237576e-08,
      "loss": 0.0291,
      "step": 3132,
      "video_reward_cumulative_accuracy": 0.8540868454661558
    },
    {
      "epoch": 0.9299495399228258,
      "grad_norm": 2.2710154056549072,
      "learning_rate": 7.437213268250948e-08,
      "loss": 0.0486,
      "step": 3133,
      "video_reward_cumulative_accuracy": 0.8539738270028726
    },
    {
      "epoch": 0.9302463639062036,
      "grad_norm": 1.7540185451507568,
      "learning_rate": 7.374630628958462e-08,
      "loss": 0.0182,
      "step": 3134,
      "video_reward_cumulative_accuracy": 0.8540204211869815
    },
    {
      "epoch": 0.9305431878895815,
      "grad_norm": 0.9131705164909363,
      "learning_rate": 7.312308471548624e-08,
      "loss": 0.0137,
      "step": 3135,
      "video_reward_cumulative_accuracy": 0.854066985645933
    },
    {
      "epoch": 0.9308400118729593,
      "grad_norm": 1.74937105178833,
      "learning_rate": 7.250246862930411e-08,
      "loss": 0.0159,
      "step": 3136,
      "video_reward_cumulative_accuracy": 0.8541135204081632
    },
    {
      "epoch": 0.9311368358563372,
      "grad_norm": 0.4030010402202606,
      "learning_rate": 7.188445869732913e-08,
      "loss": 0.0111,
      "step": 3137,
      "video_reward_cumulative_accuracy": 0.854160025502072
    },
    {
      "epoch": 0.931433659839715,
      "grad_norm": 4.948916435241699,
      "learning_rate": 7.12690555830553e-08,
      "loss": 0.065,
      "step": 3138,
      "video_reward_cumulative_accuracy": 0.8542065009560229
    },
    {
      "epoch": 0.9317304838230929,
      "grad_norm": 3.3688721656799316,
      "learning_rate": 7.065625994717717e-08,
      "loss": 0.0579,
      "step": 3139,
      "video_reward_cumulative_accuracy": 0.8542529467983434
    },
    {
      "epoch": 0.9320273078064708,
      "grad_norm": 3.2429494857788086,
      "learning_rate": 7.004607244759071e-08,
      "loss": 0.0308,
      "step": 3140,
      "video_reward_cumulative_accuracy": 0.8542993630573248
    },
    {
      "epoch": 0.9323241317898486,
      "grad_norm": 2.271911144256592,
      "learning_rate": 6.943849373939132e-08,
      "loss": 0.0129,
      "step": 3141,
      "video_reward_cumulative_accuracy": 0.8543457497612226
    },
    {
      "epoch": 0.9326209557732265,
      "grad_norm": 3.0307109355926514,
      "learning_rate": 6.883352447487363e-08,
      "loss": 0.03,
      "step": 3142,
      "video_reward_cumulative_accuracy": 0.8543921069382558
    },
    {
      "epoch": 0.9329177797566043,
      "grad_norm": 3.5836822986602783,
      "learning_rate": 6.823116530353113e-08,
      "loss": 0.0431,
      "step": 3143,
      "video_reward_cumulative_accuracy": 0.8544384346166083
    },
    {
      "epoch": 0.9332146037399822,
      "grad_norm": 2.957613706588745,
      "learning_rate": 6.763141687205432e-08,
      "loss": 0.0648,
      "step": 3144,
      "video_reward_cumulative_accuracy": 0.8544847328244275
    },
    {
      "epoch": 0.93351142772336,
      "grad_norm": 1.7011269330978394,
      "learning_rate": 6.703427982433202e-08,
      "loss": 0.0163,
      "step": 3145,
      "video_reward_cumulative_accuracy": 0.8545310015898251
    },
    {
      "epoch": 0.9338082517067379,
      "grad_norm": 0.9397748112678528,
      "learning_rate": 6.64397548014481e-08,
      "loss": 0.0156,
      "step": 3146,
      "video_reward_cumulative_accuracy": 0.8545772409408773
    },
    {
      "epoch": 0.9341050756901158,
      "grad_norm": 0.7569989562034607,
      "learning_rate": 6.584784244168335e-08,
      "loss": 0.0134,
      "step": 3147,
      "video_reward_cumulative_accuracy": 0.8546234509056244
    },
    {
      "epoch": 0.9344018996734936,
      "grad_norm": 2.637448310852051,
      "learning_rate": 6.525854338051335e-08,
      "loss": 0.0423,
      "step": 3148,
      "video_reward_cumulative_accuracy": 0.8546696315120712
    },
    {
      "epoch": 0.9346987236568715,
      "grad_norm": 2.607063055038452,
      "learning_rate": 6.467185825060728e-08,
      "loss": 0.0455,
      "step": 3149,
      "video_reward_cumulative_accuracy": 0.8547157827881867
    },
    {
      "epoch": 0.9349955476402493,
      "grad_norm": 1.5537196397781372,
      "learning_rate": 6.408778768182883e-08,
      "loss": 0.0456,
      "step": 3150,
      "video_reward_cumulative_accuracy": 0.8546031746031746
    },
    {
      "epoch": 0.9352923716236272,
      "grad_norm": 2.6270835399627686,
      "learning_rate": 6.350633230123443e-08,
      "loss": 0.0361,
      "step": 3151,
      "video_reward_cumulative_accuracy": 0.8544906378927325
    },
    {
      "epoch": 0.935589195607005,
      "grad_norm": 2.062340259552002,
      "learning_rate": 6.292749273307308e-08,
      "loss": 0.0188,
      "step": 3152,
      "video_reward_cumulative_accuracy": 0.8545368020304569
    },
    {
      "epoch": 0.9358860195903829,
      "grad_norm": 1.0614312887191772,
      "learning_rate": 6.23512695987849e-08,
      "loss": 0.0088,
      "step": 3153,
      "video_reward_cumulative_accuracy": 0.8545829368855059
    },
    {
      "epoch": 0.9361828435737608,
      "grad_norm": 1.1237103939056396,
      "learning_rate": 6.177766351700115e-08,
      "loss": 0.0129,
      "step": 3154,
      "video_reward_cumulative_accuracy": 0.8546290424857323
    },
    {
      "epoch": 0.9364796675571386,
      "grad_norm": 3.2817189693450928,
      "learning_rate": 6.120667510354422e-08,
      "loss": 0.0625,
      "step": 3155,
      "video_reward_cumulative_accuracy": 0.8546751188589541
    },
    {
      "epoch": 0.9367764915405165,
      "grad_norm": 2.6169803142547607,
      "learning_rate": 6.063830497142514e-08,
      "loss": 0.0305,
      "step": 3156,
      "video_reward_cumulative_accuracy": 0.8547211660329531
    },
    {
      "epoch": 0.9370733155238943,
      "grad_norm": 2.9755945205688477,
      "learning_rate": 6.007255373084498e-08,
      "loss": 0.0336,
      "step": 3157,
      "video_reward_cumulative_accuracy": 0.8547671840354767
    },
    {
      "epoch": 0.9373701395072722,
      "grad_norm": 0.9379553198814392,
      "learning_rate": 5.950942198919202e-08,
      "loss": 0.0127,
      "step": 3158,
      "video_reward_cumulative_accuracy": 0.8548131728942369
    },
    {
      "epoch": 0.93766696349065,
      "grad_norm": 1.1103352308273315,
      "learning_rate": 5.8948910351042943e-08,
      "loss": 0.0217,
      "step": 3159,
      "video_reward_cumulative_accuracy": 0.8548591326369104
    },
    {
      "epoch": 0.9379637874740279,
      "grad_norm": 2.294884443283081,
      "learning_rate": 5.839101941816166e-08,
      "loss": 0.0284,
      "step": 3160,
      "video_reward_cumulative_accuracy": 0.854746835443038
    },
    {
      "epoch": 0.9382606114574058,
      "grad_norm": 1.6113910675048828,
      "learning_rate": 5.783574978949796e-08,
      "loss": 0.0242,
      "step": 3161,
      "video_reward_cumulative_accuracy": 0.8547927870926922
    },
    {
      "epoch": 0.9385574354407836,
      "grad_norm": 1.6590948104858398,
      "learning_rate": 5.728310206118776e-08,
      "loss": 0.0327,
      "step": 3162,
      "video_reward_cumulative_accuracy": 0.8548387096774194
    },
    {
      "epoch": 0.9388542594241615,
      "grad_norm": 0.5105149149894714,
      "learning_rate": 5.6733076826552267e-08,
      "loss": 0.0084,
      "step": 3163,
      "video_reward_cumulative_accuracy": 0.8548846032247865
    },
    {
      "epoch": 0.9391510834075393,
      "grad_norm": 2.3923985958099365,
      "learning_rate": 5.618567467609637e-08,
      "loss": 0.0233,
      "step": 3164,
      "video_reward_cumulative_accuracy": 0.8549304677623262
    },
    {
      "epoch": 0.9394479073909172,
      "grad_norm": 2.449561834335327,
      "learning_rate": 5.564089619751023e-08,
      "loss": 0.0187,
      "step": 3165,
      "video_reward_cumulative_accuracy": 0.8549763033175355
    },
    {
      "epoch": 0.939744731374295,
      "grad_norm": 0.8511427044868469,
      "learning_rate": 5.509874197566573e-08,
      "loss": 0.015,
      "step": 3166,
      "video_reward_cumulative_accuracy": 0.8550221099178774
    },
    {
      "epoch": 0.9400415553576729,
      "grad_norm": 2.5197205543518066,
      "learning_rate": 5.455921259261837e-08,
      "loss": 0.0333,
      "step": 3167,
      "video_reward_cumulative_accuracy": 0.8550678875907799
    },
    {
      "epoch": 0.9403383793410508,
      "grad_norm": 2.1884961128234863,
      "learning_rate": 5.40223086276051e-08,
      "loss": 0.0673,
      "step": 3168,
      "video_reward_cumulative_accuracy": 0.8551136363636364
    },
    {
      "epoch": 0.9406352033244286,
      "grad_norm": 2.423147678375244,
      "learning_rate": 5.348803065704483e-08,
      "loss": 0.0129,
      "step": 3169,
      "video_reward_cumulative_accuracy": 0.8550015777847901
    },
    {
      "epoch": 0.9409320273078065,
      "grad_norm": 2.657482624053955,
      "learning_rate": 5.2956379254536226e-08,
      "loss": 0.058,
      "step": 3170,
      "video_reward_cumulative_accuracy": 0.8550473186119874
    },
    {
      "epoch": 0.9412288512911843,
      "grad_norm": 3.016252040863037,
      "learning_rate": 5.2427354990859106e-08,
      "loss": 0.028,
      "step": 3171,
      "video_reward_cumulative_accuracy": 0.8550930305897193
    },
    {
      "epoch": 0.9415256752745622,
      "grad_norm": 2.315845012664795,
      "learning_rate": 5.190095843397275e-08,
      "loss": 0.0678,
      "step": 3172,
      "video_reward_cumulative_accuracy": 0.8551387137452712
    },
    {
      "epoch": 0.94182249925794,
      "grad_norm": 0.9423714876174927,
      "learning_rate": 5.137719014901399e-08,
      "loss": 0.0241,
      "step": 3173,
      "video_reward_cumulative_accuracy": 0.8551843681058935
    },
    {
      "epoch": 0.9421193232413179,
      "grad_norm": 1.8504040241241455,
      "learning_rate": 5.0856050698299684e-08,
      "loss": 0.0228,
      "step": 3174,
      "video_reward_cumulative_accuracy": 0.855072463768116
    },
    {
      "epoch": 0.9424161472246958,
      "grad_norm": 1.4434008598327637,
      "learning_rate": 5.0337540641322846e-08,
      "loss": 0.0293,
      "step": 3175,
      "video_reward_cumulative_accuracy": 0.8551181102362204
    },
    {
      "epoch": 0.9427129712080736,
      "grad_norm": 2.862295627593994,
      "learning_rate": 4.9821660534755125e-08,
      "loss": 0.0318,
      "step": 3176,
      "video_reward_cumulative_accuracy": 0.8550062972292192
    },
    {
      "epoch": 0.9430097951914514,
      "grad_norm": 1.7346522808074951,
      "learning_rate": 4.930841093244349e-08,
      "loss": 0.0158,
      "step": 3177,
      "video_reward_cumulative_accuracy": 0.8550519357884797
    },
    {
      "epoch": 0.9433066191748293,
      "grad_norm": 3.134310722351074,
      "learning_rate": 4.8797792385411325e-08,
      "loss": 0.0237,
      "step": 3178,
      "video_reward_cumulative_accuracy": 0.85509754562618
    },
    {
      "epoch": 0.9436034431582072,
      "grad_norm": 1.7474472522735596,
      "learning_rate": 4.828980544185735e-08,
      "loss": 0.0242,
      "step": 3179,
      "video_reward_cumulative_accuracy": 0.8551431267694244
    },
    {
      "epoch": 0.943900267141585,
      "grad_norm": 4.279950141906738,
      "learning_rate": 4.77844506471542e-08,
      "loss": 0.0626,
      "step": 3180,
      "video_reward_cumulative_accuracy": 0.855188679245283
    },
    {
      "epoch": 0.9441970911249629,
      "grad_norm": 2.3081438541412354,
      "learning_rate": 4.728172854385038e-08,
      "loss": 0.0226,
      "step": 3181,
      "video_reward_cumulative_accuracy": 0.8552342030807922
    },
    {
      "epoch": 0.9444939151083408,
      "grad_norm": 1.8738723993301392,
      "learning_rate": 4.678163967166582e-08,
      "loss": 0.0248,
      "step": 3182,
      "video_reward_cumulative_accuracy": 0.8552796983029541
    },
    {
      "epoch": 0.9447907390917186,
      "grad_norm": 3.566354990005493,
      "learning_rate": 4.628418456749495e-08,
      "loss": 0.0372,
      "step": 3183,
      "video_reward_cumulative_accuracy": 0.8551680804272699
    },
    {
      "epoch": 0.9450875630750964,
      "grad_norm": 4.133174419403076,
      "learning_rate": 4.5789363765404436e-08,
      "loss": 0.0472,
      "step": 3184,
      "video_reward_cumulative_accuracy": 0.855213567839196
    },
    {
      "epoch": 0.9453843870584743,
      "grad_norm": 2.822946310043335,
      "learning_rate": 4.529717779663129e-08,
      "loss": 0.0269,
      "step": 3185,
      "video_reward_cumulative_accuracy": 0.8551020408163266
    },
    {
      "epoch": 0.9456812110418522,
      "grad_norm": 4.163548946380615,
      "learning_rate": 4.4807627189586425e-08,
      "loss": 0.0885,
      "step": 3186,
      "video_reward_cumulative_accuracy": 0.8551475204017577
    },
    {
      "epoch": 0.94597803502523,
      "grad_norm": 2.597421169281006,
      "learning_rate": 4.432071246984859e-08,
      "loss": 0.0321,
      "step": 3187,
      "video_reward_cumulative_accuracy": 0.8551929714465014
    },
    {
      "epoch": 0.9462748590086079,
      "grad_norm": 3.691058397293091,
      "learning_rate": 4.383643416016908e-08,
      "loss": 0.0655,
      "step": 3188,
      "video_reward_cumulative_accuracy": 0.855081555834379
    },
    {
      "epoch": 0.9465716829919858,
      "grad_norm": 2.430384874343872,
      "learning_rate": 4.3354792780467004e-08,
      "loss": 0.026,
      "step": 3189,
      "video_reward_cumulative_accuracy": 0.8549702100972092
    },
    {
      "epoch": 0.9468685069753636,
      "grad_norm": 2.202420234680176,
      "learning_rate": 4.287578884783122e-08,
      "loss": 0.0367,
      "step": 3190,
      "video_reward_cumulative_accuracy": 0.8550156739811913
    },
    {
      "epoch": 0.9471653309587414,
      "grad_norm": 2.3409032821655273,
      "learning_rate": 4.2399422876518995e-08,
      "loss": 0.0338,
      "step": 3191,
      "video_reward_cumulative_accuracy": 0.8550611093701034
    },
    {
      "epoch": 0.9474621549421193,
      "grad_norm": 1.7721819877624512,
      "learning_rate": 4.192569537795538e-08,
      "loss": 0.0257,
      "step": 3192,
      "video_reward_cumulative_accuracy": 0.8551065162907269
    },
    {
      "epoch": 0.9477589789254972,
      "grad_norm": 1.6074609756469727,
      "learning_rate": 4.145460686073327e-08,
      "loss": 0.0371,
      "step": 3193,
      "video_reward_cumulative_accuracy": 0.8549953022236142
    },
    {
      "epoch": 0.948055802908875,
      "grad_norm": 2.8421127796173096,
      "learning_rate": 4.098615783061144e-08,
      "loss": 0.1067,
      "step": 3194,
      "video_reward_cumulative_accuracy": 0.8550407013149656
    },
    {
      "epoch": 0.9483526268922529,
      "grad_norm": 0.6036382913589478,
      "learning_rate": 4.0520348790515084e-08,
      "loss": 0.0188,
      "step": 3195,
      "video_reward_cumulative_accuracy": 0.8550860719874804
    },
    {
      "epoch": 0.9486494508756308,
      "grad_norm": 2.0517990589141846,
      "learning_rate": 4.005718024053612e-08,
      "loss": 0.024,
      "step": 3196,
      "video_reward_cumulative_accuracy": 0.8549749687108886
    },
    {
      "epoch": 0.9489462748590086,
      "grad_norm": 1.565876841545105,
      "learning_rate": 3.959665267793067e-08,
      "loss": 0.0138,
      "step": 3197,
      "video_reward_cumulative_accuracy": 0.8550203315608382
    },
    {
      "epoch": 0.9492430988423864,
      "grad_norm": 2.432234525680542,
      "learning_rate": 3.91387665971199e-08,
      "loss": 0.0496,
      "step": 3198,
      "video_reward_cumulative_accuracy": 0.8550656660412758
    },
    {
      "epoch": 0.9495399228257643,
      "grad_norm": 0.7143085598945618,
      "learning_rate": 3.868352248968865e-08,
      "loss": 0.0061,
      "step": 3199,
      "video_reward_cumulative_accuracy": 0.8551109721788058
    },
    {
      "epoch": 0.9498367468091422,
      "grad_norm": 1.3272526264190674,
      "learning_rate": 3.823092084438568e-08,
      "loss": 0.0249,
      "step": 3200,
      "video_reward_cumulative_accuracy": 0.85515625
    },
    {
      "epoch": 0.9498367468091422,
      "eval_runtime": 136.0857,
      "eval_samples_per_second": 5.798,
      "eval_steps_per_second": 0.727,
      "eval_test_set_accuracy": 0.8320707070707071,
      "step": 3200
    },
    {
      "epoch": 0.95013357079252,
      "grad_norm": 1.0475165843963623,
      "learning_rate": 3.778096214712285e-08,
      "loss": 0.0085,
      "step": 3201,
      "video_reward_cumulative_accuracy": 0.8552014995313965
    },
    {
      "epoch": 0.9504303947758979,
      "grad_norm": 2.317171335220337,
      "learning_rate": 3.733364688097485e-08,
      "loss": 0.0783,
      "step": 3202,
      "video_reward_cumulative_accuracy": 0.8552467207995003
    },
    {
      "epoch": 0.9507272187592758,
      "grad_norm": 2.8476126194000244,
      "learning_rate": 3.6888975526177815e-08,
      "loss": 0.0534,
      "step": 3203,
      "video_reward_cumulative_accuracy": 0.8552919138307836
    },
    {
      "epoch": 0.9510240427426536,
      "grad_norm": 3.3761157989501953,
      "learning_rate": 3.6446948560129314e-08,
      "loss": 0.0177,
      "step": 3204,
      "video_reward_cumulative_accuracy": 0.8553370786516854
    },
    {
      "epoch": 0.9513208667260314,
      "grad_norm": 1.30337655544281,
      "learning_rate": 3.600756645738834e-08,
      "loss": 0.0228,
      "step": 3205,
      "video_reward_cumulative_accuracy": 0.8553822152886116
    },
    {
      "epoch": 0.9516176907094093,
      "grad_norm": 4.232937812805176,
      "learning_rate": 3.557082968967423e-08,
      "loss": 0.0748,
      "step": 3206,
      "video_reward_cumulative_accuracy": 0.8552713661883967
    },
    {
      "epoch": 0.9519145146927872,
      "grad_norm": 3.1819849014282227,
      "learning_rate": 3.5136738725866646e-08,
      "loss": 0.0287,
      "step": 3207,
      "video_reward_cumulative_accuracy": 0.8553164951668226
    },
    {
      "epoch": 0.952211338676165,
      "grad_norm": 0.5942104458808899,
      "learning_rate": 3.47052940320039e-08,
      "loss": 0.0064,
      "step": 3208,
      "video_reward_cumulative_accuracy": 0.8553615960099751
    },
    {
      "epoch": 0.9525081626595429,
      "grad_norm": 2.6708426475524902,
      "learning_rate": 3.4276496071284084e-08,
      "loss": 0.0283,
      "step": 3209,
      "video_reward_cumulative_accuracy": 0.855406668744157
    },
    {
      "epoch": 0.9528049866429208,
      "grad_norm": 0.9444103240966797,
      "learning_rate": 3.385034530406311e-08,
      "loss": 0.0048,
      "step": 3210,
      "video_reward_cumulative_accuracy": 0.8554517133956386
    },
    {
      "epoch": 0.9531018106262986,
      "grad_norm": 1.708574652671814,
      "learning_rate": 3.34268421878553e-08,
      "loss": 0.0249,
      "step": 3211,
      "video_reward_cumulative_accuracy": 0.8554967299906571
    },
    {
      "epoch": 0.9533986346096764,
      "grad_norm": 1.0022554397583008,
      "learning_rate": 3.300598717733278e-08,
      "loss": 0.0133,
      "step": 3212,
      "video_reward_cumulative_accuracy": 0.8555417185554172
    },
    {
      "epoch": 0.9536954585930543,
      "grad_norm": 1.676685094833374,
      "learning_rate": 3.258778072432356e-08,
      "loss": 0.0193,
      "step": 3213,
      "video_reward_cumulative_accuracy": 0.8555866791160909
    },
    {
      "epoch": 0.9539922825764322,
      "grad_norm": 2.026139259338379,
      "learning_rate": 3.217222327781322e-08,
      "loss": 0.0297,
      "step": 3214,
      "video_reward_cumulative_accuracy": 0.8556316116988176
    },
    {
      "epoch": 0.95428910655981,
      "grad_norm": 0.3578716516494751,
      "learning_rate": 3.175931528394294e-08,
      "loss": 0.009,
      "step": 3215,
      "video_reward_cumulative_accuracy": 0.8556765163297045
    },
    {
      "epoch": 0.9545859305431879,
      "grad_norm": 1.398017406463623,
      "learning_rate": 3.134905718600978e-08,
      "loss": 0.0171,
      "step": 3216,
      "video_reward_cumulative_accuracy": 0.8557213930348259
    },
    {
      "epoch": 0.9548827545265658,
      "grad_norm": 2.376380205154419,
      "learning_rate": 3.094144942446531e-08,
      "loss": 0.0243,
      "step": 3217,
      "video_reward_cumulative_accuracy": 0.8557662418402238
    },
    {
      "epoch": 0.9551795785099436,
      "grad_norm": 2.7739336490631104,
      "learning_rate": 3.053649243691587e-08,
      "loss": 0.0437,
      "step": 3218,
      "video_reward_cumulative_accuracy": 0.8556556867619639
    },
    {
      "epoch": 0.9554764024933214,
      "grad_norm": 2.0257623195648193,
      "learning_rate": 3.013418665812257e-08,
      "loss": 0.0569,
      "step": 3219,
      "video_reward_cumulative_accuracy": 0.8557005281143212
    },
    {
      "epoch": 0.9557732264766993,
      "grad_norm": 2.442366600036621,
      "learning_rate": 2.973453251999936e-08,
      "loss": 0.0582,
      "step": 3220,
      "video_reward_cumulative_accuracy": 0.8557453416149068
    },
    {
      "epoch": 0.9560700504600772,
      "grad_norm": 1.158838152885437,
      "learning_rate": 2.933753045161386e-08,
      "loss": 0.0107,
      "step": 3221,
      "video_reward_cumulative_accuracy": 0.8557901272896616
    },
    {
      "epoch": 0.956366874443455,
      "grad_norm": 1.9528629779815674,
      "learning_rate": 2.8943180879186517e-08,
      "loss": 0.0212,
      "step": 3222,
      "video_reward_cumulative_accuracy": 0.8558348851644941
    },
    {
      "epoch": 0.9566636984268329,
      "grad_norm": 2.6688528060913086,
      "learning_rate": 2.85514842260895e-08,
      "loss": 0.0283,
      "step": 3223,
      "video_reward_cumulative_accuracy": 0.8557244802978592
    },
    {
      "epoch": 0.9569605224102108,
      "grad_norm": 0.49328845739364624,
      "learning_rate": 2.8162440912847532e-08,
      "loss": 0.0061,
      "step": 3224,
      "video_reward_cumulative_accuracy": 0.8557692307692307
    },
    {
      "epoch": 0.9572573463935886,
      "grad_norm": 4.010133266448975,
      "learning_rate": 2.7776051357135957e-08,
      "loss": 0.0418,
      "step": 3225,
      "video_reward_cumulative_accuracy": 0.8558139534883721
    },
    {
      "epoch": 0.9575541703769664,
      "grad_norm": 1.154459834098816,
      "learning_rate": 2.7392315973781835e-08,
      "loss": 0.0174,
      "step": 3226,
      "video_reward_cumulative_accuracy": 0.8558586484810912
    },
    {
      "epoch": 0.9578509943603443,
      "grad_norm": 1.608305811882019,
      "learning_rate": 2.7011235174762284e-08,
      "loss": 0.0243,
      "step": 3227,
      "video_reward_cumulative_accuracy": 0.8559033157731639
    },
    {
      "epoch": 0.9581478183437222,
      "grad_norm": 0.8377227783203125,
      "learning_rate": 2.6632809369204205e-08,
      "loss": 0.0106,
      "step": 3228,
      "video_reward_cumulative_accuracy": 0.8559479553903345
    },
    {
      "epoch": 0.9584446423271,
      "grad_norm": 2.1696717739105225,
      "learning_rate": 2.6257038963385106e-08,
      "loss": 0.0256,
      "step": 3229,
      "video_reward_cumulative_accuracy": 0.8559925673583153
    },
    {
      "epoch": 0.9587414663104779,
      "grad_norm": 1.0289219617843628,
      "learning_rate": 2.588392436073034e-08,
      "loss": 0.0131,
      "step": 3230,
      "video_reward_cumulative_accuracy": 0.8558823529411764
    },
    {
      "epoch": 0.9590382902938558,
      "grad_norm": 0.3926885426044464,
      "learning_rate": 2.5513465961814475e-08,
      "loss": 0.0062,
      "step": 3231,
      "video_reward_cumulative_accuracy": 0.8559269575982668
    },
    {
      "epoch": 0.9593351142772336,
      "grad_norm": 1.4411845207214355,
      "learning_rate": 2.5145664164361593e-08,
      "loss": 0.0107,
      "step": 3232,
      "video_reward_cumulative_accuracy": 0.8559715346534653
    },
    {
      "epoch": 0.9596319382606114,
      "grad_norm": 0.6198068857192993,
      "learning_rate": 2.4780519363241663e-08,
      "loss": 0.0051,
      "step": 3233,
      "video_reward_cumulative_accuracy": 0.8560160841323848
    },
    {
      "epoch": 0.9599287622439893,
      "grad_norm": 1.5043619871139526,
      "learning_rate": 2.4418031950473597e-08,
      "loss": 0.0278,
      "step": 3234,
      "video_reward_cumulative_accuracy": 0.8560606060606061
    },
    {
      "epoch": 0.9602255862273672,
      "grad_norm": 2.1934850215911865,
      "learning_rate": 2.405820231522249e-08,
      "loss": 0.0246,
      "step": 3235,
      "video_reward_cumulative_accuracy": 0.855950540958269
    },
    {
      "epoch": 0.960522410210745,
      "grad_norm": 3.164400100708008,
      "learning_rate": 2.3701030843800433e-08,
      "loss": 0.025,
      "step": 3236,
      "video_reward_cumulative_accuracy": 0.8559950556242274
    },
    {
      "epoch": 0.9608192341941229,
      "grad_norm": 0.8490694761276245,
      "learning_rate": 2.334651791966569e-08,
      "loss": 0.0187,
      "step": 3237,
      "video_reward_cumulative_accuracy": 0.8560395427865307
    },
    {
      "epoch": 0.9611160581775008,
      "grad_norm": 1.7095571756362915,
      "learning_rate": 2.2994663923422422e-08,
      "loss": 0.0225,
      "step": 3238,
      "video_reward_cumulative_accuracy": 0.8560840024706609
    },
    {
      "epoch": 0.9614128821608786,
      "grad_norm": 1.3501557111740112,
      "learning_rate": 2.2645469232820127e-08,
      "loss": 0.0204,
      "step": 3239,
      "video_reward_cumulative_accuracy": 0.8561284347020686
    },
    {
      "epoch": 0.9617097061442564,
      "grad_norm": 0.568364143371582,
      "learning_rate": 2.229893422275281e-08,
      "loss": 0.0066,
      "step": 3240,
      "video_reward_cumulative_accuracy": 0.8561728395061728
    },
    {
      "epoch": 0.9620065301276343,
      "grad_norm": 1.0702372789382935,
      "learning_rate": 2.1955059265259815e-08,
      "loss": 0.0162,
      "step": 3241,
      "video_reward_cumulative_accuracy": 0.8562172169083616
    },
    {
      "epoch": 0.9623033541110122,
      "grad_norm": 4.149465560913086,
      "learning_rate": 2.161384472952416e-08,
      "loss": 0.0673,
      "step": 3242,
      "video_reward_cumulative_accuracy": 0.8562615669339914
    },
    {
      "epoch": 0.96260017809439,
      "grad_norm": 2.0406556129455566,
      "learning_rate": 2.1275290981872532e-08,
      "loss": 0.0408,
      "step": 3243,
      "video_reward_cumulative_accuracy": 0.8563058896083873
    },
    {
      "epoch": 0.9628970020777678,
      "grad_norm": 1.5414153337478638,
      "learning_rate": 2.0939398385775578e-08,
      "loss": 0.0101,
      "step": 3244,
      "video_reward_cumulative_accuracy": 0.8563501849568435
    },
    {
      "epoch": 0.9631938260611458,
      "grad_norm": 3.055986166000366,
      "learning_rate": 2.0606167301846503e-08,
      "loss": 0.0537,
      "step": 3245,
      "video_reward_cumulative_accuracy": 0.8563944530046225
    },
    {
      "epoch": 0.9634906500445236,
      "grad_norm": 1.648927927017212,
      "learning_rate": 2.0275598087841075e-08,
      "loss": 0.0365,
      "step": 3246,
      "video_reward_cumulative_accuracy": 0.8564386937769563
    },
    {
      "epoch": 0.9637874740279014,
      "grad_norm": 3.5667824745178223,
      "learning_rate": 1.994769109865735e-08,
      "loss": 0.0396,
      "step": 3247,
      "video_reward_cumulative_accuracy": 0.8564829072990453
    },
    {
      "epoch": 0.9640842980112793,
      "grad_norm": 1.4304312467575073,
      "learning_rate": 1.962244668633595e-08,
      "loss": 0.0193,
      "step": 3248,
      "video_reward_cumulative_accuracy": 0.8565270935960592
    },
    {
      "epoch": 0.9643811219946572,
      "grad_norm": 4.079548358917236,
      "learning_rate": 1.9299865200057556e-08,
      "loss": 0.0538,
      "step": 3249,
      "video_reward_cumulative_accuracy": 0.8564173591874423
    },
    {
      "epoch": 0.964677945978035,
      "grad_norm": 1.535839557647705,
      "learning_rate": 1.8979946986145137e-08,
      "loss": 0.0123,
      "step": 3250,
      "video_reward_cumulative_accuracy": 0.8564615384615385
    },
    {
      "epoch": 0.9649747699614128,
      "grad_norm": 2.697875738143921,
      "learning_rate": 1.8662692388061733e-08,
      "loss": 0.03,
      "step": 3251,
      "video_reward_cumulative_accuracy": 0.8565056905567517
    },
    {
      "epoch": 0.9652715939447908,
      "grad_norm": 2.8388845920562744,
      "learning_rate": 1.8348101746410994e-08,
      "loss": 0.0312,
      "step": 3252,
      "video_reward_cumulative_accuracy": 0.856549815498155
    },
    {
      "epoch": 0.9655684179281686,
      "grad_norm": 3.0264766216278076,
      "learning_rate": 1.803617539893665e-08,
      "loss": 0.0642,
      "step": 3253,
      "video_reward_cumulative_accuracy": 0.85659391331079
    },
    {
      "epoch": 0.9658652419115464,
      "grad_norm": 1.1789016723632812,
      "learning_rate": 1.772691368052165e-08,
      "loss": 0.0103,
      "step": 3254,
      "video_reward_cumulative_accuracy": 0.8566379840196681
    },
    {
      "epoch": 0.9661620658949243,
      "grad_norm": 2.1506567001342773,
      "learning_rate": 1.742031692318874e-08,
      "loss": 0.0225,
      "step": 3255,
      "video_reward_cumulative_accuracy": 0.8566820276497696
    },
    {
      "epoch": 0.9664588898783022,
      "grad_norm": 1.4436240196228027,
      "learning_rate": 1.711638545609906e-08,
      "loss": 0.0403,
      "step": 3256,
      "video_reward_cumulative_accuracy": 0.8565724815724816
    },
    {
      "epoch": 0.96675571386168,
      "grad_norm": 0.8938013911247253,
      "learning_rate": 1.681511960555271e-08,
      "loss": 0.0166,
      "step": 3257,
      "video_reward_cumulative_accuracy": 0.8566165182683451
    },
    {
      "epoch": 0.9670525378450578,
      "grad_norm": 2.3233988285064697,
      "learning_rate": 1.651651969498791e-08,
      "loss": 0.0305,
      "step": 3258,
      "video_reward_cumulative_accuracy": 0.8566605279312461
    },
    {
      "epoch": 0.9673493618284358,
      "grad_norm": 0.6429560780525208,
      "learning_rate": 1.6220586044980448e-08,
      "loss": 0.0046,
      "step": 3259,
      "video_reward_cumulative_accuracy": 0.8567045105860693
    },
    {
      "epoch": 0.9676461858118136,
      "grad_norm": 1.2214471101760864,
      "learning_rate": 1.592731897324368e-08,
      "loss": 0.0175,
      "step": 3260,
      "video_reward_cumulative_accuracy": 0.8567484662576688
    },
    {
      "epoch": 0.9679430097951914,
      "grad_norm": 2.7271018028259277,
      "learning_rate": 1.5636718794628523e-08,
      "loss": 0.0793,
      "step": 3261,
      "video_reward_cumulative_accuracy": 0.8567923949708678
    },
    {
      "epoch": 0.9682398337785693,
      "grad_norm": 2.33453106880188,
      "learning_rate": 1.5348785821122648e-08,
      "loss": 0.0387,
      "step": 3262,
      "video_reward_cumulative_accuracy": 0.8568362967504598
    },
    {
      "epoch": 0.9685366577619472,
      "grad_norm": 0.8599418997764587,
      "learning_rate": 1.5063520361849604e-08,
      "loss": 0.0055,
      "step": 3263,
      "video_reward_cumulative_accuracy": 0.8568801716212074
    },
    {
      "epoch": 0.968833481745325,
      "grad_norm": 0.911806583404541,
      "learning_rate": 1.4780922723069968e-08,
      "loss": 0.0228,
      "step": 3264,
      "video_reward_cumulative_accuracy": 0.8569240196078431
    },
    {
      "epoch": 0.9691303057287028,
      "grad_norm": 1.4930540323257446,
      "learning_rate": 1.4500993208179382e-08,
      "loss": 0.0299,
      "step": 3265,
      "video_reward_cumulative_accuracy": 0.8569678407350689
    },
    {
      "epoch": 0.9694271297120808,
      "grad_norm": 2.40919828414917,
      "learning_rate": 1.4223732117709387e-08,
      "loss": 0.064,
      "step": 3266,
      "video_reward_cumulative_accuracy": 0.8570116350275566
    },
    {
      "epoch": 0.9697239536954586,
      "grad_norm": 1.3808552026748657,
      "learning_rate": 1.3949139749326601e-08,
      "loss": 0.0271,
      "step": 3267,
      "video_reward_cumulative_accuracy": 0.8570554025099479
    },
    {
      "epoch": 0.9700207776788364,
      "grad_norm": 1.2729172706604004,
      "learning_rate": 1.367721639783326e-08,
      "loss": 0.0252,
      "step": 3268,
      "video_reward_cumulative_accuracy": 0.8570991432068543
    },
    {
      "epoch": 0.9703176016622143,
      "grad_norm": 5.71325159072876,
      "learning_rate": 1.3407962355164728e-08,
      "loss": 0.0391,
      "step": 3269,
      "video_reward_cumulative_accuracy": 0.8571428571428571
    },
    {
      "epoch": 0.9706144256455922,
      "grad_norm": 0.38675758242607117,
      "learning_rate": 1.3141377910391718e-08,
      "loss": 0.0065,
      "step": 3270,
      "video_reward_cumulative_accuracy": 0.8571865443425076
    },
    {
      "epoch": 0.97091124962897,
      "grad_norm": 1.710315227508545,
      "learning_rate": 1.2877463349718067e-08,
      "loss": 0.0198,
      "step": 3271,
      "video_reward_cumulative_accuracy": 0.8572302048303271
    },
    {
      "epoch": 0.9712080736123478,
      "grad_norm": 1.825723648071289,
      "learning_rate": 1.2616218956482407e-08,
      "loss": 0.0223,
      "step": 3272,
      "video_reward_cumulative_accuracy": 0.8572738386308069
    },
    {
      "epoch": 0.9715048975957258,
      "grad_norm": 0.4373869299888611,
      "learning_rate": 1.2357645011155106e-08,
      "loss": 0.0071,
      "step": 3273,
      "video_reward_cumulative_accuracy": 0.8573174457684082
    },
    {
      "epoch": 0.9718017215791036,
      "grad_norm": 2.230316400527954,
      "learning_rate": 1.2101741791341049e-08,
      "loss": 0.02,
      "step": 3274,
      "video_reward_cumulative_accuracy": 0.8573610262675626
    },
    {
      "epoch": 0.9720985455624814,
      "grad_norm": 3.0452535152435303,
      "learning_rate": 1.1848509571777133e-08,
      "loss": 0.0309,
      "step": 3275,
      "video_reward_cumulative_accuracy": 0.8574045801526717
    },
    {
      "epoch": 0.9723953695458593,
      "grad_norm": 1.0447007417678833,
      "learning_rate": 1.1597948624332278e-08,
      "loss": 0.0155,
      "step": 3276,
      "video_reward_cumulative_accuracy": 0.8574481074481074
    },
    {
      "epoch": 0.9726921935292372,
      "grad_norm": 3.0469367504119873,
      "learning_rate": 1.1350059218008248e-08,
      "loss": 0.0523,
      "step": 3277,
      "video_reward_cumulative_accuracy": 0.8574916081782118
    },
    {
      "epoch": 0.972989017512615,
      "grad_norm": 1.8677681684494019,
      "learning_rate": 1.1104841618938545e-08,
      "loss": 0.0189,
      "step": 3278,
      "video_reward_cumulative_accuracy": 0.8575350823672971
    },
    {
      "epoch": 0.9732858414959928,
      "grad_norm": 0.8737713694572449,
      "learning_rate": 1.0862296090387859e-08,
      "loss": 0.0115,
      "step": 3279,
      "video_reward_cumulative_accuracy": 0.8575785300396462
    },
    {
      "epoch": 0.9735826654793708,
      "grad_norm": 1.362595796585083,
      "learning_rate": 1.0622422892752338e-08,
      "loss": 0.0202,
      "step": 3280,
      "video_reward_cumulative_accuracy": 0.8576219512195122
    },
    {
      "epoch": 0.9738794894627486,
      "grad_norm": 4.023233413696289,
      "learning_rate": 1.0385222283559037e-08,
      "loss": 0.071,
      "step": 3281,
      "video_reward_cumulative_accuracy": 0.8576653459311185
    },
    {
      "epoch": 0.9741763134461264,
      "grad_norm": 4.000472545623779,
      "learning_rate": 1.0150694517466198e-08,
      "loss": 0.0249,
      "step": 3282,
      "video_reward_cumulative_accuracy": 0.8577087141986593
    },
    {
      "epoch": 0.9744731374295043,
      "grad_norm": 0.7309911251068115,
      "learning_rate": 9.918839846261852e-09,
      "loss": 0.0066,
      "step": 3283,
      "video_reward_cumulative_accuracy": 0.8577520560462991
    },
    {
      "epoch": 0.9747699614128822,
      "grad_norm": 1.770171880722046,
      "learning_rate": 9.689658518864664e-09,
      "loss": 0.0367,
      "step": 3284,
      "video_reward_cumulative_accuracy": 0.8576431181485993
    },
    {
      "epoch": 0.97506678539626,
      "grad_norm": 2.3775479793548584,
      "learning_rate": 9.463150781322816e-09,
      "loss": 0.019,
      "step": 3285,
      "video_reward_cumulative_accuracy": 0.8576864535768646
    },
    {
      "epoch": 0.9753636093796378,
      "grad_norm": 3.345357894897461,
      "learning_rate": 9.239316876814564e-09,
      "loss": 0.0785,
      "step": 3286,
      "video_reward_cumulative_accuracy": 0.8577297626293365
    },
    {
      "epoch": 0.9756604333630158,
      "grad_norm": 1.9170372486114502,
      "learning_rate": 9.018157045647124e-09,
      "loss": 0.0236,
      "step": 3287,
      "video_reward_cumulative_accuracy": 0.8577730453300882
    },
    {
      "epoch": 0.9759572573463936,
      "grad_norm": 1.4506341218948364,
      "learning_rate": 8.799671525257236e-09,
      "loss": 0.0337,
      "step": 3288,
      "video_reward_cumulative_accuracy": 0.8576642335766423
    },
    {
      "epoch": 0.9762540813297714,
      "grad_norm": 3.2569446563720703,
      "learning_rate": 8.583860550210043e-09,
      "loss": 0.0416,
      "step": 3289,
      "video_reward_cumulative_accuracy": 0.857707509881423
    },
    {
      "epoch": 0.9765509053131493,
      "grad_norm": 2.6264233589172363,
      "learning_rate": 8.370724352199933e-09,
      "loss": 0.0574,
      "step": 3290,
      "video_reward_cumulative_accuracy": 0.8577507598784194
    },
    {
      "epoch": 0.9768477292965272,
      "grad_norm": 0.6166547536849976,
      "learning_rate": 8.160263160049143e-09,
      "loss": 0.0137,
      "step": 3291,
      "video_reward_cumulative_accuracy": 0.8577939835916135
    },
    {
      "epoch": 0.977144553279905,
      "grad_norm": 2.100522041320801,
      "learning_rate": 7.952477199708042e-09,
      "loss": 0.0219,
      "step": 3292,
      "video_reward_cumulative_accuracy": 0.8578371810449574
    },
    {
      "epoch": 0.9774413772632828,
      "grad_norm": 1.396987795829773,
      "learning_rate": 7.747366694255409e-09,
      "loss": 0.0129,
      "step": 3293,
      "video_reward_cumulative_accuracy": 0.8578803522623747
    },
    {
      "epoch": 0.9777382012466608,
      "grad_norm": 0.7728352546691895,
      "learning_rate": 7.544931863896765e-09,
      "loss": 0.0088,
      "step": 3294,
      "video_reward_cumulative_accuracy": 0.8579234972677595
    },
    {
      "epoch": 0.9780350252300386,
      "grad_norm": 0.9085843563079834,
      "learning_rate": 7.345172925966038e-09,
      "loss": 0.0196,
      "step": 3295,
      "video_reward_cumulative_accuracy": 0.8579666160849773
    },
    {
      "epoch": 0.9783318492134164,
      "grad_norm": 1.8223828077316284,
      "learning_rate": 7.148090094923343e-09,
      "loss": 0.0362,
      "step": 3296,
      "video_reward_cumulative_accuracy": 0.8580097087378641
    },
    {
      "epoch": 0.9786286731967943,
      "grad_norm": 5.303034782409668,
      "learning_rate": 6.953683582356652e-09,
      "loss": 0.099,
      "step": 3297,
      "video_reward_cumulative_accuracy": 0.8579011222323324
    },
    {
      "epoch": 0.9789254971801722,
      "grad_norm": 2.5379104614257812,
      "learning_rate": 6.76195359698012e-09,
      "loss": 0.0362,
      "step": 3298,
      "video_reward_cumulative_accuracy": 0.8577926015767131
    },
    {
      "epoch": 0.97922232116355,
      "grad_norm": 2.565481185913086,
      "learning_rate": 6.57290034463437e-09,
      "loss": 0.0284,
      "step": 3299,
      "video_reward_cumulative_accuracy": 0.8578357077902394
    },
    {
      "epoch": 0.9795191451469278,
      "grad_norm": 2.1413698196411133,
      "learning_rate": 6.386524028286489e-09,
      "loss": 0.0321,
      "step": 3300,
      "video_reward_cumulative_accuracy": 0.8578787878787879
    },
    {
      "epoch": 0.9798159691303058,
      "grad_norm": 2.323068857192993,
      "learning_rate": 6.202824848029476e-09,
      "loss": 0.0302,
      "step": 3301,
      "video_reward_cumulative_accuracy": 0.8579218418661012
    },
    {
      "epoch": 0.9801127931136836,
      "grad_norm": 1.3499592542648315,
      "learning_rate": 6.021803001082238e-09,
      "loss": 0.0093,
      "step": 3302,
      "video_reward_cumulative_accuracy": 0.8579648697758934
    },
    {
      "epoch": 0.9804096170970614,
      "grad_norm": 0.8288122415542603,
      "learning_rate": 5.843458681789594e-09,
      "loss": 0.0155,
      "step": 3303,
      "video_reward_cumulative_accuracy": 0.8580078716318499
    },
    {
      "epoch": 0.9807064410804393,
      "grad_norm": 3.430920124053955,
      "learning_rate": 5.66779208162116e-09,
      "loss": 0.0318,
      "step": 3304,
      "video_reward_cumulative_accuracy": 0.8580508474576272
    },
    {
      "epoch": 0.9810032650638172,
      "grad_norm": 3.632520914077759,
      "learning_rate": 5.4948033891721875e-09,
      "loss": 0.0411,
      "step": 3305,
      "video_reward_cumulative_accuracy": 0.8580937972768532
    },
    {
      "epoch": 0.981300089047195,
      "grad_norm": 2.451488494873047,
      "learning_rate": 5.3244927901627274e-09,
      "loss": 0.0354,
      "step": 3306,
      "video_reward_cumulative_accuracy": 0.8581367211131277
    },
    {
      "epoch": 0.9815969130305728,
      "grad_norm": 1.2364314794540405,
      "learning_rate": 5.1568604674376295e-09,
      "loss": 0.0326,
      "step": 3307,
      "video_reward_cumulative_accuracy": 0.8581796189900212
    },
    {
      "epoch": 0.9818937370139508,
      "grad_norm": 2.0246939659118652,
      "learning_rate": 4.991906600966823e-09,
      "loss": 0.0493,
      "step": 3308,
      "video_reward_cumulative_accuracy": 0.8580713422007256
    },
    {
      "epoch": 0.9821905609973286,
      "grad_norm": 3.2378106117248535,
      "learning_rate": 4.829631367844201e-09,
      "loss": 0.0415,
      "step": 3309,
      "video_reward_cumulative_accuracy": 0.858114233907525
    },
    {
      "epoch": 0.9824873849807064,
      "grad_norm": 3.8841092586517334,
      "learning_rate": 4.670034942287904e-09,
      "loss": 0.0413,
      "step": 3310,
      "video_reward_cumulative_accuracy": 0.8581570996978852
    },
    {
      "epoch": 0.9827842089640842,
      "grad_norm": 0.6869316101074219,
      "learning_rate": 4.51311749564004e-09,
      "loss": 0.0038,
      "step": 3311,
      "video_reward_cumulative_accuracy": 0.8581999395952884
    },
    {
      "epoch": 0.9830810329474622,
      "grad_norm": 1.650780439376831,
      "learning_rate": 4.358879196366961e-09,
      "loss": 0.012,
      "step": 3312,
      "video_reward_cumulative_accuracy": 0.8582427536231884
    },
    {
      "epoch": 0.98337785693084,
      "grad_norm": 1.0909397602081299,
      "learning_rate": 4.207320210058153e-09,
      "loss": 0.007,
      "step": 3313,
      "video_reward_cumulative_accuracy": 0.8582855418050106
    },
    {
      "epoch": 0.9836746809142178,
      "grad_norm": 0.3308217227458954,
      "learning_rate": 4.058440699427346e-09,
      "loss": 0.0066,
      "step": 3314,
      "video_reward_cumulative_accuracy": 0.8583283041641521
    },
    {
      "epoch": 0.9839715048975958,
      "grad_norm": 1.89927077293396,
      "learning_rate": 3.9122408243105755e-09,
      "loss": 0.0213,
      "step": 3315,
      "video_reward_cumulative_accuracy": 0.8583710407239818
    },
    {
      "epoch": 0.9842683288809736,
      "grad_norm": 2.1102702617645264,
      "learning_rate": 3.768720741668119e-09,
      "loss": 0.0126,
      "step": 3316,
      "video_reward_cumulative_accuracy": 0.8584137515078407
    },
    {
      "epoch": 0.9845651528643514,
      "grad_norm": 2.2954890727996826,
      "learning_rate": 3.6278806055825566e-09,
      "loss": 0.0284,
      "step": 3317,
      "video_reward_cumulative_accuracy": 0.8584564365390414
    },
    {
      "epoch": 0.9848619768477292,
      "grad_norm": 2.33181095123291,
      "learning_rate": 3.489720567259325e-09,
      "loss": 0.0763,
      "step": 3318,
      "video_reward_cumulative_accuracy": 0.858499095840868
    },
    {
      "epoch": 0.9851588008311072,
      "grad_norm": 4.192658424377441,
      "learning_rate": 3.3542407750264404e-09,
      "loss": 0.0394,
      "step": 3319,
      "video_reward_cumulative_accuracy": 0.8585417294365773
    },
    {
      "epoch": 0.985455624814485,
      "grad_norm": 1.9729253053665161,
      "learning_rate": 3.2214413743353323e-09,
      "loss": 0.0377,
      "step": 3320,
      "video_reward_cumulative_accuracy": 0.858433734939759
    },
    {
      "epoch": 0.9857524487978628,
      "grad_norm": 0.8455713987350464,
      "learning_rate": 3.0913225077580653e-09,
      "loss": 0.0084,
      "step": 3321,
      "video_reward_cumulative_accuracy": 0.8584763625414031
    },
    {
      "epoch": 0.9860492727812408,
      "grad_norm": 1.2491599321365356,
      "learning_rate": 2.9638843149906725e-09,
      "loss": 0.0166,
      "step": 3322,
      "video_reward_cumulative_accuracy": 0.8585189644792294
    },
    {
      "epoch": 0.9863460967646186,
      "grad_norm": 2.424283981323242,
      "learning_rate": 2.839126932850378e-09,
      "loss": 0.0389,
      "step": 3323,
      "video_reward_cumulative_accuracy": 0.8585615407764069
    },
    {
      "epoch": 0.9866429207479964,
      "grad_norm": 1.9569655656814575,
      "learning_rate": 2.7170504952755972e-09,
      "loss": 0.0246,
      "step": 3324,
      "video_reward_cumulative_accuracy": 0.858453670276775
    },
    {
      "epoch": 0.9869397447313742,
      "grad_norm": 2.326342821121216,
      "learning_rate": 2.5976551333281586e-09,
      "loss": 0.0267,
      "step": 3325,
      "video_reward_cumulative_accuracy": 0.8584962406015038
    },
    {
      "epoch": 0.9872365687147522,
      "grad_norm": 3.129794120788574,
      "learning_rate": 2.4809409751899718e-09,
      "loss": 0.086,
      "step": 3326,
      "video_reward_cumulative_accuracy": 0.8582381238725195
    },
    {
      "epoch": 0.98753339269813,
      "grad_norm": 1.876774787902832,
      "learning_rate": 2.3669081461652476e-09,
      "loss": 0.0505,
      "step": 3327,
      "video_reward_cumulative_accuracy": 0.8581304478509167
    },
    {
      "epoch": 0.9878302166815078,
      "grad_norm": 0.5072069764137268,
      "learning_rate": 2.2555567686791124e-09,
      "loss": 0.0061,
      "step": 3328,
      "video_reward_cumulative_accuracy": 0.8581730769230769
    },
    {
      "epoch": 0.9881270406648858,
      "grad_norm": 1.15468168258667,
      "learning_rate": 2.1468869622781608e-09,
      "loss": 0.046,
      "step": 3329,
      "video_reward_cumulative_accuracy": 0.8582156803844998
    },
    {
      "epoch": 0.9884238646482636,
      "grad_norm": 1.5895243883132935,
      "learning_rate": 2.040898843630179e-09,
      "loss": 0.0176,
      "step": 3330,
      "video_reward_cumulative_accuracy": 0.8582582582582583
    },
    {
      "epoch": 0.9887206886316414,
      "grad_norm": 2.2743732929229736,
      "learning_rate": 1.9375925265235907e-09,
      "loss": 0.0536,
      "step": 3331,
      "video_reward_cumulative_accuracy": 0.8583008105673972
    },
    {
      "epoch": 0.9890175126150192,
      "grad_norm": 0.7520642280578613,
      "learning_rate": 1.8369681218677327e-09,
      "loss": 0.0133,
      "step": 3332,
      "video_reward_cumulative_accuracy": 0.8581932773109243
    },
    {
      "epoch": 0.9893143365983972,
      "grad_norm": 1.7850524187088013,
      "learning_rate": 1.739025737692579e-09,
      "loss": 0.0138,
      "step": 3333,
      "video_reward_cumulative_accuracy": 0.858085808580858
    },
    {
      "epoch": 0.989611160581775,
      "grad_norm": 0.7477326989173889,
      "learning_rate": 1.64376547914874e-09,
      "loss": 0.0129,
      "step": 3334,
      "video_reward_cumulative_accuracy": 0.8579784043191362
    },
    {
      "epoch": 0.9899079845651528,
      "grad_norm": 2.771977663040161,
      "learning_rate": 1.551187448507463e-09,
      "loss": 0.0591,
      "step": 3335,
      "video_reward_cumulative_accuracy": 0.8578710644677661
    },
    {
      "epoch": 0.9902048085485308,
      "grad_norm": 5.376701831817627,
      "learning_rate": 1.4612917451603536e-09,
      "loss": 0.0271,
      "step": 3336,
      "video_reward_cumulative_accuracy": 0.8579136690647482
    },
    {
      "epoch": 0.9905016325319086,
      "grad_norm": 1.8040037155151367,
      "learning_rate": 1.3740784656190998e-09,
      "loss": 0.0504,
      "step": 3337,
      "video_reward_cumulative_accuracy": 0.8578064129457597
    },
    {
      "epoch": 0.9907984565152864,
      "grad_norm": 1.739055871963501,
      "learning_rate": 1.2895477035154703e-09,
      "loss": 0.0359,
      "step": 3338,
      "video_reward_cumulative_accuracy": 0.8578490113840623
    },
    {
      "epoch": 0.9910952804986642,
      "grad_norm": 2.0168232917785645,
      "learning_rate": 1.2076995496015931e-09,
      "loss": 0.04,
      "step": 3339,
      "video_reward_cumulative_accuracy": 0.8578915843066787
    },
    {
      "epoch": 0.9913921044820422,
      "grad_norm": 1.7344579696655273,
      "learning_rate": 1.1285340917494004e-09,
      "loss": 0.0256,
      "step": 3340,
      "video_reward_cumulative_accuracy": 0.857934131736527
    },
    {
      "epoch": 0.99168892846542,
      "grad_norm": 1.7917364835739136,
      "learning_rate": 1.0520514149506278e-09,
      "loss": 0.021,
      "step": 3341,
      "video_reward_cumulative_accuracy": 0.857976653696498
    },
    {
      "epoch": 0.9919857524487978,
      "grad_norm": 1.345489740371704,
      "learning_rate": 9.782516013168154e-10,
      "loss": 0.0126,
      "step": 3342,
      "video_reward_cumulative_accuracy": 0.8580191502094554
    },
    {
      "epoch": 0.9922825764321758,
      "grad_norm": 0.9671461582183838,
      "learning_rate": 9.071347300793065e-10,
      "loss": 0.0065,
      "step": 3343,
      "video_reward_cumulative_accuracy": 0.8580616212982352
    },
    {
      "epoch": 0.9925794004155536,
      "grad_norm": 1.9350098371505737,
      "learning_rate": 8.387008775889716e-10,
      "loss": 0.0142,
      "step": 3344,
      "video_reward_cumulative_accuracy": 0.8581040669856459
    },
    {
      "epoch": 0.9928762243989314,
      "grad_norm": 1.1504771709442139,
      "learning_rate": 7.729501173162068e-10,
      "loss": 0.009,
      "step": 3345,
      "video_reward_cumulative_accuracy": 0.8581464872944694
    },
    {
      "epoch": 0.9931730483823092,
      "grad_norm": 1.2408806085586548,
      "learning_rate": 7.098825198509351e-10,
      "loss": 0.0086,
      "step": 3346,
      "video_reward_cumulative_accuracy": 0.8581888822474597
    },
    {
      "epoch": 0.9934698723656872,
      "grad_norm": 3.347973108291626,
      "learning_rate": 6.494981529020505e-10,
      "loss": 0.0431,
      "step": 3347,
      "video_reward_cumulative_accuracy": 0.8582312518673438
    },
    {
      "epoch": 0.993766696349065,
      "grad_norm": 1.9659161567687988,
      "learning_rate": 5.91797081298251e-10,
      "loss": 0.0124,
      "step": 3348,
      "video_reward_cumulative_accuracy": 0.8581242532855436
    },
    {
      "epoch": 0.9940635203324428,
      "grad_norm": 4.217858791351318,
      "learning_rate": 5.367793669874832e-10,
      "loss": 0.0681,
      "step": 3349,
      "video_reward_cumulative_accuracy": 0.8581666169005673
    },
    {
      "epoch": 0.9943603443158208,
      "grad_norm": 2.9570226669311523,
      "learning_rate": 4.844450690358327e-10,
      "loss": 0.0385,
      "step": 3350,
      "video_reward_cumulative_accuracy": 0.8582089552238806
    },
    {
      "epoch": 0.9946571682991986,
      "grad_norm": 0.8007720708847046,
      "learning_rate": 4.347942436300212e-10,
      "loss": 0.0163,
      "step": 3351,
      "video_reward_cumulative_accuracy": 0.8582512682781259
    },
    {
      "epoch": 0.9949539922825764,
      "grad_norm": 1.299566626548767,
      "learning_rate": 3.8782694407463184e-10,
      "loss": 0.0321,
      "step": 3352,
      "video_reward_cumulative_accuracy": 0.8582935560859188
    },
    {
      "epoch": 0.9952508162659542,
      "grad_norm": 2.04730486869812,
      "learning_rate": 3.435432207937739e-10,
      "loss": 0.0355,
      "step": 3353,
      "video_reward_cumulative_accuracy": 0.8583358186698479
    },
    {
      "epoch": 0.9955476402493322,
      "grad_norm": 2.532416343688965,
      "learning_rate": 3.019431213299728e-10,
      "loss": 0.0196,
      "step": 3354,
      "video_reward_cumulative_accuracy": 0.8583780560524746
    },
    {
      "epoch": 0.99584446423271,
      "grad_norm": 1.875337839126587,
      "learning_rate": 2.6302669034555807e-10,
      "loss": 0.0339,
      "step": 3355,
      "video_reward_cumulative_accuracy": 0.8584202682563339
    },
    {
      "epoch": 0.9961412882160878,
      "grad_norm": 2.1054089069366455,
      "learning_rate": 2.2679396962071999e-10,
      "loss": 0.0239,
      "step": 3356,
      "video_reward_cumulative_accuracy": 0.8584624553039333
    },
    {
      "epoch": 0.9964381121994658,
      "grad_norm": 2.2614123821258545,
      "learning_rate": 1.9324499805489783e-10,
      "loss": 0.0458,
      "step": 3357,
      "video_reward_cumulative_accuracy": 0.858504617217754
    },
    {
      "epoch": 0.9967349361828436,
      "grad_norm": 1.6584018468856812,
      "learning_rate": 1.6237981166622451e-10,
      "loss": 0.0176,
      "step": 3358,
      "video_reward_cumulative_accuracy": 0.8585467540202502
    },
    {
      "epoch": 0.9970317601662214,
      "grad_norm": 2.8550844192504883,
      "learning_rate": 1.341984435912491e-10,
      "loss": 0.0352,
      "step": 3359,
      "video_reward_cumulative_accuracy": 0.8585888657338494
    },
    {
      "epoch": 0.9973285841495992,
      "grad_norm": 3.7079176902770996,
      "learning_rate": 1.0870092408576949e-10,
      "loss": 0.0719,
      "step": 3360,
      "video_reward_cumulative_accuracy": 0.8584821428571429
    },
    {
      "epoch": 0.9976254081329772,
      "grad_norm": 1.3505829572677612,
      "learning_rate": 8.588728052344453e-11,
      "loss": 0.0224,
      "step": 3361,
      "video_reward_cumulative_accuracy": 0.8585242487354954
    },
    {
      "epoch": 0.997922232116355,
      "grad_norm": 1.7025647163391113,
      "learning_rate": 6.575753739718193e-11,
      "loss": 0.0293,
      "step": 3362,
      "video_reward_cumulative_accuracy": 0.8585663295657346
    },
    {
      "epoch": 0.9982190560997328,
      "grad_norm": 2.158022403717041,
      "learning_rate": 4.8311716318028e-11,
      "loss": 0.0603,
      "step": 3363,
      "video_reward_cumulative_accuracy": 0.8586083853702052
    },
    {
      "epoch": 0.9985158800831108,
      "grad_norm": 2.8020150661468506,
      "learning_rate": 3.354983601600026e-11,
      "loss": 0.0205,
      "step": 3364,
      "video_reward_cumulative_accuracy": 0.8586504161712247
    },
    {
      "epoch": 0.9988127040664886,
      "grad_norm": 3.6079211235046387,
      "learning_rate": 2.1471912339532386e-11,
      "loss": 0.0442,
      "step": 3365,
      "video_reward_cumulative_accuracy": 0.8585438335809806
    },
    {
      "epoch": 0.9991095280498664,
      "grad_norm": 1.7951223850250244,
      "learning_rate": 1.2077958254919087e-11,
      "loss": 0.0145,
      "step": 3366,
      "video_reward_cumulative_accuracy": 0.8585858585858586
    },
    {
      "epoch": 0.9994063520332442,
      "grad_norm": 2.862751007080078,
      "learning_rate": 5.367983847981428e-12,
      "loss": 0.0244,
      "step": 3367,
      "video_reward_cumulative_accuracy": 0.8584793584793585
    },
    {
      "epoch": 0.9997031760166222,
      "grad_norm": 1.4409502744674683,
      "learning_rate": 1.3419963221239506e-12,
      "loss": 0.0109,
      "step": 3368,
      "video_reward_cumulative_accuracy": 0.858521377672209
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1117860078811646,
      "learning_rate": 0.0,
      "loss": 0.0134,
      "step": 3369,
      "video_reward_cumulative_accuracy": 0.8585633719204512
    }
  ],
  "logging_steps": 1,
  "max_steps": 3369,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.6663849150126752e+19,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}