{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 3369, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00029682398337785694, "grad_norm": 16.833446502685547, "learning_rate": 1.483679525222552e-08, "loss": 0.1938, "step": 1, "video_reward_cumulative_accuracy": 0.5 }, { "epoch": 0.0005936479667557139, "grad_norm": 24.454694747924805, "learning_rate": 2.967359050445104e-08, "loss": 0.3887, "step": 2, "video_reward_cumulative_accuracy": 0.5 }, { "epoch": 0.0008904719501335708, "grad_norm": 21.62911605834961, "learning_rate": 4.451038575667656e-08, "loss": 0.2016, "step": 3, "video_reward_cumulative_accuracy": 0.5 }, { "epoch": 0.0011872959335114278, "grad_norm": 18.787561416625977, "learning_rate": 5.934718100890208e-08, "loss": 0.1834, "step": 4, "video_reward_cumulative_accuracy": 0.625 }, { "epoch": 0.0014841199168892847, "grad_norm": 25.317777633666992, "learning_rate": 7.418397626112761e-08, "loss": 0.3673, "step": 5, "video_reward_cumulative_accuracy": 0.7 }, { "epoch": 0.0017809439002671415, "grad_norm": 12.732484817504883, "learning_rate": 8.902077151335312e-08, "loss": 0.1034, "step": 6, "video_reward_cumulative_accuracy": 0.5833333333333334 }, { "epoch": 0.0020777678836449986, "grad_norm": 20.14723777770996, "learning_rate": 1.0385756676557864e-07, "loss": 0.2094, "step": 7, "video_reward_cumulative_accuracy": 0.5714285714285714 }, { "epoch": 0.0023745918670228555, "grad_norm": 33.73411178588867, "learning_rate": 1.1869436201780416e-07, "loss": 0.3328, "step": 8, "video_reward_cumulative_accuracy": 0.5625 }, { "epoch": 0.0026714158504007124, "grad_norm": 21.074481964111328, "learning_rate": 1.3353115727002968e-07, "loss": 0.1414, "step": 9, "video_reward_cumulative_accuracy": 0.5555555555555556 }, { "epoch": 0.0029682398337785693, "grad_norm": 23.00200080871582, "learning_rate": 1.4836795252225522e-07, "loss": 0.4463, "step": 10, "video_reward_cumulative_accuracy": 0.55 }, { "epoch": 0.003265063817156426, "grad_norm": 23.951406478881836, "learning_rate": 1.6320474777448073e-07, "loss": 0.3231, "step": 11, "video_reward_cumulative_accuracy": 0.5909090909090909 }, { "epoch": 0.003561887800534283, "grad_norm": 25.632526397705078, "learning_rate": 1.7804154302670624e-07, "loss": 0.2746, "step": 12, "video_reward_cumulative_accuracy": 0.625 }, { "epoch": 0.00385871178391214, "grad_norm": 17.966325759887695, "learning_rate": 1.9287833827893176e-07, "loss": 0.224, "step": 13, "video_reward_cumulative_accuracy": 0.6538461538461539 }, { "epoch": 0.004155535767289997, "grad_norm": 27.712692260742188, "learning_rate": 2.0771513353115727e-07, "loss": 0.1764, "step": 14, "video_reward_cumulative_accuracy": 0.6071428571428571 }, { "epoch": 0.004452359750667854, "grad_norm": 24.923372268676758, "learning_rate": 2.225519287833828e-07, "loss": 0.2788, "step": 15, "video_reward_cumulative_accuracy": 0.6333333333333333 }, { "epoch": 0.004749183734045711, "grad_norm": 13.765485763549805, "learning_rate": 2.3738872403560833e-07, "loss": 0.1745, "step": 16, "video_reward_cumulative_accuracy": 0.65625 }, { "epoch": 0.0050460077174235675, "grad_norm": 17.97304344177246, "learning_rate": 2.5222551928783384e-07, "loss": 0.2362, "step": 17, "video_reward_cumulative_accuracy": 0.6470588235294118 }, { "epoch": 0.005342831700801425, "grad_norm": 10.485559463500977, "learning_rate": 2.6706231454005935e-07, "loss": 0.1295, "step": 18, "video_reward_cumulative_accuracy": 0.6666666666666666 }, { "epoch": 0.005639655684179281, "grad_norm": 15.357043266296387, "learning_rate": 2.8189910979228487e-07, "loss": 0.2338, "step": 19, "video_reward_cumulative_accuracy": 0.6842105263157895 }, { "epoch": 0.005936479667557139, "grad_norm": 15.526144027709961, "learning_rate": 2.9673590504451043e-07, "loss": 0.2443, "step": 20, "video_reward_cumulative_accuracy": 0.7 }, { "epoch": 0.006233303650934996, "grad_norm": 19.776208877563477, "learning_rate": 3.1157270029673595e-07, "loss": 0.1632, "step": 21, "video_reward_cumulative_accuracy": 0.6904761904761905 }, { "epoch": 0.006530127634312852, "grad_norm": 31.176301956176758, "learning_rate": 3.2640949554896146e-07, "loss": 0.3122, "step": 22, "video_reward_cumulative_accuracy": 0.6590909090909091 }, { "epoch": 0.00682695161769071, "grad_norm": 12.820406913757324, "learning_rate": 3.41246290801187e-07, "loss": 0.1422, "step": 23, "video_reward_cumulative_accuracy": 0.6521739130434783 }, { "epoch": 0.007123775601068566, "grad_norm": 12.706981658935547, "learning_rate": 3.560830860534125e-07, "loss": 0.169, "step": 24, "video_reward_cumulative_accuracy": 0.6458333333333334 }, { "epoch": 0.0074205995844464235, "grad_norm": 16.90671157836914, "learning_rate": 3.70919881305638e-07, "loss": 0.1716, "step": 25, "video_reward_cumulative_accuracy": 0.64 }, { "epoch": 0.00771742356782428, "grad_norm": 12.562002182006836, "learning_rate": 3.857566765578635e-07, "loss": 0.1565, "step": 26, "video_reward_cumulative_accuracy": 0.6538461538461539 }, { "epoch": 0.008014247551202136, "grad_norm": 10.192179679870605, "learning_rate": 4.005934718100891e-07, "loss": 0.1055, "step": 27, "video_reward_cumulative_accuracy": 0.6481481481481481 }, { "epoch": 0.008311071534579995, "grad_norm": 15.415340423583984, "learning_rate": 4.1543026706231454e-07, "loss": 0.2231, "step": 28, "video_reward_cumulative_accuracy": 0.6428571428571429 }, { "epoch": 0.008607895517957851, "grad_norm": 21.16411781311035, "learning_rate": 4.302670623145401e-07, "loss": 0.3278, "step": 29, "video_reward_cumulative_accuracy": 0.6379310344827587 }, { "epoch": 0.008904719501335707, "grad_norm": 21.195640563964844, "learning_rate": 4.451038575667656e-07, "loss": 0.2429, "step": 30, "video_reward_cumulative_accuracy": 0.6333333333333333 }, { "epoch": 0.009201543484713566, "grad_norm": 10.36475658416748, "learning_rate": 4.5994065281899114e-07, "loss": 0.125, "step": 31, "video_reward_cumulative_accuracy": 0.6290322580645161 }, { "epoch": 0.009498367468091422, "grad_norm": 14.530949592590332, "learning_rate": 4.7477744807121665e-07, "loss": 0.2392, "step": 32, "video_reward_cumulative_accuracy": 0.640625 }, { "epoch": 0.009795191451469279, "grad_norm": 17.089160919189453, "learning_rate": 4.896142433234421e-07, "loss": 0.2253, "step": 33, "video_reward_cumulative_accuracy": 0.6363636363636364 }, { "epoch": 0.010092015434847135, "grad_norm": 14.45820426940918, "learning_rate": 5.044510385756677e-07, "loss": 0.2298, "step": 34, "video_reward_cumulative_accuracy": 0.6323529411764706 }, { "epoch": 0.010388839418224993, "grad_norm": 12.284329414367676, "learning_rate": 5.192878338278932e-07, "loss": 0.0988, "step": 35, "video_reward_cumulative_accuracy": 0.6428571428571429 }, { "epoch": 0.01068566340160285, "grad_norm": 14.295129776000977, "learning_rate": 5.341246290801187e-07, "loss": 0.2106, "step": 36, "video_reward_cumulative_accuracy": 0.6388888888888888 }, { "epoch": 0.010982487384980706, "grad_norm": 7.684972286224365, "learning_rate": 5.489614243323443e-07, "loss": 0.0939, "step": 37, "video_reward_cumulative_accuracy": 0.6486486486486487 }, { "epoch": 0.011279311368358563, "grad_norm": 9.849855422973633, "learning_rate": 5.637982195845697e-07, "loss": 0.1303, "step": 38, "video_reward_cumulative_accuracy": 0.6578947368421053 }, { "epoch": 0.01157613535173642, "grad_norm": 10.725906372070312, "learning_rate": 5.786350148367953e-07, "loss": 0.1924, "step": 39, "video_reward_cumulative_accuracy": 0.6538461538461539 }, { "epoch": 0.011872959335114277, "grad_norm": 9.24576473236084, "learning_rate": 5.934718100890209e-07, "loss": 0.108, "step": 40, "video_reward_cumulative_accuracy": 0.6375 }, { "epoch": 0.012169783318492134, "grad_norm": 6.677659511566162, "learning_rate": 6.083086053412463e-07, "loss": 0.0879, "step": 41, "video_reward_cumulative_accuracy": 0.6463414634146342 }, { "epoch": 0.012466607301869992, "grad_norm": 7.723426342010498, "learning_rate": 6.231454005934719e-07, "loss": 0.073, "step": 42, "video_reward_cumulative_accuracy": 0.6428571428571429 }, { "epoch": 0.012763431285247848, "grad_norm": 10.541435241699219, "learning_rate": 6.379821958456974e-07, "loss": 0.1862, "step": 43, "video_reward_cumulative_accuracy": 0.6395348837209303 }, { "epoch": 0.013060255268625705, "grad_norm": 7.638758659362793, "learning_rate": 6.528189910979229e-07, "loss": 0.1184, "step": 44, "video_reward_cumulative_accuracy": 0.6477272727272727 }, { "epoch": 0.013357079252003561, "grad_norm": 10.385604858398438, "learning_rate": 6.676557863501485e-07, "loss": 0.1647, "step": 45, "video_reward_cumulative_accuracy": 0.6555555555555556 }, { "epoch": 0.01365390323538142, "grad_norm": 6.365200996398926, "learning_rate": 6.82492581602374e-07, "loss": 0.0985, "step": 46, "video_reward_cumulative_accuracy": 0.6413043478260869 }, { "epoch": 0.013950727218759276, "grad_norm": 7.856001853942871, "learning_rate": 6.973293768545995e-07, "loss": 0.1468, "step": 47, "video_reward_cumulative_accuracy": 0.6382978723404256 }, { "epoch": 0.014247551202137132, "grad_norm": 6.851215839385986, "learning_rate": 7.12166172106825e-07, "loss": 0.0869, "step": 48, "video_reward_cumulative_accuracy": 0.6354166666666666 }, { "epoch": 0.014544375185514989, "grad_norm": 7.089118003845215, "learning_rate": 7.270029673590504e-07, "loss": 0.1313, "step": 49, "video_reward_cumulative_accuracy": 0.6326530612244898 }, { "epoch": 0.014841199168892847, "grad_norm": 3.806837558746338, "learning_rate": 7.41839762611276e-07, "loss": 0.0955, "step": 50, "video_reward_cumulative_accuracy": 0.63 }, { "epoch": 0.015138023152270703, "grad_norm": 3.002065420150757, "learning_rate": 7.566765578635016e-07, "loss": 0.0829, "step": 51, "video_reward_cumulative_accuracy": 0.6372549019607843 }, { "epoch": 0.01543484713564856, "grad_norm": 8.006941795349121, "learning_rate": 7.71513353115727e-07, "loss": 0.1208, "step": 52, "video_reward_cumulative_accuracy": 0.6442307692307693 }, { "epoch": 0.015731671119026416, "grad_norm": 4.04028844833374, "learning_rate": 7.863501483679525e-07, "loss": 0.0905, "step": 53, "video_reward_cumulative_accuracy": 0.6415094339622641 }, { "epoch": 0.016028495102404273, "grad_norm": 11.572624206542969, "learning_rate": 8.011869436201782e-07, "loss": 0.1112, "step": 54, "video_reward_cumulative_accuracy": 0.6296296296296297 }, { "epoch": 0.016325319085782133, "grad_norm": 2.590923309326172, "learning_rate": 8.160237388724036e-07, "loss": 0.0886, "step": 55, "video_reward_cumulative_accuracy": 0.6363636363636364 }, { "epoch": 0.01662214306915999, "grad_norm": 6.2428717613220215, "learning_rate": 8.308605341246291e-07, "loss": 0.1035, "step": 56, "video_reward_cumulative_accuracy": 0.6339285714285714 }, { "epoch": 0.016918967052537846, "grad_norm": 5.258223056793213, "learning_rate": 8.456973293768548e-07, "loss": 0.0816, "step": 57, "video_reward_cumulative_accuracy": 0.6403508771929824 }, { "epoch": 0.017215791035915702, "grad_norm": 2.0548617839813232, "learning_rate": 8.605341246290802e-07, "loss": 0.083, "step": 58, "video_reward_cumulative_accuracy": 0.6379310344827587 }, { "epoch": 0.01751261501929356, "grad_norm": 6.927907466888428, "learning_rate": 8.753709198813057e-07, "loss": 0.1002, "step": 59, "video_reward_cumulative_accuracy": 0.635593220338983 }, { "epoch": 0.017809439002671415, "grad_norm": 6.766244411468506, "learning_rate": 8.902077151335312e-07, "loss": 0.096, "step": 60, "video_reward_cumulative_accuracy": 0.6416666666666667 }, { "epoch": 0.01810626298604927, "grad_norm": 3.500997304916382, "learning_rate": 9.050445103857568e-07, "loss": 0.0837, "step": 61, "video_reward_cumulative_accuracy": 0.6475409836065574 }, { "epoch": 0.01840308696942713, "grad_norm": 2.852778196334839, "learning_rate": 9.198813056379823e-07, "loss": 0.0913, "step": 62, "video_reward_cumulative_accuracy": 0.6451612903225806 }, { "epoch": 0.018699910952804988, "grad_norm": 10.269067764282227, "learning_rate": 9.347181008902077e-07, "loss": 0.0754, "step": 63, "video_reward_cumulative_accuracy": 0.6507936507936508 }, { "epoch": 0.018996734936182844, "grad_norm": 3.8665616512298584, "learning_rate": 9.495548961424333e-07, "loss": 0.0886, "step": 64, "video_reward_cumulative_accuracy": 0.65625 }, { "epoch": 0.0192935589195607, "grad_norm": 2.6777122020721436, "learning_rate": 9.643916913946588e-07, "loss": 0.0877, "step": 65, "video_reward_cumulative_accuracy": 0.6615384615384615 }, { "epoch": 0.019590382902938557, "grad_norm": 1.6913548707962036, "learning_rate": 9.792284866468842e-07, "loss": 0.0684, "step": 66, "video_reward_cumulative_accuracy": 0.6666666666666666 }, { "epoch": 0.019887206886316414, "grad_norm": 2.17547345161438, "learning_rate": 9.9406528189911e-07, "loss": 0.0903, "step": 67, "video_reward_cumulative_accuracy": 0.6716417910447762 }, { "epoch": 0.02018403086969427, "grad_norm": 9.660552978515625, "learning_rate": 1.0089020771513354e-06, "loss": 0.1137, "step": 68, "video_reward_cumulative_accuracy": 0.6617647058823529 }, { "epoch": 0.020480854853072127, "grad_norm": 6.081414222717285, "learning_rate": 1.0237388724035608e-06, "loss": 0.0836, "step": 69, "video_reward_cumulative_accuracy": 0.6594202898550725 }, { "epoch": 0.020777678836449986, "grad_norm": 5.362737655639648, "learning_rate": 1.0385756676557865e-06, "loss": 0.0828, "step": 70, "video_reward_cumulative_accuracy": 0.6642857142857143 }, { "epoch": 0.021074502819827843, "grad_norm": 3.4765052795410156, "learning_rate": 1.053412462908012e-06, "loss": 0.0678, "step": 71, "video_reward_cumulative_accuracy": 0.6619718309859155 }, { "epoch": 0.0213713268032057, "grad_norm": 3.868277072906494, "learning_rate": 1.0682492581602374e-06, "loss": 0.0704, "step": 72, "video_reward_cumulative_accuracy": 0.6666666666666666 }, { "epoch": 0.021668150786583556, "grad_norm": 2.056610107421875, "learning_rate": 1.083086053412463e-06, "loss": 0.0699, "step": 73, "video_reward_cumulative_accuracy": 0.6712328767123288 }, { "epoch": 0.021964974769961412, "grad_norm": 4.672041893005371, "learning_rate": 1.0979228486646885e-06, "loss": 0.0789, "step": 74, "video_reward_cumulative_accuracy": 0.668918918918919 }, { "epoch": 0.02226179875333927, "grad_norm": 10.368182182312012, "learning_rate": 1.112759643916914e-06, "loss": 0.0785, "step": 75, "video_reward_cumulative_accuracy": 0.6666666666666666 }, { "epoch": 0.022558622736717125, "grad_norm": 5.704588890075684, "learning_rate": 1.1275964391691395e-06, "loss": 0.0781, "step": 76, "video_reward_cumulative_accuracy": 0.6710526315789473 }, { "epoch": 0.022855446720094985, "grad_norm": 4.419633388519287, "learning_rate": 1.1424332344213651e-06, "loss": 0.0859, "step": 77, "video_reward_cumulative_accuracy": 0.6623376623376623 }, { "epoch": 0.02315227070347284, "grad_norm": 8.239113807678223, "learning_rate": 1.1572700296735906e-06, "loss": 0.0967, "step": 78, "video_reward_cumulative_accuracy": 0.6538461538461539 }, { "epoch": 0.023449094686850698, "grad_norm": 2.648416519165039, "learning_rate": 1.172106824925816e-06, "loss": 0.0906, "step": 79, "video_reward_cumulative_accuracy": 0.6582278481012658 }, { "epoch": 0.023745918670228554, "grad_norm": 6.028584003448486, "learning_rate": 1.1869436201780417e-06, "loss": 0.0789, "step": 80, "video_reward_cumulative_accuracy": 0.65625 }, { "epoch": 0.02404274265360641, "grad_norm": 4.417842864990234, "learning_rate": 1.2017804154302672e-06, "loss": 0.0756, "step": 81, "video_reward_cumulative_accuracy": 0.654320987654321 }, { "epoch": 0.024339566636984267, "grad_norm": 5.482295513153076, "learning_rate": 1.2166172106824927e-06, "loss": 0.0518, "step": 82, "video_reward_cumulative_accuracy": 0.6585365853658537 }, { "epoch": 0.024636390620362124, "grad_norm": 3.1841440200805664, "learning_rate": 1.2314540059347183e-06, "loss": 0.0881, "step": 83, "video_reward_cumulative_accuracy": 0.6566265060240963 }, { "epoch": 0.024933214603739984, "grad_norm": 5.477931976318359, "learning_rate": 1.2462908011869438e-06, "loss": 0.0712, "step": 84, "video_reward_cumulative_accuracy": 0.6547619047619048 }, { "epoch": 0.02523003858711784, "grad_norm": 2.8534862995147705, "learning_rate": 1.2611275964391693e-06, "loss": 0.0796, "step": 85, "video_reward_cumulative_accuracy": 0.6588235294117647 }, { "epoch": 0.025526862570495697, "grad_norm": 6.181415557861328, "learning_rate": 1.2759643916913947e-06, "loss": 0.0667, "step": 86, "video_reward_cumulative_accuracy": 0.6569767441860465 }, { "epoch": 0.025823686553873553, "grad_norm": 4.147750377655029, "learning_rate": 1.2908011869436202e-06, "loss": 0.0738, "step": 87, "video_reward_cumulative_accuracy": 0.6609195402298851 }, { "epoch": 0.02612051053725141, "grad_norm": 4.225668907165527, "learning_rate": 1.3056379821958458e-06, "loss": 0.1034, "step": 88, "video_reward_cumulative_accuracy": 0.6647727272727273 }, { "epoch": 0.026417334520629266, "grad_norm": 2.278589963912964, "learning_rate": 1.3204747774480713e-06, "loss": 0.0505, "step": 89, "video_reward_cumulative_accuracy": 0.6685393258426966 }, { "epoch": 0.026714158504007122, "grad_norm": 6.009743690490723, "learning_rate": 1.335311572700297e-06, "loss": 0.0665, "step": 90, "video_reward_cumulative_accuracy": 0.6722222222222223 }, { "epoch": 0.027010982487384982, "grad_norm": 3.622457265853882, "learning_rate": 1.3501483679525224e-06, "loss": 0.0803, "step": 91, "video_reward_cumulative_accuracy": 0.6703296703296703 }, { "epoch": 0.02730780647076284, "grad_norm": 2.3656132221221924, "learning_rate": 1.364985163204748e-06, "loss": 0.0679, "step": 92, "video_reward_cumulative_accuracy": 0.6739130434782609 }, { "epoch": 0.027604630454140695, "grad_norm": 5.818204879760742, "learning_rate": 1.3798219584569734e-06, "loss": 0.0931, "step": 93, "video_reward_cumulative_accuracy": 0.6720430107526881 }, { "epoch": 0.027901454437518552, "grad_norm": 3.5401792526245117, "learning_rate": 1.394658753709199e-06, "loss": 0.0723, "step": 94, "video_reward_cumulative_accuracy": 0.675531914893617 }, { "epoch": 0.028198278420896408, "grad_norm": 6.389036178588867, "learning_rate": 1.4094955489614245e-06, "loss": 0.0554, "step": 95, "video_reward_cumulative_accuracy": 0.6789473684210526 }, { "epoch": 0.028495102404274265, "grad_norm": 3.825948476791382, "learning_rate": 1.42433234421365e-06, "loss": 0.0705, "step": 96, "video_reward_cumulative_accuracy": 0.6822916666666666 }, { "epoch": 0.02879192638765212, "grad_norm": 3.565723180770874, "learning_rate": 1.4391691394658754e-06, "loss": 0.0737, "step": 97, "video_reward_cumulative_accuracy": 0.6855670103092784 }, { "epoch": 0.029088750371029978, "grad_norm": 2.599555253982544, "learning_rate": 1.4540059347181009e-06, "loss": 0.0468, "step": 98, "video_reward_cumulative_accuracy": 0.6887755102040817 }, { "epoch": 0.029385574354407838, "grad_norm": 2.7549595832824707, "learning_rate": 1.4688427299703265e-06, "loss": 0.0644, "step": 99, "video_reward_cumulative_accuracy": 0.6919191919191919 }, { "epoch": 0.029682398337785694, "grad_norm": 5.881991386413574, "learning_rate": 1.483679525222552e-06, "loss": 0.0955, "step": 100, "video_reward_cumulative_accuracy": 0.685 }, { "epoch": 0.02997922232116355, "grad_norm": 2.9013118743896484, "learning_rate": 1.4985163204747777e-06, "loss": 0.0603, "step": 101, "video_reward_cumulative_accuracy": 0.6881188118811881 }, { "epoch": 0.030276046304541407, "grad_norm": 3.3732762336730957, "learning_rate": 1.5133531157270031e-06, "loss": 0.0615, "step": 102, "video_reward_cumulative_accuracy": 0.6911764705882353 }, { "epoch": 0.030572870287919263, "grad_norm": 4.168172359466553, "learning_rate": 1.5281899109792286e-06, "loss": 0.1154, "step": 103, "video_reward_cumulative_accuracy": 0.6893203883495146 }, { "epoch": 0.03086969427129712, "grad_norm": 3.426560163497925, "learning_rate": 1.543026706231454e-06, "loss": 0.0481, "step": 104, "video_reward_cumulative_accuracy": 0.6923076923076923 }, { "epoch": 0.031166518254674976, "grad_norm": 8.7980375289917, "learning_rate": 1.5578635014836795e-06, "loss": 0.116, "step": 105, "video_reward_cumulative_accuracy": 0.6952380952380952 }, { "epoch": 0.03146334223805283, "grad_norm": 3.171637773513794, "learning_rate": 1.572700296735905e-06, "loss": 0.0592, "step": 106, "video_reward_cumulative_accuracy": 0.6933962264150944 }, { "epoch": 0.03176016622143069, "grad_norm": 2.1238746643066406, "learning_rate": 1.5875370919881309e-06, "loss": 0.0498, "step": 107, "video_reward_cumulative_accuracy": 0.6962616822429907 }, { "epoch": 0.032056990204808546, "grad_norm": 3.829890727996826, "learning_rate": 1.6023738872403563e-06, "loss": 0.0897, "step": 108, "video_reward_cumulative_accuracy": 0.6990740740740741 }, { "epoch": 0.032353814188186406, "grad_norm": 3.1230406761169434, "learning_rate": 1.6172106824925818e-06, "loss": 0.0446, "step": 109, "video_reward_cumulative_accuracy": 0.7018348623853211 }, { "epoch": 0.032650638171564265, "grad_norm": 5.97599983215332, "learning_rate": 1.6320474777448073e-06, "loss": 0.1308, "step": 110, "video_reward_cumulative_accuracy": 0.7045454545454546 }, { "epoch": 0.03294746215494212, "grad_norm": 2.300419330596924, "learning_rate": 1.6468842729970327e-06, "loss": 0.0632, "step": 111, "video_reward_cumulative_accuracy": 0.7072072072072072 }, { "epoch": 0.03324428613831998, "grad_norm": 2.3834316730499268, "learning_rate": 1.6617210682492582e-06, "loss": 0.0585, "step": 112, "video_reward_cumulative_accuracy": 0.7098214285714286 }, { "epoch": 0.03354111012169783, "grad_norm": 2.218229293823242, "learning_rate": 1.6765578635014836e-06, "loss": 0.0574, "step": 113, "video_reward_cumulative_accuracy": 0.7079646017699115 }, { "epoch": 0.03383793410507569, "grad_norm": 6.411158084869385, "learning_rate": 1.6913946587537095e-06, "loss": 0.0608, "step": 114, "video_reward_cumulative_accuracy": 0.706140350877193 }, { "epoch": 0.034134758088453544, "grad_norm": 3.1801657676696777, "learning_rate": 1.706231454005935e-06, "loss": 0.0894, "step": 115, "video_reward_cumulative_accuracy": 0.7043478260869566 }, { "epoch": 0.034431582071831404, "grad_norm": 6.5272536277771, "learning_rate": 1.7210682492581604e-06, "loss": 0.0919, "step": 116, "video_reward_cumulative_accuracy": 0.7025862068965517 }, { "epoch": 0.034728406055209264, "grad_norm": 4.742123603820801, "learning_rate": 1.735905044510386e-06, "loss": 0.0324, "step": 117, "video_reward_cumulative_accuracy": 0.7051282051282052 }, { "epoch": 0.03502523003858712, "grad_norm": 3.718932628631592, "learning_rate": 1.7507418397626114e-06, "loss": 0.0611, "step": 118, "video_reward_cumulative_accuracy": 0.7033898305084746 }, { "epoch": 0.03532205402196498, "grad_norm": 4.320330619812012, "learning_rate": 1.7655786350148368e-06, "loss": 0.054, "step": 119, "video_reward_cumulative_accuracy": 0.7058823529411765 }, { "epoch": 0.03561887800534283, "grad_norm": 4.672208786010742, "learning_rate": 1.7804154302670625e-06, "loss": 0.0525, "step": 120, "video_reward_cumulative_accuracy": 0.7083333333333334 }, { "epoch": 0.03591570198872069, "grad_norm": 2.6272125244140625, "learning_rate": 1.7952522255192882e-06, "loss": 0.0652, "step": 121, "video_reward_cumulative_accuracy": 0.7107438016528925 }, { "epoch": 0.03621252597209854, "grad_norm": 1.9446464776992798, "learning_rate": 1.8100890207715136e-06, "loss": 0.043, "step": 122, "video_reward_cumulative_accuracy": 0.7131147540983607 }, { "epoch": 0.0365093499554764, "grad_norm": 4.1938910484313965, "learning_rate": 1.824925816023739e-06, "loss": 0.1023, "step": 123, "video_reward_cumulative_accuracy": 0.7154471544715447 }, { "epoch": 0.03680617393885426, "grad_norm": 3.999626874923706, "learning_rate": 1.8397626112759646e-06, "loss": 0.062, "step": 124, "video_reward_cumulative_accuracy": 0.7137096774193549 }, { "epoch": 0.037102997922232116, "grad_norm": 2.076876640319824, "learning_rate": 1.85459940652819e-06, "loss": 0.0479, "step": 125, "video_reward_cumulative_accuracy": 0.716 }, { "epoch": 0.037399821905609976, "grad_norm": 6.920149326324463, "learning_rate": 1.8694362017804155e-06, "loss": 0.0538, "step": 126, "video_reward_cumulative_accuracy": 0.7182539682539683 }, { "epoch": 0.03769664588898783, "grad_norm": 2.615006923675537, "learning_rate": 1.8842729970326411e-06, "loss": 0.0352, "step": 127, "video_reward_cumulative_accuracy": 0.7204724409448819 }, { "epoch": 0.03799346987236569, "grad_norm": 2.167612075805664, "learning_rate": 1.8991097922848666e-06, "loss": 0.0397, "step": 128, "video_reward_cumulative_accuracy": 0.72265625 }, { "epoch": 0.03829029385574354, "grad_norm": 3.5347766876220703, "learning_rate": 1.9139465875370923e-06, "loss": 0.0699, "step": 129, "video_reward_cumulative_accuracy": 0.7209302325581395 }, { "epoch": 0.0385871178391214, "grad_norm": 6.354689121246338, "learning_rate": 1.9287833827893175e-06, "loss": 0.0554, "step": 130, "video_reward_cumulative_accuracy": 0.7192307692307692 }, { "epoch": 0.03888394182249926, "grad_norm": 2.4313793182373047, "learning_rate": 1.943620178041543e-06, "loss": 0.0529, "step": 131, "video_reward_cumulative_accuracy": 0.7175572519083969 }, { "epoch": 0.039180765805877114, "grad_norm": 5.276021480560303, "learning_rate": 1.9584569732937684e-06, "loss": 0.0958, "step": 132, "video_reward_cumulative_accuracy": 0.7159090909090909 }, { "epoch": 0.039477589789254974, "grad_norm": 6.521552562713623, "learning_rate": 1.9732937685459945e-06, "loss": 0.0698, "step": 133, "video_reward_cumulative_accuracy": 0.7142857142857143 }, { "epoch": 0.03977441377263283, "grad_norm": 4.296199798583984, "learning_rate": 1.98813056379822e-06, "loss": 0.0632, "step": 134, "video_reward_cumulative_accuracy": 0.7089552238805971 }, { "epoch": 0.04007123775601069, "grad_norm": 5.775026321411133, "learning_rate": 2.0029673590504455e-06, "loss": 0.1013, "step": 135, "video_reward_cumulative_accuracy": 0.7111111111111111 }, { "epoch": 0.04036806173938854, "grad_norm": 2.675187349319458, "learning_rate": 2.0178041543026707e-06, "loss": 0.0515, "step": 136, "video_reward_cumulative_accuracy": 0.7132352941176471 }, { "epoch": 0.0406648857227664, "grad_norm": 6.260149955749512, "learning_rate": 2.0326409495548964e-06, "loss": 0.0883, "step": 137, "video_reward_cumulative_accuracy": 0.7153284671532847 }, { "epoch": 0.04096170970614425, "grad_norm": 11.186830520629883, "learning_rate": 2.0474777448071216e-06, "loss": 0.0918, "step": 138, "video_reward_cumulative_accuracy": 0.717391304347826 }, { "epoch": 0.04125853368952211, "grad_norm": 6.043707847595215, "learning_rate": 2.0623145400593473e-06, "loss": 0.085, "step": 139, "video_reward_cumulative_accuracy": 0.7158273381294964 }, { "epoch": 0.04155535767289997, "grad_norm": 9.900096893310547, "learning_rate": 2.077151335311573e-06, "loss": 0.0977, "step": 140, "video_reward_cumulative_accuracy": 0.7178571428571429 }, { "epoch": 0.041852181656277826, "grad_norm": 2.8422248363494873, "learning_rate": 2.0919881305637987e-06, "loss": 0.0399, "step": 141, "video_reward_cumulative_accuracy": 0.7163120567375887 }, { "epoch": 0.042149005639655686, "grad_norm": 2.6769790649414062, "learning_rate": 2.106824925816024e-06, "loss": 0.0355, "step": 142, "video_reward_cumulative_accuracy": 0.7183098591549296 }, { "epoch": 0.04244582962303354, "grad_norm": 3.1609225273132324, "learning_rate": 2.1216617210682496e-06, "loss": 0.0677, "step": 143, "video_reward_cumulative_accuracy": 0.7132867132867133 }, { "epoch": 0.0427426536064114, "grad_norm": 2.4717013835906982, "learning_rate": 2.136498516320475e-06, "loss": 0.0365, "step": 144, "video_reward_cumulative_accuracy": 0.7118055555555556 }, { "epoch": 0.04303947758978925, "grad_norm": 8.373668670654297, "learning_rate": 2.1513353115727005e-06, "loss": 0.1185, "step": 145, "video_reward_cumulative_accuracy": 0.7103448275862069 }, { "epoch": 0.04333630157316711, "grad_norm": 2.4484148025512695, "learning_rate": 2.166172106824926e-06, "loss": 0.0479, "step": 146, "video_reward_cumulative_accuracy": 0.7123287671232876 }, { "epoch": 0.04363312555654497, "grad_norm": 4.690200328826904, "learning_rate": 2.1810089020771514e-06, "loss": 0.0706, "step": 147, "video_reward_cumulative_accuracy": 0.7142857142857143 }, { "epoch": 0.043929949539922825, "grad_norm": 5.8376898765563965, "learning_rate": 2.195845697329377e-06, "loss": 0.0849, "step": 148, "video_reward_cumulative_accuracy": 0.7162162162162162 }, { "epoch": 0.044226773523300685, "grad_norm": 2.741074562072754, "learning_rate": 2.2106824925816028e-06, "loss": 0.0741, "step": 149, "video_reward_cumulative_accuracy": 0.7114093959731543 }, { "epoch": 0.04452359750667854, "grad_norm": 5.629610061645508, "learning_rate": 2.225519287833828e-06, "loss": 0.0525, "step": 150, "video_reward_cumulative_accuracy": 0.71 }, { "epoch": 0.0448204214900564, "grad_norm": 6.277879238128662, "learning_rate": 2.2403560830860537e-06, "loss": 0.0803, "step": 151, "video_reward_cumulative_accuracy": 0.7086092715231788 }, { "epoch": 0.04511724547343425, "grad_norm": 2.9074411392211914, "learning_rate": 2.255192878338279e-06, "loss": 0.0607, "step": 152, "video_reward_cumulative_accuracy": 0.7105263157894737 }, { "epoch": 0.04541406945681211, "grad_norm": 8.067234992980957, "learning_rate": 2.2700296735905046e-06, "loss": 0.0987, "step": 153, "video_reward_cumulative_accuracy": 0.7091503267973857 }, { "epoch": 0.04571089344018997, "grad_norm": 3.610557794570923, "learning_rate": 2.2848664688427303e-06, "loss": 0.0549, "step": 154, "video_reward_cumulative_accuracy": 0.7077922077922078 }, { "epoch": 0.04600771742356782, "grad_norm": 3.5607757568359375, "learning_rate": 2.2997032640949555e-06, "loss": 0.0864, "step": 155, "video_reward_cumulative_accuracy": 0.7064516129032258 }, { "epoch": 0.04630454140694568, "grad_norm": 4.017952919006348, "learning_rate": 2.314540059347181e-06, "loss": 0.0522, "step": 156, "video_reward_cumulative_accuracy": 0.7083333333333334 }, { "epoch": 0.046601365390323536, "grad_norm": 4.041268825531006, "learning_rate": 2.3293768545994065e-06, "loss": 0.0456, "step": 157, "video_reward_cumulative_accuracy": 0.7101910828025477 }, { "epoch": 0.046898189373701396, "grad_norm": 2.6721463203430176, "learning_rate": 2.344213649851632e-06, "loss": 0.0379, "step": 158, "video_reward_cumulative_accuracy": 0.7120253164556962 }, { "epoch": 0.04719501335707925, "grad_norm": 5.769506931304932, "learning_rate": 2.359050445103858e-06, "loss": 0.0505, "step": 159, "video_reward_cumulative_accuracy": 0.710691823899371 }, { "epoch": 0.04749183734045711, "grad_norm": 2.384072780609131, "learning_rate": 2.3738872403560835e-06, "loss": 0.0713, "step": 160, "video_reward_cumulative_accuracy": 0.70625 }, { "epoch": 0.04778866132383497, "grad_norm": 4.968862533569336, "learning_rate": 2.3887240356083087e-06, "loss": 0.0545, "step": 161, "video_reward_cumulative_accuracy": 0.7080745341614907 }, { "epoch": 0.04808548530721282, "grad_norm": 2.6680426597595215, "learning_rate": 2.4035608308605344e-06, "loss": 0.0545, "step": 162, "video_reward_cumulative_accuracy": 0.7098765432098766 }, { "epoch": 0.04838230929059068, "grad_norm": 5.463686943054199, "learning_rate": 2.4183976261127596e-06, "loss": 0.0798, "step": 163, "video_reward_cumulative_accuracy": 0.7085889570552147 }, { "epoch": 0.048679133273968535, "grad_norm": 4.160032749176025, "learning_rate": 2.4332344213649853e-06, "loss": 0.0885, "step": 164, "video_reward_cumulative_accuracy": 0.7042682926829268 }, { "epoch": 0.048975957257346395, "grad_norm": 8.451370239257812, "learning_rate": 2.4480712166172106e-06, "loss": 0.0836, "step": 165, "video_reward_cumulative_accuracy": 0.706060606060606 }, { "epoch": 0.04927278124072425, "grad_norm": 2.2728710174560547, "learning_rate": 2.4629080118694367e-06, "loss": 0.039, "step": 166, "video_reward_cumulative_accuracy": 0.7078313253012049 }, { "epoch": 0.04956960522410211, "grad_norm": 6.519056797027588, "learning_rate": 2.477744807121662e-06, "loss": 0.0594, "step": 167, "video_reward_cumulative_accuracy": 0.7065868263473054 }, { "epoch": 0.04986642920747997, "grad_norm": 11.774324417114258, "learning_rate": 2.4925816023738876e-06, "loss": 0.1125, "step": 168, "video_reward_cumulative_accuracy": 0.7083333333333334 }, { "epoch": 0.05016325319085782, "grad_norm": 1.901655673980713, "learning_rate": 2.507418397626113e-06, "loss": 0.0589, "step": 169, "video_reward_cumulative_accuracy": 0.7100591715976331 }, { "epoch": 0.05046007717423568, "grad_norm": 1.9185417890548706, "learning_rate": 2.5222551928783385e-06, "loss": 0.041, "step": 170, "video_reward_cumulative_accuracy": 0.711764705882353 }, { "epoch": 0.05075690115761353, "grad_norm": 1.7406995296478271, "learning_rate": 2.5370919881305638e-06, "loss": 0.0625, "step": 171, "video_reward_cumulative_accuracy": 0.7105263157894737 }, { "epoch": 0.05105372514099139, "grad_norm": 2.75722074508667, "learning_rate": 2.5519287833827894e-06, "loss": 0.036, "step": 172, "video_reward_cumulative_accuracy": 0.7122093023255814 }, { "epoch": 0.051350549124369246, "grad_norm": 4.977455139160156, "learning_rate": 2.5667655786350147e-06, "loss": 0.0586, "step": 173, "video_reward_cumulative_accuracy": 0.7109826589595376 }, { "epoch": 0.051647373107747106, "grad_norm": 3.5028724670410156, "learning_rate": 2.5816023738872403e-06, "loss": 0.0368, "step": 174, "video_reward_cumulative_accuracy": 0.7126436781609196 }, { "epoch": 0.051944197091124966, "grad_norm": 4.553066730499268, "learning_rate": 2.5964391691394664e-06, "loss": 0.042, "step": 175, "video_reward_cumulative_accuracy": 0.7142857142857143 }, { "epoch": 0.05224102107450282, "grad_norm": 5.41193962097168, "learning_rate": 2.6112759643916917e-06, "loss": 0.0806, "step": 176, "video_reward_cumulative_accuracy": 0.7159090909090909 }, { "epoch": 0.05253784505788068, "grad_norm": 2.606978178024292, "learning_rate": 2.6261127596439174e-06, "loss": 0.0336, "step": 177, "video_reward_cumulative_accuracy": 0.7175141242937854 }, { "epoch": 0.05283466904125853, "grad_norm": 3.2309417724609375, "learning_rate": 2.6409495548961426e-06, "loss": 0.0476, "step": 178, "video_reward_cumulative_accuracy": 0.7191011235955056 }, { "epoch": 0.05313149302463639, "grad_norm": 4.6940178871154785, "learning_rate": 2.6557863501483683e-06, "loss": 0.0644, "step": 179, "video_reward_cumulative_accuracy": 0.7178770949720671 }, { "epoch": 0.053428317008014245, "grad_norm": 4.964528560638428, "learning_rate": 2.670623145400594e-06, "loss": 0.0668, "step": 180, "video_reward_cumulative_accuracy": 0.7166666666666667 }, { "epoch": 0.053725140991392105, "grad_norm": 1.1753082275390625, "learning_rate": 2.685459940652819e-06, "loss": 0.0107, "step": 181, "video_reward_cumulative_accuracy": 0.7182320441988951 }, { "epoch": 0.054021964974769965, "grad_norm": 3.13619065284729, "learning_rate": 2.700296735905045e-06, "loss": 0.0513, "step": 182, "video_reward_cumulative_accuracy": 0.717032967032967 }, { "epoch": 0.05431878895814782, "grad_norm": 8.615299224853516, "learning_rate": 2.71513353115727e-06, "loss": 0.1667, "step": 183, "video_reward_cumulative_accuracy": 0.7158469945355191 }, { "epoch": 0.05461561294152568, "grad_norm": 8.474091529846191, "learning_rate": 2.729970326409496e-06, "loss": 0.0565, "step": 184, "video_reward_cumulative_accuracy": 0.717391304347826 }, { "epoch": 0.05491243692490353, "grad_norm": 3.5511362552642822, "learning_rate": 2.744807121661721e-06, "loss": 0.0296, "step": 185, "video_reward_cumulative_accuracy": 0.7189189189189189 }, { "epoch": 0.05520926090828139, "grad_norm": 3.176490306854248, "learning_rate": 2.7596439169139467e-06, "loss": 0.0326, "step": 186, "video_reward_cumulative_accuracy": 0.7204301075268817 }, { "epoch": 0.055506084891659244, "grad_norm": 4.008922100067139, "learning_rate": 2.774480712166172e-06, "loss": 0.0519, "step": 187, "video_reward_cumulative_accuracy": 0.7192513368983957 }, { "epoch": 0.055802908875037104, "grad_norm": 6.8299560546875, "learning_rate": 2.789317507418398e-06, "loss": 0.1015, "step": 188, "video_reward_cumulative_accuracy": 0.7154255319148937 }, { "epoch": 0.05609973285841496, "grad_norm": 2.7319183349609375, "learning_rate": 2.8041543026706237e-06, "loss": 0.0582, "step": 189, "video_reward_cumulative_accuracy": 0.716931216931217 }, { "epoch": 0.056396556841792816, "grad_norm": 7.440029621124268, "learning_rate": 2.818991097922849e-06, "loss": 0.0758, "step": 190, "video_reward_cumulative_accuracy": 0.7157894736842105 }, { "epoch": 0.056693380825170676, "grad_norm": 8.07271957397461, "learning_rate": 2.8338278931750747e-06, "loss": 0.0519, "step": 191, "video_reward_cumulative_accuracy": 0.7172774869109948 }, { "epoch": 0.05699020480854853, "grad_norm": 4.536227703094482, "learning_rate": 2.8486646884273e-06, "loss": 0.0574, "step": 192, "video_reward_cumulative_accuracy": 0.71875 }, { "epoch": 0.05728702879192639, "grad_norm": 2.7957005500793457, "learning_rate": 2.8635014836795256e-06, "loss": 0.0303, "step": 193, "video_reward_cumulative_accuracy": 0.7176165803108808 }, { "epoch": 0.05758385277530424, "grad_norm": 2.4321742057800293, "learning_rate": 2.878338278931751e-06, "loss": 0.0494, "step": 194, "video_reward_cumulative_accuracy": 0.7190721649484536 }, { "epoch": 0.0578806767586821, "grad_norm": 3.885902166366577, "learning_rate": 2.8931750741839765e-06, "loss": 0.0772, "step": 195, "video_reward_cumulative_accuracy": 0.7205128205128205 }, { "epoch": 0.058177500742059955, "grad_norm": 6.06294584274292, "learning_rate": 2.9080118694362018e-06, "loss": 0.0446, "step": 196, "video_reward_cumulative_accuracy": 0.7219387755102041 }, { "epoch": 0.058474324725437815, "grad_norm": 2.9600000381469727, "learning_rate": 2.9228486646884274e-06, "loss": 0.0723, "step": 197, "video_reward_cumulative_accuracy": 0.7233502538071066 }, { "epoch": 0.058771148708815675, "grad_norm": 5.309525012969971, "learning_rate": 2.937685459940653e-06, "loss": 0.0678, "step": 198, "video_reward_cumulative_accuracy": 0.7247474747474747 }, { "epoch": 0.05906797269219353, "grad_norm": 6.021256446838379, "learning_rate": 2.9525222551928783e-06, "loss": 0.1086, "step": 199, "video_reward_cumulative_accuracy": 0.7236180904522613 }, { "epoch": 0.05936479667557139, "grad_norm": 4.920889377593994, "learning_rate": 2.967359050445104e-06, "loss": 0.0766, "step": 200, "video_reward_cumulative_accuracy": 0.7225 }, { "epoch": 0.05936479667557139, "eval_runtime": 129.5769, "eval_samples_per_second": 6.089, "eval_steps_per_second": 0.764, "eval_test_set_accuracy": 0.696969696969697, "step": 200 }, { "epoch": 0.05966162065894924, "grad_norm": 8.54822063446045, "learning_rate": 2.9821958456973297e-06, "loss": 0.0915, "step": 201, "video_reward_cumulative_accuracy": 0.7213930348258707 }, { "epoch": 0.0599584446423271, "grad_norm": 3.513885974884033, "learning_rate": 2.9970326409495554e-06, "loss": 0.0686, "step": 202, "video_reward_cumulative_accuracy": 0.7227722772277227 }, { "epoch": 0.060255268625704954, "grad_norm": 4.311375141143799, "learning_rate": 3.011869436201781e-06, "loss": 0.0586, "step": 203, "video_reward_cumulative_accuracy": 0.7192118226600985 }, { "epoch": 0.060552092609082814, "grad_norm": 4.355630397796631, "learning_rate": 3.0267062314540063e-06, "loss": 0.0683, "step": 204, "video_reward_cumulative_accuracy": 0.7181372549019608 }, { "epoch": 0.060848916592460674, "grad_norm": 5.103359699249268, "learning_rate": 3.041543026706232e-06, "loss": 0.0869, "step": 205, "video_reward_cumulative_accuracy": 0.7195121951219512 }, { "epoch": 0.06114574057583853, "grad_norm": 4.328181266784668, "learning_rate": 3.056379821958457e-06, "loss": 0.0628, "step": 206, "video_reward_cumulative_accuracy": 0.720873786407767 }, { "epoch": 0.06144256455921639, "grad_norm": 2.0452539920806885, "learning_rate": 3.071216617210683e-06, "loss": 0.0691, "step": 207, "video_reward_cumulative_accuracy": 0.7222222222222222 }, { "epoch": 0.06173938854259424, "grad_norm": 2.371507167816162, "learning_rate": 3.086053412462908e-06, "loss": 0.0485, "step": 208, "video_reward_cumulative_accuracy": 0.7235576923076923 }, { "epoch": 0.0620362125259721, "grad_norm": 4.104339122772217, "learning_rate": 3.100890207715134e-06, "loss": 0.0686, "step": 209, "video_reward_cumulative_accuracy": 0.7248803827751196 }, { "epoch": 0.06233303650934995, "grad_norm": 8.020886421203613, "learning_rate": 3.115727002967359e-06, "loss": 0.0653, "step": 210, "video_reward_cumulative_accuracy": 0.7238095238095238 }, { "epoch": 0.06262986049272781, "grad_norm": 2.7191717624664307, "learning_rate": 3.1305637982195847e-06, "loss": 0.0474, "step": 211, "video_reward_cumulative_accuracy": 0.7251184834123223 }, { "epoch": 0.06292668447610567, "grad_norm": 2.388265609741211, "learning_rate": 3.14540059347181e-06, "loss": 0.0625, "step": 212, "video_reward_cumulative_accuracy": 0.7264150943396226 }, { "epoch": 0.06322350845948353, "grad_norm": 2.529482841491699, "learning_rate": 3.1602373887240356e-06, "loss": 0.054, "step": 213, "video_reward_cumulative_accuracy": 0.7253521126760564 }, { "epoch": 0.06352033244286139, "grad_norm": 5.697103977203369, "learning_rate": 3.1750741839762617e-06, "loss": 0.0667, "step": 214, "video_reward_cumulative_accuracy": 0.7242990654205608 }, { "epoch": 0.06381715642623924, "grad_norm": 2.470099449157715, "learning_rate": 3.189910979228487e-06, "loss": 0.0767, "step": 215, "video_reward_cumulative_accuracy": 0.7255813953488373 }, { "epoch": 0.06411398040961709, "grad_norm": 2.413121223449707, "learning_rate": 3.2047477744807127e-06, "loss": 0.0707, "step": 216, "video_reward_cumulative_accuracy": 0.7222222222222222 }, { "epoch": 0.06441080439299496, "grad_norm": 4.319202899932861, "learning_rate": 3.219584569732938e-06, "loss": 0.0661, "step": 217, "video_reward_cumulative_accuracy": 0.7235023041474654 }, { "epoch": 0.06470762837637281, "grad_norm": 10.807517051696777, "learning_rate": 3.2344213649851636e-06, "loss": 0.1928, "step": 218, "video_reward_cumulative_accuracy": 0.7201834862385321 }, { "epoch": 0.06500445235975066, "grad_norm": 3.2382359504699707, "learning_rate": 3.2492581602373893e-06, "loss": 0.0677, "step": 219, "video_reward_cumulative_accuracy": 0.7214611872146118 }, { "epoch": 0.06530127634312853, "grad_norm": 3.427091121673584, "learning_rate": 3.2640949554896145e-06, "loss": 0.0519, "step": 220, "video_reward_cumulative_accuracy": 0.7204545454545455 }, { "epoch": 0.06559810032650638, "grad_norm": 5.226832389831543, "learning_rate": 3.27893175074184e-06, "loss": 0.0667, "step": 221, "video_reward_cumulative_accuracy": 0.7194570135746606 }, { "epoch": 0.06589492430988424, "grad_norm": 4.402886390686035, "learning_rate": 3.2937685459940654e-06, "loss": 0.0532, "step": 222, "video_reward_cumulative_accuracy": 0.7207207207207207 }, { "epoch": 0.06619174829326209, "grad_norm": 5.255795001983643, "learning_rate": 3.308605341246291e-06, "loss": 0.0755, "step": 223, "video_reward_cumulative_accuracy": 0.7219730941704036 }, { "epoch": 0.06648857227663996, "grad_norm": 8.409960746765137, "learning_rate": 3.3234421364985163e-06, "loss": 0.1155, "step": 224, "video_reward_cumulative_accuracy": 0.7232142857142857 }, { "epoch": 0.06678539626001781, "grad_norm": 8.938908576965332, "learning_rate": 3.338278931750742e-06, "loss": 0.1178, "step": 225, "video_reward_cumulative_accuracy": 0.7222222222222222 }, { "epoch": 0.06708222024339566, "grad_norm": 8.283513069152832, "learning_rate": 3.3531157270029673e-06, "loss": 0.1187, "step": 226, "video_reward_cumulative_accuracy": 0.7234513274336283 }, { "epoch": 0.06737904422677353, "grad_norm": 6.216405391693115, "learning_rate": 3.3679525222551934e-06, "loss": 0.0735, "step": 227, "video_reward_cumulative_accuracy": 0.7224669603524229 }, { "epoch": 0.06767586821015138, "grad_norm": 4.862206935882568, "learning_rate": 3.382789317507419e-06, "loss": 0.0792, "step": 228, "video_reward_cumulative_accuracy": 0.7236842105263158 }, { "epoch": 0.06797269219352924, "grad_norm": 2.139225959777832, "learning_rate": 3.3976261127596443e-06, "loss": 0.0682, "step": 229, "video_reward_cumulative_accuracy": 0.7248908296943232 }, { "epoch": 0.06826951617690709, "grad_norm": 3.5411124229431152, "learning_rate": 3.41246290801187e-06, "loss": 0.0719, "step": 230, "video_reward_cumulative_accuracy": 0.7260869565217392 }, { "epoch": 0.06856634016028496, "grad_norm": 3.569051742553711, "learning_rate": 3.427299703264095e-06, "loss": 0.0443, "step": 231, "video_reward_cumulative_accuracy": 0.7272727272727273 }, { "epoch": 0.06886316414366281, "grad_norm": 5.477877616882324, "learning_rate": 3.442136498516321e-06, "loss": 0.0875, "step": 232, "video_reward_cumulative_accuracy": 0.7262931034482759 }, { "epoch": 0.06915998812704066, "grad_norm": 4.547797203063965, "learning_rate": 3.456973293768546e-06, "loss": 0.0537, "step": 233, "video_reward_cumulative_accuracy": 0.7274678111587983 }, { "epoch": 0.06945681211041853, "grad_norm": 3.6796634197235107, "learning_rate": 3.471810089020772e-06, "loss": 0.0563, "step": 234, "video_reward_cumulative_accuracy": 0.7264957264957265 }, { "epoch": 0.06975363609379638, "grad_norm": 1.6680197715759277, "learning_rate": 3.486646884272997e-06, "loss": 0.0447, "step": 235, "video_reward_cumulative_accuracy": 0.7276595744680852 }, { "epoch": 0.07005046007717423, "grad_norm": 3.814924478530884, "learning_rate": 3.5014836795252227e-06, "loss": 0.0532, "step": 236, "video_reward_cumulative_accuracy": 0.7266949152542372 }, { "epoch": 0.07034728406055209, "grad_norm": 2.366469383239746, "learning_rate": 3.5163204747774484e-06, "loss": 0.0393, "step": 237, "video_reward_cumulative_accuracy": 0.7278481012658228 }, { "epoch": 0.07064410804392995, "grad_norm": 5.3031206130981445, "learning_rate": 3.5311572700296736e-06, "loss": 0.0637, "step": 238, "video_reward_cumulative_accuracy": 0.7289915966386554 }, { "epoch": 0.07094093202730781, "grad_norm": 2.591233491897583, "learning_rate": 3.5459940652818993e-06, "loss": 0.0779, "step": 239, "video_reward_cumulative_accuracy": 0.7301255230125523 }, { "epoch": 0.07123775601068566, "grad_norm": 5.400726795196533, "learning_rate": 3.560830860534125e-06, "loss": 0.0881, "step": 240, "video_reward_cumulative_accuracy": 0.73125 }, { "epoch": 0.07153457999406353, "grad_norm": 6.161452293395996, "learning_rate": 3.5756676557863507e-06, "loss": 0.1075, "step": 241, "video_reward_cumulative_accuracy": 0.7323651452282157 }, { "epoch": 0.07183140397744138, "grad_norm": 4.78709077835083, "learning_rate": 3.5905044510385763e-06, "loss": 0.0733, "step": 242, "video_reward_cumulative_accuracy": 0.7334710743801653 }, { "epoch": 0.07212822796081923, "grad_norm": 2.570233106613159, "learning_rate": 3.6053412462908016e-06, "loss": 0.0624, "step": 243, "video_reward_cumulative_accuracy": 0.7345679012345679 }, { "epoch": 0.07242505194419709, "grad_norm": 2.175311803817749, "learning_rate": 3.6201780415430273e-06, "loss": 0.0712, "step": 244, "video_reward_cumulative_accuracy": 0.735655737704918 }, { "epoch": 0.07272187592757495, "grad_norm": 2.6615543365478516, "learning_rate": 3.6350148367952525e-06, "loss": 0.0627, "step": 245, "video_reward_cumulative_accuracy": 0.736734693877551 }, { "epoch": 0.0730186999109528, "grad_norm": 3.832801580429077, "learning_rate": 3.649851632047478e-06, "loss": 0.0511, "step": 246, "video_reward_cumulative_accuracy": 0.7378048780487805 }, { "epoch": 0.07331552389433066, "grad_norm": 2.9405055046081543, "learning_rate": 3.6646884272997034e-06, "loss": 0.0773, "step": 247, "video_reward_cumulative_accuracy": 0.7388663967611336 }, { "epoch": 0.07361234787770853, "grad_norm": 1.5752394199371338, "learning_rate": 3.679525222551929e-06, "loss": 0.0362, "step": 248, "video_reward_cumulative_accuracy": 0.7399193548387096 }, { "epoch": 0.07390917186108638, "grad_norm": 2.3977644443511963, "learning_rate": 3.6943620178041544e-06, "loss": 0.0669, "step": 249, "video_reward_cumulative_accuracy": 0.7409638554216867 }, { "epoch": 0.07420599584446423, "grad_norm": 2.2234885692596436, "learning_rate": 3.70919881305638e-06, "loss": 0.0717, "step": 250, "video_reward_cumulative_accuracy": 0.74 }, { "epoch": 0.07450281982784208, "grad_norm": 2.77751088142395, "learning_rate": 3.7240356083086053e-06, "loss": 0.0726, "step": 251, "video_reward_cumulative_accuracy": 0.7410358565737052 }, { "epoch": 0.07479964381121995, "grad_norm": 2.137138843536377, "learning_rate": 3.738872403560831e-06, "loss": 0.046, "step": 252, "video_reward_cumulative_accuracy": 0.7420634920634921 }, { "epoch": 0.0750964677945978, "grad_norm": 3.0533056259155273, "learning_rate": 3.753709198813057e-06, "loss": 0.038, "step": 253, "video_reward_cumulative_accuracy": 0.7430830039525692 }, { "epoch": 0.07539329177797566, "grad_norm": 1.6132036447525024, "learning_rate": 3.7685459940652823e-06, "loss": 0.0406, "step": 254, "video_reward_cumulative_accuracy": 0.7440944881889764 }, { "epoch": 0.07569011576135352, "grad_norm": 1.4988843202590942, "learning_rate": 3.783382789317508e-06, "loss": 0.039, "step": 255, "video_reward_cumulative_accuracy": 0.7450980392156863 }, { "epoch": 0.07598693974473138, "grad_norm": 3.1757826805114746, "learning_rate": 3.7982195845697332e-06, "loss": 0.0607, "step": 256, "video_reward_cumulative_accuracy": 0.74609375 }, { "epoch": 0.07628376372810923, "grad_norm": 5.67103385925293, "learning_rate": 3.813056379821959e-06, "loss": 0.0528, "step": 257, "video_reward_cumulative_accuracy": 0.7470817120622568 }, { "epoch": 0.07658058771148708, "grad_norm": 2.0683236122131348, "learning_rate": 3.8278931750741846e-06, "loss": 0.0275, "step": 258, "video_reward_cumulative_accuracy": 0.748062015503876 }, { "epoch": 0.07687741169486495, "grad_norm": 4.611932277679443, "learning_rate": 3.84272997032641e-06, "loss": 0.0939, "step": 259, "video_reward_cumulative_accuracy": 0.747104247104247 }, { "epoch": 0.0771742356782428, "grad_norm": 4.704992294311523, "learning_rate": 3.857566765578635e-06, "loss": 0.1056, "step": 260, "video_reward_cumulative_accuracy": 0.7480769230769231 }, { "epoch": 0.07747105966162066, "grad_norm": 6.33126974105835, "learning_rate": 3.872403560830861e-06, "loss": 0.1326, "step": 261, "video_reward_cumulative_accuracy": 0.7490421455938697 }, { "epoch": 0.07776788364499852, "grad_norm": 2.4778928756713867, "learning_rate": 3.887240356083086e-06, "loss": 0.0467, "step": 262, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.07806470762837638, "grad_norm": 11.440229415893555, "learning_rate": 3.902077151335312e-06, "loss": 0.1549, "step": 263, "video_reward_cumulative_accuracy": 0.7490494296577946 }, { "epoch": 0.07836153161175423, "grad_norm": 4.551571369171143, "learning_rate": 3.916913946587537e-06, "loss": 0.0389, "step": 264, "video_reward_cumulative_accuracy": 0.7481060606060606 }, { "epoch": 0.07865835559513208, "grad_norm": 4.240347385406494, "learning_rate": 3.931750741839763e-06, "loss": 0.0743, "step": 265, "video_reward_cumulative_accuracy": 0.7490566037735849 }, { "epoch": 0.07895517957850995, "grad_norm": 7.178493976593018, "learning_rate": 3.946587537091989e-06, "loss": 0.083, "step": 266, "video_reward_cumulative_accuracy": 0.7481203007518797 }, { "epoch": 0.0792520035618878, "grad_norm": 8.066522598266602, "learning_rate": 3.961424332344214e-06, "loss": 0.0835, "step": 267, "video_reward_cumulative_accuracy": 0.7471910112359551 }, { "epoch": 0.07954882754526565, "grad_norm": 6.96315860748291, "learning_rate": 3.97626112759644e-06, "loss": 0.0599, "step": 268, "video_reward_cumulative_accuracy": 0.7481343283582089 }, { "epoch": 0.07984565152864351, "grad_norm": 2.7894434928894043, "learning_rate": 3.991097922848665e-06, "loss": 0.0418, "step": 269, "video_reward_cumulative_accuracy": 0.7490706319702602 }, { "epoch": 0.08014247551202137, "grad_norm": 2.1485440731048584, "learning_rate": 4.005934718100891e-06, "loss": 0.058, "step": 270, "video_reward_cumulative_accuracy": 0.7481481481481481 }, { "epoch": 0.08043929949539923, "grad_norm": 3.671163320541382, "learning_rate": 4.020771513353116e-06, "loss": 0.0447, "step": 271, "video_reward_cumulative_accuracy": 0.7490774907749077 }, { "epoch": 0.08073612347877708, "grad_norm": 2.1427624225616455, "learning_rate": 4.0356083086053414e-06, "loss": 0.0365, "step": 272, "video_reward_cumulative_accuracy": 0.7481617647058824 }, { "epoch": 0.08103294746215495, "grad_norm": 2.3186142444610596, "learning_rate": 4.050445103857567e-06, "loss": 0.0578, "step": 273, "video_reward_cumulative_accuracy": 0.7472527472527473 }, { "epoch": 0.0813297714455328, "grad_norm": 7.067409515380859, "learning_rate": 4.065281899109793e-06, "loss": 0.1045, "step": 274, "video_reward_cumulative_accuracy": 0.7463503649635036 }, { "epoch": 0.08162659542891065, "grad_norm": 0.9794759154319763, "learning_rate": 4.080118694362018e-06, "loss": 0.0146, "step": 275, "video_reward_cumulative_accuracy": 0.7472727272727273 }, { "epoch": 0.0819234194122885, "grad_norm": 5.27300500869751, "learning_rate": 4.094955489614243e-06, "loss": 0.0394, "step": 276, "video_reward_cumulative_accuracy": 0.7481884057971014 }, { "epoch": 0.08222024339566637, "grad_norm": 4.100281238555908, "learning_rate": 4.109792284866469e-06, "loss": 0.0616, "step": 277, "video_reward_cumulative_accuracy": 0.7490974729241877 }, { "epoch": 0.08251706737904423, "grad_norm": 4.657914638519287, "learning_rate": 4.124629080118695e-06, "loss": 0.0861, "step": 278, "video_reward_cumulative_accuracy": 0.7482014388489209 }, { "epoch": 0.08281389136242208, "grad_norm": 4.093924045562744, "learning_rate": 4.139465875370921e-06, "loss": 0.048, "step": 279, "video_reward_cumulative_accuracy": 0.7491039426523297 }, { "epoch": 0.08311071534579995, "grad_norm": 7.382143020629883, "learning_rate": 4.154302670623146e-06, "loss": 0.0895, "step": 280, "video_reward_cumulative_accuracy": 0.7464285714285714 }, { "epoch": 0.0834075393291778, "grad_norm": 11.922904968261719, "learning_rate": 4.169139465875371e-06, "loss": 0.1006, "step": 281, "video_reward_cumulative_accuracy": 0.7473309608540926 }, { "epoch": 0.08370436331255565, "grad_norm": 3.6098527908325195, "learning_rate": 4.183976261127597e-06, "loss": 0.0253, "step": 282, "video_reward_cumulative_accuracy": 0.74822695035461 }, { "epoch": 0.0840011872959335, "grad_norm": 4.491762161254883, "learning_rate": 4.1988130563798226e-06, "loss": 0.0394, "step": 283, "video_reward_cumulative_accuracy": 0.7473498233215548 }, { "epoch": 0.08429801127931137, "grad_norm": 9.078802108764648, "learning_rate": 4.213649851632048e-06, "loss": 0.1382, "step": 284, "video_reward_cumulative_accuracy": 0.7464788732394366 }, { "epoch": 0.08459483526268922, "grad_norm": 4.624719619750977, "learning_rate": 4.228486646884273e-06, "loss": 0.0321, "step": 285, "video_reward_cumulative_accuracy": 0.7473684210526316 }, { "epoch": 0.08489165924606708, "grad_norm": 4.962191104888916, "learning_rate": 4.243323442136499e-06, "loss": 0.1042, "step": 286, "video_reward_cumulative_accuracy": 0.7482517482517482 }, { "epoch": 0.08518848322944494, "grad_norm": 3.402569055557251, "learning_rate": 4.258160237388724e-06, "loss": 0.0473, "step": 287, "video_reward_cumulative_accuracy": 0.7491289198606271 }, { "epoch": 0.0854853072128228, "grad_norm": 5.989389419555664, "learning_rate": 4.27299703264095e-06, "loss": 0.1256, "step": 288, "video_reward_cumulative_accuracy": 0.7482638888888888 }, { "epoch": 0.08578213119620065, "grad_norm": 7.128279685974121, "learning_rate": 4.287833827893175e-06, "loss": 0.0807, "step": 289, "video_reward_cumulative_accuracy": 0.7491349480968859 }, { "epoch": 0.0860789551795785, "grad_norm": 3.677250623703003, "learning_rate": 4.302670623145401e-06, "loss": 0.0731, "step": 290, "video_reward_cumulative_accuracy": 0.7482758620689656 }, { "epoch": 0.08637577916295637, "grad_norm": 3.578486680984497, "learning_rate": 4.317507418397626e-06, "loss": 0.0695, "step": 291, "video_reward_cumulative_accuracy": 0.7491408934707904 }, { "epoch": 0.08667260314633422, "grad_norm": 2.7287142276763916, "learning_rate": 4.332344213649852e-06, "loss": 0.0301, "step": 292, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.08696942712971208, "grad_norm": 2.3789162635803223, "learning_rate": 4.347181008902078e-06, "loss": 0.0378, "step": 293, "video_reward_cumulative_accuracy": 0.7491467576791809 }, { "epoch": 0.08726625111308994, "grad_norm": 1.2172014713287354, "learning_rate": 4.362017804154303e-06, "loss": 0.0288, "step": 294, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.0875630750964678, "grad_norm": 4.625653266906738, "learning_rate": 4.376854599406529e-06, "loss": 0.096, "step": 295, "video_reward_cumulative_accuracy": 0.7491525423728813 }, { "epoch": 0.08785989907984565, "grad_norm": 3.8820786476135254, "learning_rate": 4.391691394658754e-06, "loss": 0.0471, "step": 296, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.0881567230632235, "grad_norm": 5.01461935043335, "learning_rate": 4.4065281899109794e-06, "loss": 0.0686, "step": 297, "video_reward_cumulative_accuracy": 0.7508417508417509 }, { "epoch": 0.08845354704660137, "grad_norm": 8.346698760986328, "learning_rate": 4.4213649851632055e-06, "loss": 0.1304, "step": 298, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.08875037102997922, "grad_norm": 2.789457082748413, "learning_rate": 4.436201780415431e-06, "loss": 0.0618, "step": 299, "video_reward_cumulative_accuracy": 0.7508361204013378 }, { "epoch": 0.08904719501335707, "grad_norm": 4.110484600067139, "learning_rate": 4.451038575667656e-06, "loss": 0.091, "step": 300, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.08934401899673494, "grad_norm": 1.834205150604248, "learning_rate": 4.465875370919881e-06, "loss": 0.0305, "step": 301, "video_reward_cumulative_accuracy": 0.7508305647840532 }, { "epoch": 0.0896408429801128, "grad_norm": 3.296640157699585, "learning_rate": 4.480712166172107e-06, "loss": 0.0954, "step": 302, "video_reward_cumulative_accuracy": 0.75 }, { "epoch": 0.08993766696349065, "grad_norm": 1.8095667362213135, "learning_rate": 4.495548961424333e-06, "loss": 0.0251, "step": 303, "video_reward_cumulative_accuracy": 0.7508250825082509 }, { "epoch": 0.0902344909468685, "grad_norm": 4.808000564575195, "learning_rate": 4.510385756676558e-06, "loss": 0.0786, "step": 304, "video_reward_cumulative_accuracy": 0.7516447368421053 }, { "epoch": 0.09053131493024637, "grad_norm": 4.050952911376953, "learning_rate": 4.525222551928784e-06, "loss": 0.0847, "step": 305, "video_reward_cumulative_accuracy": 0.7524590163934426 }, { "epoch": 0.09082813891362422, "grad_norm": 6.732780933380127, "learning_rate": 4.540059347181009e-06, "loss": 0.1121, "step": 306, "video_reward_cumulative_accuracy": 0.7532679738562091 }, { "epoch": 0.09112496289700207, "grad_norm": 5.494693279266357, "learning_rate": 4.554896142433235e-06, "loss": 0.0543, "step": 307, "video_reward_cumulative_accuracy": 0.754071661237785 }, { "epoch": 0.09142178688037994, "grad_norm": 2.1636104583740234, "learning_rate": 4.5697329376854606e-06, "loss": 0.0511, "step": 308, "video_reward_cumulative_accuracy": 0.7532467532467533 }, { "epoch": 0.0917186108637578, "grad_norm": 6.281263828277588, "learning_rate": 4.584569732937686e-06, "loss": 0.0951, "step": 309, "video_reward_cumulative_accuracy": 0.7540453074433657 }, { "epoch": 0.09201543484713565, "grad_norm": 3.543318033218384, "learning_rate": 4.599406528189911e-06, "loss": 0.0731, "step": 310, "video_reward_cumulative_accuracy": 0.7532258064516129 }, { "epoch": 0.0923122588305135, "grad_norm": 2.5978658199310303, "learning_rate": 4.614243323442137e-06, "loss": 0.0714, "step": 311, "video_reward_cumulative_accuracy": 0.7540192926045016 }, { "epoch": 0.09260908281389137, "grad_norm": 2.553865909576416, "learning_rate": 4.629080118694362e-06, "loss": 0.0742, "step": 312, "video_reward_cumulative_accuracy": 0.7548076923076923 }, { "epoch": 0.09290590679726922, "grad_norm": 1.7062216997146606, "learning_rate": 4.643916913946588e-06, "loss": 0.0572, "step": 313, "video_reward_cumulative_accuracy": 0.7539936102236422 }, { "epoch": 0.09320273078064707, "grad_norm": 1.2176084518432617, "learning_rate": 4.658753709198813e-06, "loss": 0.049, "step": 314, "video_reward_cumulative_accuracy": 0.7547770700636943 }, { "epoch": 0.09349955476402494, "grad_norm": 1.2772020101547241, "learning_rate": 4.673590504451039e-06, "loss": 0.0449, "step": 315, "video_reward_cumulative_accuracy": 0.753968253968254 }, { "epoch": 0.09379637874740279, "grad_norm": 1.6296868324279785, "learning_rate": 4.688427299703264e-06, "loss": 0.043, "step": 316, "video_reward_cumulative_accuracy": 0.754746835443038 }, { "epoch": 0.09409320273078065, "grad_norm": 2.408234119415283, "learning_rate": 4.7032640949554895e-06, "loss": 0.0572, "step": 317, "video_reward_cumulative_accuracy": 0.7555205047318612 }, { "epoch": 0.0943900267141585, "grad_norm": 1.397177815437317, "learning_rate": 4.718100890207716e-06, "loss": 0.0276, "step": 318, "video_reward_cumulative_accuracy": 0.7562893081761006 }, { "epoch": 0.09468685069753636, "grad_norm": 2.097848415374756, "learning_rate": 4.732937685459941e-06, "loss": 0.0501, "step": 319, "video_reward_cumulative_accuracy": 0.7570532915360502 }, { "epoch": 0.09498367468091422, "grad_norm": 2.369933605194092, "learning_rate": 4.747774480712167e-06, "loss": 0.0377, "step": 320, "video_reward_cumulative_accuracy": 0.7578125 }, { "epoch": 0.09528049866429207, "grad_norm": 2.974731922149658, "learning_rate": 4.762611275964392e-06, "loss": 0.0609, "step": 321, "video_reward_cumulative_accuracy": 0.7570093457943925 }, { "epoch": 0.09557732264766994, "grad_norm": 2.996340036392212, "learning_rate": 4.7774480712166174e-06, "loss": 0.0795, "step": 322, "video_reward_cumulative_accuracy": 0.7577639751552795 }, { "epoch": 0.09587414663104779, "grad_norm": 2.6763088703155518, "learning_rate": 4.7922848664688435e-06, "loss": 0.067, "step": 323, "video_reward_cumulative_accuracy": 0.7585139318885449 }, { "epoch": 0.09617097061442564, "grad_norm": 3.8970932960510254, "learning_rate": 4.807121661721069e-06, "loss": 0.0637, "step": 324, "video_reward_cumulative_accuracy": 0.7592592592592593 }, { "epoch": 0.0964677945978035, "grad_norm": 3.0511136054992676, "learning_rate": 4.821958456973294e-06, "loss": 0.0545, "step": 325, "video_reward_cumulative_accuracy": 0.76 }, { "epoch": 0.09676461858118136, "grad_norm": 2.455991744995117, "learning_rate": 4.836795252225519e-06, "loss": 0.052, "step": 326, "video_reward_cumulative_accuracy": 0.75920245398773 }, { "epoch": 0.09706144256455922, "grad_norm": 5.562345504760742, "learning_rate": 4.851632047477745e-06, "loss": 0.0677, "step": 327, "video_reward_cumulative_accuracy": 0.7584097859327217 }, { "epoch": 0.09735826654793707, "grad_norm": 3.8642868995666504, "learning_rate": 4.866468842729971e-06, "loss": 0.0758, "step": 328, "video_reward_cumulative_accuracy": 0.7576219512195121 }, { "epoch": 0.09765509053131494, "grad_norm": 5.224849224090576, "learning_rate": 4.881305637982196e-06, "loss": 0.0496, "step": 329, "video_reward_cumulative_accuracy": 0.7583586626139818 }, { "epoch": 0.09795191451469279, "grad_norm": 2.613288402557373, "learning_rate": 4.896142433234421e-06, "loss": 0.0323, "step": 330, "video_reward_cumulative_accuracy": 0.759090909090909 }, { "epoch": 0.09824873849807064, "grad_norm": 1.3308014869689941, "learning_rate": 4.910979228486647e-06, "loss": 0.0467, "step": 331, "video_reward_cumulative_accuracy": 0.7598187311178247 }, { "epoch": 0.0985455624814485, "grad_norm": 2.2056570053100586, "learning_rate": 4.925816023738873e-06, "loss": 0.0739, "step": 332, "video_reward_cumulative_accuracy": 0.7605421686746988 }, { "epoch": 0.09884238646482636, "grad_norm": 1.1088576316833496, "learning_rate": 4.9406528189910986e-06, "loss": 0.0498, "step": 333, "video_reward_cumulative_accuracy": 0.7597597597597597 }, { "epoch": 0.09913921044820422, "grad_norm": 1.563938856124878, "learning_rate": 4.955489614243324e-06, "loss": 0.0737, "step": 334, "video_reward_cumulative_accuracy": 0.7604790419161677 }, { "epoch": 0.09943603443158207, "grad_norm": 1.9494915008544922, "learning_rate": 4.970326409495549e-06, "loss": 0.0488, "step": 335, "video_reward_cumulative_accuracy": 0.7611940298507462 }, { "epoch": 0.09973285841495994, "grad_norm": 4.285027980804443, "learning_rate": 4.985163204747775e-06, "loss": 0.0958, "step": 336, "video_reward_cumulative_accuracy": 0.7619047619047619 }, { "epoch": 0.10002968239833779, "grad_norm": 1.7529289722442627, "learning_rate": 5e-06, "loss": 0.0616, "step": 337, "video_reward_cumulative_accuracy": 0.7611275964391692 }, { "epoch": 0.10032650638171564, "grad_norm": 2.010082960128784, "learning_rate": 4.999998658003678e-06, "loss": 0.0854, "step": 338, "video_reward_cumulative_accuracy": 0.7618343195266272 }, { "epoch": 0.1006233303650935, "grad_norm": 2.320046901702881, "learning_rate": 4.9999946320161525e-06, "loss": 0.0489, "step": 339, "video_reward_cumulative_accuracy": 0.7625368731563422 }, { "epoch": 0.10092015434847136, "grad_norm": 3.510748863220215, "learning_rate": 4.999987922041746e-06, "loss": 0.1, "step": 340, "video_reward_cumulative_accuracy": 0.7602941176470588 }, { "epoch": 0.10121697833184921, "grad_norm": 4.800668239593506, "learning_rate": 4.999978528087661e-06, "loss": 0.0532, "step": 341, "video_reward_cumulative_accuracy": 0.7609970674486803 }, { "epoch": 0.10151380231522707, "grad_norm": 3.035673141479492, "learning_rate": 4.999966450163984e-06, "loss": 0.0732, "step": 342, "video_reward_cumulative_accuracy": 0.7616959064327485 }, { "epoch": 0.10181062629860493, "grad_norm": 1.4098176956176758, "learning_rate": 4.999951688283682e-06, "loss": 0.0352, "step": 343, "video_reward_cumulative_accuracy": 0.7623906705539358 }, { "epoch": 0.10210745028198279, "grad_norm": 2.0432138442993164, "learning_rate": 4.999934242462603e-06, "loss": 0.0546, "step": 344, "video_reward_cumulative_accuracy": 0.7630813953488372 }, { "epoch": 0.10240427426536064, "grad_norm": 3.4673523902893066, "learning_rate": 4.999914112719477e-06, "loss": 0.0746, "step": 345, "video_reward_cumulative_accuracy": 0.763768115942029 }, { "epoch": 0.10270109824873849, "grad_norm": 1.0772874355316162, "learning_rate": 4.9998912990759146e-06, "loss": 0.049, "step": 346, "video_reward_cumulative_accuracy": 0.7644508670520231 }, { "epoch": 0.10299792223211636, "grad_norm": 2.790788412094116, "learning_rate": 4.999865801556409e-06, "loss": 0.0442, "step": 347, "video_reward_cumulative_accuracy": 0.7651296829971181 }, { "epoch": 0.10329474621549421, "grad_norm": 3.4424827098846436, "learning_rate": 4.999837620188334e-06, "loss": 0.0384, "step": 348, "video_reward_cumulative_accuracy": 0.7658045977011494 }, { "epoch": 0.10359157019887207, "grad_norm": 2.485424280166626, "learning_rate": 4.999806755001946e-06, "loss": 0.0333, "step": 349, "video_reward_cumulative_accuracy": 0.7664756446991404 }, { "epoch": 0.10388839418224993, "grad_norm": 1.6122627258300781, "learning_rate": 4.999773206030379e-06, "loss": 0.0689, "step": 350, "video_reward_cumulative_accuracy": 0.7671428571428571 }, { "epoch": 0.10418521816562779, "grad_norm": 3.075976848602295, "learning_rate": 4.999736973309655e-06, "loss": 0.0376, "step": 351, "video_reward_cumulative_accuracy": 0.7663817663817664 }, { "epoch": 0.10448204214900564, "grad_norm": 3.550297260284424, "learning_rate": 4.99969805687867e-06, "loss": 0.048, "step": 352, "video_reward_cumulative_accuracy": 0.7670454545454546 }, { "epoch": 0.10477886613238349, "grad_norm": 6.603730201721191, "learning_rate": 4.999656456779207e-06, "loss": 0.0667, "step": 353, "video_reward_cumulative_accuracy": 0.7662889518413598 }, { "epoch": 0.10507569011576136, "grad_norm": 1.7593069076538086, "learning_rate": 4.999612173055926e-06, "loss": 0.0376, "step": 354, "video_reward_cumulative_accuracy": 0.7669491525423728 }, { "epoch": 0.10537251409913921, "grad_norm": 1.126291275024414, "learning_rate": 4.99956520575637e-06, "loss": 0.0315, "step": 355, "video_reward_cumulative_accuracy": 0.7676056338028169 }, { "epoch": 0.10566933808251706, "grad_norm": 5.531168460845947, "learning_rate": 4.999515554930965e-06, "loss": 0.0701, "step": 356, "video_reward_cumulative_accuracy": 0.7682584269662921 }, { "epoch": 0.10596616206589493, "grad_norm": 1.5562381744384766, "learning_rate": 4.999463220633013e-06, "loss": 0.0385, "step": 357, "video_reward_cumulative_accuracy": 0.7675070028011205 }, { "epoch": 0.10626298604927278, "grad_norm": 11.684016227722168, "learning_rate": 4.999408202918702e-06, "loss": 0.1559, "step": 358, "video_reward_cumulative_accuracy": 0.7667597765363129 }, { "epoch": 0.10655981003265064, "grad_norm": 3.8814454078674316, "learning_rate": 4.999350501847098e-06, "loss": 0.0488, "step": 359, "video_reward_cumulative_accuracy": 0.7674094707520891 }, { "epoch": 0.10685663401602849, "grad_norm": 8.800138473510742, "learning_rate": 4.999290117480149e-06, "loss": 0.1598, "step": 360, "video_reward_cumulative_accuracy": 0.7680555555555556 }, { "epoch": 0.10715345799940636, "grad_norm": 4.154999256134033, "learning_rate": 4.999227049882684e-06, "loss": 0.0748, "step": 361, "video_reward_cumulative_accuracy": 0.7686980609418282 }, { "epoch": 0.10745028198278421, "grad_norm": 4.171562671661377, "learning_rate": 4.999161299122411e-06, "loss": 0.0392, "step": 362, "video_reward_cumulative_accuracy": 0.7693370165745856 }, { "epoch": 0.10774710596616206, "grad_norm": 3.6571145057678223, "learning_rate": 4.9990928652699214e-06, "loss": 0.0314, "step": 363, "video_reward_cumulative_accuracy": 0.7699724517906336 }, { "epoch": 0.10804392994953993, "grad_norm": 12.863832473754883, "learning_rate": 4.999021748398684e-06, "loss": 0.1173, "step": 364, "video_reward_cumulative_accuracy": 0.7678571428571429 }, { "epoch": 0.10834075393291778, "grad_norm": 4.573801517486572, "learning_rate": 4.99894794858505e-06, "loss": 0.0748, "step": 365, "video_reward_cumulative_accuracy": 0.7684931506849315 }, { "epoch": 0.10863757791629564, "grad_norm": 7.55157470703125, "learning_rate": 4.998871465908251e-06, "loss": 0.1099, "step": 366, "video_reward_cumulative_accuracy": 0.76775956284153 }, { "epoch": 0.10893440189967349, "grad_norm": 2.933627128601074, "learning_rate": 4.998792300450399e-06, "loss": 0.0819, "step": 367, "video_reward_cumulative_accuracy": 0.7683923705722071 }, { "epoch": 0.10923122588305136, "grad_norm": 6.3735575675964355, "learning_rate": 4.998710452296485e-06, "loss": 0.0566, "step": 368, "video_reward_cumulative_accuracy": 0.7690217391304348 }, { "epoch": 0.10952804986642921, "grad_norm": 1.9880731105804443, "learning_rate": 4.9986259215343814e-06, "loss": 0.0467, "step": 369, "video_reward_cumulative_accuracy": 0.7696476964769647 }, { "epoch": 0.10982487384980706, "grad_norm": 5.522644996643066, "learning_rate": 4.99853870825484e-06, "loss": 0.0861, "step": 370, "video_reward_cumulative_accuracy": 0.768918918918919 }, { "epoch": 0.11012169783318491, "grad_norm": 4.956112861633301, "learning_rate": 4.998448812551493e-06, "loss": 0.0501, "step": 371, "video_reward_cumulative_accuracy": 0.7695417789757413 }, { "epoch": 0.11041852181656278, "grad_norm": 4.254426956176758, "learning_rate": 4.998356234520851e-06, "loss": 0.0427, "step": 372, "video_reward_cumulative_accuracy": 0.7701612903225806 }, { "epoch": 0.11071534579994063, "grad_norm": 3.666555166244507, "learning_rate": 4.998260974262308e-06, "loss": 0.079, "step": 373, "video_reward_cumulative_accuracy": 0.7694369973190348 }, { "epoch": 0.11101216978331849, "grad_norm": 3.9171857833862305, "learning_rate": 4.998163031878133e-06, "loss": 0.0739, "step": 374, "video_reward_cumulative_accuracy": 0.7687165775401069 }, { "epoch": 0.11130899376669635, "grad_norm": 4.026345252990723, "learning_rate": 4.998062407473477e-06, "loss": 0.0506, "step": 375, "video_reward_cumulative_accuracy": 0.7693333333333333 }, { "epoch": 0.11160581775007421, "grad_norm": 1.3979490995407104, "learning_rate": 4.99795910115637e-06, "loss": 0.0479, "step": 376, "video_reward_cumulative_accuracy": 0.7686170212765957 }, { "epoch": 0.11190264173345206, "grad_norm": 1.5546613931655884, "learning_rate": 4.997853113037722e-06, "loss": 0.0323, "step": 377, "video_reward_cumulative_accuracy": 0.7692307692307693 }, { "epoch": 0.11219946571682991, "grad_norm": 2.0145232677459717, "learning_rate": 4.997744443231321e-06, "loss": 0.0471, "step": 378, "video_reward_cumulative_accuracy": 0.7698412698412699 }, { "epoch": 0.11249628970020778, "grad_norm": 3.335103750228882, "learning_rate": 4.9976330918538356e-06, "loss": 0.0416, "step": 379, "video_reward_cumulative_accuracy": 0.7704485488126649 }, { "epoch": 0.11279311368358563, "grad_norm": 3.545008897781372, "learning_rate": 4.99751905902481e-06, "loss": 0.039, "step": 380, "video_reward_cumulative_accuracy": 0.7710526315789473 }, { "epoch": 0.11308993766696349, "grad_norm": 6.1864190101623535, "learning_rate": 4.997402344866672e-06, "loss": 0.0831, "step": 381, "video_reward_cumulative_accuracy": 0.7703412073490814 }, { "epoch": 0.11338676165034135, "grad_norm": 1.5058016777038574, "learning_rate": 4.997282949504725e-06, "loss": 0.0472, "step": 382, "video_reward_cumulative_accuracy": 0.7709424083769634 }, { "epoch": 0.1136835856337192, "grad_norm": 5.477189064025879, "learning_rate": 4.99716087306715e-06, "loss": 0.0518, "step": 383, "video_reward_cumulative_accuracy": 0.7702349869451697 }, { "epoch": 0.11398040961709706, "grad_norm": 2.9288504123687744, "learning_rate": 4.99703611568501e-06, "loss": 0.0564, "step": 384, "video_reward_cumulative_accuracy": 0.76953125 }, { "epoch": 0.11427723360047491, "grad_norm": 3.441972017288208, "learning_rate": 4.996908677492243e-06, "loss": 0.019, "step": 385, "video_reward_cumulative_accuracy": 0.7701298701298701 }, { "epoch": 0.11457405758385278, "grad_norm": 2.967977523803711, "learning_rate": 4.996778558625666e-06, "loss": 0.058, "step": 386, "video_reward_cumulative_accuracy": 0.7707253886010362 }, { "epoch": 0.11487088156723063, "grad_norm": 3.720794200897217, "learning_rate": 4.996645759224974e-06, "loss": 0.0383, "step": 387, "video_reward_cumulative_accuracy": 0.7713178294573644 }, { "epoch": 0.11516770555060848, "grad_norm": 4.410665035247803, "learning_rate": 4.9965102794327416e-06, "loss": 0.0816, "step": 388, "video_reward_cumulative_accuracy": 0.7719072164948454 }, { "epoch": 0.11546452953398635, "grad_norm": 4.823568344116211, "learning_rate": 4.996372119394418e-06, "loss": 0.0597, "step": 389, "video_reward_cumulative_accuracy": 0.7699228791773779 }, { "epoch": 0.1157613535173642, "grad_norm": 6.129620552062988, "learning_rate": 4.9962312792583325e-06, "loss": 0.1142, "step": 390, "video_reward_cumulative_accuracy": 0.7705128205128206 }, { "epoch": 0.11605817750074206, "grad_norm": 4.6021037101745605, "learning_rate": 4.9960877591756895e-06, "loss": 0.0491, "step": 391, "video_reward_cumulative_accuracy": 0.7710997442455243 }, { "epoch": 0.11635500148411991, "grad_norm": 6.3145341873168945, "learning_rate": 4.9959415593005734e-06, "loss": 0.1095, "step": 392, "video_reward_cumulative_accuracy": 0.7704081632653061 }, { "epoch": 0.11665182546749778, "grad_norm": 2.623091697692871, "learning_rate": 4.995792679789943e-06, "loss": 0.0661, "step": 393, "video_reward_cumulative_accuracy": 0.7709923664122137 }, { "epoch": 0.11694864945087563, "grad_norm": 4.27155065536499, "learning_rate": 4.995641120803634e-06, "loss": 0.0581, "step": 394, "video_reward_cumulative_accuracy": 0.7715736040609137 }, { "epoch": 0.11724547343425348, "grad_norm": 4.873302936553955, "learning_rate": 4.99548688250436e-06, "loss": 0.0637, "step": 395, "video_reward_cumulative_accuracy": 0.7708860759493671 }, { "epoch": 0.11754229741763135, "grad_norm": 1.3667075634002686, "learning_rate": 4.995329965057712e-06, "loss": 0.0398, "step": 396, "video_reward_cumulative_accuracy": 0.7714646464646465 }, { "epoch": 0.1178391214010092, "grad_norm": 2.0029561519622803, "learning_rate": 4.995170368632156e-06, "loss": 0.0376, "step": 397, "video_reward_cumulative_accuracy": 0.7720403022670025 }, { "epoch": 0.11813594538438706, "grad_norm": 2.7206361293792725, "learning_rate": 4.995008093399034e-06, "loss": 0.0364, "step": 398, "video_reward_cumulative_accuracy": 0.7726130653266332 }, { "epoch": 0.11843276936776491, "grad_norm": 3.3953866958618164, "learning_rate": 4.9948431395325626e-06, "loss": 0.0533, "step": 399, "video_reward_cumulative_accuracy": 0.7731829573934837 }, { "epoch": 0.11872959335114278, "grad_norm": 4.966310024261475, "learning_rate": 4.994675507209837e-06, "loss": 0.0953, "step": 400, "video_reward_cumulative_accuracy": 0.77375 }, { "epoch": 0.11872959335114278, "eval_runtime": 133.1422, "eval_samples_per_second": 5.926, "eval_steps_per_second": 0.744, "eval_test_set_accuracy": 0.7323232323232324, "step": 400 }, { "epoch": 0.11902641733452063, "grad_norm": 1.743256688117981, "learning_rate": 4.9945051966108285e-06, "loss": 0.0466, "step": 401, "video_reward_cumulative_accuracy": 0.7743142144638404 }, { "epoch": 0.11932324131789848, "grad_norm": 1.3353296518325806, "learning_rate": 4.99433220791838e-06, "loss": 0.0713, "step": 402, "video_reward_cumulative_accuracy": 0.7723880597014925 }, { "epoch": 0.11962006530127635, "grad_norm": 3.405177593231201, "learning_rate": 4.994156541318211e-06, "loss": 0.049, "step": 403, "video_reward_cumulative_accuracy": 0.771712158808933 }, { "epoch": 0.1199168892846542, "grad_norm": 1.6333727836608887, "learning_rate": 4.993978196998918e-06, "loss": 0.0639, "step": 404, "video_reward_cumulative_accuracy": 0.7722772277227723 }, { "epoch": 0.12021371326803205, "grad_norm": 2.6365771293640137, "learning_rate": 4.993797175151971e-06, "loss": 0.0343, "step": 405, "video_reward_cumulative_accuracy": 0.7728395061728395 }, { "epoch": 0.12051053725140991, "grad_norm": 2.222435712814331, "learning_rate": 4.9936134759717134e-06, "loss": 0.0686, "step": 406, "video_reward_cumulative_accuracy": 0.7733990147783252 }, { "epoch": 0.12080736123478777, "grad_norm": 7.1145405769348145, "learning_rate": 4.993427099655366e-06, "loss": 0.0705, "step": 407, "video_reward_cumulative_accuracy": 0.7727272727272727 }, { "epoch": 0.12110418521816563, "grad_norm": 4.418581008911133, "learning_rate": 4.993238046403021e-06, "loss": 0.0407, "step": 408, "video_reward_cumulative_accuracy": 0.7720588235294118 }, { "epoch": 0.12140100920154348, "grad_norm": 1.4404574632644653, "learning_rate": 4.993046316417643e-06, "loss": 0.0407, "step": 409, "video_reward_cumulative_accuracy": 0.7713936430317848 }, { "epoch": 0.12169783318492135, "grad_norm": 1.7548354864120483, "learning_rate": 4.992851909905077e-06, "loss": 0.0422, "step": 410, "video_reward_cumulative_accuracy": 0.7719512195121951 }, { "epoch": 0.1219946571682992, "grad_norm": 2.8836653232574463, "learning_rate": 4.992654827074034e-06, "loss": 0.0519, "step": 411, "video_reward_cumulative_accuracy": 0.7725060827250608 }, { "epoch": 0.12229148115167705, "grad_norm": 6.0396504402160645, "learning_rate": 4.992455068136104e-06, "loss": 0.0834, "step": 412, "video_reward_cumulative_accuracy": 0.7730582524271845 }, { "epoch": 0.1225883051350549, "grad_norm": 1.5045963525772095, "learning_rate": 4.992252633305745e-06, "loss": 0.0404, "step": 413, "video_reward_cumulative_accuracy": 0.7736077481840193 }, { "epoch": 0.12288512911843277, "grad_norm": 1.5633553266525269, "learning_rate": 4.992047522800292e-06, "loss": 0.0249, "step": 414, "video_reward_cumulative_accuracy": 0.7741545893719807 }, { "epoch": 0.12318195310181063, "grad_norm": 2.4161694049835205, "learning_rate": 4.991839736839951e-06, "loss": 0.047, "step": 415, "video_reward_cumulative_accuracy": 0.7746987951807229 }, { "epoch": 0.12347877708518848, "grad_norm": 4.570451259613037, "learning_rate": 4.9916292756478e-06, "loss": 0.0645, "step": 416, "video_reward_cumulative_accuracy": 0.7752403846153846 }, { "epoch": 0.12377560106856635, "grad_norm": 2.0078325271606445, "learning_rate": 4.991416139449791e-06, "loss": 0.053, "step": 417, "video_reward_cumulative_accuracy": 0.7757793764988009 }, { "epoch": 0.1240724250519442, "grad_norm": 2.8492980003356934, "learning_rate": 4.991200328474743e-06, "loss": 0.041, "step": 418, "video_reward_cumulative_accuracy": 0.7763157894736842 }, { "epoch": 0.12436924903532205, "grad_norm": 1.4428461790084839, "learning_rate": 4.990981842954353e-06, "loss": 0.0213, "step": 419, "video_reward_cumulative_accuracy": 0.7768496420047732 }, { "epoch": 0.1246660730186999, "grad_norm": 1.667288899421692, "learning_rate": 4.990760683123186e-06, "loss": 0.0402, "step": 420, "video_reward_cumulative_accuracy": 0.7773809523809524 }, { "epoch": 0.12496289700207777, "grad_norm": 5.119730472564697, "learning_rate": 4.990536849218678e-06, "loss": 0.0759, "step": 421, "video_reward_cumulative_accuracy": 0.7779097387173397 }, { "epoch": 0.12525972098545562, "grad_norm": 7.024728298187256, "learning_rate": 4.990310341481136e-06, "loss": 0.1402, "step": 422, "video_reward_cumulative_accuracy": 0.7784360189573459 }, { "epoch": 0.12555654496883348, "grad_norm": 3.2602407932281494, "learning_rate": 4.990081160153738e-06, "loss": 0.0567, "step": 423, "video_reward_cumulative_accuracy": 0.7789598108747045 }, { "epoch": 0.12585336895221133, "grad_norm": 3.0349199771881104, "learning_rate": 4.989849305482534e-06, "loss": 0.1118, "step": 424, "video_reward_cumulative_accuracy": 0.7783018867924528 }, { "epoch": 0.12615019293558918, "grad_norm": 4.9538774490356445, "learning_rate": 4.989614777716442e-06, "loss": 0.0844, "step": 425, "video_reward_cumulative_accuracy": 0.7776470588235294 }, { "epoch": 0.12644701691896706, "grad_norm": 4.11725378036499, "learning_rate": 4.989377577107248e-06, "loss": 0.0852, "step": 426, "video_reward_cumulative_accuracy": 0.7769953051643192 }, { "epoch": 0.12674384090234492, "grad_norm": 2.492112874984741, "learning_rate": 4.989137703909612e-06, "loss": 0.0574, "step": 427, "video_reward_cumulative_accuracy": 0.7775175644028103 }, { "epoch": 0.12704066488572277, "grad_norm": 3.3286123275756836, "learning_rate": 4.988895158381062e-06, "loss": 0.0402, "step": 428, "video_reward_cumulative_accuracy": 0.7780373831775701 }, { "epoch": 0.12733748886910062, "grad_norm": 5.785702228546143, "learning_rate": 4.988649940781992e-06, "loss": 0.0592, "step": 429, "video_reward_cumulative_accuracy": 0.7773892773892774 }, { "epoch": 0.12763431285247848, "grad_norm": 1.343465805053711, "learning_rate": 4.988402051375668e-06, "loss": 0.0254, "step": 430, "video_reward_cumulative_accuracy": 0.7779069767441861 }, { "epoch": 0.12793113683585633, "grad_norm": 2.176379680633545, "learning_rate": 4.988151490428223e-06, "loss": 0.1113, "step": 431, "video_reward_cumulative_accuracy": 0.7761020881670534 }, { "epoch": 0.12822796081923418, "grad_norm": 4.0742340087890625, "learning_rate": 4.987898258208659e-06, "loss": 0.0802, "step": 432, "video_reward_cumulative_accuracy": 0.7766203703703703 }, { "epoch": 0.12852478480261206, "grad_norm": 1.4879510402679443, "learning_rate": 4.987642354988845e-06, "loss": 0.0415, "step": 433, "video_reward_cumulative_accuracy": 0.7771362586605081 }, { "epoch": 0.12882160878598992, "grad_norm": 2.5588343143463135, "learning_rate": 4.987383781043517e-06, "loss": 0.0591, "step": 434, "video_reward_cumulative_accuracy": 0.7764976958525346 }, { "epoch": 0.12911843276936777, "grad_norm": 1.5112923383712769, "learning_rate": 4.987122536650282e-06, "loss": 0.0583, "step": 435, "video_reward_cumulative_accuracy": 0.7770114942528735 }, { "epoch": 0.12941525675274562, "grad_norm": 3.004580020904541, "learning_rate": 4.986858622089609e-06, "loss": 0.1139, "step": 436, "video_reward_cumulative_accuracy": 0.7775229357798165 }, { "epoch": 0.12971208073612348, "grad_norm": 4.673270225524902, "learning_rate": 4.986592037644836e-06, "loss": 0.1071, "step": 437, "video_reward_cumulative_accuracy": 0.7768878718535469 }, { "epoch": 0.13000890471950133, "grad_norm": 1.8337359428405762, "learning_rate": 4.986322783602167e-06, "loss": 0.0351, "step": 438, "video_reward_cumulative_accuracy": 0.7773972602739726 }, { "epoch": 0.13030572870287918, "grad_norm": 4.02969217300415, "learning_rate": 4.986050860250674e-06, "loss": 0.0719, "step": 439, "video_reward_cumulative_accuracy": 0.7767653758542141 }, { "epoch": 0.13060255268625706, "grad_norm": 9.999349594116211, "learning_rate": 4.985776267882291e-06, "loss": 0.1391, "step": 440, "video_reward_cumulative_accuracy": 0.7761363636363636 }, { "epoch": 0.13089937666963491, "grad_norm": 1.213397741317749, "learning_rate": 4.985499006791822e-06, "loss": 0.0421, "step": 441, "video_reward_cumulative_accuracy": 0.7766439909297053 }, { "epoch": 0.13119620065301277, "grad_norm": 1.5377864837646484, "learning_rate": 4.9852190772769304e-06, "loss": 0.0629, "step": 442, "video_reward_cumulative_accuracy": 0.7748868778280543 }, { "epoch": 0.13149302463639062, "grad_norm": 2.056608200073242, "learning_rate": 4.984936479638151e-06, "loss": 0.0557, "step": 443, "video_reward_cumulative_accuracy": 0.7731376975169301 }, { "epoch": 0.13178984861976847, "grad_norm": 2.519721746444702, "learning_rate": 4.9846512141788774e-06, "loss": 0.0692, "step": 444, "video_reward_cumulative_accuracy": 0.7725225225225225 }, { "epoch": 0.13208667260314633, "grad_norm": 1.5284984111785889, "learning_rate": 4.984363281205372e-06, "loss": 0.0448, "step": 445, "video_reward_cumulative_accuracy": 0.7730337078651686 }, { "epoch": 0.13238349658652418, "grad_norm": 1.4093446731567383, "learning_rate": 4.984072681026757e-06, "loss": 0.0707, "step": 446, "video_reward_cumulative_accuracy": 0.773542600896861 }, { "epoch": 0.13268032056990206, "grad_norm": 4.844381332397461, "learning_rate": 4.98377941395502e-06, "loss": 0.0652, "step": 447, "video_reward_cumulative_accuracy": 0.772930648769575 }, { "epoch": 0.1329771445532799, "grad_norm": 3.3423585891723633, "learning_rate": 4.983483480305012e-06, "loss": 0.0597, "step": 448, "video_reward_cumulative_accuracy": 0.7723214285714286 }, { "epoch": 0.13327396853665777, "grad_norm": 1.8888392448425293, "learning_rate": 4.983184880394447e-06, "loss": 0.0142, "step": 449, "video_reward_cumulative_accuracy": 0.7728285077951003 }, { "epoch": 0.13357079252003562, "grad_norm": 4.848865032196045, "learning_rate": 4.982883614543901e-06, "loss": 0.0525, "step": 450, "video_reward_cumulative_accuracy": 0.7733333333333333 }, { "epoch": 0.13386761650341347, "grad_norm": 3.70519757270813, "learning_rate": 4.982579683076811e-06, "loss": 0.0716, "step": 451, "video_reward_cumulative_accuracy": 0.7727272727272727 }, { "epoch": 0.13416444048679133, "grad_norm": 1.4521280527114868, "learning_rate": 4.982273086319479e-06, "loss": 0.0166, "step": 452, "video_reward_cumulative_accuracy": 0.7732300884955752 }, { "epoch": 0.13446126447016918, "grad_norm": 1.7118197679519653, "learning_rate": 4.981963824601064e-06, "loss": 0.0634, "step": 453, "video_reward_cumulative_accuracy": 0.7737306843267108 }, { "epoch": 0.13475808845354706, "grad_norm": 3.1727960109710693, "learning_rate": 4.98165189825359e-06, "loss": 0.0269, "step": 454, "video_reward_cumulative_accuracy": 0.7742290748898678 }, { "epoch": 0.1350549124369249, "grad_norm": 2.006000280380249, "learning_rate": 4.981337307611939e-06, "loss": 0.0942, "step": 455, "video_reward_cumulative_accuracy": 0.7747252747252747 }, { "epoch": 0.13535173642030277, "grad_norm": 2.203691244125366, "learning_rate": 4.981020053013855e-06, "loss": 0.0439, "step": 456, "video_reward_cumulative_accuracy": 0.7741228070175439 }, { "epoch": 0.13564856040368062, "grad_norm": 3.0224599838256836, "learning_rate": 4.9807001347999424e-06, "loss": 0.066, "step": 457, "video_reward_cumulative_accuracy": 0.774617067833698 }, { "epoch": 0.13594538438705847, "grad_norm": 1.738229513168335, "learning_rate": 4.980377553313665e-06, "loss": 0.0393, "step": 458, "video_reward_cumulative_accuracy": 0.7751091703056768 }, { "epoch": 0.13624220837043632, "grad_norm": 3.5705530643463135, "learning_rate": 4.980052308901343e-06, "loss": 0.0345, "step": 459, "video_reward_cumulative_accuracy": 0.7745098039215687 }, { "epoch": 0.13653903235381418, "grad_norm": 2.2670326232910156, "learning_rate": 4.9797244019121595e-06, "loss": 0.0286, "step": 460, "video_reward_cumulative_accuracy": 0.775 }, { "epoch": 0.13683585633719206, "grad_norm": 2.399627923965454, "learning_rate": 4.979393832698154e-06, "loss": 0.0963, "step": 461, "video_reward_cumulative_accuracy": 0.7754880694143167 }, { "epoch": 0.1371326803205699, "grad_norm": 4.614706039428711, "learning_rate": 4.979060601614225e-06, "loss": 0.1159, "step": 462, "video_reward_cumulative_accuracy": 0.7748917748917749 }, { "epoch": 0.13742950430394776, "grad_norm": 3.261317729949951, "learning_rate": 4.978724709018128e-06, "loss": 0.0661, "step": 463, "video_reward_cumulative_accuracy": 0.775377969762419 }, { "epoch": 0.13772632828732562, "grad_norm": 3.9476890563964844, "learning_rate": 4.978386155270477e-06, "loss": 0.0487, "step": 464, "video_reward_cumulative_accuracy": 0.7747844827586207 }, { "epoch": 0.13802315227070347, "grad_norm": 2.917313575744629, "learning_rate": 4.9780449407347405e-06, "loss": 0.0759, "step": 465, "video_reward_cumulative_accuracy": 0.7731182795698924 }, { "epoch": 0.13831997625408132, "grad_norm": 1.9969302415847778, "learning_rate": 4.977701065777247e-06, "loss": 0.0262, "step": 466, "video_reward_cumulative_accuracy": 0.7736051502145923 }, { "epoch": 0.13861680023745918, "grad_norm": 2.4382143020629883, "learning_rate": 4.97735453076718e-06, "loss": 0.0622, "step": 467, "video_reward_cumulative_accuracy": 0.7740899357601713 }, { "epoch": 0.13891362422083706, "grad_norm": 1.4531607627868652, "learning_rate": 4.977005336076578e-06, "loss": 0.0309, "step": 468, "video_reward_cumulative_accuracy": 0.7745726495726496 }, { "epoch": 0.1392104482042149, "grad_norm": 4.770167350769043, "learning_rate": 4.976653482080335e-06, "loss": 0.0523, "step": 469, "video_reward_cumulative_accuracy": 0.7750533049040512 }, { "epoch": 0.13950727218759276, "grad_norm": 1.4525412321090698, "learning_rate": 4.9762989691562006e-06, "loss": 0.0469, "step": 470, "video_reward_cumulative_accuracy": 0.774468085106383 }, { "epoch": 0.13980409617097062, "grad_norm": 7.346729755401611, "learning_rate": 4.975941797684778e-06, "loss": 0.104, "step": 471, "video_reward_cumulative_accuracy": 0.772823779193206 }, { "epoch": 0.14010092015434847, "grad_norm": 2.2593302726745605, "learning_rate": 4.975581968049527e-06, "loss": 0.0912, "step": 472, "video_reward_cumulative_accuracy": 0.7733050847457628 }, { "epoch": 0.14039774413772632, "grad_norm": 2.1568541526794434, "learning_rate": 4.9752194806367585e-06, "loss": 0.0645, "step": 473, "video_reward_cumulative_accuracy": 0.7716701902748414 }, { "epoch": 0.14069456812110417, "grad_norm": 2.715193033218384, "learning_rate": 4.974854335835639e-06, "loss": 0.0426, "step": 474, "video_reward_cumulative_accuracy": 0.7710970464135021 }, { "epoch": 0.14099139210448206, "grad_norm": 2.6449036598205566, "learning_rate": 4.974486534038185e-06, "loss": 0.0349, "step": 475, "video_reward_cumulative_accuracy": 0.771578947368421 }, { "epoch": 0.1412882160878599, "grad_norm": 2.4418740272521973, "learning_rate": 4.9741160756392705e-06, "loss": 0.0411, "step": 476, "video_reward_cumulative_accuracy": 0.7720588235294118 }, { "epoch": 0.14158504007123776, "grad_norm": 1.6195287704467773, "learning_rate": 4.973742961036615e-06, "loss": 0.0366, "step": 477, "video_reward_cumulative_accuracy": 0.7725366876310272 }, { "epoch": 0.14188186405461561, "grad_norm": 1.6140589714050293, "learning_rate": 4.973367190630796e-06, "loss": 0.0646, "step": 478, "video_reward_cumulative_accuracy": 0.7719665271966527 }, { "epoch": 0.14217868803799347, "grad_norm": 3.116804361343384, "learning_rate": 4.972988764825239e-06, "loss": 0.0293, "step": 479, "video_reward_cumulative_accuracy": 0.7724425887265136 }, { "epoch": 0.14247551202137132, "grad_norm": 1.9201480150222778, "learning_rate": 4.972607684026218e-06, "loss": 0.039, "step": 480, "video_reward_cumulative_accuracy": 0.771875 }, { "epoch": 0.14277233600474917, "grad_norm": 1.579925298690796, "learning_rate": 4.972223948642865e-06, "loss": 0.0468, "step": 481, "video_reward_cumulative_accuracy": 0.7713097713097713 }, { "epoch": 0.14306915998812705, "grad_norm": 2.824054718017578, "learning_rate": 4.971837559087153e-06, "loss": 0.0691, "step": 482, "video_reward_cumulative_accuracy": 0.770746887966805 }, { "epoch": 0.1433659839715049, "grad_norm": 3.3171064853668213, "learning_rate": 4.971448515773911e-06, "loss": 0.0773, "step": 483, "video_reward_cumulative_accuracy": 0.7701863354037267 }, { "epoch": 0.14366280795488276, "grad_norm": 4.4460577964782715, "learning_rate": 4.971056819120814e-06, "loss": 0.055, "step": 484, "video_reward_cumulative_accuracy": 0.7696280991735537 }, { "epoch": 0.1439596319382606, "grad_norm": 2.3328418731689453, "learning_rate": 4.970662469548386e-06, "loss": 0.0411, "step": 485, "video_reward_cumulative_accuracy": 0.7701030927835052 }, { "epoch": 0.14425645592163847, "grad_norm": 2.608328342437744, "learning_rate": 4.970265467480001e-06, "loss": 0.0615, "step": 486, "video_reward_cumulative_accuracy": 0.7695473251028807 }, { "epoch": 0.14455327990501632, "grad_norm": 1.6409941911697388, "learning_rate": 4.969865813341878e-06, "loss": 0.0431, "step": 487, "video_reward_cumulative_accuracy": 0.7700205338809035 }, { "epoch": 0.14485010388839417, "grad_norm": 6.2619805335998535, "learning_rate": 4.969463507563085e-06, "loss": 0.0885, "step": 488, "video_reward_cumulative_accuracy": 0.7704918032786885 }, { "epoch": 0.14514692787177205, "grad_norm": 2.390130043029785, "learning_rate": 4.969058550575535e-06, "loss": 0.0684, "step": 489, "video_reward_cumulative_accuracy": 0.7709611451942741 }, { "epoch": 0.1454437518551499, "grad_norm": 1.9959198236465454, "learning_rate": 4.968650942813991e-06, "loss": 0.0635, "step": 490, "video_reward_cumulative_accuracy": 0.7704081632653061 }, { "epoch": 0.14574057583852776, "grad_norm": 3.4746286869049072, "learning_rate": 4.968240684716058e-06, "loss": 0.0764, "step": 491, "video_reward_cumulative_accuracy": 0.769857433808554 }, { "epoch": 0.1460373998219056, "grad_norm": 2.226306438446045, "learning_rate": 4.967827776722187e-06, "loss": 0.0627, "step": 492, "video_reward_cumulative_accuracy": 0.7703252032520326 }, { "epoch": 0.14633422380528346, "grad_norm": 1.9936774969100952, "learning_rate": 4.967412219275677e-06, "loss": 0.026, "step": 493, "video_reward_cumulative_accuracy": 0.77079107505071 }, { "epoch": 0.14663104778866132, "grad_norm": 2.9451053142547607, "learning_rate": 4.966994012822668e-06, "loss": 0.037, "step": 494, "video_reward_cumulative_accuracy": 0.7692307692307693 }, { "epoch": 0.14692787177203917, "grad_norm": 2.2165896892547607, "learning_rate": 4.9665731578121445e-06, "loss": 0.0604, "step": 495, "video_reward_cumulative_accuracy": 0.7696969696969697 }, { "epoch": 0.14722469575541705, "grad_norm": 4.33952522277832, "learning_rate": 4.966149654695937e-06, "loss": 0.0512, "step": 496, "video_reward_cumulative_accuracy": 0.7691532258064516 }, { "epoch": 0.1475215197387949, "grad_norm": 1.7252819538116455, "learning_rate": 4.9657235039287165e-06, "loss": 0.0451, "step": 497, "video_reward_cumulative_accuracy": 0.7686116700201208 }, { "epoch": 0.14781834372217276, "grad_norm": 1.3271393775939941, "learning_rate": 4.965294705967997e-06, "loss": 0.0548, "step": 498, "video_reward_cumulative_accuracy": 0.7680722891566265 }, { "epoch": 0.1481151677055506, "grad_norm": 4.343282699584961, "learning_rate": 4.964863261274134e-06, "loss": 0.0571, "step": 499, "video_reward_cumulative_accuracy": 0.7675350701402806 }, { "epoch": 0.14841199168892846, "grad_norm": 1.385603666305542, "learning_rate": 4.964429170310327e-06, "loss": 0.0579, "step": 500, "video_reward_cumulative_accuracy": 0.767 }, { "epoch": 0.14870881567230632, "grad_norm": 1.3973246812820435, "learning_rate": 4.963992433542612e-06, "loss": 0.0573, "step": 501, "video_reward_cumulative_accuracy": 0.7674650698602794 }, { "epoch": 0.14900563965568417, "grad_norm": 1.4018256664276123, "learning_rate": 4.963553051439871e-06, "loss": 0.0543, "step": 502, "video_reward_cumulative_accuracy": 0.7679282868525896 }, { "epoch": 0.14930246363906205, "grad_norm": 1.3220703601837158, "learning_rate": 4.963111024473823e-06, "loss": 0.0535, "step": 503, "video_reward_cumulative_accuracy": 0.768389662027833 }, { "epoch": 0.1495992876224399, "grad_norm": 5.434345722198486, "learning_rate": 4.962666353119025e-06, "loss": 0.0945, "step": 504, "video_reward_cumulative_accuracy": 0.7688492063492064 }, { "epoch": 0.14989611160581776, "grad_norm": 1.8976709842681885, "learning_rate": 4.9622190378528775e-06, "loss": 0.0447, "step": 505, "video_reward_cumulative_accuracy": 0.7683168316831683 }, { "epoch": 0.1501929355891956, "grad_norm": 0.9916190505027771, "learning_rate": 4.961769079155615e-06, "loss": 0.0367, "step": 506, "video_reward_cumulative_accuracy": 0.7687747035573123 }, { "epoch": 0.15048975957257346, "grad_norm": 3.0541810989379883, "learning_rate": 4.961316477510312e-06, "loss": 0.0512, "step": 507, "video_reward_cumulative_accuracy": 0.7682445759368837 }, { "epoch": 0.15078658355595131, "grad_norm": 1.1684255599975586, "learning_rate": 4.960861233402881e-06, "loss": 0.0324, "step": 508, "video_reward_cumulative_accuracy": 0.7687007874015748 }, { "epoch": 0.15108340753932917, "grad_norm": 2.5912883281707764, "learning_rate": 4.960403347322069e-06, "loss": 0.0573, "step": 509, "video_reward_cumulative_accuracy": 0.768172888015717 }, { "epoch": 0.15138023152270705, "grad_norm": 2.920675039291382, "learning_rate": 4.959942819759464e-06, "loss": 0.0379, "step": 510, "video_reward_cumulative_accuracy": 0.7676470588235295 }, { "epoch": 0.1516770555060849, "grad_norm": 7.364986419677734, "learning_rate": 4.959479651209485e-06, "loss": 0.109, "step": 511, "video_reward_cumulative_accuracy": 0.7681017612524462 }, { "epoch": 0.15197387948946275, "grad_norm": 2.541637420654297, "learning_rate": 4.959013842169389e-06, "loss": 0.0187, "step": 512, "video_reward_cumulative_accuracy": 0.7685546875 }, { "epoch": 0.1522707034728406, "grad_norm": 1.8760055303573608, "learning_rate": 4.9585453931392665e-06, "loss": 0.0403, "step": 513, "video_reward_cumulative_accuracy": 0.7680311890838206 }, { "epoch": 0.15256752745621846, "grad_norm": 1.9295579195022583, "learning_rate": 4.958074304622045e-06, "loss": 0.0487, "step": 514, "video_reward_cumulative_accuracy": 0.7684824902723736 }, { "epoch": 0.1528643514395963, "grad_norm": 4.06351900100708, "learning_rate": 4.957600577123482e-06, "loss": 0.0608, "step": 515, "video_reward_cumulative_accuracy": 0.7679611650485437 }, { "epoch": 0.15316117542297417, "grad_norm": 2.601158618927002, "learning_rate": 4.957124211152169e-06, "loss": 0.0148, "step": 516, "video_reward_cumulative_accuracy": 0.7684108527131783 }, { "epoch": 0.15345799940635205, "grad_norm": 1.6226683855056763, "learning_rate": 4.9566452072195335e-06, "loss": 0.0445, "step": 517, "video_reward_cumulative_accuracy": 0.7688588007736944 }, { "epoch": 0.1537548233897299, "grad_norm": 1.285947322845459, "learning_rate": 4.956163565839831e-06, "loss": 0.0194, "step": 518, "video_reward_cumulative_accuracy": 0.7693050193050193 }, { "epoch": 0.15405164737310775, "grad_norm": 5.118712425231934, "learning_rate": 4.955679287530152e-06, "loss": 0.1169, "step": 519, "video_reward_cumulative_accuracy": 0.7687861271676301 }, { "epoch": 0.1543484713564856, "grad_norm": 1.981034755706787, "learning_rate": 4.955192372810414e-06, "loss": 0.0227, "step": 520, "video_reward_cumulative_accuracy": 0.7692307692307693 }, { "epoch": 0.15464529533986346, "grad_norm": 3.3112709522247314, "learning_rate": 4.954702822203369e-06, "loss": 0.0877, "step": 521, "video_reward_cumulative_accuracy": 0.7687140115163148 }, { "epoch": 0.1549421193232413, "grad_norm": 2.729583501815796, "learning_rate": 4.954210636234597e-06, "loss": 0.0301, "step": 522, "video_reward_cumulative_accuracy": 0.7691570881226054 }, { "epoch": 0.15523894330661916, "grad_norm": 9.558045387268066, "learning_rate": 4.953715815432505e-06, "loss": 0.0811, "step": 523, "video_reward_cumulative_accuracy": 0.7695984703632888 }, { "epoch": 0.15553576728999705, "grad_norm": 6.721735000610352, "learning_rate": 4.9532183603283345e-06, "loss": 0.0445, "step": 524, "video_reward_cumulative_accuracy": 0.7690839694656488 }, { "epoch": 0.1558325912733749, "grad_norm": 1.9270586967468262, "learning_rate": 4.952718271456151e-06, "loss": 0.0429, "step": 525, "video_reward_cumulative_accuracy": 0.7695238095238095 }, { "epoch": 0.15612941525675275, "grad_norm": 2.5546162128448486, "learning_rate": 4.952215549352846e-06, "loss": 0.0786, "step": 526, "video_reward_cumulative_accuracy": 0.7690114068441065 }, { "epoch": 0.1564262392401306, "grad_norm": 2.585820436477661, "learning_rate": 4.951710194558144e-06, "loss": 0.0416, "step": 527, "video_reward_cumulative_accuracy": 0.7694497153700189 }, { "epoch": 0.15672306322350846, "grad_norm": 2.7101705074310303, "learning_rate": 4.9512022076145895e-06, "loss": 0.0562, "step": 528, "video_reward_cumulative_accuracy": 0.7698863636363636 }, { "epoch": 0.1570198872068863, "grad_norm": 1.0189766883850098, "learning_rate": 4.9506915890675566e-06, "loss": 0.0526, "step": 529, "video_reward_cumulative_accuracy": 0.7703213610586012 }, { "epoch": 0.15731671119026416, "grad_norm": 6.756640911102295, "learning_rate": 4.9501783394652455e-06, "loss": 0.0875, "step": 530, "video_reward_cumulative_accuracy": 0.7707547169811321 }, { "epoch": 0.15761353517364202, "grad_norm": 1.9713293313980103, "learning_rate": 4.9496624593586775e-06, "loss": 0.076, "step": 531, "video_reward_cumulative_accuracy": 0.7702448210922788 }, { "epoch": 0.1579103591570199, "grad_norm": 2.242279529571533, "learning_rate": 4.949143949301701e-06, "loss": 0.0646, "step": 532, "video_reward_cumulative_accuracy": 0.7706766917293233 }, { "epoch": 0.15820718314039775, "grad_norm": 3.1193904876708984, "learning_rate": 4.9486228098509865e-06, "loss": 0.074, "step": 533, "video_reward_cumulative_accuracy": 0.7692307692307693 }, { "epoch": 0.1585040071237756, "grad_norm": 1.8932733535766602, "learning_rate": 4.9480990415660276e-06, "loss": 0.0565, "step": 534, "video_reward_cumulative_accuracy": 0.7696629213483146 }, { "epoch": 0.15880083110715346, "grad_norm": 2.4588887691497803, "learning_rate": 4.947572645009141e-06, "loss": 0.0547, "step": 535, "video_reward_cumulative_accuracy": 0.7691588785046729 }, { "epoch": 0.1590976550905313, "grad_norm": 4.820741176605225, "learning_rate": 4.947043620745464e-06, "loss": 0.0805, "step": 536, "video_reward_cumulative_accuracy": 0.769589552238806 }, { "epoch": 0.15939447907390916, "grad_norm": 1.0390130281448364, "learning_rate": 4.946511969342956e-06, "loss": 0.0558, "step": 537, "video_reward_cumulative_accuracy": 0.7700186219739292 }, { "epoch": 0.15969130305728702, "grad_norm": 2.8653810024261475, "learning_rate": 4.945977691372396e-06, "loss": 0.0489, "step": 538, "video_reward_cumulative_accuracy": 0.7704460966542751 }, { "epoch": 0.1599881270406649, "grad_norm": 3.0216479301452637, "learning_rate": 4.945440787407382e-06, "loss": 0.0597, "step": 539, "video_reward_cumulative_accuracy": 0.7708719851576994 }, { "epoch": 0.16028495102404275, "grad_norm": 1.2938923835754395, "learning_rate": 4.944901258024335e-06, "loss": 0.0615, "step": 540, "video_reward_cumulative_accuracy": 0.7703703703703704 }, { "epoch": 0.1605817750074206, "grad_norm": 1.7534013986587524, "learning_rate": 4.94435910380249e-06, "loss": 0.0746, "step": 541, "video_reward_cumulative_accuracy": 0.7689463955637708 }, { "epoch": 0.16087859899079845, "grad_norm": 2.365793228149414, "learning_rate": 4.943814325323904e-06, "loss": 0.0314, "step": 542, "video_reward_cumulative_accuracy": 0.7693726937269373 }, { "epoch": 0.1611754229741763, "grad_norm": 5.616259574890137, "learning_rate": 4.943266923173449e-06, "loss": 0.1097, "step": 543, "video_reward_cumulative_accuracy": 0.7697974217311234 }, { "epoch": 0.16147224695755416, "grad_norm": 1.3462814092636108, "learning_rate": 4.942716897938813e-06, "loss": 0.0486, "step": 544, "video_reward_cumulative_accuracy": 0.7702205882352942 }, { "epoch": 0.161769070940932, "grad_norm": 1.065239429473877, "learning_rate": 4.9421642502105025e-06, "loss": 0.0442, "step": 545, "video_reward_cumulative_accuracy": 0.7697247706422018 }, { "epoch": 0.1620658949243099, "grad_norm": 2.191693067550659, "learning_rate": 4.941608980581839e-06, "loss": 0.0351, "step": 546, "video_reward_cumulative_accuracy": 0.7692307692307693 }, { "epoch": 0.16236271890768775, "grad_norm": 2.6450510025024414, "learning_rate": 4.941051089648958e-06, "loss": 0.0778, "step": 547, "video_reward_cumulative_accuracy": 0.7687385740402194 }, { "epoch": 0.1626595428910656, "grad_norm": 4.044307231903076, "learning_rate": 4.940490578010808e-06, "loss": 0.068, "step": 548, "video_reward_cumulative_accuracy": 0.7691605839416058 }, { "epoch": 0.16295636687444345, "grad_norm": 2.968937873840332, "learning_rate": 4.9399274462691555e-06, "loss": 0.0773, "step": 549, "video_reward_cumulative_accuracy": 0.7695810564663024 }, { "epoch": 0.1632531908578213, "grad_norm": 1.9103548526763916, "learning_rate": 4.939361695028575e-06, "loss": 0.0691, "step": 550, "video_reward_cumulative_accuracy": 0.769090909090909 }, { "epoch": 0.16355001484119916, "grad_norm": 3.47516131401062, "learning_rate": 4.938793324896456e-06, "loss": 0.0912, "step": 551, "video_reward_cumulative_accuracy": 0.7686025408348457 }, { "epoch": 0.163846838824577, "grad_norm": 3.028697967529297, "learning_rate": 4.9382223364829995e-06, "loss": 0.0532, "step": 552, "video_reward_cumulative_accuracy": 0.7690217391304348 }, { "epoch": 0.1641436628079549, "grad_norm": 3.2655930519104004, "learning_rate": 4.937648730401215e-06, "loss": 0.036, "step": 553, "video_reward_cumulative_accuracy": 0.7694394213381555 }, { "epoch": 0.16444048679133275, "grad_norm": 1.8100935220718384, "learning_rate": 4.937072507266928e-06, "loss": 0.0492, "step": 554, "video_reward_cumulative_accuracy": 0.76985559566787 }, { "epoch": 0.1647373107747106, "grad_norm": 3.864176034927368, "learning_rate": 4.936493667698766e-06, "loss": 0.0589, "step": 555, "video_reward_cumulative_accuracy": 0.7702702702702703 }, { "epoch": 0.16503413475808845, "grad_norm": 2.1781342029571533, "learning_rate": 4.935912212318171e-06, "loss": 0.0196, "step": 556, "video_reward_cumulative_accuracy": 0.77068345323741 }, { "epoch": 0.1653309587414663, "grad_norm": 1.995492696762085, "learning_rate": 4.935328141749393e-06, "loss": 0.049, "step": 557, "video_reward_cumulative_accuracy": 0.770197486535009 }, { "epoch": 0.16562778272484416, "grad_norm": 2.0720860958099365, "learning_rate": 4.934741456619488e-06, "loss": 0.0585, "step": 558, "video_reward_cumulative_accuracy": 0.7697132616487455 }, { "epoch": 0.165924606708222, "grad_norm": 1.4079474210739136, "learning_rate": 4.934152157558317e-06, "loss": 0.0322, "step": 559, "video_reward_cumulative_accuracy": 0.7701252236135957 }, { "epoch": 0.1662214306915999, "grad_norm": 4.133894443511963, "learning_rate": 4.933560245198552e-06, "loss": 0.0604, "step": 560, "video_reward_cumulative_accuracy": 0.7705357142857143 }, { "epoch": 0.16651825467497774, "grad_norm": 3.3255507946014404, "learning_rate": 4.932965720175669e-06, "loss": 0.0559, "step": 561, "video_reward_cumulative_accuracy": 0.7709447415329769 }, { "epoch": 0.1668150786583556, "grad_norm": 3.746882677078247, "learning_rate": 4.9323685831279465e-06, "loss": 0.0728, "step": 562, "video_reward_cumulative_accuracy": 0.7713523131672598 }, { "epoch": 0.16711190264173345, "grad_norm": 2.2193782329559326, "learning_rate": 4.93176883469647e-06, "loss": 0.0512, "step": 563, "video_reward_cumulative_accuracy": 0.7708703374777975 }, { "epoch": 0.1674087266251113, "grad_norm": 0.7712289094924927, "learning_rate": 4.9311664755251265e-06, "loss": 0.0202, "step": 564, "video_reward_cumulative_accuracy": 0.7712765957446809 }, { "epoch": 0.16770555060848916, "grad_norm": 1.9561268091201782, "learning_rate": 4.93056150626061e-06, "loss": 0.045, "step": 565, "video_reward_cumulative_accuracy": 0.7716814159292036 }, { "epoch": 0.168002374591867, "grad_norm": 2.4627063274383545, "learning_rate": 4.92995392755241e-06, "loss": 0.0474, "step": 566, "video_reward_cumulative_accuracy": 0.7720848056537103 }, { "epoch": 0.1682991985752449, "grad_norm": 2.0036139488220215, "learning_rate": 4.929343740052823e-06, "loss": 0.0358, "step": 567, "video_reward_cumulative_accuracy": 0.7724867724867724 }, { "epoch": 0.16859602255862274, "grad_norm": 3.07709002494812, "learning_rate": 4.928730944416945e-06, "loss": 0.0407, "step": 568, "video_reward_cumulative_accuracy": 0.772887323943662 }, { "epoch": 0.1688928465420006, "grad_norm": 1.9337095022201538, "learning_rate": 4.928115541302672e-06, "loss": 0.0386, "step": 569, "video_reward_cumulative_accuracy": 0.7724077328646749 }, { "epoch": 0.16918967052537845, "grad_norm": 6.987502574920654, "learning_rate": 4.927497531370697e-06, "loss": 0.1114, "step": 570, "video_reward_cumulative_accuracy": 0.7728070175438596 }, { "epoch": 0.1694864945087563, "grad_norm": 3.6298933029174805, "learning_rate": 4.9268769152845146e-06, "loss": 0.0853, "step": 571, "video_reward_cumulative_accuracy": 0.7723292469352014 }, { "epoch": 0.16978331849213416, "grad_norm": 6.841943740844727, "learning_rate": 4.926253693710416e-06, "loss": 0.0701, "step": 572, "video_reward_cumulative_accuracy": 0.7727272727272727 }, { "epoch": 0.170080142475512, "grad_norm": 5.937003135681152, "learning_rate": 4.925627867317491e-06, "loss": 0.1064, "step": 573, "video_reward_cumulative_accuracy": 0.7731239092495636 }, { "epoch": 0.1703769664588899, "grad_norm": 2.3757357597351074, "learning_rate": 4.924999436777624e-06, "loss": 0.0466, "step": 574, "video_reward_cumulative_accuracy": 0.7735191637630662 }, { "epoch": 0.17067379044226774, "grad_norm": 1.4566744565963745, "learning_rate": 4.924368402765498e-06, "loss": 0.0228, "step": 575, "video_reward_cumulative_accuracy": 0.7739130434782608 }, { "epoch": 0.1709706144256456, "grad_norm": 2.156557083129883, "learning_rate": 4.923734765958587e-06, "loss": 0.0483, "step": 576, "video_reward_cumulative_accuracy": 0.7743055555555556 }, { "epoch": 0.17126743840902345, "grad_norm": 3.534210681915283, "learning_rate": 4.9230985270371625e-06, "loss": 0.0695, "step": 577, "video_reward_cumulative_accuracy": 0.7738301559792028 }, { "epoch": 0.1715642623924013, "grad_norm": 1.7886089086532593, "learning_rate": 4.9224596866842895e-06, "loss": 0.06, "step": 578, "video_reward_cumulative_accuracy": 0.7742214532871973 }, { "epoch": 0.17186108637577915, "grad_norm": 1.3056138753890991, "learning_rate": 4.921818245585824e-06, "loss": 0.0428, "step": 579, "video_reward_cumulative_accuracy": 0.7746113989637305 }, { "epoch": 0.172157910359157, "grad_norm": 2.9909825325012207, "learning_rate": 4.921174204430415e-06, "loss": 0.0486, "step": 580, "video_reward_cumulative_accuracy": 0.775 }, { "epoch": 0.1724547343425349, "grad_norm": 3.4431159496307373, "learning_rate": 4.920527563909505e-06, "loss": 0.0921, "step": 581, "video_reward_cumulative_accuracy": 0.774526678141136 }, { "epoch": 0.17275155832591274, "grad_norm": 1.9083445072174072, "learning_rate": 4.919878324717323e-06, "loss": 0.0655, "step": 582, "video_reward_cumulative_accuracy": 0.7749140893470791 }, { "epoch": 0.1730483823092906, "grad_norm": 5.427271366119385, "learning_rate": 4.919226487550892e-06, "loss": 0.0755, "step": 583, "video_reward_cumulative_accuracy": 0.774442538593482 }, { "epoch": 0.17334520629266845, "grad_norm": 2.5748579502105713, "learning_rate": 4.918572053110022e-06, "loss": 0.0445, "step": 584, "video_reward_cumulative_accuracy": 0.7748287671232876 }, { "epoch": 0.1736420302760463, "grad_norm": 1.3002210855484009, "learning_rate": 4.917915022097313e-06, "loss": 0.0644, "step": 585, "video_reward_cumulative_accuracy": 0.7752136752136752 }, { "epoch": 0.17393885425942415, "grad_norm": 0.7442671060562134, "learning_rate": 4.917255395218149e-06, "loss": 0.0408, "step": 586, "video_reward_cumulative_accuracy": 0.7747440273037542 }, { "epoch": 0.174235678242802, "grad_norm": 0.8181408643722534, "learning_rate": 4.9165931731807045e-06, "loss": 0.0564, "step": 587, "video_reward_cumulative_accuracy": 0.7751277683134583 }, { "epoch": 0.1745325022261799, "grad_norm": 1.601649522781372, "learning_rate": 4.915928356695941e-06, "loss": 0.0286, "step": 588, "video_reward_cumulative_accuracy": 0.7755102040816326 }, { "epoch": 0.17482932620955774, "grad_norm": 0.9357208013534546, "learning_rate": 4.915260946477601e-06, "loss": 0.0481, "step": 589, "video_reward_cumulative_accuracy": 0.7750424448217318 }, { "epoch": 0.1751261501929356, "grad_norm": 3.644911766052246, "learning_rate": 4.914590943242216e-06, "loss": 0.0626, "step": 590, "video_reward_cumulative_accuracy": 0.7745762711864407 }, { "epoch": 0.17542297417631345, "grad_norm": 6.013518810272217, "learning_rate": 4.913918347709098e-06, "loss": 0.0961, "step": 591, "video_reward_cumulative_accuracy": 0.7749576988155669 }, { "epoch": 0.1757197981596913, "grad_norm": 3.016268491744995, "learning_rate": 4.9132431606003444e-06, "loss": 0.0378, "step": 592, "video_reward_cumulative_accuracy": 0.7753378378378378 }, { "epoch": 0.17601662214306915, "grad_norm": 1.4820626974105835, "learning_rate": 4.912565382640834e-06, "loss": 0.0345, "step": 593, "video_reward_cumulative_accuracy": 0.7757166947723441 }, { "epoch": 0.176313446126447, "grad_norm": 5.260765075683594, "learning_rate": 4.911885014558227e-06, "loss": 0.0478, "step": 594, "video_reward_cumulative_accuracy": 0.7760942760942761 }, { "epoch": 0.17661027010982489, "grad_norm": 1.4641700983047485, "learning_rate": 4.911202057082966e-06, "loss": 0.0504, "step": 595, "video_reward_cumulative_accuracy": 0.7747899159663866 }, { "epoch": 0.17690709409320274, "grad_norm": 2.0738446712493896, "learning_rate": 4.91051651094827e-06, "loss": 0.0336, "step": 596, "video_reward_cumulative_accuracy": 0.7743288590604027 }, { "epoch": 0.1772039180765806, "grad_norm": 1.8221532106399536, "learning_rate": 4.90982837689014e-06, "loss": 0.0481, "step": 597, "video_reward_cumulative_accuracy": 0.7747068676716918 }, { "epoch": 0.17750074205995844, "grad_norm": 3.999908685684204, "learning_rate": 4.909137655647354e-06, "loss": 0.0827, "step": 598, "video_reward_cumulative_accuracy": 0.7750836120401338 }, { "epoch": 0.1777975660433363, "grad_norm": 3.1265053749084473, "learning_rate": 4.908444347961472e-06, "loss": 0.0808, "step": 599, "video_reward_cumulative_accuracy": 0.7754590984974958 }, { "epoch": 0.17809439002671415, "grad_norm": 4.669152736663818, "learning_rate": 4.907748454576822e-06, "loss": 0.09, "step": 600, "video_reward_cumulative_accuracy": 0.7758333333333334 }, { "epoch": 0.17809439002671415, "eval_runtime": 143.405, "eval_samples_per_second": 5.502, "eval_steps_per_second": 0.69, "eval_test_set_accuracy": 0.73989898989899, "step": 600 }, { "epoch": 0.178391214010092, "grad_norm": 1.6913496255874634, "learning_rate": 4.907049976240516e-06, "loss": 0.0538, "step": 601, "video_reward_cumulative_accuracy": 0.7762063227953411 }, { "epoch": 0.17868803799346988, "grad_norm": 10.412848472595215, "learning_rate": 4.9063489137024375e-06, "loss": 0.1035, "step": 602, "video_reward_cumulative_accuracy": 0.7757475083056479 }, { "epoch": 0.17898486197684774, "grad_norm": 0.9118223786354065, "learning_rate": 4.905645267715246e-06, "loss": 0.0125, "step": 603, "video_reward_cumulative_accuracy": 0.7761194029850746 }, { "epoch": 0.1792816859602256, "grad_norm": 2.315340518951416, "learning_rate": 4.904939039034373e-06, "loss": 0.0634, "step": 604, "video_reward_cumulative_accuracy": 0.7756622516556292 }, { "epoch": 0.17957850994360344, "grad_norm": 3.120900869369507, "learning_rate": 4.904230228418023e-06, "loss": 0.1249, "step": 605, "video_reward_cumulative_accuracy": 0.775206611570248 }, { "epoch": 0.1798753339269813, "grad_norm": 3.1384425163269043, "learning_rate": 4.903518836627174e-06, "loss": 0.0442, "step": 606, "video_reward_cumulative_accuracy": 0.7747524752475248 }, { "epoch": 0.18017215791035915, "grad_norm": 4.558505535125732, "learning_rate": 4.9028048644255745e-06, "loss": 0.071, "step": 607, "video_reward_cumulative_accuracy": 0.7742998352553542 }, { "epoch": 0.180468981893737, "grad_norm": 2.9076156616210938, "learning_rate": 4.9020883125797415e-06, "loss": 0.0323, "step": 608, "video_reward_cumulative_accuracy": 0.7738486842105263 }, { "epoch": 0.18076580587711488, "grad_norm": 4.778907775878906, "learning_rate": 4.9013691818589635e-06, "loss": 0.0686, "step": 609, "video_reward_cumulative_accuracy": 0.7742200328407225 }, { "epoch": 0.18106262986049274, "grad_norm": 1.0505435466766357, "learning_rate": 4.9006474730352974e-06, "loss": 0.0419, "step": 610, "video_reward_cumulative_accuracy": 0.7745901639344263 }, { "epoch": 0.1813594538438706, "grad_norm": 3.0237913131713867, "learning_rate": 4.8999231868835675e-06, "loss": 0.0503, "step": 611, "video_reward_cumulative_accuracy": 0.7741407528641571 }, { "epoch": 0.18165627782724844, "grad_norm": 1.5496163368225098, "learning_rate": 4.899196324181365e-06, "loss": 0.0545, "step": 612, "video_reward_cumulative_accuracy": 0.7736928104575164 }, { "epoch": 0.1819531018106263, "grad_norm": 4.970526695251465, "learning_rate": 4.898466885709049e-06, "loss": 0.0746, "step": 613, "video_reward_cumulative_accuracy": 0.7732463295269169 }, { "epoch": 0.18224992579400415, "grad_norm": 3.2845726013183594, "learning_rate": 4.897734872249742e-06, "loss": 0.0703, "step": 614, "video_reward_cumulative_accuracy": 0.7728013029315961 }, { "epoch": 0.182546749777382, "grad_norm": 1.6914643049240112, "learning_rate": 4.89700028458933e-06, "loss": 0.0178, "step": 615, "video_reward_cumulative_accuracy": 0.7731707317073171 }, { "epoch": 0.18284357376075988, "grad_norm": 1.8486802577972412, "learning_rate": 4.896263123516465e-06, "loss": 0.0277, "step": 616, "video_reward_cumulative_accuracy": 0.773538961038961 }, { "epoch": 0.18314039774413773, "grad_norm": 4.048538684844971, "learning_rate": 4.8955233898225605e-06, "loss": 0.0762, "step": 617, "video_reward_cumulative_accuracy": 0.773095623987034 }, { "epoch": 0.1834372217275156, "grad_norm": 3.5552480220794678, "learning_rate": 4.894781084301793e-06, "loss": 0.0318, "step": 618, "video_reward_cumulative_accuracy": 0.7726537216828478 }, { "epoch": 0.18373404571089344, "grad_norm": 4.219141006469727, "learning_rate": 4.8940362077511e-06, "loss": 0.0499, "step": 619, "video_reward_cumulative_accuracy": 0.7730210016155089 }, { "epoch": 0.1840308696942713, "grad_norm": 1.9816478490829468, "learning_rate": 4.893288760970178e-06, "loss": 0.0364, "step": 620, "video_reward_cumulative_accuracy": 0.7733870967741936 }, { "epoch": 0.18432769367764915, "grad_norm": 2.502150535583496, "learning_rate": 4.892538744761484e-06, "loss": 0.0592, "step": 621, "video_reward_cumulative_accuracy": 0.7737520128824477 }, { "epoch": 0.184624517661027, "grad_norm": 2.3041718006134033, "learning_rate": 4.891786159930234e-06, "loss": 0.0372, "step": 622, "video_reward_cumulative_accuracy": 0.7741157556270096 }, { "epoch": 0.18492134164440488, "grad_norm": 2.347670078277588, "learning_rate": 4.8910310072843996e-06, "loss": 0.0908, "step": 623, "video_reward_cumulative_accuracy": 0.7744783306581059 }, { "epoch": 0.18521816562778273, "grad_norm": 1.8450899124145508, "learning_rate": 4.89027328763471e-06, "loss": 0.0378, "step": 624, "video_reward_cumulative_accuracy": 0.7748397435897436 }, { "epoch": 0.18551498961116059, "grad_norm": 2.6531243324279785, "learning_rate": 4.889513001794652e-06, "loss": 0.0294, "step": 625, "video_reward_cumulative_accuracy": 0.7752 }, { "epoch": 0.18581181359453844, "grad_norm": 1.7452423572540283, "learning_rate": 4.888750150580466e-06, "loss": 0.0357, "step": 626, "video_reward_cumulative_accuracy": 0.7747603833865815 }, { "epoch": 0.1861086375779163, "grad_norm": 1.9167020320892334, "learning_rate": 4.887984734811146e-06, "loss": 0.0278, "step": 627, "video_reward_cumulative_accuracy": 0.7751196172248804 }, { "epoch": 0.18640546156129414, "grad_norm": 2.1981749534606934, "learning_rate": 4.887216755308442e-06, "loss": 0.0457, "step": 628, "video_reward_cumulative_accuracy": 0.7746815286624203 }, { "epoch": 0.186702285544672, "grad_norm": 2.631011962890625, "learning_rate": 4.886446212896853e-06, "loss": 0.0836, "step": 629, "video_reward_cumulative_accuracy": 0.7742448330683624 }, { "epoch": 0.18699910952804988, "grad_norm": 3.423548936843872, "learning_rate": 4.885673108403631e-06, "loss": 0.0552, "step": 630, "video_reward_cumulative_accuracy": 0.7746031746031746 }, { "epoch": 0.18729593351142773, "grad_norm": 0.9264172911643982, "learning_rate": 4.884897442658779e-06, "loss": 0.0143, "step": 631, "video_reward_cumulative_accuracy": 0.7749603803486529 }, { "epoch": 0.18759275749480558, "grad_norm": 1.8935270309448242, "learning_rate": 4.88411921649505e-06, "loss": 0.0241, "step": 632, "video_reward_cumulative_accuracy": 0.7745253164556962 }, { "epoch": 0.18788958147818344, "grad_norm": 1.9011247158050537, "learning_rate": 4.883338430747944e-06, "loss": 0.1005, "step": 633, "video_reward_cumulative_accuracy": 0.7748815165876777 }, { "epoch": 0.1881864054615613, "grad_norm": 2.4055683612823486, "learning_rate": 4.882555086255712e-06, "loss": 0.0326, "step": 634, "video_reward_cumulative_accuracy": 0.7752365930599369 }, { "epoch": 0.18848322944493914, "grad_norm": 3.9865567684173584, "learning_rate": 4.88176918385935e-06, "loss": 0.0432, "step": 635, "video_reward_cumulative_accuracy": 0.7748031496062993 }, { "epoch": 0.188780053428317, "grad_norm": 1.4653565883636475, "learning_rate": 4.8809807244025985e-06, "loss": 0.0275, "step": 636, "video_reward_cumulative_accuracy": 0.7751572327044025 }, { "epoch": 0.18907687741169488, "grad_norm": 1.477861762046814, "learning_rate": 4.880189708731947e-06, "loss": 0.0707, "step": 637, "video_reward_cumulative_accuracy": 0.7755102040816326 }, { "epoch": 0.18937370139507273, "grad_norm": 4.140712261199951, "learning_rate": 4.879396137696628e-06, "loss": 0.0873, "step": 638, "video_reward_cumulative_accuracy": 0.7750783699059561 }, { "epoch": 0.18967052537845058, "grad_norm": 2.715289354324341, "learning_rate": 4.878600012148617e-06, "loss": 0.0496, "step": 639, "video_reward_cumulative_accuracy": 0.7754303599374022 }, { "epoch": 0.18996734936182844, "grad_norm": 1.2748372554779053, "learning_rate": 4.87780133294263e-06, "loss": 0.0294, "step": 640, "video_reward_cumulative_accuracy": 0.77578125 }, { "epoch": 0.1902641733452063, "grad_norm": 1.9033632278442383, "learning_rate": 4.877000100936129e-06, "loss": 0.0344, "step": 641, "video_reward_cumulative_accuracy": 0.7761310452418096 }, { "epoch": 0.19056099732858414, "grad_norm": 3.2748398780822754, "learning_rate": 4.876196316989313e-06, "loss": 0.054, "step": 642, "video_reward_cumulative_accuracy": 0.7764797507788161 }, { "epoch": 0.190857821311962, "grad_norm": 2.8134162425994873, "learning_rate": 4.875389981965123e-06, "loss": 0.0845, "step": 643, "video_reward_cumulative_accuracy": 0.776049766718507 }, { "epoch": 0.19115464529533988, "grad_norm": 1.5256245136260986, "learning_rate": 4.874581096729238e-06, "loss": 0.0205, "step": 644, "video_reward_cumulative_accuracy": 0.7763975155279503 }, { "epoch": 0.19145146927871773, "grad_norm": 5.077547073364258, "learning_rate": 4.8737696621500715e-06, "loss": 0.0899, "step": 645, "video_reward_cumulative_accuracy": 0.7767441860465116 }, { "epoch": 0.19174829326209558, "grad_norm": 4.956404209136963, "learning_rate": 4.872955679098782e-06, "loss": 0.0849, "step": 646, "video_reward_cumulative_accuracy": 0.7770897832817337 }, { "epoch": 0.19204511724547343, "grad_norm": 7.943280220031738, "learning_rate": 4.872139148449257e-06, "loss": 0.1075, "step": 647, "video_reward_cumulative_accuracy": 0.7774343122102009 }, { "epoch": 0.1923419412288513, "grad_norm": 6.340520858764648, "learning_rate": 4.871320071078122e-06, "loss": 0.0777, "step": 648, "video_reward_cumulative_accuracy": 0.7777777777777778 }, { "epoch": 0.19263876521222914, "grad_norm": 2.82149338722229, "learning_rate": 4.870498447864735e-06, "loss": 0.0556, "step": 649, "video_reward_cumulative_accuracy": 0.7781201848998459 }, { "epoch": 0.192935589195607, "grad_norm": 5.320289134979248, "learning_rate": 4.86967427969119e-06, "loss": 0.0697, "step": 650, "video_reward_cumulative_accuracy": 0.7776923076923077 }, { "epoch": 0.19323241317898487, "grad_norm": 1.857016682624817, "learning_rate": 4.86884756744231e-06, "loss": 0.0323, "step": 651, "video_reward_cumulative_accuracy": 0.7780337941628265 }, { "epoch": 0.19352923716236273, "grad_norm": 2.1225006580352783, "learning_rate": 4.8680183120056516e-06, "loss": 0.0557, "step": 652, "video_reward_cumulative_accuracy": 0.7776073619631901 }, { "epoch": 0.19382606114574058, "grad_norm": 1.882553219795227, "learning_rate": 4.8671865142715e-06, "loss": 0.0494, "step": 653, "video_reward_cumulative_accuracy": 0.7771822358346095 }, { "epoch": 0.19412288512911843, "grad_norm": 3.701078414916992, "learning_rate": 4.866352175132873e-06, "loss": 0.1412, "step": 654, "video_reward_cumulative_accuracy": 0.7759938837920489 }, { "epoch": 0.1944197091124963, "grad_norm": 4.4002509117126465, "learning_rate": 4.865515295485511e-06, "loss": 0.0662, "step": 655, "video_reward_cumulative_accuracy": 0.7763358778625954 }, { "epoch": 0.19471653309587414, "grad_norm": 5.623415470123291, "learning_rate": 4.864675876227889e-06, "loss": 0.0927, "step": 656, "video_reward_cumulative_accuracy": 0.7751524390243902 }, { "epoch": 0.195013357079252, "grad_norm": 1.7767045497894287, "learning_rate": 4.863833918261204e-06, "loss": 0.0468, "step": 657, "video_reward_cumulative_accuracy": 0.7754946727549468 }, { "epoch": 0.19531018106262987, "grad_norm": 1.3329066038131714, "learning_rate": 4.862989422489379e-06, "loss": 0.0362, "step": 658, "video_reward_cumulative_accuracy": 0.7758358662613982 }, { "epoch": 0.19560700504600773, "grad_norm": 3.9193496704101562, "learning_rate": 4.862142389819063e-06, "loss": 0.0751, "step": 659, "video_reward_cumulative_accuracy": 0.776176024279211 }, { "epoch": 0.19590382902938558, "grad_norm": 2.8584847450256348, "learning_rate": 4.861292821159627e-06, "loss": 0.0569, "step": 660, "video_reward_cumulative_accuracy": 0.7765151515151515 }, { "epoch": 0.19620065301276343, "grad_norm": 1.3020362854003906, "learning_rate": 4.860440717423166e-06, "loss": 0.0746, "step": 661, "video_reward_cumulative_accuracy": 0.7768532526475038 }, { "epoch": 0.19649747699614128, "grad_norm": 5.554771423339844, "learning_rate": 4.8595860795244955e-06, "loss": 0.0509, "step": 662, "video_reward_cumulative_accuracy": 0.7764350453172205 }, { "epoch": 0.19679430097951914, "grad_norm": 1.001604676246643, "learning_rate": 4.858728908381153e-06, "loss": 0.0301, "step": 663, "video_reward_cumulative_accuracy": 0.7760180995475113 }, { "epoch": 0.197091124962897, "grad_norm": 1.391948938369751, "learning_rate": 4.857869204913394e-06, "loss": 0.0644, "step": 664, "video_reward_cumulative_accuracy": 0.776355421686747 }, { "epoch": 0.19738794894627487, "grad_norm": 1.2883930206298828, "learning_rate": 4.857006970044194e-06, "loss": 0.0282, "step": 665, "video_reward_cumulative_accuracy": 0.7766917293233083 }, { "epoch": 0.19768477292965272, "grad_norm": 1.8838915824890137, "learning_rate": 4.856142204699246e-06, "loss": 0.037, "step": 666, "video_reward_cumulative_accuracy": 0.777027027027027 }, { "epoch": 0.19798159691303058, "grad_norm": 8.990363121032715, "learning_rate": 4.855274909806959e-06, "loss": 0.1334, "step": 667, "video_reward_cumulative_accuracy": 0.7766116941529235 }, { "epoch": 0.19827842089640843, "grad_norm": 3.5435233116149902, "learning_rate": 4.85440508629846e-06, "loss": 0.0568, "step": 668, "video_reward_cumulative_accuracy": 0.7761976047904192 }, { "epoch": 0.19857524487978628, "grad_norm": 2.3105525970458984, "learning_rate": 4.853532735107587e-06, "loss": 0.0383, "step": 669, "video_reward_cumulative_accuracy": 0.7765321375186846 }, { "epoch": 0.19887206886316414, "grad_norm": 0.8967596292495728, "learning_rate": 4.852657857170894e-06, "loss": 0.0358, "step": 670, "video_reward_cumulative_accuracy": 0.7768656716417911 }, { "epoch": 0.199168892846542, "grad_norm": 1.6966391801834106, "learning_rate": 4.851780453427648e-06, "loss": 0.035, "step": 671, "video_reward_cumulative_accuracy": 0.7764530551415797 }, { "epoch": 0.19946571682991987, "grad_norm": 5.105749607086182, "learning_rate": 4.8509005248198265e-06, "loss": 0.0905, "step": 672, "video_reward_cumulative_accuracy": 0.7760416666666666 }, { "epoch": 0.19976254081329772, "grad_norm": 1.4758727550506592, "learning_rate": 4.8500180722921184e-06, "loss": 0.0379, "step": 673, "video_reward_cumulative_accuracy": 0.7763744427934621 }, { "epoch": 0.20005936479667558, "grad_norm": 2.2049713134765625, "learning_rate": 4.849133096791923e-06, "loss": 0.0313, "step": 674, "video_reward_cumulative_accuracy": 0.776706231454006 }, { "epoch": 0.20035618878005343, "grad_norm": 2.7456955909729004, "learning_rate": 4.848245599269346e-06, "loss": 0.0959, "step": 675, "video_reward_cumulative_accuracy": 0.7770370370370371 }, { "epoch": 0.20065301276343128, "grad_norm": 5.363068103790283, "learning_rate": 4.847355580677203e-06, "loss": 0.0997, "step": 676, "video_reward_cumulative_accuracy": 0.775887573964497 }, { "epoch": 0.20094983674680914, "grad_norm": 1.5729711055755615, "learning_rate": 4.846463041971014e-06, "loss": 0.0311, "step": 677, "video_reward_cumulative_accuracy": 0.7754800590841949 }, { "epoch": 0.201246660730187, "grad_norm": 7.414484024047852, "learning_rate": 4.845567984109009e-06, "loss": 0.1189, "step": 678, "video_reward_cumulative_accuracy": 0.7743362831858407 }, { "epoch": 0.20154348471356487, "grad_norm": 1.798652172088623, "learning_rate": 4.844670408052117e-06, "loss": 0.0357, "step": 679, "video_reward_cumulative_accuracy": 0.7746686303387335 }, { "epoch": 0.20184030869694272, "grad_norm": 4.525697231292725, "learning_rate": 4.843770314763973e-06, "loss": 0.0504, "step": 680, "video_reward_cumulative_accuracy": 0.774264705882353 }, { "epoch": 0.20213713268032057, "grad_norm": 2.7870752811431885, "learning_rate": 4.842867705210915e-06, "loss": 0.0954, "step": 681, "video_reward_cumulative_accuracy": 0.7738619676945668 }, { "epoch": 0.20243395666369843, "grad_norm": 2.8513903617858887, "learning_rate": 4.841962580361983e-06, "loss": 0.0483, "step": 682, "video_reward_cumulative_accuracy": 0.7741935483870968 }, { "epoch": 0.20273078064707628, "grad_norm": 2.141054630279541, "learning_rate": 4.841054941188914e-06, "loss": 0.0399, "step": 683, "video_reward_cumulative_accuracy": 0.773792093704246 }, { "epoch": 0.20302760463045413, "grad_norm": 1.3511686325073242, "learning_rate": 4.840144788666149e-06, "loss": 0.0312, "step": 684, "video_reward_cumulative_accuracy": 0.7741228070175439 }, { "epoch": 0.203324428613832, "grad_norm": 1.8301844596862793, "learning_rate": 4.839232123770824e-06, "loss": 0.0654, "step": 685, "video_reward_cumulative_accuracy": 0.7744525547445256 }, { "epoch": 0.20362125259720987, "grad_norm": 4.309445858001709, "learning_rate": 4.838316947482774e-06, "loss": 0.0561, "step": 686, "video_reward_cumulative_accuracy": 0.7747813411078717 }, { "epoch": 0.20391807658058772, "grad_norm": 4.205143928527832, "learning_rate": 4.837399260784529e-06, "loss": 0.073, "step": 687, "video_reward_cumulative_accuracy": 0.7743813682678311 }, { "epoch": 0.20421490056396557, "grad_norm": 2.1332249641418457, "learning_rate": 4.836479064661314e-06, "loss": 0.0535, "step": 688, "video_reward_cumulative_accuracy": 0.7747093023255814 }, { "epoch": 0.20451172454734343, "grad_norm": 2.2309157848358154, "learning_rate": 4.83555636010105e-06, "loss": 0.0286, "step": 689, "video_reward_cumulative_accuracy": 0.7750362844702468 }, { "epoch": 0.20480854853072128, "grad_norm": 1.6311012506484985, "learning_rate": 4.8346311480943495e-06, "loss": 0.0292, "step": 690, "video_reward_cumulative_accuracy": 0.7753623188405797 }, { "epoch": 0.20510537251409913, "grad_norm": 3.9914817810058594, "learning_rate": 4.833703429634519e-06, "loss": 0.0789, "step": 691, "video_reward_cumulative_accuracy": 0.7749638205499276 }, { "epoch": 0.20540219649747699, "grad_norm": 0.823984682559967, "learning_rate": 4.832773205717551e-06, "loss": 0.027, "step": 692, "video_reward_cumulative_accuracy": 0.7752890173410405 }, { "epoch": 0.20569902048085487, "grad_norm": 0.9912533760070801, "learning_rate": 4.831840477342134e-06, "loss": 0.0309, "step": 693, "video_reward_cumulative_accuracy": 0.7756132756132756 }, { "epoch": 0.20599584446423272, "grad_norm": 1.6996347904205322, "learning_rate": 4.830905245509641e-06, "loss": 0.0468, "step": 694, "video_reward_cumulative_accuracy": 0.7752161383285303 }, { "epoch": 0.20629266844761057, "grad_norm": 1.391541600227356, "learning_rate": 4.829967511224135e-06, "loss": 0.0389, "step": 695, "video_reward_cumulative_accuracy": 0.7755395683453238 }, { "epoch": 0.20658949243098843, "grad_norm": 2.34708833694458, "learning_rate": 4.829027275492364e-06, "loss": 0.0395, "step": 696, "video_reward_cumulative_accuracy": 0.7758620689655172 }, { "epoch": 0.20688631641436628, "grad_norm": 2.918024778366089, "learning_rate": 4.828084539323763e-06, "loss": 0.0451, "step": 697, "video_reward_cumulative_accuracy": 0.7754662840746055 }, { "epoch": 0.20718314039774413, "grad_norm": 2.345532178878784, "learning_rate": 4.82713930373045e-06, "loss": 0.0442, "step": 698, "video_reward_cumulative_accuracy": 0.7757879656160458 }, { "epoch": 0.20747996438112198, "grad_norm": 1.47147536277771, "learning_rate": 4.826191569727228e-06, "loss": 0.0284, "step": 699, "video_reward_cumulative_accuracy": 0.7761087267525035 }, { "epoch": 0.20777678836449986, "grad_norm": 1.816048264503479, "learning_rate": 4.82524133833158e-06, "loss": 0.0437, "step": 700, "video_reward_cumulative_accuracy": 0.7764285714285715 }, { "epoch": 0.20807361234787772, "grad_norm": 3.5193707942962646, "learning_rate": 4.824288610563673e-06, "loss": 0.0454, "step": 701, "video_reward_cumulative_accuracy": 0.7767475035663338 }, { "epoch": 0.20837043633125557, "grad_norm": 1.532949447631836, "learning_rate": 4.8233333874463535e-06, "loss": 0.036, "step": 702, "video_reward_cumulative_accuracy": 0.7770655270655271 }, { "epoch": 0.20866726031463342, "grad_norm": 1.1091006994247437, "learning_rate": 4.822375670005144e-06, "loss": 0.0265, "step": 703, "video_reward_cumulative_accuracy": 0.7773826458036984 }, { "epoch": 0.20896408429801128, "grad_norm": 2.238027572631836, "learning_rate": 4.821415459268249e-06, "loss": 0.0393, "step": 704, "video_reward_cumulative_accuracy": 0.7776988636363636 }, { "epoch": 0.20926090828138913, "grad_norm": 4.488368988037109, "learning_rate": 4.820452756266546e-06, "loss": 0.0896, "step": 705, "video_reward_cumulative_accuracy": 0.7780141843971631 }, { "epoch": 0.20955773226476698, "grad_norm": 2.5125250816345215, "learning_rate": 4.819487562033592e-06, "loss": 0.0354, "step": 706, "video_reward_cumulative_accuracy": 0.7776203966005666 }, { "epoch": 0.20985455624814486, "grad_norm": 3.1740994453430176, "learning_rate": 4.818519877605616e-06, "loss": 0.0392, "step": 707, "video_reward_cumulative_accuracy": 0.7779349363507779 }, { "epoch": 0.21015138023152272, "grad_norm": 0.8142343759536743, "learning_rate": 4.817549704021521e-06, "loss": 0.0256, "step": 708, "video_reward_cumulative_accuracy": 0.7782485875706214 }, { "epoch": 0.21044820421490057, "grad_norm": 2.2193245887756348, "learning_rate": 4.816577042322883e-06, "loss": 0.0586, "step": 709, "video_reward_cumulative_accuracy": 0.7785613540197461 }, { "epoch": 0.21074502819827842, "grad_norm": 3.6588878631591797, "learning_rate": 4.815601893553948e-06, "loss": 0.061, "step": 710, "video_reward_cumulative_accuracy": 0.778169014084507 }, { "epoch": 0.21104185218165628, "grad_norm": 3.278996229171753, "learning_rate": 4.8146242587616335e-06, "loss": 0.0306, "step": 711, "video_reward_cumulative_accuracy": 0.7784810126582279 }, { "epoch": 0.21133867616503413, "grad_norm": 4.987575054168701, "learning_rate": 4.813644138995524e-06, "loss": 0.0612, "step": 712, "video_reward_cumulative_accuracy": 0.7780898876404494 }, { "epoch": 0.21163550014841198, "grad_norm": 3.508737087249756, "learning_rate": 4.812661535307876e-06, "loss": 0.0883, "step": 713, "video_reward_cumulative_accuracy": 0.7776998597475456 }, { "epoch": 0.21193232413178986, "grad_norm": 1.5868617296218872, "learning_rate": 4.811676448753606e-06, "loss": 0.0478, "step": 714, "video_reward_cumulative_accuracy": 0.7780112044817927 }, { "epoch": 0.21222914811516772, "grad_norm": 1.609864592552185, "learning_rate": 4.810688880390303e-06, "loss": 0.0553, "step": 715, "video_reward_cumulative_accuracy": 0.7783216783216783 }, { "epoch": 0.21252597209854557, "grad_norm": 3.3906569480895996, "learning_rate": 4.809698831278217e-06, "loss": 0.077, "step": 716, "video_reward_cumulative_accuracy": 0.7779329608938548 }, { "epoch": 0.21282279608192342, "grad_norm": 1.417561411857605, "learning_rate": 4.808706302480261e-06, "loss": 0.038, "step": 717, "video_reward_cumulative_accuracy": 0.7782426778242678 }, { "epoch": 0.21311962006530127, "grad_norm": 1.8394412994384766, "learning_rate": 4.807711295062013e-06, "loss": 0.0496, "step": 718, "video_reward_cumulative_accuracy": 0.7785515320334262 }, { "epoch": 0.21341644404867913, "grad_norm": 2.1921329498291016, "learning_rate": 4.8067138100917065e-06, "loss": 0.0483, "step": 719, "video_reward_cumulative_accuracy": 0.778164116828929 }, { "epoch": 0.21371326803205698, "grad_norm": 3.041285753250122, "learning_rate": 4.805713848640242e-06, "loss": 0.0777, "step": 720, "video_reward_cumulative_accuracy": 0.7784722222222222 }, { "epoch": 0.21401009201543486, "grad_norm": 2.195009469985962, "learning_rate": 4.804711411781173e-06, "loss": 0.0761, "step": 721, "video_reward_cumulative_accuracy": 0.7787794729542302 }, { "epoch": 0.2143069159988127, "grad_norm": 1.3252123594284058, "learning_rate": 4.803706500590714e-06, "loss": 0.062, "step": 722, "video_reward_cumulative_accuracy": 0.7790858725761773 }, { "epoch": 0.21460373998219057, "grad_norm": 3.693161964416504, "learning_rate": 4.802699116147732e-06, "loss": 0.0576, "step": 723, "video_reward_cumulative_accuracy": 0.7786998616874136 }, { "epoch": 0.21490056396556842, "grad_norm": 1.6738680601119995, "learning_rate": 4.801689259533756e-06, "loss": 0.0488, "step": 724, "video_reward_cumulative_accuracy": 0.7790055248618785 }, { "epoch": 0.21519738794894627, "grad_norm": 2.5583913326263428, "learning_rate": 4.800676931832963e-06, "loss": 0.0596, "step": 725, "video_reward_cumulative_accuracy": 0.7793103448275862 }, { "epoch": 0.21549421193232413, "grad_norm": 2.4868931770324707, "learning_rate": 4.799662134132185e-06, "loss": 0.0386, "step": 726, "video_reward_cumulative_accuracy": 0.7789256198347108 }, { "epoch": 0.21579103591570198, "grad_norm": 1.2940788269042969, "learning_rate": 4.798644867520905e-06, "loss": 0.0506, "step": 727, "video_reward_cumulative_accuracy": 0.7792297111416782 }, { "epoch": 0.21608785989907986, "grad_norm": 1.6243387460708618, "learning_rate": 4.797625133091259e-06, "loss": 0.0387, "step": 728, "video_reward_cumulative_accuracy": 0.779532967032967 }, { "epoch": 0.2163846838824577, "grad_norm": 3.6060104370117188, "learning_rate": 4.796602931938031e-06, "loss": 0.072, "step": 729, "video_reward_cumulative_accuracy": 0.7791495198902606 }, { "epoch": 0.21668150786583557, "grad_norm": 2.368060827255249, "learning_rate": 4.795578265158652e-06, "loss": 0.0417, "step": 730, "video_reward_cumulative_accuracy": 0.7787671232876713 }, { "epoch": 0.21697833184921342, "grad_norm": 2.8776209354400635, "learning_rate": 4.794551133853202e-06, "loss": 0.0693, "step": 731, "video_reward_cumulative_accuracy": 0.7790697674418605 }, { "epoch": 0.21727515583259127, "grad_norm": 1.1618021726608276, "learning_rate": 4.7935215391244065e-06, "loss": 0.0558, "step": 732, "video_reward_cumulative_accuracy": 0.7786885245901639 }, { "epoch": 0.21757197981596912, "grad_norm": 4.455048561096191, "learning_rate": 4.792489482077633e-06, "loss": 0.0619, "step": 733, "video_reward_cumulative_accuracy": 0.7789904502046384 }, { "epoch": 0.21786880379934698, "grad_norm": 3.7576048374176025, "learning_rate": 4.791454963820898e-06, "loss": 0.0586, "step": 734, "video_reward_cumulative_accuracy": 0.779291553133515 }, { "epoch": 0.21816562778272486, "grad_norm": 2.280623435974121, "learning_rate": 4.790417985464855e-06, "loss": 0.0457, "step": 735, "video_reward_cumulative_accuracy": 0.7789115646258503 }, { "epoch": 0.2184624517661027, "grad_norm": 3.224135398864746, "learning_rate": 4.789378548122803e-06, "loss": 0.0378, "step": 736, "video_reward_cumulative_accuracy": 0.7792119565217391 }, { "epoch": 0.21875927574948056, "grad_norm": 1.9654239416122437, "learning_rate": 4.788336652910676e-06, "loss": 0.068, "step": 737, "video_reward_cumulative_accuracy": 0.7788331071913162 }, { "epoch": 0.21905609973285842, "grad_norm": 2.6083526611328125, "learning_rate": 4.787292300947053e-06, "loss": 0.0529, "step": 738, "video_reward_cumulative_accuracy": 0.7784552845528455 }, { "epoch": 0.21935292371623627, "grad_norm": 2.398172378540039, "learning_rate": 4.786245493353145e-06, "loss": 0.0541, "step": 739, "video_reward_cumulative_accuracy": 0.7780784844384303 }, { "epoch": 0.21964974769961412, "grad_norm": 0.9663519263267517, "learning_rate": 4.785196231252802e-06, "loss": 0.031, "step": 740, "video_reward_cumulative_accuracy": 0.7783783783783784 }, { "epoch": 0.21994657168299198, "grad_norm": 1.1138893365859985, "learning_rate": 4.784144515772509e-06, "loss": 0.0387, "step": 741, "video_reward_cumulative_accuracy": 0.7780026990553306 }, { "epoch": 0.22024339566636983, "grad_norm": 1.9808402061462402, "learning_rate": 4.783090348041384e-06, "loss": 0.0348, "step": 742, "video_reward_cumulative_accuracy": 0.7776280323450134 }, { "epoch": 0.2205402196497477, "grad_norm": 1.5914376974105835, "learning_rate": 4.782033729191179e-06, "loss": 0.0462, "step": 743, "video_reward_cumulative_accuracy": 0.7779273216689099 }, { "epoch": 0.22083704363312556, "grad_norm": 3.169459819793701, "learning_rate": 4.780974660356276e-06, "loss": 0.0653, "step": 744, "video_reward_cumulative_accuracy": 0.7782258064516129 }, { "epoch": 0.22113386761650342, "grad_norm": 2.7912189960479736, "learning_rate": 4.77991314267369e-06, "loss": 0.0405, "step": 745, "video_reward_cumulative_accuracy": 0.7778523489932886 }, { "epoch": 0.22143069159988127, "grad_norm": 2.121472120285034, "learning_rate": 4.778849177283061e-06, "loss": 0.0563, "step": 746, "video_reward_cumulative_accuracy": 0.7781501340482574 }, { "epoch": 0.22172751558325912, "grad_norm": 3.9767544269561768, "learning_rate": 4.777782765326661e-06, "loss": 0.0501, "step": 747, "video_reward_cumulative_accuracy": 0.7784471218206158 }, { "epoch": 0.22202433956663697, "grad_norm": 1.9390398263931274, "learning_rate": 4.776713907949386e-06, "loss": 0.065, "step": 748, "video_reward_cumulative_accuracy": 0.7787433155080213 }, { "epoch": 0.22232116355001483, "grad_norm": 4.238917827606201, "learning_rate": 4.775642606298758e-06, "loss": 0.0829, "step": 749, "video_reward_cumulative_accuracy": 0.7790387182910548 }, { "epoch": 0.2226179875333927, "grad_norm": 2.718532085418701, "learning_rate": 4.774568861524923e-06, "loss": 0.034, "step": 750, "video_reward_cumulative_accuracy": 0.7786666666666666 }, { "epoch": 0.22291481151677056, "grad_norm": 4.930084228515625, "learning_rate": 4.773492674780651e-06, "loss": 0.0583, "step": 751, "video_reward_cumulative_accuracy": 0.7782956058588548 }, { "epoch": 0.22321163550014841, "grad_norm": 4.565423965454102, "learning_rate": 4.772414047221333e-06, "loss": 0.0486, "step": 752, "video_reward_cumulative_accuracy": 0.7785904255319149 }, { "epoch": 0.22350845948352627, "grad_norm": 4.179710865020752, "learning_rate": 4.77133298000498e-06, "loss": 0.0614, "step": 753, "video_reward_cumulative_accuracy": 0.7782204515272244 }, { "epoch": 0.22380528346690412, "grad_norm": 5.0286383628845215, "learning_rate": 4.7702494742922215e-06, "loss": 0.0705, "step": 754, "video_reward_cumulative_accuracy": 0.7771883289124668 }, { "epoch": 0.22410210745028197, "grad_norm": 1.3847112655639648, "learning_rate": 4.769163531246308e-06, "loss": 0.0306, "step": 755, "video_reward_cumulative_accuracy": 0.7774834437086092 }, { "epoch": 0.22439893143365983, "grad_norm": 3.042525053024292, "learning_rate": 4.7680751520331035e-06, "loss": 0.0426, "step": 756, "video_reward_cumulative_accuracy": 0.7777777777777778 }, { "epoch": 0.2246957554170377, "grad_norm": 1.5790531635284424, "learning_rate": 4.766984337821089e-06, "loss": 0.0229, "step": 757, "video_reward_cumulative_accuracy": 0.7780713342140027 }, { "epoch": 0.22499257940041556, "grad_norm": 3.0712270736694336, "learning_rate": 4.76589108978136e-06, "loss": 0.0316, "step": 758, "video_reward_cumulative_accuracy": 0.7783641160949868 }, { "epoch": 0.2252894033837934, "grad_norm": 3.963674783706665, "learning_rate": 4.764795409087623e-06, "loss": 0.053, "step": 759, "video_reward_cumulative_accuracy": 0.7786561264822134 }, { "epoch": 0.22558622736717127, "grad_norm": 3.206650495529175, "learning_rate": 4.7636972969161984e-06, "loss": 0.0279, "step": 760, "video_reward_cumulative_accuracy": 0.7789473684210526 }, { "epoch": 0.22588305135054912, "grad_norm": 4.03346061706543, "learning_rate": 4.762596754446017e-06, "loss": 0.0558, "step": 761, "video_reward_cumulative_accuracy": 0.778580814717477 }, { "epoch": 0.22617987533392697, "grad_norm": 4.312930107116699, "learning_rate": 4.7614937828586176e-06, "loss": 0.1088, "step": 762, "video_reward_cumulative_accuracy": 0.7782152230971129 }, { "epoch": 0.22647669931730482, "grad_norm": 3.7724556922912598, "learning_rate": 4.760388383338145e-06, "loss": 0.0847, "step": 763, "video_reward_cumulative_accuracy": 0.7785058977719528 }, { "epoch": 0.2267735233006827, "grad_norm": 1.2291319370269775, "learning_rate": 4.759280557071357e-06, "loss": 0.0156, "step": 764, "video_reward_cumulative_accuracy": 0.7787958115183246 }, { "epoch": 0.22707034728406056, "grad_norm": 10.076594352722168, "learning_rate": 4.758170305247608e-06, "loss": 0.0863, "step": 765, "video_reward_cumulative_accuracy": 0.7790849673202614 }, { "epoch": 0.2273671712674384, "grad_norm": 2.8340132236480713, "learning_rate": 4.757057629058865e-06, "loss": 0.0329, "step": 766, "video_reward_cumulative_accuracy": 0.7787206266318538 }, { "epoch": 0.22766399525081626, "grad_norm": 2.3575525283813477, "learning_rate": 4.755942529699692e-06, "loss": 0.05, "step": 767, "video_reward_cumulative_accuracy": 0.7783572359843546 }, { "epoch": 0.22796081923419412, "grad_norm": 3.471806049346924, "learning_rate": 4.754825008367256e-06, "loss": 0.0655, "step": 768, "video_reward_cumulative_accuracy": 0.7779947916666666 }, { "epoch": 0.22825764321757197, "grad_norm": 3.795821189880371, "learning_rate": 4.753705066261326e-06, "loss": 0.0479, "step": 769, "video_reward_cumulative_accuracy": 0.7776332899869961 }, { "epoch": 0.22855446720094982, "grad_norm": 2.038712978363037, "learning_rate": 4.752582704584267e-06, "loss": 0.067, "step": 770, "video_reward_cumulative_accuracy": 0.7779220779220779 }, { "epoch": 0.2288512911843277, "grad_norm": 2.496264696121216, "learning_rate": 4.751457924541045e-06, "loss": 0.0409, "step": 771, "video_reward_cumulative_accuracy": 0.7782101167315175 }, { "epoch": 0.22914811516770556, "grad_norm": 1.8071130514144897, "learning_rate": 4.75033072733922e-06, "loss": 0.0145, "step": 772, "video_reward_cumulative_accuracy": 0.7784974093264249 }, { "epoch": 0.2294449391510834, "grad_norm": 3.4890074729919434, "learning_rate": 4.749201114188946e-06, "loss": 0.0712, "step": 773, "video_reward_cumulative_accuracy": 0.7781371280724451 }, { "epoch": 0.22974176313446126, "grad_norm": 2.5620384216308594, "learning_rate": 4.748069086302975e-06, "loss": 0.0741, "step": 774, "video_reward_cumulative_accuracy": 0.7777777777777778 }, { "epoch": 0.23003858711783912, "grad_norm": 1.5887774229049683, "learning_rate": 4.7469346448966455e-06, "loss": 0.0347, "step": 775, "video_reward_cumulative_accuracy": 0.7774193548387097 }, { "epoch": 0.23033541110121697, "grad_norm": 2.032930612564087, "learning_rate": 4.745797791187894e-06, "loss": 0.0537, "step": 776, "video_reward_cumulative_accuracy": 0.7777061855670103 }, { "epoch": 0.23063223508459482, "grad_norm": 3.915695905685425, "learning_rate": 4.744658526397241e-06, "loss": 0.0798, "step": 777, "video_reward_cumulative_accuracy": 0.7773487773487774 }, { "epoch": 0.2309290590679727, "grad_norm": 1.2229045629501343, "learning_rate": 4.743516851747798e-06, "loss": 0.0364, "step": 778, "video_reward_cumulative_accuracy": 0.7776349614395887 }, { "epoch": 0.23122588305135056, "grad_norm": 2.093339204788208, "learning_rate": 4.742372768465264e-06, "loss": 0.0472, "step": 779, "video_reward_cumulative_accuracy": 0.7772785622593068 }, { "epoch": 0.2315227070347284, "grad_norm": 2.220613479614258, "learning_rate": 4.7412262777779235e-06, "loss": 0.0444, "step": 780, "video_reward_cumulative_accuracy": 0.7769230769230769 }, { "epoch": 0.23181953101810626, "grad_norm": 4.584027290344238, "learning_rate": 4.740077380916646e-06, "loss": 0.0535, "step": 781, "video_reward_cumulative_accuracy": 0.7772087067861716 }, { "epoch": 0.23211635500148411, "grad_norm": 2.5561423301696777, "learning_rate": 4.738926079114883e-06, "loss": 0.0301, "step": 782, "video_reward_cumulative_accuracy": 0.7774936061381074 }, { "epoch": 0.23241317898486197, "grad_norm": 2.526623249053955, "learning_rate": 4.737772373608669e-06, "loss": 0.0315, "step": 783, "video_reward_cumulative_accuracy": 0.777139208173691 }, { "epoch": 0.23271000296823982, "grad_norm": 1.8118770122528076, "learning_rate": 4.736616265636619e-06, "loss": 0.0408, "step": 784, "video_reward_cumulative_accuracy": 0.7774234693877551 }, { "epoch": 0.2330068269516177, "grad_norm": 1.151275873184204, "learning_rate": 4.735457756439926e-06, "loss": 0.0169, "step": 785, "video_reward_cumulative_accuracy": 0.7777070063694268 }, { "epoch": 0.23330365093499555, "grad_norm": 5.073890209197998, "learning_rate": 4.734296847262364e-06, "loss": 0.0722, "step": 786, "video_reward_cumulative_accuracy": 0.7779898218829516 }, { "epoch": 0.2336004749183734, "grad_norm": 2.3573646545410156, "learning_rate": 4.733133539350281e-06, "loss": 0.0311, "step": 787, "video_reward_cumulative_accuracy": 0.7776365946632783 }, { "epoch": 0.23389729890175126, "grad_norm": 3.158076286315918, "learning_rate": 4.7319678339526e-06, "loss": 0.0754, "step": 788, "video_reward_cumulative_accuracy": 0.7779187817258884 }, { "epoch": 0.2341941228851291, "grad_norm": 2.9681179523468018, "learning_rate": 4.730799732320819e-06, "loss": 0.0412, "step": 789, "video_reward_cumulative_accuracy": 0.7782002534854245 }, { "epoch": 0.23449094686850697, "grad_norm": 2.718312978744507, "learning_rate": 4.729629235709009e-06, "loss": 0.0404, "step": 790, "video_reward_cumulative_accuracy": 0.7784810126582279 }, { "epoch": 0.23478777085188482, "grad_norm": 2.505716562271118, "learning_rate": 4.728456345373813e-06, "loss": 0.0526, "step": 791, "video_reward_cumulative_accuracy": 0.7787610619469026 }, { "epoch": 0.2350845948352627, "grad_norm": 3.465552806854248, "learning_rate": 4.7272810625744405e-06, "loss": 0.0543, "step": 792, "video_reward_cumulative_accuracy": 0.7790404040404041 }, { "epoch": 0.23538141881864055, "grad_norm": 2.564662218093872, "learning_rate": 4.726103388572672e-06, "loss": 0.0536, "step": 793, "video_reward_cumulative_accuracy": 0.7786885245901639 }, { "epoch": 0.2356782428020184, "grad_norm": 2.045988082885742, "learning_rate": 4.724923324632855e-06, "loss": 0.0451, "step": 794, "video_reward_cumulative_accuracy": 0.7789672544080605 }, { "epoch": 0.23597506678539626, "grad_norm": 1.944718837738037, "learning_rate": 4.7237408720219045e-06, "loss": 0.0348, "step": 795, "video_reward_cumulative_accuracy": 0.779245283018868 }, { "epoch": 0.2362718907687741, "grad_norm": 2.2963719367980957, "learning_rate": 4.722556032009295e-06, "loss": 0.0597, "step": 796, "video_reward_cumulative_accuracy": 0.7795226130653267 }, { "epoch": 0.23656871475215197, "grad_norm": 2.749324083328247, "learning_rate": 4.72136880586707e-06, "loss": 0.0344, "step": 797, "video_reward_cumulative_accuracy": 0.7797992471769134 }, { "epoch": 0.23686553873552982, "grad_norm": 3.161100387573242, "learning_rate": 4.7201791948698315e-06, "loss": 0.0518, "step": 798, "video_reward_cumulative_accuracy": 0.7800751879699248 }, { "epoch": 0.2371623627189077, "grad_norm": 8.226521492004395, "learning_rate": 4.71898720029474e-06, "loss": 0.1091, "step": 799, "video_reward_cumulative_accuracy": 0.77909887359199 }, { "epoch": 0.23745918670228555, "grad_norm": 4.398781776428223, "learning_rate": 4.717792823421521e-06, "loss": 0.0586, "step": 800, "video_reward_cumulative_accuracy": 0.77875 }, { "epoch": 0.23745918670228555, "eval_runtime": 128.8206, "eval_samples_per_second": 6.125, "eval_steps_per_second": 0.769, "eval_test_set_accuracy": 0.7487373737373737, "step": 800 }, { "epoch": 0.2377560106856634, "grad_norm": 3.7454681396484375, "learning_rate": 4.71659606553245e-06, "loss": 0.0363, "step": 801, "video_reward_cumulative_accuracy": 0.7790262172284644 }, { "epoch": 0.23805283466904126, "grad_norm": 1.430262804031372, "learning_rate": 4.7153969279123665e-06, "loss": 0.046, "step": 802, "video_reward_cumulative_accuracy": 0.7793017456359103 }, { "epoch": 0.2383496586524191, "grad_norm": 5.489625453948975, "learning_rate": 4.7141954118486585e-06, "loss": 0.0652, "step": 803, "video_reward_cumulative_accuracy": 0.7789539227895392 }, { "epoch": 0.23864648263579696, "grad_norm": 2.220259666442871, "learning_rate": 4.712991518631272e-06, "loss": 0.0209, "step": 804, "video_reward_cumulative_accuracy": 0.779228855721393 }, { "epoch": 0.23894330661917482, "grad_norm": 2.3900604248046875, "learning_rate": 4.711785249552701e-06, "loss": 0.0485, "step": 805, "video_reward_cumulative_accuracy": 0.7795031055900621 }, { "epoch": 0.2392401306025527, "grad_norm": 2.9659066200256348, "learning_rate": 4.710576605907995e-06, "loss": 0.0493, "step": 806, "video_reward_cumulative_accuracy": 0.7797766749379652 }, { "epoch": 0.23953695458593055, "grad_norm": 5.7252326011657715, "learning_rate": 4.709365588994749e-06, "loss": 0.0617, "step": 807, "video_reward_cumulative_accuracy": 0.7794299876084263 }, { "epoch": 0.2398337785693084, "grad_norm": 3.0332016944885254, "learning_rate": 4.708152200113106e-06, "loss": 0.0504, "step": 808, "video_reward_cumulative_accuracy": 0.7797029702970297 }, { "epoch": 0.24013060255268626, "grad_norm": 1.474946141242981, "learning_rate": 4.706936440565759e-06, "loss": 0.0367, "step": 809, "video_reward_cumulative_accuracy": 0.7799752781211372 }, { "epoch": 0.2404274265360641, "grad_norm": 5.749459743499756, "learning_rate": 4.705718311657943e-06, "loss": 0.0698, "step": 810, "video_reward_cumulative_accuracy": 0.7802469135802469 }, { "epoch": 0.24072425051944196, "grad_norm": 1.7116312980651855, "learning_rate": 4.704497814697436e-06, "loss": 0.0396, "step": 811, "video_reward_cumulative_accuracy": 0.7805178791615289 }, { "epoch": 0.24102107450281982, "grad_norm": 1.7955880165100098, "learning_rate": 4.703274950994563e-06, "loss": 0.0209, "step": 812, "video_reward_cumulative_accuracy": 0.7807881773399015 }, { "epoch": 0.2413178984861977, "grad_norm": 4.338817596435547, "learning_rate": 4.702049721862184e-06, "loss": 0.0471, "step": 813, "video_reward_cumulative_accuracy": 0.7810578105781057 }, { "epoch": 0.24161472246957555, "grad_norm": 1.308546781539917, "learning_rate": 4.700822128615703e-06, "loss": 0.028, "step": 814, "video_reward_cumulative_accuracy": 0.7813267813267813 }, { "epoch": 0.2419115464529534, "grad_norm": 5.44117546081543, "learning_rate": 4.699592172573061e-06, "loss": 0.08, "step": 815, "video_reward_cumulative_accuracy": 0.7815950920245399 }, { "epoch": 0.24220837043633126, "grad_norm": 3.996955394744873, "learning_rate": 4.698359855054733e-06, "loss": 0.0551, "step": 816, "video_reward_cumulative_accuracy": 0.7818627450980392 }, { "epoch": 0.2425051944197091, "grad_norm": 3.2032980918884277, "learning_rate": 4.6971251773837335e-06, "loss": 0.0969, "step": 817, "video_reward_cumulative_accuracy": 0.7821297429620563 }, { "epoch": 0.24280201840308696, "grad_norm": 3.8643088340759277, "learning_rate": 4.695888140885608e-06, "loss": 0.0489, "step": 818, "video_reward_cumulative_accuracy": 0.78239608801956 }, { "epoch": 0.24309884238646481, "grad_norm": 1.5098183155059814, "learning_rate": 4.6946487468884346e-06, "loss": 0.0363, "step": 819, "video_reward_cumulative_accuracy": 0.7826617826617827 }, { "epoch": 0.2433956663698427, "grad_norm": 2.6578338146209717, "learning_rate": 4.693406996722824e-06, "loss": 0.0805, "step": 820, "video_reward_cumulative_accuracy": 0.7829268292682927 }, { "epoch": 0.24369249035322055, "grad_norm": 2.7649166584014893, "learning_rate": 4.692162891721917e-06, "loss": 0.0424, "step": 821, "video_reward_cumulative_accuracy": 0.7825822168087698 }, { "epoch": 0.2439893143365984, "grad_norm": 1.1864911317825317, "learning_rate": 4.690916433221377e-06, "loss": 0.0167, "step": 822, "video_reward_cumulative_accuracy": 0.7828467153284672 }, { "epoch": 0.24428613831997625, "grad_norm": 1.7823535203933716, "learning_rate": 4.6896676225594016e-06, "loss": 0.0778, "step": 823, "video_reward_cumulative_accuracy": 0.7831105710814095 }, { "epoch": 0.2445829623033541, "grad_norm": 3.1167526245117188, "learning_rate": 4.68841646107671e-06, "loss": 0.0327, "step": 824, "video_reward_cumulative_accuracy": 0.783373786407767 }, { "epoch": 0.24487978628673196, "grad_norm": 3.2958545684814453, "learning_rate": 4.6871629501165435e-06, "loss": 0.024, "step": 825, "video_reward_cumulative_accuracy": 0.7836363636363637 }, { "epoch": 0.2451766102701098, "grad_norm": 3.2736194133758545, "learning_rate": 4.68590709102467e-06, "loss": 0.0416, "step": 826, "video_reward_cumulative_accuracy": 0.7832929782082324 }, { "epoch": 0.2454734342534877, "grad_norm": 2.1489417552948, "learning_rate": 4.684648885149374e-06, "loss": 0.0491, "step": 827, "video_reward_cumulative_accuracy": 0.7835550181378477 }, { "epoch": 0.24577025823686555, "grad_norm": 1.746704339981079, "learning_rate": 4.6833883338414635e-06, "loss": 0.0513, "step": 828, "video_reward_cumulative_accuracy": 0.7832125603864735 }, { "epoch": 0.2460670822202434, "grad_norm": 2.047314167022705, "learning_rate": 4.682125438454261e-06, "loss": 0.0399, "step": 829, "video_reward_cumulative_accuracy": 0.7828709288299156 }, { "epoch": 0.24636390620362125, "grad_norm": 1.1579736471176147, "learning_rate": 4.680860200343609e-06, "loss": 0.0214, "step": 830, "video_reward_cumulative_accuracy": 0.7831325301204819 }, { "epoch": 0.2466607301869991, "grad_norm": 3.0398266315460205, "learning_rate": 4.679592620867862e-06, "loss": 0.0607, "step": 831, "video_reward_cumulative_accuracy": 0.782791817087846 }, { "epoch": 0.24695755417037696, "grad_norm": 2.732314348220825, "learning_rate": 4.678322701387891e-06, "loss": 0.0403, "step": 832, "video_reward_cumulative_accuracy": 0.7824519230769231 }, { "epoch": 0.2472543781537548, "grad_norm": 2.1207661628723145, "learning_rate": 4.677050443267076e-06, "loss": 0.0301, "step": 833, "video_reward_cumulative_accuracy": 0.7827130852340937 }, { "epoch": 0.2475512021371327, "grad_norm": 1.3241153955459595, "learning_rate": 4.675775847871311e-06, "loss": 0.0499, "step": 834, "video_reward_cumulative_accuracy": 0.7823741007194245 }, { "epoch": 0.24784802612051055, "grad_norm": 1.7813966274261475, "learning_rate": 4.6744989165689975e-06, "loss": 0.0392, "step": 835, "video_reward_cumulative_accuracy": 0.7826347305389222 }, { "epoch": 0.2481448501038884, "grad_norm": 1.4840867519378662, "learning_rate": 4.673219650731045e-06, "loss": 0.0193, "step": 836, "video_reward_cumulative_accuracy": 0.7828947368421053 }, { "epoch": 0.24844167408726625, "grad_norm": 0.5827304720878601, "learning_rate": 4.67193805173087e-06, "loss": 0.0077, "step": 837, "video_reward_cumulative_accuracy": 0.7831541218637993 }, { "epoch": 0.2487384980706441, "grad_norm": 2.449009656906128, "learning_rate": 4.670654120944393e-06, "loss": 0.0828, "step": 838, "video_reward_cumulative_accuracy": 0.7828162291169452 }, { "epoch": 0.24903532205402196, "grad_norm": 2.1830556392669678, "learning_rate": 4.669367859750038e-06, "loss": 0.0239, "step": 839, "video_reward_cumulative_accuracy": 0.7830750893921334 }, { "epoch": 0.2493321460373998, "grad_norm": 2.4376471042633057, "learning_rate": 4.668079269528732e-06, "loss": 0.0652, "step": 840, "video_reward_cumulative_accuracy": 0.7833333333333333 }, { "epoch": 0.2496289700207777, "grad_norm": 8.17204761505127, "learning_rate": 4.666788351663902e-06, "loss": 0.1043, "step": 841, "video_reward_cumulative_accuracy": 0.7835909631391201 }, { "epoch": 0.24992579400415554, "grad_norm": 4.692628860473633, "learning_rate": 4.6654951075414715e-06, "loss": 0.1819, "step": 842, "video_reward_cumulative_accuracy": 0.7838479809976246 }, { "epoch": 0.25022261798753337, "grad_norm": 2.7783429622650146, "learning_rate": 4.664199538549865e-06, "loss": 0.0546, "step": 843, "video_reward_cumulative_accuracy": 0.7841043890865955 }, { "epoch": 0.25051944197091125, "grad_norm": 2.2595643997192383, "learning_rate": 4.662901646080002e-06, "loss": 0.0521, "step": 844, "video_reward_cumulative_accuracy": 0.7837677725118484 }, { "epoch": 0.25081626595428913, "grad_norm": 6.65918493270874, "learning_rate": 4.661601431525295e-06, "loss": 0.106, "step": 845, "video_reward_cumulative_accuracy": 0.7840236686390533 }, { "epoch": 0.25111308993766696, "grad_norm": 2.6098530292510986, "learning_rate": 4.660298896281653e-06, "loss": 0.0738, "step": 846, "video_reward_cumulative_accuracy": 0.7836879432624113 }, { "epoch": 0.25140991392104484, "grad_norm": 1.8904296159744263, "learning_rate": 4.658994041747471e-06, "loss": 0.0658, "step": 847, "video_reward_cumulative_accuracy": 0.7839433293978748 }, { "epoch": 0.25170673790442266, "grad_norm": 3.186429977416992, "learning_rate": 4.657686869323638e-06, "loss": 0.0371, "step": 848, "video_reward_cumulative_accuracy": 0.7841981132075472 }, { "epoch": 0.25200356188780054, "grad_norm": 1.2086502313613892, "learning_rate": 4.6563773804135305e-06, "loss": 0.038, "step": 849, "video_reward_cumulative_accuracy": 0.784452296819788 }, { "epoch": 0.25230038587117837, "grad_norm": 1.1845347881317139, "learning_rate": 4.655065576423013e-06, "loss": 0.0215, "step": 850, "video_reward_cumulative_accuracy": 0.7847058823529411 }, { "epoch": 0.25259720985455625, "grad_norm": 2.5198981761932373, "learning_rate": 4.6537514587604316e-06, "loss": 0.0581, "step": 851, "video_reward_cumulative_accuracy": 0.7849588719153937 }, { "epoch": 0.25289403383793413, "grad_norm": 4.27205753326416, "learning_rate": 4.652435028836622e-06, "loss": 0.0617, "step": 852, "video_reward_cumulative_accuracy": 0.7846244131455399 }, { "epoch": 0.25319085782131195, "grad_norm": 1.900675892829895, "learning_rate": 4.651116288064899e-06, "loss": 0.0384, "step": 853, "video_reward_cumulative_accuracy": 0.7848769050410317 }, { "epoch": 0.25348768180468984, "grad_norm": 1.898750901222229, "learning_rate": 4.649795237861058e-06, "loss": 0.0461, "step": 854, "video_reward_cumulative_accuracy": 0.7851288056206089 }, { "epoch": 0.25378450578806766, "grad_norm": 1.234446406364441, "learning_rate": 4.648471879643374e-06, "loss": 0.0797, "step": 855, "video_reward_cumulative_accuracy": 0.7847953216374269 }, { "epoch": 0.25408132977144554, "grad_norm": 2.0378189086914062, "learning_rate": 4.647146214832602e-06, "loss": 0.0835, "step": 856, "video_reward_cumulative_accuracy": 0.7844626168224299 }, { "epoch": 0.25437815375482337, "grad_norm": 1.5428322553634644, "learning_rate": 4.645818244851971e-06, "loss": 0.0477, "step": 857, "video_reward_cumulative_accuracy": 0.7841306884480747 }, { "epoch": 0.25467497773820125, "grad_norm": 1.234578251838684, "learning_rate": 4.644487971127186e-06, "loss": 0.0556, "step": 858, "video_reward_cumulative_accuracy": 0.7843822843822844 }, { "epoch": 0.2549718017215791, "grad_norm": 2.008531093597412, "learning_rate": 4.643155395086425e-06, "loss": 0.0508, "step": 859, "video_reward_cumulative_accuracy": 0.7846332945285215 }, { "epoch": 0.25526862570495695, "grad_norm": 2.030647039413452, "learning_rate": 4.6418205181603385e-06, "loss": 0.0239, "step": 860, "video_reward_cumulative_accuracy": 0.7848837209302325 }, { "epoch": 0.25556544968833483, "grad_norm": 3.389904022216797, "learning_rate": 4.640483341782044e-06, "loss": 0.0565, "step": 861, "video_reward_cumulative_accuracy": 0.7851335656213705 }, { "epoch": 0.25586227367171266, "grad_norm": 1.0547555685043335, "learning_rate": 4.639143867387132e-06, "loss": 0.0433, "step": 862, "video_reward_cumulative_accuracy": 0.7853828306264501 }, { "epoch": 0.25615909765509054, "grad_norm": 2.2876555919647217, "learning_rate": 4.6378020964136586e-06, "loss": 0.0344, "step": 863, "video_reward_cumulative_accuracy": 0.7856315179606026 }, { "epoch": 0.25645592163846836, "grad_norm": 1.4506580829620361, "learning_rate": 4.636458030302144e-06, "loss": 0.0418, "step": 864, "video_reward_cumulative_accuracy": 0.7858796296296297 }, { "epoch": 0.25675274562184625, "grad_norm": 5.08413028717041, "learning_rate": 4.635111670495574e-06, "loss": 0.049, "step": 865, "video_reward_cumulative_accuracy": 0.7855491329479769 }, { "epoch": 0.2570495696052241, "grad_norm": 4.657954692840576, "learning_rate": 4.6337630184393965e-06, "loss": 0.0434, "step": 866, "video_reward_cumulative_accuracy": 0.785796766743649 }, { "epoch": 0.25734639358860195, "grad_norm": 1.428345799446106, "learning_rate": 4.632412075581521e-06, "loss": 0.0475, "step": 867, "video_reward_cumulative_accuracy": 0.7860438292964245 }, { "epoch": 0.25764321757197983, "grad_norm": 0.920592188835144, "learning_rate": 4.6310588433723145e-06, "loss": 0.0175, "step": 868, "video_reward_cumulative_accuracy": 0.7862903225806451 }, { "epoch": 0.25794004155535766, "grad_norm": 3.902883291244507, "learning_rate": 4.629703323264605e-06, "loss": 0.0379, "step": 869, "video_reward_cumulative_accuracy": 0.786536248561565 }, { "epoch": 0.25823686553873554, "grad_norm": 2.918687582015991, "learning_rate": 4.6283455167136724e-06, "loss": 0.0538, "step": 870, "video_reward_cumulative_accuracy": 0.7867816091954023 }, { "epoch": 0.25853368952211336, "grad_norm": 2.900933027267456, "learning_rate": 4.626985425177256e-06, "loss": 0.0521, "step": 871, "video_reward_cumulative_accuracy": 0.7870264064293915 }, { "epoch": 0.25883051350549124, "grad_norm": 3.1551060676574707, "learning_rate": 4.625623050115545e-06, "loss": 0.0863, "step": 872, "video_reward_cumulative_accuracy": 0.786697247706422 }, { "epoch": 0.2591273374888691, "grad_norm": 1.1339958906173706, "learning_rate": 4.6242583929911825e-06, "loss": 0.0553, "step": 873, "video_reward_cumulative_accuracy": 0.786368843069874 }, { "epoch": 0.25942416147224695, "grad_norm": 1.0027540922164917, "learning_rate": 4.6228914552692585e-06, "loss": 0.0178, "step": 874, "video_reward_cumulative_accuracy": 0.7866132723112128 }, { "epoch": 0.25972098545562483, "grad_norm": 3.1570804119110107, "learning_rate": 4.621522238417314e-06, "loss": 0.1099, "step": 875, "video_reward_cumulative_accuracy": 0.7868571428571428 }, { "epoch": 0.26001780943900266, "grad_norm": 2.4433491230010986, "learning_rate": 4.620150743905338e-06, "loss": 0.0314, "step": 876, "video_reward_cumulative_accuracy": 0.7865296803652968 }, { "epoch": 0.26031463342238054, "grad_norm": 3.163719654083252, "learning_rate": 4.6187769732057595e-06, "loss": 0.0235, "step": 877, "video_reward_cumulative_accuracy": 0.7867730900798175 }, { "epoch": 0.26061145740575836, "grad_norm": 4.77492094039917, "learning_rate": 4.617400927793457e-06, "loss": 0.0781, "step": 878, "video_reward_cumulative_accuracy": 0.7864464692482915 }, { "epoch": 0.26090828138913624, "grad_norm": 1.8638689517974854, "learning_rate": 4.6160226091457495e-06, "loss": 0.0368, "step": 879, "video_reward_cumulative_accuracy": 0.7861205915813424 }, { "epoch": 0.2612051053725141, "grad_norm": 2.038536548614502, "learning_rate": 4.6146420187423935e-06, "loss": 0.0344, "step": 880, "video_reward_cumulative_accuracy": 0.7863636363636364 }, { "epoch": 0.26150192935589195, "grad_norm": 2.544743299484253, "learning_rate": 4.613259158065588e-06, "loss": 0.0741, "step": 881, "video_reward_cumulative_accuracy": 0.7866061293984109 }, { "epoch": 0.26179875333926983, "grad_norm": 4.100607395172119, "learning_rate": 4.611874028599969e-06, "loss": 0.0515, "step": 882, "video_reward_cumulative_accuracy": 0.7868480725623582 }, { "epoch": 0.26209557732264765, "grad_norm": 0.9536772966384888, "learning_rate": 4.610486631832606e-06, "loss": 0.0332, "step": 883, "video_reward_cumulative_accuracy": 0.7865232163080408 }, { "epoch": 0.26239240130602554, "grad_norm": 1.7179930210113525, "learning_rate": 4.609096969253005e-06, "loss": 0.0275, "step": 884, "video_reward_cumulative_accuracy": 0.7867647058823529 }, { "epoch": 0.26268922528940336, "grad_norm": 2.068021059036255, "learning_rate": 4.607705042353104e-06, "loss": 0.0526, "step": 885, "video_reward_cumulative_accuracy": 0.7870056497175141 }, { "epoch": 0.26298604927278124, "grad_norm": 3.365269899368286, "learning_rate": 4.60631085262727e-06, "loss": 0.0301, "step": 886, "video_reward_cumulative_accuracy": 0.7872460496613995 }, { "epoch": 0.2632828732561591, "grad_norm": 4.123976707458496, "learning_rate": 4.604914401572301e-06, "loss": 0.0437, "step": 887, "video_reward_cumulative_accuracy": 0.7874859075535513 }, { "epoch": 0.26357969723953695, "grad_norm": 2.0754764080047607, "learning_rate": 4.603515690687425e-06, "loss": 0.0825, "step": 888, "video_reward_cumulative_accuracy": 0.7877252252252253 }, { "epoch": 0.26387652122291483, "grad_norm": 2.7115345001220703, "learning_rate": 4.602114721474293e-06, "loss": 0.0596, "step": 889, "video_reward_cumulative_accuracy": 0.7879640044994376 }, { "epoch": 0.26417334520629265, "grad_norm": 1.4795091152191162, "learning_rate": 4.60071149543698e-06, "loss": 0.0438, "step": 890, "video_reward_cumulative_accuracy": 0.7882022471910113 }, { "epoch": 0.26447016918967053, "grad_norm": 4.9827985763549805, "learning_rate": 4.599306014081987e-06, "loss": 0.0523, "step": 891, "video_reward_cumulative_accuracy": 0.7878787878787878 }, { "epoch": 0.26476699317304836, "grad_norm": 3.740387439727783, "learning_rate": 4.597898278918233e-06, "loss": 0.0927, "step": 892, "video_reward_cumulative_accuracy": 0.7881165919282511 }, { "epoch": 0.26506381715642624, "grad_norm": 2.645918130874634, "learning_rate": 4.596488291457061e-06, "loss": 0.0482, "step": 893, "video_reward_cumulative_accuracy": 0.7883538633818589 }, { "epoch": 0.2653606411398041, "grad_norm": 3.115306854248047, "learning_rate": 4.595076053212226e-06, "loss": 0.0522, "step": 894, "video_reward_cumulative_accuracy": 0.7885906040268457 }, { "epoch": 0.26565746512318195, "grad_norm": 2.4514198303222656, "learning_rate": 4.593661565699905e-06, "loss": 0.0442, "step": 895, "video_reward_cumulative_accuracy": 0.788826815642458 }, { "epoch": 0.2659542891065598, "grad_norm": 1.658631443977356, "learning_rate": 4.592244830438688e-06, "loss": 0.0353, "step": 896, "video_reward_cumulative_accuracy": 0.7890625 }, { "epoch": 0.26625111308993765, "grad_norm": 4.696023464202881, "learning_rate": 4.590825848949576e-06, "loss": 0.0655, "step": 897, "video_reward_cumulative_accuracy": 0.7892976588628763 }, { "epoch": 0.26654793707331553, "grad_norm": 4.951619625091553, "learning_rate": 4.589404622755985e-06, "loss": 0.0759, "step": 898, "video_reward_cumulative_accuracy": 0.7895322939866369 }, { "epoch": 0.26684476105669336, "grad_norm": 4.583511829376221, "learning_rate": 4.587981153383738e-06, "loss": 0.1074, "step": 899, "video_reward_cumulative_accuracy": 0.789210233592881 }, { "epoch": 0.26714158504007124, "grad_norm": 1.0951406955718994, "learning_rate": 4.586555442361068e-06, "loss": 0.0384, "step": 900, "video_reward_cumulative_accuracy": 0.7894444444444444 }, { "epoch": 0.2674384090234491, "grad_norm": 2.3430957794189453, "learning_rate": 4.585127491218615e-06, "loss": 0.0268, "step": 901, "video_reward_cumulative_accuracy": 0.7896781354051055 }, { "epoch": 0.26773523300682694, "grad_norm": 3.9636011123657227, "learning_rate": 4.5836973014894225e-06, "loss": 0.0735, "step": 902, "video_reward_cumulative_accuracy": 0.7899113082039911 }, { "epoch": 0.2680320569902048, "grad_norm": 3.968548536300659, "learning_rate": 4.582264874708937e-06, "loss": 0.0568, "step": 903, "video_reward_cumulative_accuracy": 0.7901439645625692 }, { "epoch": 0.26832888097358265, "grad_norm": 1.8706952333450317, "learning_rate": 4.58083021241501e-06, "loss": 0.0886, "step": 904, "video_reward_cumulative_accuracy": 0.7898230088495575 }, { "epoch": 0.26862570495696053, "grad_norm": 2.0862226486206055, "learning_rate": 4.579393316147888e-06, "loss": 0.057, "step": 905, "video_reward_cumulative_accuracy": 0.7900552486187845 }, { "epoch": 0.26892252894033836, "grad_norm": 2.1430046558380127, "learning_rate": 4.577954187450221e-06, "loss": 0.051, "step": 906, "video_reward_cumulative_accuracy": 0.7902869757174393 }, { "epoch": 0.26921935292371624, "grad_norm": 1.2016264200210571, "learning_rate": 4.576512827867051e-06, "loss": 0.0298, "step": 907, "video_reward_cumulative_accuracy": 0.7905181918412348 }, { "epoch": 0.2695161769070941, "grad_norm": 2.2290544509887695, "learning_rate": 4.57506923894582e-06, "loss": 0.059, "step": 908, "video_reward_cumulative_accuracy": 0.7907488986784141 }, { "epoch": 0.26981300089047194, "grad_norm": 1.2380579710006714, "learning_rate": 4.573623422236359e-06, "loss": 0.0574, "step": 909, "video_reward_cumulative_accuracy": 0.7904290429042904 }, { "epoch": 0.2701098248738498, "grad_norm": 1.3100279569625854, "learning_rate": 4.572175379290892e-06, "loss": 0.0188, "step": 910, "video_reward_cumulative_accuracy": 0.7906593406593406 }, { "epoch": 0.27040664885722765, "grad_norm": 1.4202399253845215, "learning_rate": 4.570725111664035e-06, "loss": 0.0274, "step": 911, "video_reward_cumulative_accuracy": 0.7908891328210758 }, { "epoch": 0.27070347284060553, "grad_norm": 1.6007649898529053, "learning_rate": 4.569272620912791e-06, "loss": 0.0389, "step": 912, "video_reward_cumulative_accuracy": 0.7911184210526315 }, { "epoch": 0.27100029682398336, "grad_norm": 1.8392062187194824, "learning_rate": 4.56781790859655e-06, "loss": 0.07, "step": 913, "video_reward_cumulative_accuracy": 0.7913472070098576 }, { "epoch": 0.27129712080736124, "grad_norm": 2.425304651260376, "learning_rate": 4.566360976277086e-06, "loss": 0.0375, "step": 914, "video_reward_cumulative_accuracy": 0.7915754923413567 }, { "epoch": 0.2715939447907391, "grad_norm": 3.8037993907928467, "learning_rate": 4.564901825518558e-06, "loss": 0.066, "step": 915, "video_reward_cumulative_accuracy": 0.7918032786885246 }, { "epoch": 0.27189076877411694, "grad_norm": 2.4875757694244385, "learning_rate": 4.563440457887506e-06, "loss": 0.0269, "step": 916, "video_reward_cumulative_accuracy": 0.7920305676855895 }, { "epoch": 0.2721875927574948, "grad_norm": 2.975370168685913, "learning_rate": 4.561976874952849e-06, "loss": 0.0474, "step": 917, "video_reward_cumulative_accuracy": 0.792257360959651 }, { "epoch": 0.27248441674087265, "grad_norm": 1.9937337636947632, "learning_rate": 4.560511078285885e-06, "loss": 0.092, "step": 918, "video_reward_cumulative_accuracy": 0.7924836601307189 }, { "epoch": 0.27278124072425053, "grad_norm": 2.1573855876922607, "learning_rate": 4.559043069460291e-06, "loss": 0.0465, "step": 919, "video_reward_cumulative_accuracy": 0.7921653971708379 }, { "epoch": 0.27307806470762835, "grad_norm": 1.528318166732788, "learning_rate": 4.557572850052116e-06, "loss": 0.0668, "step": 920, "video_reward_cumulative_accuracy": 0.7918478260869565 }, { "epoch": 0.27337488869100623, "grad_norm": 3.7441587448120117, "learning_rate": 4.556100421639783e-06, "loss": 0.1027, "step": 921, "video_reward_cumulative_accuracy": 0.7920738327904452 }, { "epoch": 0.2736717126743841, "grad_norm": 1.5246275663375854, "learning_rate": 4.554625785804087e-06, "loss": 0.0302, "step": 922, "video_reward_cumulative_accuracy": 0.7922993492407809 }, { "epoch": 0.27396853665776194, "grad_norm": 3.7128987312316895, "learning_rate": 4.553148944128192e-06, "loss": 0.0519, "step": 923, "video_reward_cumulative_accuracy": 0.7925243770314193 }, { "epoch": 0.2742653606411398, "grad_norm": 5.355534076690674, "learning_rate": 4.551669898197631e-06, "loss": 0.0838, "step": 924, "video_reward_cumulative_accuracy": 0.7927489177489178 }, { "epoch": 0.27456218462451765, "grad_norm": 3.1454975605010986, "learning_rate": 4.550188649600306e-06, "loss": 0.0614, "step": 925, "video_reward_cumulative_accuracy": 0.792972972972973 }, { "epoch": 0.2748590086078955, "grad_norm": 6.770321846008301, "learning_rate": 4.548705199926478e-06, "loss": 0.0797, "step": 926, "video_reward_cumulative_accuracy": 0.7931965442764579 }, { "epoch": 0.27515583259127335, "grad_norm": 4.7147064208984375, "learning_rate": 4.547219550768774e-06, "loss": 0.0511, "step": 927, "video_reward_cumulative_accuracy": 0.7928802588996764 }, { "epoch": 0.27545265657465123, "grad_norm": 3.4447805881500244, "learning_rate": 4.545731703722185e-06, "loss": 0.0591, "step": 928, "video_reward_cumulative_accuracy": 0.7931034482758621 }, { "epoch": 0.2757494805580291, "grad_norm": 1.9188413619995117, "learning_rate": 4.544241660384057e-06, "loss": 0.0486, "step": 929, "video_reward_cumulative_accuracy": 0.7927879440258342 }, { "epoch": 0.27604630454140694, "grad_norm": 0.8871810436248779, "learning_rate": 4.542749422354098e-06, "loss": 0.0457, "step": 930, "video_reward_cumulative_accuracy": 0.7924731182795699 }, { "epoch": 0.2763431285247848, "grad_norm": 1.0391457080841064, "learning_rate": 4.54125499123437e-06, "loss": 0.0463, "step": 931, "video_reward_cumulative_accuracy": 0.7926960257787325 }, { "epoch": 0.27663995250816265, "grad_norm": 3.6447765827178955, "learning_rate": 4.539758368629288e-06, "loss": 0.0333, "step": 932, "video_reward_cumulative_accuracy": 0.7929184549356223 }, { "epoch": 0.2769367764915405, "grad_norm": 2.135462760925293, "learning_rate": 4.538259556145623e-06, "loss": 0.0349, "step": 933, "video_reward_cumulative_accuracy": 0.7931404072883173 }, { "epoch": 0.27723360047491835, "grad_norm": 1.0831674337387085, "learning_rate": 4.5367585553924965e-06, "loss": 0.0454, "step": 934, "video_reward_cumulative_accuracy": 0.7933618843683083 }, { "epoch": 0.27753042445829623, "grad_norm": 2.257012128829956, "learning_rate": 4.5352553679813775e-06, "loss": 0.0664, "step": 935, "video_reward_cumulative_accuracy": 0.793048128342246 }, { "epoch": 0.2778272484416741, "grad_norm": 1.1687437295913696, "learning_rate": 4.5337499955260825e-06, "loss": 0.0443, "step": 936, "video_reward_cumulative_accuracy": 0.7932692307692307 }, { "epoch": 0.27812407242505194, "grad_norm": 1.566916823387146, "learning_rate": 4.532242439642778e-06, "loss": 0.0663, "step": 937, "video_reward_cumulative_accuracy": 0.7934898612593383 }, { "epoch": 0.2784208964084298, "grad_norm": 2.265585422515869, "learning_rate": 4.530732701949968e-06, "loss": 0.0347, "step": 938, "video_reward_cumulative_accuracy": 0.7937100213219617 }, { "epoch": 0.27871772039180764, "grad_norm": 1.542738437652588, "learning_rate": 4.529220784068505e-06, "loss": 0.0337, "step": 939, "video_reward_cumulative_accuracy": 0.7933972310969116 }, { "epoch": 0.2790145443751855, "grad_norm": 1.868859887123108, "learning_rate": 4.527706687621578e-06, "loss": 0.0593, "step": 940, "video_reward_cumulative_accuracy": 0.7930851063829787 }, { "epoch": 0.27931136835856335, "grad_norm": 1.1284223794937134, "learning_rate": 4.526190414234718e-06, "loss": 0.0271, "step": 941, "video_reward_cumulative_accuracy": 0.7933049946865037 }, { "epoch": 0.27960819234194123, "grad_norm": 2.6170237064361572, "learning_rate": 4.524671965535791e-06, "loss": 0.0675, "step": 942, "video_reward_cumulative_accuracy": 0.7929936305732485 }, { "epoch": 0.2799050163253191, "grad_norm": 1.8915306329727173, "learning_rate": 4.523151343154999e-06, "loss": 0.0355, "step": 943, "video_reward_cumulative_accuracy": 0.7932131495227995 }, { "epoch": 0.28020184030869694, "grad_norm": 1.1092487573623657, "learning_rate": 4.521628548724877e-06, "loss": 0.0447, "step": 944, "video_reward_cumulative_accuracy": 0.7934322033898306 }, { "epoch": 0.2804986642920748, "grad_norm": 2.1226096153259277, "learning_rate": 4.5201035838802935e-06, "loss": 0.076, "step": 945, "video_reward_cumulative_accuracy": 0.7936507936507936 }, { "epoch": 0.28079548827545264, "grad_norm": 3.9487340450286865, "learning_rate": 4.518576450258446e-06, "loss": 0.102, "step": 946, "video_reward_cumulative_accuracy": 0.7938689217758985 }, { "epoch": 0.2810923122588305, "grad_norm": 2.5979459285736084, "learning_rate": 4.517047149498861e-06, "loss": 0.0372, "step": 947, "video_reward_cumulative_accuracy": 0.7940865892291447 }, { "epoch": 0.28138913624220835, "grad_norm": 5.942515850067139, "learning_rate": 4.51551568324339e-06, "loss": 0.1805, "step": 948, "video_reward_cumulative_accuracy": 0.7943037974683544 }, { "epoch": 0.28168596022558623, "grad_norm": 3.59495210647583, "learning_rate": 4.5139820531362125e-06, "loss": 0.0965, "step": 949, "video_reward_cumulative_accuracy": 0.7945205479452054 }, { "epoch": 0.2819827842089641, "grad_norm": 1.3361320495605469, "learning_rate": 4.512446260823828e-06, "loss": 0.0318, "step": 950, "video_reward_cumulative_accuracy": 0.7947368421052632 }, { "epoch": 0.28227960819234194, "grad_norm": 3.481994390487671, "learning_rate": 4.510908307955059e-06, "loss": 0.0588, "step": 951, "video_reward_cumulative_accuracy": 0.7944269190325972 }, { "epoch": 0.2825764321757198, "grad_norm": 0.8120396137237549, "learning_rate": 4.509368196181048e-06, "loss": 0.0275, "step": 952, "video_reward_cumulative_accuracy": 0.7946428571428571 }, { "epoch": 0.28287325615909764, "grad_norm": 6.15502405166626, "learning_rate": 4.507825927155253e-06, "loss": 0.0756, "step": 953, "video_reward_cumulative_accuracy": 0.7948583420776495 }, { "epoch": 0.2831700801424755, "grad_norm": 3.1125705242156982, "learning_rate": 4.506281502533451e-06, "loss": 0.0404, "step": 954, "video_reward_cumulative_accuracy": 0.7950733752620545 }, { "epoch": 0.28346690412585335, "grad_norm": 0.9939224123954773, "learning_rate": 4.50473492397373e-06, "loss": 0.0321, "step": 955, "video_reward_cumulative_accuracy": 0.7952879581151833 }, { "epoch": 0.28376372810923123, "grad_norm": 2.3135781288146973, "learning_rate": 4.503186193136493e-06, "loss": 0.0179, "step": 956, "video_reward_cumulative_accuracy": 0.7955020920502092 }, { "epoch": 0.2840605520926091, "grad_norm": 1.7014567852020264, "learning_rate": 4.501635311684453e-06, "loss": 0.095, "step": 957, "video_reward_cumulative_accuracy": 0.7951933124346917 }, { "epoch": 0.28435737607598693, "grad_norm": 1.145207166671753, "learning_rate": 4.500082281282632e-06, "loss": 0.044, "step": 958, "video_reward_cumulative_accuracy": 0.7954070981210856 }, { "epoch": 0.2846542000593648, "grad_norm": 1.2436047792434692, "learning_rate": 4.4985271035983584e-06, "loss": 0.0842, "step": 959, "video_reward_cumulative_accuracy": 0.7950990615224192 }, { "epoch": 0.28495102404274264, "grad_norm": 2.411715030670166, "learning_rate": 4.496969780301267e-06, "loss": 0.0421, "step": 960, "video_reward_cumulative_accuracy": 0.7953125 }, { "epoch": 0.2852478480261205, "grad_norm": 1.0124142169952393, "learning_rate": 4.495410313063295e-06, "loss": 0.037, "step": 961, "video_reward_cumulative_accuracy": 0.795525494276795 }, { "epoch": 0.28554467200949835, "grad_norm": 1.802912950515747, "learning_rate": 4.493848703558681e-06, "loss": 0.0552, "step": 962, "video_reward_cumulative_accuracy": 0.7957380457380457 }, { "epoch": 0.2858414959928762, "grad_norm": 2.056218147277832, "learning_rate": 4.492284953463967e-06, "loss": 0.0721, "step": 963, "video_reward_cumulative_accuracy": 0.7959501557632399 }, { "epoch": 0.2861383199762541, "grad_norm": 2.424288511276245, "learning_rate": 4.490719064457987e-06, "loss": 0.0269, "step": 964, "video_reward_cumulative_accuracy": 0.7956431535269709 }, { "epoch": 0.28643514395963193, "grad_norm": 1.5298640727996826, "learning_rate": 4.4891510382218775e-06, "loss": 0.0371, "step": 965, "video_reward_cumulative_accuracy": 0.7958549222797927 }, { "epoch": 0.2867319679430098, "grad_norm": 2.102123737335205, "learning_rate": 4.487580876439066e-06, "loss": 0.0662, "step": 966, "video_reward_cumulative_accuracy": 0.7960662525879917 }, { "epoch": 0.28702879192638764, "grad_norm": 1.2193660736083984, "learning_rate": 4.486008580795273e-06, "loss": 0.0652, "step": 967, "video_reward_cumulative_accuracy": 0.795760082730093 }, { "epoch": 0.2873256159097655, "grad_norm": 2.06874418258667, "learning_rate": 4.484434152978512e-06, "loss": 0.0292, "step": 968, "video_reward_cumulative_accuracy": 0.7954545454545454 }, { "epoch": 0.28762243989314334, "grad_norm": 1.5268133878707886, "learning_rate": 4.482857594679082e-06, "loss": 0.0429, "step": 969, "video_reward_cumulative_accuracy": 0.7951496388028896 }, { "epoch": 0.2879192638765212, "grad_norm": 2.622420072555542, "learning_rate": 4.4812789075895735e-06, "loss": 0.1016, "step": 970, "video_reward_cumulative_accuracy": 0.795360824742268 }, { "epoch": 0.2882160878598991, "grad_norm": 2.2772624492645264, "learning_rate": 4.479698093404858e-06, "loss": 0.0652, "step": 971, "video_reward_cumulative_accuracy": 0.7950566426364573 }, { "epoch": 0.28851291184327693, "grad_norm": 0.8947013020515442, "learning_rate": 4.478115153822096e-06, "loss": 0.0285, "step": 972, "video_reward_cumulative_accuracy": 0.7952674897119342 }, { "epoch": 0.2888097358266548, "grad_norm": 1.8154103755950928, "learning_rate": 4.476530090540724e-06, "loss": 0.0613, "step": 973, "video_reward_cumulative_accuracy": 0.7949640287769785 }, { "epoch": 0.28910655981003264, "grad_norm": 1.2615007162094116, "learning_rate": 4.474942905262462e-06, "loss": 0.0474, "step": 974, "video_reward_cumulative_accuracy": 0.7951745379876797 }, { "epoch": 0.2894033837934105, "grad_norm": 0.8353313207626343, "learning_rate": 4.473353599691308e-06, "loss": 0.0222, "step": 975, "video_reward_cumulative_accuracy": 0.7948717948717948 }, { "epoch": 0.28970020777678834, "grad_norm": 2.982597827911377, "learning_rate": 4.471762175533535e-06, "loss": 0.0546, "step": 976, "video_reward_cumulative_accuracy": 0.7945696721311475 }, { "epoch": 0.2899970317601662, "grad_norm": 2.2625701427459717, "learning_rate": 4.470168634497692e-06, "loss": 0.0405, "step": 977, "video_reward_cumulative_accuracy": 0.7947799385875128 }, { "epoch": 0.2902938557435441, "grad_norm": 1.974323034286499, "learning_rate": 4.4685729782946005e-06, "loss": 0.0385, "step": 978, "video_reward_cumulative_accuracy": 0.7949897750511248 }, { "epoch": 0.29059067972692193, "grad_norm": 4.570712566375732, "learning_rate": 4.46697520863735e-06, "loss": 0.0505, "step": 979, "video_reward_cumulative_accuracy": 0.7951991828396323 }, { "epoch": 0.2908875037102998, "grad_norm": 0.7615110874176025, "learning_rate": 4.465375327241305e-06, "loss": 0.0154, "step": 980, "video_reward_cumulative_accuracy": 0.7954081632653062 }, { "epoch": 0.29118432769367764, "grad_norm": 3.337805986404419, "learning_rate": 4.46377333582409e-06, "loss": 0.0636, "step": 981, "video_reward_cumulative_accuracy": 0.7956167176350663 }, { "epoch": 0.2914811516770555, "grad_norm": 1.5002707242965698, "learning_rate": 4.4621692361056005e-06, "loss": 0.0339, "step": 982, "video_reward_cumulative_accuracy": 0.7958248472505092 }, { "epoch": 0.29177797566043334, "grad_norm": 3.1056461334228516, "learning_rate": 4.460563029807991e-06, "loss": 0.0719, "step": 983, "video_reward_cumulative_accuracy": 0.7960325534079349 }, { "epoch": 0.2920747996438112, "grad_norm": 4.184977054595947, "learning_rate": 4.4589547186556825e-06, "loss": 0.0574, "step": 984, "video_reward_cumulative_accuracy": 0.796239837398374 }, { "epoch": 0.2923716236271891, "grad_norm": 0.9293310046195984, "learning_rate": 4.45734430437535e-06, "loss": 0.0066, "step": 985, "video_reward_cumulative_accuracy": 0.7964467005076142 }, { "epoch": 0.29266844761056693, "grad_norm": 1.263472557067871, "learning_rate": 4.455731788695933e-06, "loss": 0.0232, "step": 986, "video_reward_cumulative_accuracy": 0.7966531440162272 }, { "epoch": 0.2929652715939448, "grad_norm": 2.434809446334839, "learning_rate": 4.4541171733486224e-06, "loss": 0.06, "step": 987, "video_reward_cumulative_accuracy": 0.7958459979736575 }, { "epoch": 0.29326209557732263, "grad_norm": 4.916622161865234, "learning_rate": 4.452500460066863e-06, "loss": 0.0295, "step": 988, "video_reward_cumulative_accuracy": 0.7960526315789473 }, { "epoch": 0.2935589195607005, "grad_norm": 1.8236298561096191, "learning_rate": 4.450881650586354e-06, "loss": 0.0219, "step": 989, "video_reward_cumulative_accuracy": 0.7957532861476239 }, { "epoch": 0.29385574354407834, "grad_norm": 1.2961921691894531, "learning_rate": 4.449260746645046e-06, "loss": 0.0212, "step": 990, "video_reward_cumulative_accuracy": 0.795959595959596 }, { "epoch": 0.2941525675274562, "grad_norm": 0.6145333051681519, "learning_rate": 4.447637749983135e-06, "loss": 0.0062, "step": 991, "video_reward_cumulative_accuracy": 0.7961654894046418 }, { "epoch": 0.2944493915108341, "grad_norm": 3.7940635681152344, "learning_rate": 4.446012662343066e-06, "loss": 0.0816, "step": 992, "video_reward_cumulative_accuracy": 0.7953629032258065 }, { "epoch": 0.2947462154942119, "grad_norm": 2.3133246898651123, "learning_rate": 4.444385485469529e-06, "loss": 0.0319, "step": 993, "video_reward_cumulative_accuracy": 0.7955689828801611 }, { "epoch": 0.2950430394775898, "grad_norm": 2.988262891769409, "learning_rate": 4.442756221109456e-06, "loss": 0.0406, "step": 994, "video_reward_cumulative_accuracy": 0.795774647887324 }, { "epoch": 0.29533986346096763, "grad_norm": 0.47877877950668335, "learning_rate": 4.441124871012018e-06, "loss": 0.0096, "step": 995, "video_reward_cumulative_accuracy": 0.7959798994974875 }, { "epoch": 0.2956366874443455, "grad_norm": 3.234767198562622, "learning_rate": 4.439491436928631e-06, "loss": 0.0469, "step": 996, "video_reward_cumulative_accuracy": 0.7961847389558233 }, { "epoch": 0.29593351142772334, "grad_norm": 2.747255325317383, "learning_rate": 4.437855920612945e-06, "loss": 0.094, "step": 997, "video_reward_cumulative_accuracy": 0.7958876629889668 }, { "epoch": 0.2962303354111012, "grad_norm": 1.539255142211914, "learning_rate": 4.436218323820843e-06, "loss": 0.0229, "step": 998, "video_reward_cumulative_accuracy": 0.7960921843687375 }, { "epoch": 0.2965271593944791, "grad_norm": 4.402137279510498, "learning_rate": 4.4345786483104455e-06, "loss": 0.083, "step": 999, "video_reward_cumulative_accuracy": 0.7957957957957958 }, { "epoch": 0.2968239833778569, "grad_norm": 4.107393741607666, "learning_rate": 4.432936895842104e-06, "loss": 0.0391, "step": 1000, "video_reward_cumulative_accuracy": 0.796 }, { "epoch": 0.2968239833778569, "eval_runtime": 135.0462, "eval_samples_per_second": 5.842, "eval_steps_per_second": 0.733, "eval_test_set_accuracy": 0.7790404040404041, "step": 1000 }, { "epoch": 0.2971208073612348, "grad_norm": 2.150050640106201, "learning_rate": 4.431293068178397e-06, "loss": 0.1201, "step": 1001, "video_reward_cumulative_accuracy": 0.7957042957042957 }, { "epoch": 0.29741763134461263, "grad_norm": 1.469811201095581, "learning_rate": 4.429647167084135e-06, "loss": 0.022, "step": 1002, "video_reward_cumulative_accuracy": 0.7959081836327345 }, { "epoch": 0.2977144553279905, "grad_norm": 7.703973770141602, "learning_rate": 4.4279991943263525e-06, "loss": 0.0968, "step": 1003, "video_reward_cumulative_accuracy": 0.7961116650049851 }, { "epoch": 0.29801127931136834, "grad_norm": 4.6636271476745605, "learning_rate": 4.426349151674307e-06, "loss": 0.0969, "step": 1004, "video_reward_cumulative_accuracy": 0.795816733067729 }, { "epoch": 0.2983081032947462, "grad_norm": 4.039132118225098, "learning_rate": 4.424697040899481e-06, "loss": 0.0568, "step": 1005, "video_reward_cumulative_accuracy": 0.7955223880597015 }, { "epoch": 0.2986049272781241, "grad_norm": 3.1670384407043457, "learning_rate": 4.423042863775574e-06, "loss": 0.1071, "step": 1006, "video_reward_cumulative_accuracy": 0.7952286282306164 }, { "epoch": 0.2989017512615019, "grad_norm": 6.5014967918396, "learning_rate": 4.421386622078507e-06, "loss": 0.0683, "step": 1007, "video_reward_cumulative_accuracy": 0.7954319761668321 }, { "epoch": 0.2991985752448798, "grad_norm": 1.935164451599121, "learning_rate": 4.419728317586416e-06, "loss": 0.0532, "step": 1008, "video_reward_cumulative_accuracy": 0.7951388888888888 }, { "epoch": 0.29949539922825763, "grad_norm": 0.7051401138305664, "learning_rate": 4.418067952079651e-06, "loss": 0.0142, "step": 1009, "video_reward_cumulative_accuracy": 0.7953419226957383 }, { "epoch": 0.2997922232116355, "grad_norm": 3.0097496509552, "learning_rate": 4.416405527340776e-06, "loss": 0.0499, "step": 1010, "video_reward_cumulative_accuracy": 0.7955445544554456 }, { "epoch": 0.30008904719501334, "grad_norm": 3.7447476387023926, "learning_rate": 4.414741045154566e-06, "loss": 0.0378, "step": 1011, "video_reward_cumulative_accuracy": 0.7957467853610287 }, { "epoch": 0.3003858711783912, "grad_norm": 3.5587432384490967, "learning_rate": 4.4130745073080025e-06, "loss": 0.0991, "step": 1012, "video_reward_cumulative_accuracy": 0.7949604743083004 }, { "epoch": 0.3006826951617691, "grad_norm": 0.9364858269691467, "learning_rate": 4.411405915590278e-06, "loss": 0.0261, "step": 1013, "video_reward_cumulative_accuracy": 0.7951628825271471 }, { "epoch": 0.3009795191451469, "grad_norm": 0.5637649297714233, "learning_rate": 4.409735271792786e-06, "loss": 0.0121, "step": 1014, "video_reward_cumulative_accuracy": 0.7953648915187377 }, { "epoch": 0.3012763431285248, "grad_norm": 1.5942317247390747, "learning_rate": 4.408062577709124e-06, "loss": 0.0446, "step": 1015, "video_reward_cumulative_accuracy": 0.7955665024630542 }, { "epoch": 0.30157316711190263, "grad_norm": 2.7422399520874023, "learning_rate": 4.406387835135094e-06, "loss": 0.0223, "step": 1016, "video_reward_cumulative_accuracy": 0.7957677165354331 }, { "epoch": 0.3018699910952805, "grad_norm": 2.2551071643829346, "learning_rate": 4.404711045868694e-06, "loss": 0.0434, "step": 1017, "video_reward_cumulative_accuracy": 0.795968534906588 }, { "epoch": 0.30216681507865834, "grad_norm": 5.363723278045654, "learning_rate": 4.403032211710118e-06, "loss": 0.0916, "step": 1018, "video_reward_cumulative_accuracy": 0.7956777996070727 }, { "epoch": 0.3024636390620362, "grad_norm": 2.561952829360962, "learning_rate": 4.401351334461759e-06, "loss": 0.0415, "step": 1019, "video_reward_cumulative_accuracy": 0.7958783120706575 }, { "epoch": 0.3027604630454141, "grad_norm": 1.5561801195144653, "learning_rate": 4.3996684159282014e-06, "loss": 0.0834, "step": 1020, "video_reward_cumulative_accuracy": 0.796078431372549 }, { "epoch": 0.3030572870287919, "grad_norm": 2.4101288318634033, "learning_rate": 4.397983457916222e-06, "loss": 0.0574, "step": 1021, "video_reward_cumulative_accuracy": 0.7962781586679726 }, { "epoch": 0.3033541110121698, "grad_norm": 2.7204737663269043, "learning_rate": 4.3962964622347855e-06, "loss": 0.0439, "step": 1022, "video_reward_cumulative_accuracy": 0.7964774951076321 }, { "epoch": 0.30365093499554763, "grad_norm": 1.8821197748184204, "learning_rate": 4.3946074306950484e-06, "loss": 0.0284, "step": 1023, "video_reward_cumulative_accuracy": 0.7966764418377321 }, { "epoch": 0.3039477589789255, "grad_norm": 1.1801754236221313, "learning_rate": 4.392916365110347e-06, "loss": 0.0358, "step": 1024, "video_reward_cumulative_accuracy": 0.796875 }, { "epoch": 0.30424458296230333, "grad_norm": 1.7793229818344116, "learning_rate": 4.391223267296206e-06, "loss": 0.043, "step": 1025, "video_reward_cumulative_accuracy": 0.7970731707317074 }, { "epoch": 0.3045414069456812, "grad_norm": 1.5398008823394775, "learning_rate": 4.389528139070329e-06, "loss": 0.028, "step": 1026, "video_reward_cumulative_accuracy": 0.797270955165692 }, { "epoch": 0.3048382309290591, "grad_norm": 2.413785457611084, "learning_rate": 4.387830982252602e-06, "loss": 0.0461, "step": 1027, "video_reward_cumulative_accuracy": 0.7974683544303798 }, { "epoch": 0.3051350549124369, "grad_norm": 5.496139049530029, "learning_rate": 4.3861317986650875e-06, "loss": 0.0584, "step": 1028, "video_reward_cumulative_accuracy": 0.7976653696498055 }, { "epoch": 0.3054318788958148, "grad_norm": 2.5064406394958496, "learning_rate": 4.384430590132023e-06, "loss": 0.0621, "step": 1029, "video_reward_cumulative_accuracy": 0.7978620019436345 }, { "epoch": 0.3057287028791926, "grad_norm": 2.9505836963653564, "learning_rate": 4.382727358479821e-06, "loss": 0.0354, "step": 1030, "video_reward_cumulative_accuracy": 0.7980582524271844 }, { "epoch": 0.3060255268625705, "grad_norm": 5.6248297691345215, "learning_rate": 4.3810221055370664e-06, "loss": 0.0853, "step": 1031, "video_reward_cumulative_accuracy": 0.7982541222114452 }, { "epoch": 0.30632235084594833, "grad_norm": 2.6210145950317383, "learning_rate": 4.3793148331345136e-06, "loss": 0.0609, "step": 1032, "video_reward_cumulative_accuracy": 0.7984496124031008 }, { "epoch": 0.3066191748293262, "grad_norm": 3.5333571434020996, "learning_rate": 4.377605543105086e-06, "loss": 0.0319, "step": 1033, "video_reward_cumulative_accuracy": 0.7986447241045499 }, { "epoch": 0.3069159988127041, "grad_norm": 1.4399138689041138, "learning_rate": 4.375894237283872e-06, "loss": 0.0569, "step": 1034, "video_reward_cumulative_accuracy": 0.7988394584139265 }, { "epoch": 0.3072128227960819, "grad_norm": 1.2284201383590698, "learning_rate": 4.374180917508124e-06, "loss": 0.0193, "step": 1035, "video_reward_cumulative_accuracy": 0.7990338164251207 }, { "epoch": 0.3075096467794598, "grad_norm": 2.532024383544922, "learning_rate": 4.372465585617257e-06, "loss": 0.0401, "step": 1036, "video_reward_cumulative_accuracy": 0.7992277992277992 }, { "epoch": 0.3078064707628376, "grad_norm": 1.7466938495635986, "learning_rate": 4.370748243452846e-06, "loss": 0.038, "step": 1037, "video_reward_cumulative_accuracy": 0.7994214079074252 }, { "epoch": 0.3081032947462155, "grad_norm": 1.943616509437561, "learning_rate": 4.369028892858626e-06, "loss": 0.0164, "step": 1038, "video_reward_cumulative_accuracy": 0.7996146435452793 }, { "epoch": 0.30840011872959333, "grad_norm": 0.6063635349273682, "learning_rate": 4.367307535680485e-06, "loss": 0.0151, "step": 1039, "video_reward_cumulative_accuracy": 0.7998075072184793 }, { "epoch": 0.3086969427129712, "grad_norm": 6.530287742614746, "learning_rate": 4.3655841737664685e-06, "loss": 0.1203, "step": 1040, "video_reward_cumulative_accuracy": 0.7995192307692308 }, { "epoch": 0.3089937666963491, "grad_norm": 4.490469932556152, "learning_rate": 4.363858808966772e-06, "loss": 0.0926, "step": 1041, "video_reward_cumulative_accuracy": 0.7997118155619597 }, { "epoch": 0.3092905906797269, "grad_norm": 1.2573050260543823, "learning_rate": 4.362131443133742e-06, "loss": 0.0368, "step": 1042, "video_reward_cumulative_accuracy": 0.7994241842610365 }, { "epoch": 0.3095874146631048, "grad_norm": 3.369048595428467, "learning_rate": 4.3604020781218736e-06, "loss": 0.0502, "step": 1043, "video_reward_cumulative_accuracy": 0.7996164908916586 }, { "epoch": 0.3098842386464826, "grad_norm": 1.5536525249481201, "learning_rate": 4.358670715787808e-06, "loss": 0.0142, "step": 1044, "video_reward_cumulative_accuracy": 0.7998084291187739 }, { "epoch": 0.3101810626298605, "grad_norm": 2.2478020191192627, "learning_rate": 4.356937357990331e-06, "loss": 0.0408, "step": 1045, "video_reward_cumulative_accuracy": 0.8 }, { "epoch": 0.31047788661323833, "grad_norm": 4.114165782928467, "learning_rate": 4.3552020065903685e-06, "loss": 0.0481, "step": 1046, "video_reward_cumulative_accuracy": 0.7992351816443595 }, { "epoch": 0.3107747105966162, "grad_norm": 2.3197696208953857, "learning_rate": 4.353464663450991e-06, "loss": 0.0471, "step": 1047, "video_reward_cumulative_accuracy": 0.7994269340974212 }, { "epoch": 0.3110715345799941, "grad_norm": 1.8591456413269043, "learning_rate": 4.351725330437405e-06, "loss": 0.0179, "step": 1048, "video_reward_cumulative_accuracy": 0.799618320610687 }, { "epoch": 0.3113683585633719, "grad_norm": 1.4338923692703247, "learning_rate": 4.349984009416952e-06, "loss": 0.0159, "step": 1049, "video_reward_cumulative_accuracy": 0.7993326978074357 }, { "epoch": 0.3116651825467498, "grad_norm": 5.681126594543457, "learning_rate": 4.34824070225911e-06, "loss": 0.0691, "step": 1050, "video_reward_cumulative_accuracy": 0.799047619047619 }, { "epoch": 0.3119620065301276, "grad_norm": 1.271953821182251, "learning_rate": 4.346495410835487e-06, "loss": 0.0444, "step": 1051, "video_reward_cumulative_accuracy": 0.7992388201712655 }, { "epoch": 0.3122588305135055, "grad_norm": 2.048283100128174, "learning_rate": 4.344748137019825e-06, "loss": 0.0131, "step": 1052, "video_reward_cumulative_accuracy": 0.7989543726235742 }, { "epoch": 0.31255565449688333, "grad_norm": 1.9480700492858887, "learning_rate": 4.34299888268799e-06, "loss": 0.0607, "step": 1053, "video_reward_cumulative_accuracy": 0.798670465337132 }, { "epoch": 0.3128524784802612, "grad_norm": 2.398563861846924, "learning_rate": 4.341247649717978e-06, "loss": 0.055, "step": 1054, "video_reward_cumulative_accuracy": 0.7988614800759013 }, { "epoch": 0.31314930246363903, "grad_norm": 2.373682737350464, "learning_rate": 4.339494439989907e-06, "loss": 0.0684, "step": 1055, "video_reward_cumulative_accuracy": 0.7990521327014218 }, { "epoch": 0.3134461264470169, "grad_norm": 3.2019567489624023, "learning_rate": 4.3377392553860156e-06, "loss": 0.0426, "step": 1056, "video_reward_cumulative_accuracy": 0.7992424242424242 }, { "epoch": 0.3137429504303948, "grad_norm": 5.288125038146973, "learning_rate": 4.335982097790668e-06, "loss": 0.0768, "step": 1057, "video_reward_cumulative_accuracy": 0.7994323557237465 }, { "epoch": 0.3140397744137726, "grad_norm": 4.724822521209717, "learning_rate": 4.334222969090342e-06, "loss": 0.0782, "step": 1058, "video_reward_cumulative_accuracy": 0.7996219281663516 }, { "epoch": 0.3143365983971505, "grad_norm": 1.1170587539672852, "learning_rate": 4.332461871173633e-06, "loss": 0.0591, "step": 1059, "video_reward_cumulative_accuracy": 0.7998111425873465 }, { "epoch": 0.3146334223805283, "grad_norm": 3.1828134059906006, "learning_rate": 4.330698805931251e-06, "loss": 0.0642, "step": 1060, "video_reward_cumulative_accuracy": 0.8 }, { "epoch": 0.3149302463639062, "grad_norm": 3.1132760047912598, "learning_rate": 4.328933775256017e-06, "loss": 0.0925, "step": 1061, "video_reward_cumulative_accuracy": 0.8001885014137606 }, { "epoch": 0.31522707034728403, "grad_norm": 2.5283889770507812, "learning_rate": 4.327166781042864e-06, "loss": 0.0319, "step": 1062, "video_reward_cumulative_accuracy": 0.7999058380414312 }, { "epoch": 0.3155238943306619, "grad_norm": 2.8218538761138916, "learning_rate": 4.325397825188829e-06, "loss": 0.0391, "step": 1063, "video_reward_cumulative_accuracy": 0.8000940733772343 }, { "epoch": 0.3158207183140398, "grad_norm": 2.607076644897461, "learning_rate": 4.323626909593062e-06, "loss": 0.0454, "step": 1064, "video_reward_cumulative_accuracy": 0.8002819548872181 }, { "epoch": 0.3161175422974176, "grad_norm": 1.558544397354126, "learning_rate": 4.321854036156809e-06, "loss": 0.0189, "step": 1065, "video_reward_cumulative_accuracy": 0.8004694835680751 }, { "epoch": 0.3164143662807955, "grad_norm": 1.2349810600280762, "learning_rate": 4.320079206783423e-06, "loss": 0.0272, "step": 1066, "video_reward_cumulative_accuracy": 0.800656660412758 }, { "epoch": 0.3167111902641733, "grad_norm": 1.2629001140594482, "learning_rate": 4.318302423378357e-06, "loss": 0.0318, "step": 1067, "video_reward_cumulative_accuracy": 0.8003748828491096 }, { "epoch": 0.3170080142475512, "grad_norm": 2.740196466445923, "learning_rate": 4.3165236878491575e-06, "loss": 0.0518, "step": 1068, "video_reward_cumulative_accuracy": 0.800561797752809 }, { "epoch": 0.31730483823092903, "grad_norm": 3.2238783836364746, "learning_rate": 4.314743002105473e-06, "loss": 0.0403, "step": 1069, "video_reward_cumulative_accuracy": 0.8002806361085126 }, { "epoch": 0.3176016622143069, "grad_norm": 1.6323812007904053, "learning_rate": 4.31296036805904e-06, "loss": 0.0721, "step": 1070, "video_reward_cumulative_accuracy": 0.8004672897196262 }, { "epoch": 0.3178984861976848, "grad_norm": 1.8098689317703247, "learning_rate": 4.3111757876236905e-06, "loss": 0.0256, "step": 1071, "video_reward_cumulative_accuracy": 0.8006535947712419 }, { "epoch": 0.3181953101810626, "grad_norm": 1.6807585954666138, "learning_rate": 4.309389262715344e-06, "loss": 0.0353, "step": 1072, "video_reward_cumulative_accuracy": 0.800839552238806 }, { "epoch": 0.3184921341644405, "grad_norm": 4.06589937210083, "learning_rate": 4.307600795252008e-06, "loss": 0.0547, "step": 1073, "video_reward_cumulative_accuracy": 0.8005591798695247 }, { "epoch": 0.3187889581478183, "grad_norm": 2.8750007152557373, "learning_rate": 4.305810387153778e-06, "loss": 0.0552, "step": 1074, "video_reward_cumulative_accuracy": 0.8007448789571695 }, { "epoch": 0.3190857821311962, "grad_norm": 1.4461251497268677, "learning_rate": 4.30401804034283e-06, "loss": 0.0467, "step": 1075, "video_reward_cumulative_accuracy": 0.8009302325581396 }, { "epoch": 0.31938260611457403, "grad_norm": 7.60485315322876, "learning_rate": 4.30222375674342e-06, "loss": 0.0826, "step": 1076, "video_reward_cumulative_accuracy": 0.8011152416356877 }, { "epoch": 0.3196794300979519, "grad_norm": 5.275068283081055, "learning_rate": 4.3004275382818884e-06, "loss": 0.0536, "step": 1077, "video_reward_cumulative_accuracy": 0.8012999071494893 }, { "epoch": 0.3199762540813298, "grad_norm": 2.498542547225952, "learning_rate": 4.298629386886649e-06, "loss": 0.0574, "step": 1078, "video_reward_cumulative_accuracy": 0.8010204081632653 }, { "epoch": 0.3202730780647076, "grad_norm": 4.200756072998047, "learning_rate": 4.296829304488191e-06, "loss": 0.0782, "step": 1079, "video_reward_cumulative_accuracy": 0.8012048192771084 }, { "epoch": 0.3205699020480855, "grad_norm": 1.4311738014221191, "learning_rate": 4.29502729301908e-06, "loss": 0.0201, "step": 1080, "video_reward_cumulative_accuracy": 0.8013888888888889 }, { "epoch": 0.3208667260314633, "grad_norm": 0.836157500743866, "learning_rate": 4.293223354413948e-06, "loss": 0.0322, "step": 1081, "video_reward_cumulative_accuracy": 0.8015726179463459 }, { "epoch": 0.3211635500148412, "grad_norm": 1.57651686668396, "learning_rate": 4.2914174906094985e-06, "loss": 0.0481, "step": 1082, "video_reward_cumulative_accuracy": 0.8017560073937153 }, { "epoch": 0.32146037399821903, "grad_norm": 1.0639756917953491, "learning_rate": 4.289609703544501e-06, "loss": 0.0469, "step": 1083, "video_reward_cumulative_accuracy": 0.8019390581717452 }, { "epoch": 0.3217571979815969, "grad_norm": 2.068735361099243, "learning_rate": 4.2877999951597935e-06, "loss": 0.0474, "step": 1084, "video_reward_cumulative_accuracy": 0.8021217712177122 }, { "epoch": 0.3220540219649748, "grad_norm": 2.1171066761016846, "learning_rate": 4.28598836739827e-06, "loss": 0.0429, "step": 1085, "video_reward_cumulative_accuracy": 0.8018433179723502 }, { "epoch": 0.3223508459483526, "grad_norm": 3.831587076187134, "learning_rate": 4.28417482220489e-06, "loss": 0.0376, "step": 1086, "video_reward_cumulative_accuracy": 0.8015653775322283 }, { "epoch": 0.3226476699317305, "grad_norm": 6.328731536865234, "learning_rate": 4.282359361526671e-06, "loss": 0.0615, "step": 1087, "video_reward_cumulative_accuracy": 0.8012879484820608 }, { "epoch": 0.3229444939151083, "grad_norm": 1.401442289352417, "learning_rate": 4.2805419873126855e-06, "loss": 0.0197, "step": 1088, "video_reward_cumulative_accuracy": 0.8014705882352942 }, { "epoch": 0.3232413178984862, "grad_norm": 4.52972412109375, "learning_rate": 4.278722701514061e-06, "loss": 0.0569, "step": 1089, "video_reward_cumulative_accuracy": 0.8016528925619835 }, { "epoch": 0.323538141881864, "grad_norm": 2.7835745811462402, "learning_rate": 4.276901506083978e-06, "loss": 0.0962, "step": 1090, "video_reward_cumulative_accuracy": 0.8018348623853211 }, { "epoch": 0.3238349658652419, "grad_norm": 1.7582579851150513, "learning_rate": 4.275078402977666e-06, "loss": 0.0331, "step": 1091, "video_reward_cumulative_accuracy": 0.8020164986251146 }, { "epoch": 0.3241317898486198, "grad_norm": 1.52336847782135, "learning_rate": 4.273253394152404e-06, "loss": 0.059, "step": 1092, "video_reward_cumulative_accuracy": 0.8021978021978022 }, { "epoch": 0.3244286138319976, "grad_norm": 1.373092532157898, "learning_rate": 4.271426481567515e-06, "loss": 0.0319, "step": 1093, "video_reward_cumulative_accuracy": 0.8023787740164684 }, { "epoch": 0.3247254378153755, "grad_norm": 4.265668869018555, "learning_rate": 4.269597667184366e-06, "loss": 0.1015, "step": 1094, "video_reward_cumulative_accuracy": 0.8025594149908593 }, { "epoch": 0.3250222617987533, "grad_norm": 2.7200961112976074, "learning_rate": 4.267766952966369e-06, "loss": 0.0494, "step": 1095, "video_reward_cumulative_accuracy": 0.8027397260273973 }, { "epoch": 0.3253190857821312, "grad_norm": 2.589541435241699, "learning_rate": 4.2659343408789734e-06, "loss": 0.038, "step": 1096, "video_reward_cumulative_accuracy": 0.8029197080291971 }, { "epoch": 0.325615909765509, "grad_norm": 2.7991340160369873, "learning_rate": 4.264099832889665e-06, "loss": 0.0584, "step": 1097, "video_reward_cumulative_accuracy": 0.8030993618960802 }, { "epoch": 0.3259127337488869, "grad_norm": 2.104408025741577, "learning_rate": 4.262263430967966e-06, "loss": 0.0505, "step": 1098, "video_reward_cumulative_accuracy": 0.8032786885245902 }, { "epoch": 0.3262095577322648, "grad_norm": 1.6177819967269897, "learning_rate": 4.2604251370854325e-06, "loss": 0.0405, "step": 1099, "video_reward_cumulative_accuracy": 0.8030027297543221 }, { "epoch": 0.3265063817156426, "grad_norm": 4.011326789855957, "learning_rate": 4.2585849532156505e-06, "loss": 0.0463, "step": 1100, "video_reward_cumulative_accuracy": 0.8027272727272727 }, { "epoch": 0.3268032056990205, "grad_norm": 1.1863105297088623, "learning_rate": 4.256742881334238e-06, "loss": 0.0208, "step": 1101, "video_reward_cumulative_accuracy": 0.8029064486830154 }, { "epoch": 0.3271000296823983, "grad_norm": 2.122631072998047, "learning_rate": 4.254898923418838e-06, "loss": 0.0622, "step": 1102, "video_reward_cumulative_accuracy": 0.8026315789473685 }, { "epoch": 0.3273968536657762, "grad_norm": 2.541916608810425, "learning_rate": 4.253053081449116e-06, "loss": 0.0483, "step": 1103, "video_reward_cumulative_accuracy": 0.8028105167724388 }, { "epoch": 0.327693677649154, "grad_norm": 3.4067628383636475, "learning_rate": 4.251205357406764e-06, "loss": 0.0289, "step": 1104, "video_reward_cumulative_accuracy": 0.802536231884058 }, { "epoch": 0.3279905016325319, "grad_norm": 1.9281690120697021, "learning_rate": 4.249355753275492e-06, "loss": 0.0488, "step": 1105, "video_reward_cumulative_accuracy": 0.8022624434389141 }, { "epoch": 0.3282873256159098, "grad_norm": 4.4767231941223145, "learning_rate": 4.247504271041031e-06, "loss": 0.0644, "step": 1106, "video_reward_cumulative_accuracy": 0.8024412296564195 }, { "epoch": 0.3285841495992876, "grad_norm": 4.301503658294678, "learning_rate": 4.245650912691127e-06, "loss": 0.0987, "step": 1107, "video_reward_cumulative_accuracy": 0.8026196928635954 }, { "epoch": 0.3288809735826655, "grad_norm": 2.4027132987976074, "learning_rate": 4.243795680215538e-06, "loss": 0.0807, "step": 1108, "video_reward_cumulative_accuracy": 0.8023465703971119 }, { "epoch": 0.3291777975660433, "grad_norm": 2.302290678024292, "learning_rate": 4.241938575606038e-06, "loss": 0.0403, "step": 1109, "video_reward_cumulative_accuracy": 0.8025247971145176 }, { "epoch": 0.3294746215494212, "grad_norm": 4.239207744598389, "learning_rate": 4.240079600856408e-06, "loss": 0.062, "step": 1110, "video_reward_cumulative_accuracy": 0.8027027027027027 }, { "epoch": 0.329771445532799, "grad_norm": 3.736924171447754, "learning_rate": 4.238218757962439e-06, "loss": 0.0399, "step": 1111, "video_reward_cumulative_accuracy": 0.8028802880288028 }, { "epoch": 0.3300682695161769, "grad_norm": 1.4400713443756104, "learning_rate": 4.2363560489219255e-06, "loss": 0.0213, "step": 1112, "video_reward_cumulative_accuracy": 0.8030575539568345 }, { "epoch": 0.3303650934995548, "grad_norm": 1.8887395858764648, "learning_rate": 4.234491475734667e-06, "loss": 0.0368, "step": 1113, "video_reward_cumulative_accuracy": 0.8032345013477089 }, { "epoch": 0.3306619174829326, "grad_norm": 1.5093616247177124, "learning_rate": 4.232625040402463e-06, "loss": 0.0272, "step": 1114, "video_reward_cumulative_accuracy": 0.803411131059246 }, { "epoch": 0.3309587414663105, "grad_norm": 5.804702281951904, "learning_rate": 4.230756744929114e-06, "loss": 0.0515, "step": 1115, "video_reward_cumulative_accuracy": 0.8031390134529148 }, { "epoch": 0.3312555654496883, "grad_norm": 0.6705430150032043, "learning_rate": 4.228886591320415e-06, "loss": 0.0149, "step": 1116, "video_reward_cumulative_accuracy": 0.8033154121863799 }, { "epoch": 0.3315523894330662, "grad_norm": 5.313145637512207, "learning_rate": 4.227014581584159e-06, "loss": 0.0725, "step": 1117, "video_reward_cumulative_accuracy": 0.8034914950760967 }, { "epoch": 0.331849213416444, "grad_norm": 2.9840526580810547, "learning_rate": 4.2251407177301295e-06, "loss": 0.0556, "step": 1118, "video_reward_cumulative_accuracy": 0.8032200357781754 }, { "epoch": 0.3321460373998219, "grad_norm": 1.9225348234176636, "learning_rate": 4.2232650017701015e-06, "loss": 0.0517, "step": 1119, "video_reward_cumulative_accuracy": 0.8029490616621984 }, { "epoch": 0.3324428613831998, "grad_norm": 6.583012580871582, "learning_rate": 4.221387435717838e-06, "loss": 0.0913, "step": 1120, "video_reward_cumulative_accuracy": 0.803125 }, { "epoch": 0.3327396853665776, "grad_norm": 5.673295021057129, "learning_rate": 4.219508021589088e-06, "loss": 0.0629, "step": 1121, "video_reward_cumulative_accuracy": 0.8033006244424621 }, { "epoch": 0.3330365093499555, "grad_norm": 2.6932709217071533, "learning_rate": 4.217626761401585e-06, "loss": 0.035, "step": 1122, "video_reward_cumulative_accuracy": 0.803475935828877 }, { "epoch": 0.3333333333333333, "grad_norm": 1.6052873134613037, "learning_rate": 4.215743657175046e-06, "loss": 0.0277, "step": 1123, "video_reward_cumulative_accuracy": 0.8036509349955476 }, { "epoch": 0.3336301573167112, "grad_norm": 1.1534535884857178, "learning_rate": 4.213858710931163e-06, "loss": 0.0273, "step": 1124, "video_reward_cumulative_accuracy": 0.8033807829181495 }, { "epoch": 0.333926981300089, "grad_norm": 1.3987401723861694, "learning_rate": 4.2119719246936114e-06, "loss": 0.034, "step": 1125, "video_reward_cumulative_accuracy": 0.8035555555555556 }, { "epoch": 0.3342238052834669, "grad_norm": 2.05250883102417, "learning_rate": 4.210083300488038e-06, "loss": 0.0606, "step": 1126, "video_reward_cumulative_accuracy": 0.8037300177619894 }, { "epoch": 0.3345206292668448, "grad_norm": 2.67376971244812, "learning_rate": 4.208192840342066e-06, "loss": 0.0622, "step": 1127, "video_reward_cumulative_accuracy": 0.8039041703637977 }, { "epoch": 0.3348174532502226, "grad_norm": 1.0269320011138916, "learning_rate": 4.206300546285286e-06, "loss": 0.0466, "step": 1128, "video_reward_cumulative_accuracy": 0.8036347517730497 }, { "epoch": 0.3351142772336005, "grad_norm": 6.182705879211426, "learning_rate": 4.204406420349259e-06, "loss": 0.035, "step": 1129, "video_reward_cumulative_accuracy": 0.8038086802480071 }, { "epoch": 0.3354111012169783, "grad_norm": 3.8771231174468994, "learning_rate": 4.2025104645675145e-06, "loss": 0.0472, "step": 1130, "video_reward_cumulative_accuracy": 0.8039823008849557 }, { "epoch": 0.3357079252003562, "grad_norm": 4.503866195678711, "learning_rate": 4.200612680975545e-06, "loss": 0.0592, "step": 1131, "video_reward_cumulative_accuracy": 0.8037135278514589 }, { "epoch": 0.336004749183734, "grad_norm": 1.73423433303833, "learning_rate": 4.1987130716108046e-06, "loss": 0.035, "step": 1132, "video_reward_cumulative_accuracy": 0.8034452296819788 }, { "epoch": 0.3363015731671119, "grad_norm": 2.8510076999664307, "learning_rate": 4.196811638512708e-06, "loss": 0.0513, "step": 1133, "video_reward_cumulative_accuracy": 0.8031774051191527 }, { "epoch": 0.3365983971504898, "grad_norm": 2.853792190551758, "learning_rate": 4.194908383722629e-06, "loss": 0.0676, "step": 1134, "video_reward_cumulative_accuracy": 0.8029100529100529 }, { "epoch": 0.3368952211338676, "grad_norm": 3.5561459064483643, "learning_rate": 4.193003309283896e-06, "loss": 0.0834, "step": 1135, "video_reward_cumulative_accuracy": 0.8030837004405287 }, { "epoch": 0.3371920451172455, "grad_norm": 1.2049405574798584, "learning_rate": 4.191096417241792e-06, "loss": 0.0216, "step": 1136, "video_reward_cumulative_accuracy": 0.8032570422535211 }, { "epoch": 0.3374888691006233, "grad_norm": 1.6044228076934814, "learning_rate": 4.189187709643549e-06, "loss": 0.0478, "step": 1137, "video_reward_cumulative_accuracy": 0.8034300791556728 }, { "epoch": 0.3377856930840012, "grad_norm": 1.5150611400604248, "learning_rate": 4.1872771885383525e-06, "loss": 0.0535, "step": 1138, "video_reward_cumulative_accuracy": 0.8031634446397188 }, { "epoch": 0.338082517067379, "grad_norm": 3.9265263080596924, "learning_rate": 4.18536485597733e-06, "loss": 0.0596, "step": 1139, "video_reward_cumulative_accuracy": 0.8033362598770851 }, { "epoch": 0.3383793410507569, "grad_norm": 7.542102336883545, "learning_rate": 4.183450714013557e-06, "loss": 0.0811, "step": 1140, "video_reward_cumulative_accuracy": 0.8035087719298246 }, { "epoch": 0.3386761650341348, "grad_norm": 4.337952613830566, "learning_rate": 4.181534764702051e-06, "loss": 0.0957, "step": 1141, "video_reward_cumulative_accuracy": 0.8032427695004382 }, { "epoch": 0.3389729890175126, "grad_norm": 1.57660710811615, "learning_rate": 4.179617010099768e-06, "loss": 0.0462, "step": 1142, "video_reward_cumulative_accuracy": 0.803415061295972 }, { "epoch": 0.3392698130008905, "grad_norm": 1.877387285232544, "learning_rate": 4.177697452265605e-06, "loss": 0.0485, "step": 1143, "video_reward_cumulative_accuracy": 0.8035870516185477 }, { "epoch": 0.3395666369842683, "grad_norm": 3.2722318172454834, "learning_rate": 4.175776093260395e-06, "loss": 0.0395, "step": 1144, "video_reward_cumulative_accuracy": 0.8037587412587412 }, { "epoch": 0.3398634609676462, "grad_norm": 2.0722169876098633, "learning_rate": 4.1738529351469e-06, "loss": 0.0416, "step": 1145, "video_reward_cumulative_accuracy": 0.8039301310043668 }, { "epoch": 0.340160284951024, "grad_norm": 1.58327317237854, "learning_rate": 4.1719279799898205e-06, "loss": 0.0566, "step": 1146, "video_reward_cumulative_accuracy": 0.8041012216404887 }, { "epoch": 0.3404571089344019, "grad_norm": 4.316904544830322, "learning_rate": 4.17000122985578e-06, "loss": 0.0597, "step": 1147, "video_reward_cumulative_accuracy": 0.8038360941586749 }, { "epoch": 0.3407539329177798, "grad_norm": 5.312283039093018, "learning_rate": 4.168072686813332e-06, "loss": 0.0579, "step": 1148, "video_reward_cumulative_accuracy": 0.804006968641115 }, { "epoch": 0.3410507569011576, "grad_norm": 1.4211534261703491, "learning_rate": 4.166142352932957e-06, "loss": 0.032, "step": 1149, "video_reward_cumulative_accuracy": 0.804177545691906 }, { "epoch": 0.3413475808845355, "grad_norm": 1.4748672246932983, "learning_rate": 4.164210230287053e-06, "loss": 0.0391, "step": 1150, "video_reward_cumulative_accuracy": 0.8043478260869565 }, { "epoch": 0.3416444048679133, "grad_norm": 1.9210374355316162, "learning_rate": 4.162276320949943e-06, "loss": 0.0787, "step": 1151, "video_reward_cumulative_accuracy": 0.8036490008688097 }, { "epoch": 0.3419412288512912, "grad_norm": 3.2308928966522217, "learning_rate": 4.160340626997865e-06, "loss": 0.0535, "step": 1152, "video_reward_cumulative_accuracy": 0.8038194444444444 }, { "epoch": 0.342238052834669, "grad_norm": 1.825577735900879, "learning_rate": 4.158403150508975e-06, "loss": 0.0535, "step": 1153, "video_reward_cumulative_accuracy": 0.8035559410234172 }, { "epoch": 0.3425348768180469, "grad_norm": 1.2146353721618652, "learning_rate": 4.156463893563342e-06, "loss": 0.0576, "step": 1154, "video_reward_cumulative_accuracy": 0.8032928942807626 }, { "epoch": 0.3428317008014248, "grad_norm": 1.1705416440963745, "learning_rate": 4.154522858242947e-06, "loss": 0.0369, "step": 1155, "video_reward_cumulative_accuracy": 0.8034632034632034 }, { "epoch": 0.3431285247848026, "grad_norm": 1.2783030271530151, "learning_rate": 4.15258004663168e-06, "loss": 0.0363, "step": 1156, "video_reward_cumulative_accuracy": 0.8036332179930796 }, { "epoch": 0.3434253487681805, "grad_norm": 1.560981273651123, "learning_rate": 4.150635460815336e-06, "loss": 0.0454, "step": 1157, "video_reward_cumulative_accuracy": 0.8033707865168539 }, { "epoch": 0.3437221727515583, "grad_norm": 1.9000885486602783, "learning_rate": 4.148689102881619e-06, "loss": 0.0535, "step": 1158, "video_reward_cumulative_accuracy": 0.8035405872193437 }, { "epoch": 0.3440189967349362, "grad_norm": 1.363930344581604, "learning_rate": 4.146740974920131e-06, "loss": 0.0444, "step": 1159, "video_reward_cumulative_accuracy": 0.8037100949094047 }, { "epoch": 0.344315820718314, "grad_norm": 1.4986649751663208, "learning_rate": 4.144791079022379e-06, "loss": 0.0329, "step": 1160, "video_reward_cumulative_accuracy": 0.8038793103448276 }, { "epoch": 0.3446126447016919, "grad_norm": 2.1276040077209473, "learning_rate": 4.142839417281762e-06, "loss": 0.0409, "step": 1161, "video_reward_cumulative_accuracy": 0.8040482342807924 }, { "epoch": 0.3449094686850698, "grad_norm": 2.0634193420410156, "learning_rate": 4.140885991793582e-06, "loss": 0.0363, "step": 1162, "video_reward_cumulative_accuracy": 0.8042168674698795 }, { "epoch": 0.3452062926684476, "grad_norm": 4.047058582305908, "learning_rate": 4.138930804655028e-06, "loss": 0.0609, "step": 1163, "video_reward_cumulative_accuracy": 0.8039552880481513 }, { "epoch": 0.3455031166518255, "grad_norm": 1.8624390363693237, "learning_rate": 4.136973857965185e-06, "loss": 0.0345, "step": 1164, "video_reward_cumulative_accuracy": 0.8041237113402062 }, { "epoch": 0.3457999406352033, "grad_norm": 3.3220489025115967, "learning_rate": 4.135015153825024e-06, "loss": 0.0393, "step": 1165, "video_reward_cumulative_accuracy": 0.8042918454935623 }, { "epoch": 0.3460967646185812, "grad_norm": 2.244272470474243, "learning_rate": 4.133054694337404e-06, "loss": 0.1681, "step": 1166, "video_reward_cumulative_accuracy": 0.8040308747855918 }, { "epoch": 0.346393588601959, "grad_norm": 1.2074984312057495, "learning_rate": 4.1310924816070705e-06, "loss": 0.0117, "step": 1167, "video_reward_cumulative_accuracy": 0.8041988003427593 }, { "epoch": 0.3466904125853369, "grad_norm": 4.098258018493652, "learning_rate": 4.129128517740647e-06, "loss": 0.0522, "step": 1168, "video_reward_cumulative_accuracy": 0.8039383561643836 }, { "epoch": 0.3469872365687148, "grad_norm": 1.4286725521087646, "learning_rate": 4.12716280484664e-06, "loss": 0.0594, "step": 1169, "video_reward_cumulative_accuracy": 0.8032506415739948 }, { "epoch": 0.3472840605520926, "grad_norm": 3.419475555419922, "learning_rate": 4.125195345035433e-06, "loss": 0.0582, "step": 1170, "video_reward_cumulative_accuracy": 0.8034188034188035 }, { "epoch": 0.3475808845354705, "grad_norm": 1.2283254861831665, "learning_rate": 4.1232261404192865e-06, "loss": 0.0162, "step": 1171, "video_reward_cumulative_accuracy": 0.8035866780529461 }, { "epoch": 0.3478777085188483, "grad_norm": 1.2608306407928467, "learning_rate": 4.12125519311233e-06, "loss": 0.0137, "step": 1172, "video_reward_cumulative_accuracy": 0.8037542662116041 }, { "epoch": 0.3481745325022262, "grad_norm": 1.785900354385376, "learning_rate": 4.119282505230569e-06, "loss": 0.0385, "step": 1173, "video_reward_cumulative_accuracy": 0.8034953111679455 }, { "epoch": 0.348471356485604, "grad_norm": 2.0561554431915283, "learning_rate": 4.117308078891876e-06, "loss": 0.0453, "step": 1174, "video_reward_cumulative_accuracy": 0.8036626916524702 }, { "epoch": 0.3487681804689819, "grad_norm": 4.3540730476379395, "learning_rate": 4.115331916215987e-06, "loss": 0.0697, "step": 1175, "video_reward_cumulative_accuracy": 0.8038297872340425 }, { "epoch": 0.3490650044523598, "grad_norm": 2.7298593521118164, "learning_rate": 4.1133540193245056e-06, "loss": 0.0258, "step": 1176, "video_reward_cumulative_accuracy": 0.8039965986394558 }, { "epoch": 0.3493618284357376, "grad_norm": 1.9880746603012085, "learning_rate": 4.111374390340895e-06, "loss": 0.0545, "step": 1177, "video_reward_cumulative_accuracy": 0.8037383177570093 }, { "epoch": 0.3496586524191155, "grad_norm": 2.990823745727539, "learning_rate": 4.109393031390482e-06, "loss": 0.0543, "step": 1178, "video_reward_cumulative_accuracy": 0.8034804753820034 }, { "epoch": 0.3499554764024933, "grad_norm": 2.0915024280548096, "learning_rate": 4.107409944600444e-06, "loss": 0.0747, "step": 1179, "video_reward_cumulative_accuracy": 0.8036471586089907 }, { "epoch": 0.3502523003858712, "grad_norm": 1.47837495803833, "learning_rate": 4.105425132099821e-06, "loss": 0.0526, "step": 1180, "video_reward_cumulative_accuracy": 0.8038135593220339 }, { "epoch": 0.350549124369249, "grad_norm": 1.934434175491333, "learning_rate": 4.103438596019498e-06, "loss": 0.0385, "step": 1181, "video_reward_cumulative_accuracy": 0.8039796782387807 }, { "epoch": 0.3508459483526269, "grad_norm": 1.294867753982544, "learning_rate": 4.1014503384922164e-06, "loss": 0.0314, "step": 1182, "video_reward_cumulative_accuracy": 0.8041455160744501 }, { "epoch": 0.35114277233600477, "grad_norm": 2.0357749462127686, "learning_rate": 4.099460361652563e-06, "loss": 0.0253, "step": 1183, "video_reward_cumulative_accuracy": 0.8043110735418427 }, { "epoch": 0.3514395963193826, "grad_norm": 2.3916919231414795, "learning_rate": 4.097468667636971e-06, "loss": 0.0447, "step": 1184, "video_reward_cumulative_accuracy": 0.8040540540540541 }, { "epoch": 0.3517364203027605, "grad_norm": 2.0069797039031982, "learning_rate": 4.095475258583719e-06, "loss": 0.0179, "step": 1185, "video_reward_cumulative_accuracy": 0.8042194092827004 }, { "epoch": 0.3520332442861383, "grad_norm": 3.129631996154785, "learning_rate": 4.093480136632922e-06, "loss": 0.1022, "step": 1186, "video_reward_cumulative_accuracy": 0.8039629005059022 }, { "epoch": 0.3523300682695162, "grad_norm": 1.985904574394226, "learning_rate": 4.09148330392654e-06, "loss": 0.0333, "step": 1187, "video_reward_cumulative_accuracy": 0.8041280539174389 }, { "epoch": 0.352626892252894, "grad_norm": 2.5807793140411377, "learning_rate": 4.089484762608365e-06, "loss": 0.0602, "step": 1188, "video_reward_cumulative_accuracy": 0.8042929292929293 }, { "epoch": 0.3529237162362719, "grad_norm": 2.0883278846740723, "learning_rate": 4.0874845148240265e-06, "loss": 0.0711, "step": 1189, "video_reward_cumulative_accuracy": 0.804457527333894 }, { "epoch": 0.35322054021964977, "grad_norm": 1.2518675327301025, "learning_rate": 4.085482562720983e-06, "loss": 0.0395, "step": 1190, "video_reward_cumulative_accuracy": 0.8042016806722689 }, { "epoch": 0.3535173642030276, "grad_norm": 1.957599401473999, "learning_rate": 4.083478908448525e-06, "loss": 0.0636, "step": 1191, "video_reward_cumulative_accuracy": 0.8043660789252729 }, { "epoch": 0.3538141881864055, "grad_norm": 2.872051954269409, "learning_rate": 4.08147355415777e-06, "loss": 0.0783, "step": 1192, "video_reward_cumulative_accuracy": 0.8045302013422819 }, { "epoch": 0.3541110121697833, "grad_norm": 5.688748836517334, "learning_rate": 4.07946650200166e-06, "loss": 0.0614, "step": 1193, "video_reward_cumulative_accuracy": 0.8046940486169321 }, { "epoch": 0.3544078361531612, "grad_norm": 1.7336045503616333, "learning_rate": 4.0774577541349605e-06, "loss": 0.0619, "step": 1194, "video_reward_cumulative_accuracy": 0.8044388609715243 }, { "epoch": 0.354704660136539, "grad_norm": 2.06296968460083, "learning_rate": 4.075447312714258e-06, "loss": 0.0263, "step": 1195, "video_reward_cumulative_accuracy": 0.80418410041841 }, { "epoch": 0.3550014841199169, "grad_norm": 1.0817362070083618, "learning_rate": 4.073435179897956e-06, "loss": 0.0567, "step": 1196, "video_reward_cumulative_accuracy": 0.8035117056856187 }, { "epoch": 0.35529830810329477, "grad_norm": 1.0421109199523926, "learning_rate": 4.071421357846274e-06, "loss": 0.0549, "step": 1197, "video_reward_cumulative_accuracy": 0.8036758563074352 }, { "epoch": 0.3555951320866726, "grad_norm": 2.0795469284057617, "learning_rate": 4.0694058487212464e-06, "loss": 0.0329, "step": 1198, "video_reward_cumulative_accuracy": 0.8038397328881469 }, { "epoch": 0.3558919560700505, "grad_norm": 2.0918068885803223, "learning_rate": 4.067388654686717e-06, "loss": 0.0445, "step": 1199, "video_reward_cumulative_accuracy": 0.8035863219349458 }, { "epoch": 0.3561887800534283, "grad_norm": 1.405401349067688, "learning_rate": 4.065369777908339e-06, "loss": 0.0439, "step": 1200, "video_reward_cumulative_accuracy": 0.80375 }, { "epoch": 0.3561887800534283, "eval_runtime": 129.9202, "eval_samples_per_second": 6.073, "eval_steps_per_second": 0.762, "eval_test_set_accuracy": 0.7689393939393939, "step": 1200 }, { "epoch": 0.3564856040368062, "grad_norm": 1.2888494729995728, "learning_rate": 4.063349220553573e-06, "loss": 0.0477, "step": 1201, "video_reward_cumulative_accuracy": 0.8030807660283097 }, { "epoch": 0.356782428020184, "grad_norm": 2.8064706325531006, "learning_rate": 4.0613269847916845e-06, "loss": 0.0431, "step": 1202, "video_reward_cumulative_accuracy": 0.8032445923460898 }, { "epoch": 0.3570792520035619, "grad_norm": 1.3145371675491333, "learning_rate": 4.059303072793739e-06, "loss": 0.0215, "step": 1203, "video_reward_cumulative_accuracy": 0.8034081463009144 }, { "epoch": 0.35737607598693977, "grad_norm": 1.8004136085510254, "learning_rate": 4.057277486732601e-06, "loss": 0.1032, "step": 1204, "video_reward_cumulative_accuracy": 0.8035714285714286 }, { "epoch": 0.3576728999703176, "grad_norm": 0.9853895902633667, "learning_rate": 4.0552502287829365e-06, "loss": 0.0421, "step": 1205, "video_reward_cumulative_accuracy": 0.8033195020746888 }, { "epoch": 0.3579697239536955, "grad_norm": 2.2384772300720215, "learning_rate": 4.0532213011212025e-06, "loss": 0.0396, "step": 1206, "video_reward_cumulative_accuracy": 0.8034825870646766 }, { "epoch": 0.3582665479370733, "grad_norm": 1.6136014461517334, "learning_rate": 4.0511907059256485e-06, "loss": 0.0425, "step": 1207, "video_reward_cumulative_accuracy": 0.8036454018227009 }, { "epoch": 0.3585633719204512, "grad_norm": 0.9855162501335144, "learning_rate": 4.049158445376318e-06, "loss": 0.0325, "step": 1208, "video_reward_cumulative_accuracy": 0.8038079470198676 }, { "epoch": 0.358860195903829, "grad_norm": 2.7680773735046387, "learning_rate": 4.047124521655037e-06, "loss": 0.0559, "step": 1209, "video_reward_cumulative_accuracy": 0.803556658395368 }, { "epoch": 0.3591570198872069, "grad_norm": 1.1192249059677124, "learning_rate": 4.045088936945423e-06, "loss": 0.0224, "step": 1210, "video_reward_cumulative_accuracy": 0.8037190082644629 }, { "epoch": 0.35945384387058477, "grad_norm": 1.8444184064865112, "learning_rate": 4.043051693432871e-06, "loss": 0.0565, "step": 1211, "video_reward_cumulative_accuracy": 0.8038810900082577 }, { "epoch": 0.3597506678539626, "grad_norm": 2.4251158237457275, "learning_rate": 4.041012793304563e-06, "loss": 0.0407, "step": 1212, "video_reward_cumulative_accuracy": 0.804042904290429 }, { "epoch": 0.36004749183734047, "grad_norm": 1.5506036281585693, "learning_rate": 4.038972238749452e-06, "loss": 0.0247, "step": 1213, "video_reward_cumulative_accuracy": 0.8042044517724649 }, { "epoch": 0.3603443158207183, "grad_norm": 4.467177391052246, "learning_rate": 4.036930031958275e-06, "loss": 0.0588, "step": 1214, "video_reward_cumulative_accuracy": 0.8043657331136738 }, { "epoch": 0.3606411398040962, "grad_norm": 2.9094066619873047, "learning_rate": 4.034886175123537e-06, "loss": 0.0309, "step": 1215, "video_reward_cumulative_accuracy": 0.8045267489711934 }, { "epoch": 0.360937963787474, "grad_norm": 2.6900370121002197, "learning_rate": 4.032840670439517e-06, "loss": 0.0351, "step": 1216, "video_reward_cumulative_accuracy": 0.8046875 }, { "epoch": 0.3612347877708519, "grad_norm": 3.0466442108154297, "learning_rate": 4.030793520102264e-06, "loss": 0.0434, "step": 1217, "video_reward_cumulative_accuracy": 0.804847986852917 }, { "epoch": 0.36153161175422976, "grad_norm": 0.9686444401741028, "learning_rate": 4.028744726309592e-06, "loss": 0.0301, "step": 1218, "video_reward_cumulative_accuracy": 0.805008210180624 }, { "epoch": 0.3618284357376076, "grad_norm": 1.4730597734451294, "learning_rate": 4.02669429126108e-06, "loss": 0.0612, "step": 1219, "video_reward_cumulative_accuracy": 0.8051681706316653 }, { "epoch": 0.36212525972098547, "grad_norm": 4.798864841461182, "learning_rate": 4.024642217158068e-06, "loss": 0.0744, "step": 1220, "video_reward_cumulative_accuracy": 0.8045081967213115 }, { "epoch": 0.3624220837043633, "grad_norm": 2.4471278190612793, "learning_rate": 4.022588506203658e-06, "loss": 0.035, "step": 1221, "video_reward_cumulative_accuracy": 0.8046683046683046 }, { "epoch": 0.3627189076877412, "grad_norm": 7.197712421417236, "learning_rate": 4.020533160602708e-06, "loss": 0.0859, "step": 1222, "video_reward_cumulative_accuracy": 0.8048281505728314 }, { "epoch": 0.363015731671119, "grad_norm": 2.378415107727051, "learning_rate": 4.018476182561829e-06, "loss": 0.053, "step": 1223, "video_reward_cumulative_accuracy": 0.8049877350776778 }, { "epoch": 0.3633125556544969, "grad_norm": 1.3243201971054077, "learning_rate": 4.0164175742893894e-06, "loss": 0.0264, "step": 1224, "video_reward_cumulative_accuracy": 0.8051470588235294 }, { "epoch": 0.36360937963787476, "grad_norm": 4.309061050415039, "learning_rate": 4.014357337995504e-06, "loss": 0.0634, "step": 1225, "video_reward_cumulative_accuracy": 0.8048979591836735 }, { "epoch": 0.3639062036212526, "grad_norm": 2.373619318008423, "learning_rate": 4.012295475892036e-06, "loss": 0.0803, "step": 1226, "video_reward_cumulative_accuracy": 0.8050570962479608 }, { "epoch": 0.36420302760463047, "grad_norm": 6.605100154876709, "learning_rate": 4.0102319901925945e-06, "loss": 0.076, "step": 1227, "video_reward_cumulative_accuracy": 0.8048084759576202 }, { "epoch": 0.3644998515880083, "grad_norm": 1.552177906036377, "learning_rate": 4.008166883112532e-06, "loss": 0.0249, "step": 1228, "video_reward_cumulative_accuracy": 0.8049674267100977 }, { "epoch": 0.3647966755713862, "grad_norm": 1.2064961194992065, "learning_rate": 4.00610015686894e-06, "loss": 0.0212, "step": 1229, "video_reward_cumulative_accuracy": 0.8051261187957689 }, { "epoch": 0.365093499554764, "grad_norm": 1.4522373676300049, "learning_rate": 4.004031813680652e-06, "loss": 0.057, "step": 1230, "video_reward_cumulative_accuracy": 0.8052845528455285 }, { "epoch": 0.3653903235381419, "grad_norm": 3.753844738006592, "learning_rate": 4.0019618557682345e-06, "loss": 0.0332, "step": 1231, "video_reward_cumulative_accuracy": 0.8050365556458164 }, { "epoch": 0.36568714752151976, "grad_norm": 0.7311299443244934, "learning_rate": 3.999890285353988e-06, "loss": 0.0161, "step": 1232, "video_reward_cumulative_accuracy": 0.8051948051948052 }, { "epoch": 0.3659839715048976, "grad_norm": 1.8597936630249023, "learning_rate": 3.997817104661943e-06, "loss": 0.0308, "step": 1233, "video_reward_cumulative_accuracy": 0.805352798053528 }, { "epoch": 0.36628079548827547, "grad_norm": 1.923897624015808, "learning_rate": 3.995742315917862e-06, "loss": 0.0338, "step": 1234, "video_reward_cumulative_accuracy": 0.8055105348460292 }, { "epoch": 0.3665776194716533, "grad_norm": 1.4819157123565674, "learning_rate": 3.993665921349232e-06, "loss": 0.0398, "step": 1235, "video_reward_cumulative_accuracy": 0.805668016194332 }, { "epoch": 0.3668744434550312, "grad_norm": 3.7512669563293457, "learning_rate": 3.991587923185263e-06, "loss": 0.0493, "step": 1236, "video_reward_cumulative_accuracy": 0.8050161812297735 }, { "epoch": 0.367171267438409, "grad_norm": 0.6202178597450256, "learning_rate": 3.989508323656888e-06, "loss": 0.0137, "step": 1237, "video_reward_cumulative_accuracy": 0.8051738075990299 }, { "epoch": 0.3674680914217869, "grad_norm": 3.8399429321289062, "learning_rate": 3.987427124996759e-06, "loss": 0.0561, "step": 1238, "video_reward_cumulative_accuracy": 0.8053311793214862 }, { "epoch": 0.36776491540516476, "grad_norm": 1.2864596843719482, "learning_rate": 3.985344329439246e-06, "loss": 0.0431, "step": 1239, "video_reward_cumulative_accuracy": 0.8050847457627118 }, { "epoch": 0.3680617393885426, "grad_norm": 2.3478002548217773, "learning_rate": 3.983259939220431e-06, "loss": 0.03, "step": 1240, "video_reward_cumulative_accuracy": 0.805241935483871 }, { "epoch": 0.36835856337192047, "grad_norm": 4.141085624694824, "learning_rate": 3.9811739565781085e-06, "loss": 0.0648, "step": 1241, "video_reward_cumulative_accuracy": 0.8053988718775181 }, { "epoch": 0.3686553873552983, "grad_norm": 3.281418800354004, "learning_rate": 3.979086383751786e-06, "loss": 0.0348, "step": 1242, "video_reward_cumulative_accuracy": 0.8055555555555556 }, { "epoch": 0.3689522113386762, "grad_norm": 2.6300387382507324, "learning_rate": 3.976997222982671e-06, "loss": 0.0671, "step": 1243, "video_reward_cumulative_accuracy": 0.8053097345132744 }, { "epoch": 0.369249035322054, "grad_norm": 2.545103073120117, "learning_rate": 3.974906476513686e-06, "loss": 0.0264, "step": 1244, "video_reward_cumulative_accuracy": 0.805064308681672 }, { "epoch": 0.3695458593054319, "grad_norm": 3.0602078437805176, "learning_rate": 3.972814146589446e-06, "loss": 0.051, "step": 1245, "video_reward_cumulative_accuracy": 0.8052208835341366 }, { "epoch": 0.36984268328880976, "grad_norm": 3.791985273361206, "learning_rate": 3.970720235456272e-06, "loss": 0.0344, "step": 1246, "video_reward_cumulative_accuracy": 0.8053772070626003 }, { "epoch": 0.3701395072721876, "grad_norm": 2.767526388168335, "learning_rate": 3.96862474536218e-06, "loss": 0.0703, "step": 1247, "video_reward_cumulative_accuracy": 0.8051323175621492 }, { "epoch": 0.37043633125556547, "grad_norm": 3.0397701263427734, "learning_rate": 3.9665276785568825e-06, "loss": 0.0825, "step": 1248, "video_reward_cumulative_accuracy": 0.8048878205128205 }, { "epoch": 0.3707331552389433, "grad_norm": 1.6637073755264282, "learning_rate": 3.964429037291785e-06, "loss": 0.0129, "step": 1249, "video_reward_cumulative_accuracy": 0.8050440352281826 }, { "epoch": 0.37102997922232117, "grad_norm": 4.110602855682373, "learning_rate": 3.962328823819981e-06, "loss": 0.0622, "step": 1250, "video_reward_cumulative_accuracy": 0.8048 }, { "epoch": 0.371326803205699, "grad_norm": 2.5476889610290527, "learning_rate": 3.960227040396255e-06, "loss": 0.0384, "step": 1251, "video_reward_cumulative_accuracy": 0.8049560351718625 }, { "epoch": 0.3716236271890769, "grad_norm": 0.3595353364944458, "learning_rate": 3.958123689277074e-06, "loss": 0.0129, "step": 1252, "video_reward_cumulative_accuracy": 0.805111821086262 }, { "epoch": 0.37192045117245476, "grad_norm": 0.8653481602668762, "learning_rate": 3.956018772720591e-06, "loss": 0.0145, "step": 1253, "video_reward_cumulative_accuracy": 0.8052673583399841 }, { "epoch": 0.3722172751558326, "grad_norm": 4.666868209838867, "learning_rate": 3.953912292986637e-06, "loss": 0.0618, "step": 1254, "video_reward_cumulative_accuracy": 0.8054226475279107 }, { "epoch": 0.37251409913921046, "grad_norm": 1.4767639636993408, "learning_rate": 3.951804252336723e-06, "loss": 0.027, "step": 1255, "video_reward_cumulative_accuracy": 0.8055776892430279 }, { "epoch": 0.3728109231225883, "grad_norm": 1.8188785314559937, "learning_rate": 3.949694653034036e-06, "loss": 0.0648, "step": 1256, "video_reward_cumulative_accuracy": 0.8057324840764332 }, { "epoch": 0.37310774710596617, "grad_norm": 4.584212779998779, "learning_rate": 3.9475834973434345e-06, "loss": 0.0546, "step": 1257, "video_reward_cumulative_accuracy": 0.8058870326173428 }, { "epoch": 0.373404571089344, "grad_norm": 2.5021183490753174, "learning_rate": 3.94547078753145e-06, "loss": 0.0283, "step": 1258, "video_reward_cumulative_accuracy": 0.8060413354531002 }, { "epoch": 0.3737013950727219, "grad_norm": 0.826378345489502, "learning_rate": 3.94335652586628e-06, "loss": 0.0123, "step": 1259, "video_reward_cumulative_accuracy": 0.8061953931691819 }, { "epoch": 0.37399821905609976, "grad_norm": 2.355395555496216, "learning_rate": 3.941240714617791e-06, "loss": 0.0221, "step": 1260, "video_reward_cumulative_accuracy": 0.8063492063492064 }, { "epoch": 0.3742950430394776, "grad_norm": 0.9242327213287354, "learning_rate": 3.9391233560575116e-06, "loss": 0.011, "step": 1261, "video_reward_cumulative_accuracy": 0.8065027755749405 }, { "epoch": 0.37459186702285546, "grad_norm": 1.269935131072998, "learning_rate": 3.937004452458631e-06, "loss": 0.0226, "step": 1262, "video_reward_cumulative_accuracy": 0.8066561014263075 }, { "epoch": 0.3748886910062333, "grad_norm": 1.5934422016143799, "learning_rate": 3.9348840060959985e-06, "loss": 0.0288, "step": 1263, "video_reward_cumulative_accuracy": 0.8068091844813935 }, { "epoch": 0.37518551498961117, "grad_norm": 1.6049624681472778, "learning_rate": 3.932762019246119e-06, "loss": 0.0327, "step": 1264, "video_reward_cumulative_accuracy": 0.8065664556962026 }, { "epoch": 0.375482338972989, "grad_norm": 2.5892493724823, "learning_rate": 3.930638494187151e-06, "loss": 0.0368, "step": 1265, "video_reward_cumulative_accuracy": 0.8063241106719368 }, { "epoch": 0.3757791629563669, "grad_norm": 4.644944667816162, "learning_rate": 3.928513433198905e-06, "loss": 0.1756, "step": 1266, "video_reward_cumulative_accuracy": 0.8060821484992101 }, { "epoch": 0.37607598693974476, "grad_norm": 3.284151315689087, "learning_rate": 3.92638683856284e-06, "loss": 0.0922, "step": 1267, "video_reward_cumulative_accuracy": 0.8058405682715075 }, { "epoch": 0.3763728109231226, "grad_norm": 1.4588419198989868, "learning_rate": 3.924258712562061e-06, "loss": 0.0219, "step": 1268, "video_reward_cumulative_accuracy": 0.805993690851735 }, { "epoch": 0.37666963490650046, "grad_norm": 2.8274147510528564, "learning_rate": 3.9221290574813205e-06, "loss": 0.0222, "step": 1269, "video_reward_cumulative_accuracy": 0.806146572104019 }, { "epoch": 0.3769664588898783, "grad_norm": 2.812047243118286, "learning_rate": 3.919997875607008e-06, "loss": 0.0383, "step": 1270, "video_reward_cumulative_accuracy": 0.8062992125984252 }, { "epoch": 0.37726328287325617, "grad_norm": 5.075555324554443, "learning_rate": 3.917865169227154e-06, "loss": 0.0635, "step": 1271, "video_reward_cumulative_accuracy": 0.8060582218725413 }, { "epoch": 0.377560106856634, "grad_norm": 1.0230021476745605, "learning_rate": 3.915730940631426e-06, "loss": 0.0303, "step": 1272, "video_reward_cumulative_accuracy": 0.8058176100628931 }, { "epoch": 0.3778569308400119, "grad_norm": 1.6530554294586182, "learning_rate": 3.913595192111124e-06, "loss": 0.0386, "step": 1273, "video_reward_cumulative_accuracy": 0.8059701492537313 }, { "epoch": 0.37815375482338975, "grad_norm": 5.165441513061523, "learning_rate": 3.911457925959185e-06, "loss": 0.062, "step": 1274, "video_reward_cumulative_accuracy": 0.8057299843014128 }, { "epoch": 0.3784505788067676, "grad_norm": 1.2560231685638428, "learning_rate": 3.909319144470169e-06, "loss": 0.0234, "step": 1275, "video_reward_cumulative_accuracy": 0.8058823529411765 }, { "epoch": 0.37874740279014546, "grad_norm": 0.828478991985321, "learning_rate": 3.907178849940266e-06, "loss": 0.0111, "step": 1276, "video_reward_cumulative_accuracy": 0.8060344827586207 }, { "epoch": 0.3790442267735233, "grad_norm": 3.1586334705352783, "learning_rate": 3.90503704466729e-06, "loss": 0.0643, "step": 1277, "video_reward_cumulative_accuracy": 0.8061863743148003 }, { "epoch": 0.37934105075690117, "grad_norm": 3.0786163806915283, "learning_rate": 3.902893730950676e-06, "loss": 0.0786, "step": 1278, "video_reward_cumulative_accuracy": 0.8063380281690141 }, { "epoch": 0.379637874740279, "grad_norm": 2.47560453414917, "learning_rate": 3.900748911091481e-06, "loss": 0.0522, "step": 1279, "video_reward_cumulative_accuracy": 0.8060985144644254 }, { "epoch": 0.37993469872365687, "grad_norm": 2.573753595352173, "learning_rate": 3.898602587392377e-06, "loss": 0.0691, "step": 1280, "video_reward_cumulative_accuracy": 0.80625 }, { "epoch": 0.38023152270703475, "grad_norm": 6.1154656410217285, "learning_rate": 3.89645476215765e-06, "loss": 0.1061, "step": 1281, "video_reward_cumulative_accuracy": 0.8064012490241999 }, { "epoch": 0.3805283466904126, "grad_norm": 2.435875654220581, "learning_rate": 3.894305437693198e-06, "loss": 0.0309, "step": 1282, "video_reward_cumulative_accuracy": 0.8061622464898596 }, { "epoch": 0.38082517067379046, "grad_norm": 3.2194957733154297, "learning_rate": 3.892154616306531e-06, "loss": 0.0485, "step": 1283, "video_reward_cumulative_accuracy": 0.8063133281371785 }, { "epoch": 0.3811219946571683, "grad_norm": 2.315264940261841, "learning_rate": 3.890002300306764e-06, "loss": 0.0662, "step": 1284, "video_reward_cumulative_accuracy": 0.8064641744548287 }, { "epoch": 0.38141881864054616, "grad_norm": 2.33933687210083, "learning_rate": 3.887848492004618e-06, "loss": 0.0468, "step": 1285, "video_reward_cumulative_accuracy": 0.8066147859922179 }, { "epoch": 0.381715642623924, "grad_norm": 2.370605707168579, "learning_rate": 3.885693193712413e-06, "loss": 0.0385, "step": 1286, "video_reward_cumulative_accuracy": 0.8063763608087092 }, { "epoch": 0.38201246660730187, "grad_norm": 2.9528722763061523, "learning_rate": 3.883536407744073e-06, "loss": 0.0312, "step": 1287, "video_reward_cumulative_accuracy": 0.8061383061383062 }, { "epoch": 0.38230929059067975, "grad_norm": 1.1673752069473267, "learning_rate": 3.881378136415117e-06, "loss": 0.0343, "step": 1288, "video_reward_cumulative_accuracy": 0.8062888198757764 }, { "epoch": 0.3826061145740576, "grad_norm": 1.4453524351119995, "learning_rate": 3.8792183820426575e-06, "loss": 0.0593, "step": 1289, "video_reward_cumulative_accuracy": 0.8064391000775796 }, { "epoch": 0.38290293855743546, "grad_norm": 2.969148874282837, "learning_rate": 3.877057146945401e-06, "loss": 0.0384, "step": 1290, "video_reward_cumulative_accuracy": 0.8065891472868217 }, { "epoch": 0.3831997625408133, "grad_norm": 2.2823967933654785, "learning_rate": 3.874894433443643e-06, "loss": 0.0443, "step": 1291, "video_reward_cumulative_accuracy": 0.8067389620449265 }, { "epoch": 0.38349658652419116, "grad_norm": 1.4445525407791138, "learning_rate": 3.872730243859267e-06, "loss": 0.0504, "step": 1292, "video_reward_cumulative_accuracy": 0.8065015479876161 }, { "epoch": 0.383793410507569, "grad_norm": 1.5774403810501099, "learning_rate": 3.87056458051574e-06, "loss": 0.0322, "step": 1293, "video_reward_cumulative_accuracy": 0.8066511987625676 }, { "epoch": 0.38409023449094687, "grad_norm": 2.641799211502075, "learning_rate": 3.868397445738112e-06, "loss": 0.0246, "step": 1294, "video_reward_cumulative_accuracy": 0.8068006182380216 }, { "epoch": 0.38438705847432475, "grad_norm": 2.0484931468963623, "learning_rate": 3.866228841853012e-06, "loss": 0.0937, "step": 1295, "video_reward_cumulative_accuracy": 0.806949806949807 }, { "epoch": 0.3846838824577026, "grad_norm": 3.79331374168396, "learning_rate": 3.864058771188648e-06, "loss": 0.0471, "step": 1296, "video_reward_cumulative_accuracy": 0.8070987654320988 }, { "epoch": 0.38498070644108046, "grad_norm": 1.6458531618118286, "learning_rate": 3.861887236074801e-06, "loss": 0.0402, "step": 1297, "video_reward_cumulative_accuracy": 0.8072474942174248 }, { "epoch": 0.3852775304244583, "grad_norm": 2.398191213607788, "learning_rate": 3.859714238842823e-06, "loss": 0.0288, "step": 1298, "video_reward_cumulative_accuracy": 0.8073959938366718 }, { "epoch": 0.38557435440783616, "grad_norm": 1.3121765851974487, "learning_rate": 3.8575397818256396e-06, "loss": 0.0379, "step": 1299, "video_reward_cumulative_accuracy": 0.8075442648190916 }, { "epoch": 0.385871178391214, "grad_norm": 2.593432903289795, "learning_rate": 3.855363867357741e-06, "loss": 0.0366, "step": 1300, "video_reward_cumulative_accuracy": 0.8076923076923077 }, { "epoch": 0.38616800237459187, "grad_norm": 2.9913852214813232, "learning_rate": 3.853186497775181e-06, "loss": 0.0427, "step": 1301, "video_reward_cumulative_accuracy": 0.8078401229823213 }, { "epoch": 0.38646482635796975, "grad_norm": 1.1634633541107178, "learning_rate": 3.85100767541558e-06, "loss": 0.0457, "step": 1302, "video_reward_cumulative_accuracy": 0.8079877112135176 }, { "epoch": 0.3867616503413476, "grad_norm": 2.312039852142334, "learning_rate": 3.8488274026181125e-06, "loss": 0.0337, "step": 1303, "video_reward_cumulative_accuracy": 0.8077513430544896 }, { "epoch": 0.38705847432472545, "grad_norm": 0.9711390733718872, "learning_rate": 3.846645681723514e-06, "loss": 0.0367, "step": 1304, "video_reward_cumulative_accuracy": 0.807898773006135 }, { "epoch": 0.3873552983081033, "grad_norm": 0.5557654500007629, "learning_rate": 3.844462515074075e-06, "loss": 0.0175, "step": 1305, "video_reward_cumulative_accuracy": 0.8080459770114943 }, { "epoch": 0.38765212229148116, "grad_norm": 1.1500357389450073, "learning_rate": 3.842277905013634e-06, "loss": 0.0308, "step": 1306, "video_reward_cumulative_accuracy": 0.8081929555895865 }, { "epoch": 0.387948946274859, "grad_norm": 1.2327475547790527, "learning_rate": 3.840091853887585e-06, "loss": 0.0429, "step": 1307, "video_reward_cumulative_accuracy": 0.8083397092578424 }, { "epoch": 0.38824577025823687, "grad_norm": 0.717802107334137, "learning_rate": 3.837904364042864e-06, "loss": 0.0256, "step": 1308, "video_reward_cumulative_accuracy": 0.8084862385321101 }, { "epoch": 0.38854259424161475, "grad_norm": 1.6281253099441528, "learning_rate": 3.835715437827954e-06, "loss": 0.0191, "step": 1309, "video_reward_cumulative_accuracy": 0.8086325439266616 }, { "epoch": 0.3888394182249926, "grad_norm": 2.4564850330352783, "learning_rate": 3.83352507759288e-06, "loss": 0.0526, "step": 1310, "video_reward_cumulative_accuracy": 0.8087786259541985 }, { "epoch": 0.38913624220837045, "grad_norm": 1.5885370969772339, "learning_rate": 3.831333285689207e-06, "loss": 0.0145, "step": 1311, "video_reward_cumulative_accuracy": 0.8089244851258581 }, { "epoch": 0.3894330661917483, "grad_norm": 3.0064384937286377, "learning_rate": 3.829140064470035e-06, "loss": 0.0724, "step": 1312, "video_reward_cumulative_accuracy": 0.8090701219512195 }, { "epoch": 0.38972989017512616, "grad_norm": 0.9188132882118225, "learning_rate": 3.826945416290001e-06, "loss": 0.012, "step": 1313, "video_reward_cumulative_accuracy": 0.8092155369383092 }, { "epoch": 0.390026714158504, "grad_norm": 2.3891713619232178, "learning_rate": 3.824749343505271e-06, "loss": 0.0717, "step": 1314, "video_reward_cumulative_accuracy": 0.8089802130898022 }, { "epoch": 0.39032353814188187, "grad_norm": 2.077953577041626, "learning_rate": 3.822551848473545e-06, "loss": 0.0498, "step": 1315, "video_reward_cumulative_accuracy": 0.8091254752851711 }, { "epoch": 0.39062036212525975, "grad_norm": 1.5445294380187988, "learning_rate": 3.820352933554045e-06, "loss": 0.0276, "step": 1316, "video_reward_cumulative_accuracy": 0.8088905775075987 }, { "epoch": 0.39091718610863757, "grad_norm": 3.751812219619751, "learning_rate": 3.81815260110752e-06, "loss": 0.0526, "step": 1317, "video_reward_cumulative_accuracy": 0.8090356871678056 }, { "epoch": 0.39121401009201545, "grad_norm": 3.9450860023498535, "learning_rate": 3.815950853496242e-06, "loss": 0.0608, "step": 1318, "video_reward_cumulative_accuracy": 0.8091805766312595 }, { "epoch": 0.3915108340753933, "grad_norm": 2.4571402072906494, "learning_rate": 3.813747693083999e-06, "loss": 0.0406, "step": 1319, "video_reward_cumulative_accuracy": 0.809325246398787 }, { "epoch": 0.39180765805877116, "grad_norm": 0.831182062625885, "learning_rate": 3.8115431222360984e-06, "loss": 0.0075, "step": 1320, "video_reward_cumulative_accuracy": 0.809469696969697 }, { "epoch": 0.392104482042149, "grad_norm": 5.207632541656494, "learning_rate": 3.80933714331936e-06, "loss": 0.0581, "step": 1321, "video_reward_cumulative_accuracy": 0.8096139288417865 }, { "epoch": 0.39240130602552686, "grad_norm": 0.5821350812911987, "learning_rate": 3.807129758702117e-06, "loss": 0.0128, "step": 1322, "video_reward_cumulative_accuracy": 0.8097579425113465 }, { "epoch": 0.39269813000890474, "grad_norm": 4.091737747192383, "learning_rate": 3.804920970754211e-06, "loss": 0.032, "step": 1323, "video_reward_cumulative_accuracy": 0.809901738473167 }, { "epoch": 0.39299495399228257, "grad_norm": 1.5021380186080933, "learning_rate": 3.802710781846991e-06, "loss": 0.017, "step": 1324, "video_reward_cumulative_accuracy": 0.8100453172205438 }, { "epoch": 0.39329177797566045, "grad_norm": 3.842895984649658, "learning_rate": 3.8004991943533077e-06, "loss": 0.059, "step": 1325, "video_reward_cumulative_accuracy": 0.810188679245283 }, { "epoch": 0.3935886019590383, "grad_norm": 3.194486141204834, "learning_rate": 3.798286210647516e-06, "loss": 0.0299, "step": 1326, "video_reward_cumulative_accuracy": 0.8103318250377074 }, { "epoch": 0.39388542594241616, "grad_norm": 2.559457302093506, "learning_rate": 3.796071833105468e-06, "loss": 0.0648, "step": 1327, "video_reward_cumulative_accuracy": 0.8100979653353428 }, { "epoch": 0.394182249925794, "grad_norm": 4.017072677612305, "learning_rate": 3.793856064104514e-06, "loss": 0.057, "step": 1328, "video_reward_cumulative_accuracy": 0.8102409638554217 }, { "epoch": 0.39447907390917186, "grad_norm": 0.940937876701355, "learning_rate": 3.7916389060234964e-06, "loss": 0.0176, "step": 1329, "video_reward_cumulative_accuracy": 0.8103837471783296 }, { "epoch": 0.39477589789254974, "grad_norm": 3.047013759613037, "learning_rate": 3.78942036124275e-06, "loss": 0.0547, "step": 1330, "video_reward_cumulative_accuracy": 0.8105263157894737 }, { "epoch": 0.39507272187592757, "grad_norm": 0.7967216372489929, "learning_rate": 3.787200432144097e-06, "loss": 0.0175, "step": 1331, "video_reward_cumulative_accuracy": 0.8106686701728024 }, { "epoch": 0.39536954585930545, "grad_norm": 3.501380205154419, "learning_rate": 3.784979121110848e-06, "loss": 0.0465, "step": 1332, "video_reward_cumulative_accuracy": 0.8108108108108109 }, { "epoch": 0.3956663698426833, "grad_norm": 3.338715076446533, "learning_rate": 3.782756430527794e-06, "loss": 0.0628, "step": 1333, "video_reward_cumulative_accuracy": 0.8105776444111028 }, { "epoch": 0.39596319382606116, "grad_norm": 4.169296741485596, "learning_rate": 3.7805323627812108e-06, "loss": 0.0453, "step": 1334, "video_reward_cumulative_accuracy": 0.81071964017991 }, { "epoch": 0.396260017809439, "grad_norm": 2.169301986694336, "learning_rate": 3.778306920258852e-06, "loss": 0.0294, "step": 1335, "video_reward_cumulative_accuracy": 0.8104868913857678 }, { "epoch": 0.39655684179281686, "grad_norm": 2.8611955642700195, "learning_rate": 3.7760801053499435e-06, "loss": 0.0706, "step": 1336, "video_reward_cumulative_accuracy": 0.8106287425149701 }, { "epoch": 0.39685366577619474, "grad_norm": 3.386845827102661, "learning_rate": 3.7738519204451883e-06, "loss": 0.0497, "step": 1337, "video_reward_cumulative_accuracy": 0.8107703814510098 }, { "epoch": 0.39715048975957257, "grad_norm": 3.034348249435425, "learning_rate": 3.7716223679367604e-06, "loss": 0.0708, "step": 1338, "video_reward_cumulative_accuracy": 0.8109118086696562 }, { "epoch": 0.39744731374295045, "grad_norm": 0.7038185000419617, "learning_rate": 3.769391450218298e-06, "loss": 0.0199, "step": 1339, "video_reward_cumulative_accuracy": 0.8110530246452576 }, { "epoch": 0.3977441377263283, "grad_norm": 2.768979072570801, "learning_rate": 3.767159169684911e-06, "loss": 0.0546, "step": 1340, "video_reward_cumulative_accuracy": 0.8111940298507463 }, { "epoch": 0.39804096170970615, "grad_norm": 2.743908405303955, "learning_rate": 3.7649255287331676e-06, "loss": 0.0425, "step": 1341, "video_reward_cumulative_accuracy": 0.8113348247576435 }, { "epoch": 0.398337785693084, "grad_norm": 2.2306787967681885, "learning_rate": 3.762690529761097e-06, "loss": 0.0258, "step": 1342, "video_reward_cumulative_accuracy": 0.8114754098360656 }, { "epoch": 0.39863460967646186, "grad_norm": 1.8014007806777954, "learning_rate": 3.7604541751681904e-06, "loss": 0.0331, "step": 1343, "video_reward_cumulative_accuracy": 0.8116157855547282 }, { "epoch": 0.39893143365983974, "grad_norm": 2.2490646839141846, "learning_rate": 3.7582164673553888e-06, "loss": 0.0227, "step": 1344, "video_reward_cumulative_accuracy": 0.8117559523809523 }, { "epoch": 0.39922825764321757, "grad_norm": 3.8205676078796387, "learning_rate": 3.7559774087250906e-06, "loss": 0.0826, "step": 1345, "video_reward_cumulative_accuracy": 0.8118959107806691 }, { "epoch": 0.39952508162659545, "grad_norm": 4.000797271728516, "learning_rate": 3.753737001681142e-06, "loss": 0.0942, "step": 1346, "video_reward_cumulative_accuracy": 0.812035661218425 }, { "epoch": 0.39982190560997327, "grad_norm": 3.239428758621216, "learning_rate": 3.7514952486288365e-06, "loss": 0.0449, "step": 1347, "video_reward_cumulative_accuracy": 0.811804008908686 }, { "epoch": 0.40011872959335115, "grad_norm": 4.8066725730896, "learning_rate": 3.7492521519749146e-06, "loss": 0.0516, "step": 1348, "video_reward_cumulative_accuracy": 0.8119436201780416 }, { "epoch": 0.400415553576729, "grad_norm": 3.3413074016571045, "learning_rate": 3.7470077141275578e-06, "loss": 0.0927, "step": 1349, "video_reward_cumulative_accuracy": 0.8117123795404003 }, { "epoch": 0.40071237756010686, "grad_norm": 1.3113895654678345, "learning_rate": 3.744761937496389e-06, "loss": 0.0308, "step": 1350, "video_reward_cumulative_accuracy": 0.8118518518518518 }, { "epoch": 0.40100920154348474, "grad_norm": 1.7896422147750854, "learning_rate": 3.742514824492465e-06, "loss": 0.0702, "step": 1351, "video_reward_cumulative_accuracy": 0.8119911176905995 }, { "epoch": 0.40130602552686256, "grad_norm": 3.303739309310913, "learning_rate": 3.740266377528282e-06, "loss": 0.0711, "step": 1352, "video_reward_cumulative_accuracy": 0.8121301775147929 }, { "epoch": 0.40160284951024044, "grad_norm": 0.9642285108566284, "learning_rate": 3.738016599017766e-06, "loss": 0.0306, "step": 1353, "video_reward_cumulative_accuracy": 0.8122690317812269 }, { "epoch": 0.40189967349361827, "grad_norm": 1.783601999282837, "learning_rate": 3.735765491376271e-06, "loss": 0.041, "step": 1354, "video_reward_cumulative_accuracy": 0.8124076809453471 }, { "epoch": 0.40219649747699615, "grad_norm": 2.2338671684265137, "learning_rate": 3.733513057020581e-06, "loss": 0.0219, "step": 1355, "video_reward_cumulative_accuracy": 0.8125461254612546 }, { "epoch": 0.402493321460374, "grad_norm": 2.4188389778137207, "learning_rate": 3.731259298368902e-06, "loss": 0.042, "step": 1356, "video_reward_cumulative_accuracy": 0.8126843657817109 }, { "epoch": 0.40279014544375186, "grad_norm": 2.109005928039551, "learning_rate": 3.7290042178408625e-06, "loss": 0.0795, "step": 1357, "video_reward_cumulative_accuracy": 0.8124539425202653 }, { "epoch": 0.40308696942712974, "grad_norm": 2.0904476642608643, "learning_rate": 3.726747817857511e-06, "loss": 0.039, "step": 1358, "video_reward_cumulative_accuracy": 0.8125920471281296 }, { "epoch": 0.40338379341050756, "grad_norm": 3.161112070083618, "learning_rate": 3.7244901008413127e-06, "loss": 0.0728, "step": 1359, "video_reward_cumulative_accuracy": 0.8123620309050773 }, { "epoch": 0.40368061739388544, "grad_norm": 2.363586187362671, "learning_rate": 3.7222310692161434e-06, "loss": 0.0416, "step": 1360, "video_reward_cumulative_accuracy": 0.8125 }, { "epoch": 0.40397744137726327, "grad_norm": 1.192459225654602, "learning_rate": 3.7199707254072953e-06, "loss": 0.0394, "step": 1361, "video_reward_cumulative_accuracy": 0.8126377663482733 }, { "epoch": 0.40427426536064115, "grad_norm": 2.6609785556793213, "learning_rate": 3.7177090718414654e-06, "loss": 0.1119, "step": 1362, "video_reward_cumulative_accuracy": 0.8124082232011748 }, { "epoch": 0.404571089344019, "grad_norm": 3.815920352935791, "learning_rate": 3.7154461109467586e-06, "loss": 0.072, "step": 1363, "video_reward_cumulative_accuracy": 0.8125458547322084 }, { "epoch": 0.40486791332739686, "grad_norm": 2.569744110107422, "learning_rate": 3.713181845152684e-06, "loss": 0.025, "step": 1364, "video_reward_cumulative_accuracy": 0.8126832844574781 }, { "epoch": 0.40516473731077474, "grad_norm": 3.3158631324768066, "learning_rate": 3.710916276890149e-06, "loss": 0.0523, "step": 1365, "video_reward_cumulative_accuracy": 0.8128205128205128 }, { "epoch": 0.40546156129415256, "grad_norm": 3.6916356086730957, "learning_rate": 3.7086494085914632e-06, "loss": 0.0656, "step": 1366, "video_reward_cumulative_accuracy": 0.8129575402635432 }, { "epoch": 0.40575838527753044, "grad_norm": 1.8868242502212524, "learning_rate": 3.7063812426903273e-06, "loss": 0.031, "step": 1367, "video_reward_cumulative_accuracy": 0.8127286027798098 }, { "epoch": 0.40605520926090827, "grad_norm": 3.946322441101074, "learning_rate": 3.7041117816218396e-06, "loss": 0.068, "step": 1368, "video_reward_cumulative_accuracy": 0.8128654970760234 }, { "epoch": 0.40635203324428615, "grad_norm": 4.210629940032959, "learning_rate": 3.7018410278224852e-06, "loss": 0.0726, "step": 1369, "video_reward_cumulative_accuracy": 0.8122717311906501 }, { "epoch": 0.406648857227664, "grad_norm": 3.0957443714141846, "learning_rate": 3.69956898373014e-06, "loss": 0.0363, "step": 1370, "video_reward_cumulative_accuracy": 0.8124087591240876 }, { "epoch": 0.40694568121104185, "grad_norm": 1.1315875053405762, "learning_rate": 3.697295651784063e-06, "loss": 0.0365, "step": 1371, "video_reward_cumulative_accuracy": 0.812545587162655 }, { "epoch": 0.40724250519441973, "grad_norm": 4.435636043548584, "learning_rate": 3.695021034424897e-06, "loss": 0.0564, "step": 1372, "video_reward_cumulative_accuracy": 0.8126822157434402 }, { "epoch": 0.40753932917779756, "grad_norm": 1.2530689239501953, "learning_rate": 3.692745134094665e-06, "loss": 0.03, "step": 1373, "video_reward_cumulative_accuracy": 0.8128186453022578 }, { "epoch": 0.40783615316117544, "grad_norm": 1.9480600357055664, "learning_rate": 3.690467953236766e-06, "loss": 0.0375, "step": 1374, "video_reward_cumulative_accuracy": 0.8125909752547307 }, { "epoch": 0.40813297714455327, "grad_norm": 1.831527590751648, "learning_rate": 3.6881894942959752e-06, "loss": 0.0614, "step": 1375, "video_reward_cumulative_accuracy": 0.8127272727272727 }, { "epoch": 0.40842980112793115, "grad_norm": 2.9821012020111084, "learning_rate": 3.6859097597184395e-06, "loss": 0.0336, "step": 1376, "video_reward_cumulative_accuracy": 0.8128633720930233 }, { "epoch": 0.40872662511130897, "grad_norm": 1.6577091217041016, "learning_rate": 3.6836287519516745e-06, "loss": 0.0375, "step": 1377, "video_reward_cumulative_accuracy": 0.8126361655773421 }, { "epoch": 0.40902344909468685, "grad_norm": 3.604968309402466, "learning_rate": 3.681346473444565e-06, "loss": 0.0396, "step": 1378, "video_reward_cumulative_accuracy": 0.8127721335268505 }, { "epoch": 0.40932027307806473, "grad_norm": 4.176747798919678, "learning_rate": 3.6790629266473564e-06, "loss": 0.0433, "step": 1379, "video_reward_cumulative_accuracy": 0.8129079042784626 }, { "epoch": 0.40961709706144256, "grad_norm": 1.8375120162963867, "learning_rate": 3.676778114011659e-06, "loss": 0.0673, "step": 1380, "video_reward_cumulative_accuracy": 0.8130434782608695 }, { "epoch": 0.40991392104482044, "grad_norm": 1.4766967296600342, "learning_rate": 3.6744920379904407e-06, "loss": 0.0376, "step": 1381, "video_reward_cumulative_accuracy": 0.8131788559015206 }, { "epoch": 0.41021074502819826, "grad_norm": 1.3402959108352661, "learning_rate": 3.6722047010380265e-06, "loss": 0.0401, "step": 1382, "video_reward_cumulative_accuracy": 0.8133140376266281 }, { "epoch": 0.41050756901157615, "grad_norm": 2.443718671798706, "learning_rate": 3.669916105610094e-06, "loss": 0.0345, "step": 1383, "video_reward_cumulative_accuracy": 0.8134490238611713 }, { "epoch": 0.41080439299495397, "grad_norm": 2.9310362339019775, "learning_rate": 3.667626254163673e-06, "loss": 0.0351, "step": 1384, "video_reward_cumulative_accuracy": 0.8132225433526011 }, { "epoch": 0.41110121697833185, "grad_norm": 1.3766952753067017, "learning_rate": 3.665335149157141e-06, "loss": 0.0203, "step": 1385, "video_reward_cumulative_accuracy": 0.8133574007220217 }, { "epoch": 0.41139804096170973, "grad_norm": 2.6975274085998535, "learning_rate": 3.6630427930502215e-06, "loss": 0.0513, "step": 1386, "video_reward_cumulative_accuracy": 0.8131313131313131 }, { "epoch": 0.41169486494508756, "grad_norm": 2.027492046356201, "learning_rate": 3.6607491883039807e-06, "loss": 0.071, "step": 1387, "video_reward_cumulative_accuracy": 0.8132660418168709 }, { "epoch": 0.41199168892846544, "grad_norm": 1.6064057350158691, "learning_rate": 3.658454337380827e-06, "loss": 0.0425, "step": 1388, "video_reward_cumulative_accuracy": 0.8134005763688761 }, { "epoch": 0.41228851291184326, "grad_norm": 2.689882755279541, "learning_rate": 3.6561582427445053e-06, "loss": 0.0549, "step": 1389, "video_reward_cumulative_accuracy": 0.8135349172066235 }, { "epoch": 0.41258533689522114, "grad_norm": 2.3749701976776123, "learning_rate": 3.653860906860096e-06, "loss": 0.0514, "step": 1390, "video_reward_cumulative_accuracy": 0.8133093525179856 }, { "epoch": 0.41288216087859897, "grad_norm": 2.138916015625, "learning_rate": 3.651562332194012e-06, "loss": 0.0303, "step": 1391, "video_reward_cumulative_accuracy": 0.8134435657800144 }, { "epoch": 0.41317898486197685, "grad_norm": 3.9567198753356934, "learning_rate": 3.6492625212139964e-06, "loss": 0.0721, "step": 1392, "video_reward_cumulative_accuracy": 0.8135775862068966 }, { "epoch": 0.41347580884535473, "grad_norm": 1.3105418682098389, "learning_rate": 3.6469614763891193e-06, "loss": 0.0371, "step": 1393, "video_reward_cumulative_accuracy": 0.8137114142139268 }, { "epoch": 0.41377263282873256, "grad_norm": 1.6768875122070312, "learning_rate": 3.644659200189776e-06, "loss": 0.0369, "step": 1394, "video_reward_cumulative_accuracy": 0.8134863701578192 }, { "epoch": 0.41406945681211044, "grad_norm": 3.7469892501831055, "learning_rate": 3.6423556950876827e-06, "loss": 0.0554, "step": 1395, "video_reward_cumulative_accuracy": 0.8136200716845878 }, { "epoch": 0.41436628079548826, "grad_norm": 5.655117511749268, "learning_rate": 3.6400509635558766e-06, "loss": 0.0512, "step": 1396, "video_reward_cumulative_accuracy": 0.8133954154727794 }, { "epoch": 0.41466310477886614, "grad_norm": 3.703136682510376, "learning_rate": 3.6377450080687106e-06, "loss": 0.0283, "step": 1397, "video_reward_cumulative_accuracy": 0.813528990694345 }, { "epoch": 0.41495992876224397, "grad_norm": 3.908426284790039, "learning_rate": 3.635437831101851e-06, "loss": 0.0451, "step": 1398, "video_reward_cumulative_accuracy": 0.8136623748211731 }, { "epoch": 0.41525675274562185, "grad_norm": 1.4794903993606567, "learning_rate": 3.633129435132277e-06, "loss": 0.0248, "step": 1399, "video_reward_cumulative_accuracy": 0.813795568263045 }, { "epoch": 0.41555357672899973, "grad_norm": 1.4713983535766602, "learning_rate": 3.630819822638275e-06, "loss": 0.0451, "step": 1400, "video_reward_cumulative_accuracy": 0.8135714285714286 }, { "epoch": 0.41555357672899973, "eval_runtime": 130.0037, "eval_samples_per_second": 6.069, "eval_steps_per_second": 0.762, "eval_test_set_accuracy": 0.773989898989899, "step": 1400 }, { "epoch": 0.41585040071237755, "grad_norm": 2.494872570037842, "learning_rate": 3.6285089960994396e-06, "loss": 0.0338, "step": 1401, "video_reward_cumulative_accuracy": 0.8137044967880086 }, { "epoch": 0.41614722469575544, "grad_norm": 3.971022605895996, "learning_rate": 3.626196957996666e-06, "loss": 0.081, "step": 1402, "video_reward_cumulative_accuracy": 0.8138373751783167 }, { "epoch": 0.41644404867913326, "grad_norm": 2.763796806335449, "learning_rate": 3.6238837108121514e-06, "loss": 0.0512, "step": 1403, "video_reward_cumulative_accuracy": 0.8136136849607983 }, { "epoch": 0.41674087266251114, "grad_norm": 3.586524724960327, "learning_rate": 3.6215692570293924e-06, "loss": 0.0805, "step": 1404, "video_reward_cumulative_accuracy": 0.8137464387464387 }, { "epoch": 0.41703769664588897, "grad_norm": 2.187155246734619, "learning_rate": 3.619253599133178e-06, "loss": 0.0365, "step": 1405, "video_reward_cumulative_accuracy": 0.8135231316725978 }, { "epoch": 0.41733452062926685, "grad_norm": 3.1447842121124268, "learning_rate": 3.6169367396095935e-06, "loss": 0.0494, "step": 1406, "video_reward_cumulative_accuracy": 0.8133001422475107 }, { "epoch": 0.41763134461264473, "grad_norm": 1.2427921295166016, "learning_rate": 3.6146186809460114e-06, "loss": 0.0147, "step": 1407, "video_reward_cumulative_accuracy": 0.8134328358208955 }, { "epoch": 0.41792816859602255, "grad_norm": 1.1575847864151, "learning_rate": 3.612299425631093e-06, "loss": 0.0468, "step": 1408, "video_reward_cumulative_accuracy": 0.8132102272727273 }, { "epoch": 0.41822499257940043, "grad_norm": 0.6622688174247742, "learning_rate": 3.609978976154784e-06, "loss": 0.0116, "step": 1409, "video_reward_cumulative_accuracy": 0.8133427963094393 }, { "epoch": 0.41852181656277826, "grad_norm": 1.4234910011291504, "learning_rate": 3.6076573350083112e-06, "loss": 0.0449, "step": 1410, "video_reward_cumulative_accuracy": 0.8134751773049645 }, { "epoch": 0.41881864054615614, "grad_norm": 0.7136000990867615, "learning_rate": 3.605334504684183e-06, "loss": 0.0299, "step": 1411, "video_reward_cumulative_accuracy": 0.8132530120481928 }, { "epoch": 0.41911546452953397, "grad_norm": 2.231410503387451, "learning_rate": 3.6030104876761835e-06, "loss": 0.0417, "step": 1412, "video_reward_cumulative_accuracy": 0.8133852691218131 }, { "epoch": 0.41941228851291185, "grad_norm": 2.5650246143341064, "learning_rate": 3.600685286479369e-06, "loss": 0.0438, "step": 1413, "video_reward_cumulative_accuracy": 0.813517338995046 }, { "epoch": 0.4197091124962897, "grad_norm": 3.8068923950195312, "learning_rate": 3.59835890359007e-06, "loss": 0.0378, "step": 1414, "video_reward_cumulative_accuracy": 0.8136492220650636 }, { "epoch": 0.42000593647966755, "grad_norm": 2.6522464752197266, "learning_rate": 3.5960313415058833e-06, "loss": 0.0443, "step": 1415, "video_reward_cumulative_accuracy": 0.8134275618374558 }, { "epoch": 0.42030276046304543, "grad_norm": 2.89424729347229, "learning_rate": 3.5937026027256738e-06, "loss": 0.0213, "step": 1416, "video_reward_cumulative_accuracy": 0.8135593220338984 }, { "epoch": 0.42059958444642326, "grad_norm": 3.648902177810669, "learning_rate": 3.591372689749567e-06, "loss": 0.1216, "step": 1417, "video_reward_cumulative_accuracy": 0.8136908962597036 }, { "epoch": 0.42089640842980114, "grad_norm": 1.9664138555526733, "learning_rate": 3.5890416050789523e-06, "loss": 0.0361, "step": 1418, "video_reward_cumulative_accuracy": 0.8138222849083215 }, { "epoch": 0.42119323241317896, "grad_norm": 3.4354248046875, "learning_rate": 3.586709351216474e-06, "loss": 0.0391, "step": 1419, "video_reward_cumulative_accuracy": 0.8136011275546159 }, { "epoch": 0.42149005639655684, "grad_norm": 1.596327543258667, "learning_rate": 3.5843759306660344e-06, "loss": 0.0905, "step": 1420, "video_reward_cumulative_accuracy": 0.8133802816901409 }, { "epoch": 0.4217868803799347, "grad_norm": 3.7419114112854004, "learning_rate": 3.5820413459327863e-06, "loss": 0.0678, "step": 1421, "video_reward_cumulative_accuracy": 0.812807881773399 }, { "epoch": 0.42208370436331255, "grad_norm": 3.569519281387329, "learning_rate": 3.579705599523132e-06, "loss": 0.047, "step": 1422, "video_reward_cumulative_accuracy": 0.8129395218002813 }, { "epoch": 0.42238052834669043, "grad_norm": 1.55231511592865, "learning_rate": 3.5773686939447226e-06, "loss": 0.0314, "step": 1423, "video_reward_cumulative_accuracy": 0.8130709768095573 }, { "epoch": 0.42267735233006826, "grad_norm": 2.8076114654541016, "learning_rate": 3.575030631706454e-06, "loss": 0.0511, "step": 1424, "video_reward_cumulative_accuracy": 0.8128511235955056 }, { "epoch": 0.42297417631344614, "grad_norm": 1.3293800354003906, "learning_rate": 3.5726914153184624e-06, "loss": 0.0334, "step": 1425, "video_reward_cumulative_accuracy": 0.8129824561403509 }, { "epoch": 0.42327100029682396, "grad_norm": 2.4649341106414795, "learning_rate": 3.570351047292123e-06, "loss": 0.0397, "step": 1426, "video_reward_cumulative_accuracy": 0.8131136044880786 }, { "epoch": 0.42356782428020184, "grad_norm": 3.885298490524292, "learning_rate": 3.5680095301400497e-06, "loss": 0.0345, "step": 1427, "video_reward_cumulative_accuracy": 0.8132445690259286 }, { "epoch": 0.4238646482635797, "grad_norm": 2.974383592605591, "learning_rate": 3.565666866376086e-06, "loss": 0.0572, "step": 1428, "video_reward_cumulative_accuracy": 0.8130252100840336 }, { "epoch": 0.42416147224695755, "grad_norm": 1.509018898010254, "learning_rate": 3.5633230585153093e-06, "loss": 0.0386, "step": 1429, "video_reward_cumulative_accuracy": 0.8131560531840448 }, { "epoch": 0.42445829623033543, "grad_norm": 1.3774346113204956, "learning_rate": 3.5609781090740264e-06, "loss": 0.0231, "step": 1430, "video_reward_cumulative_accuracy": 0.8132867132867133 }, { "epoch": 0.42475512021371326, "grad_norm": 2.268357276916504, "learning_rate": 3.558632020569768e-06, "loss": 0.045, "step": 1431, "video_reward_cumulative_accuracy": 0.8134171907756813 }, { "epoch": 0.42505194419709114, "grad_norm": 2.4706010818481445, "learning_rate": 3.5562847955212863e-06, "loss": 0.0662, "step": 1432, "video_reward_cumulative_accuracy": 0.8131983240223464 }, { "epoch": 0.42534876818046896, "grad_norm": 1.930578589439392, "learning_rate": 3.553936436448556e-06, "loss": 0.0474, "step": 1433, "video_reward_cumulative_accuracy": 0.8133286810886252 }, { "epoch": 0.42564559216384684, "grad_norm": 2.0366480350494385, "learning_rate": 3.551586945872769e-06, "loss": 0.061, "step": 1434, "video_reward_cumulative_accuracy": 0.8131101813110181 }, { "epoch": 0.4259424161472247, "grad_norm": 2.4363696575164795, "learning_rate": 3.5492363263163305e-06, "loss": 0.0386, "step": 1435, "video_reward_cumulative_accuracy": 0.8132404181184669 }, { "epoch": 0.42623924013060255, "grad_norm": 1.8315794467926025, "learning_rate": 3.546884580302859e-06, "loss": 0.0375, "step": 1436, "video_reward_cumulative_accuracy": 0.8133704735376045 }, { "epoch": 0.42653606411398043, "grad_norm": 2.6749696731567383, "learning_rate": 3.544531710357183e-06, "loss": 0.1026, "step": 1437, "video_reward_cumulative_accuracy": 0.8131524008350731 }, { "epoch": 0.42683288809735825, "grad_norm": 1.775738000869751, "learning_rate": 3.5421777190053354e-06, "loss": 0.0308, "step": 1438, "video_reward_cumulative_accuracy": 0.8132823365785814 }, { "epoch": 0.42712971208073613, "grad_norm": 2.429361581802368, "learning_rate": 3.539822608774555e-06, "loss": 0.0293, "step": 1439, "video_reward_cumulative_accuracy": 0.8134120917303683 }, { "epoch": 0.42742653606411396, "grad_norm": 1.1443023681640625, "learning_rate": 3.537466382193282e-06, "loss": 0.0219, "step": 1440, "video_reward_cumulative_accuracy": 0.8135416666666667 }, { "epoch": 0.42772336004749184, "grad_norm": 2.1525886058807373, "learning_rate": 3.535109041791153e-06, "loss": 0.0612, "step": 1441, "video_reward_cumulative_accuracy": 0.8136710617626648 }, { "epoch": 0.4280201840308697, "grad_norm": 2.423384666442871, "learning_rate": 3.532750590099002e-06, "loss": 0.0459, "step": 1442, "video_reward_cumulative_accuracy": 0.8134535367545076 }, { "epoch": 0.42831700801424755, "grad_norm": 1.831072449684143, "learning_rate": 3.5303910296488565e-06, "loss": 0.0212, "step": 1443, "video_reward_cumulative_accuracy": 0.8135828135828136 }, { "epoch": 0.4286138319976254, "grad_norm": 3.769604444503784, "learning_rate": 3.528030362973933e-06, "loss": 0.0522, "step": 1444, "video_reward_cumulative_accuracy": 0.8133656509695291 }, { "epoch": 0.42891065598100325, "grad_norm": 2.6289186477661133, "learning_rate": 3.525668592608637e-06, "loss": 0.0766, "step": 1445, "video_reward_cumulative_accuracy": 0.8134948096885813 }, { "epoch": 0.42920747996438113, "grad_norm": 2.1730971336364746, "learning_rate": 3.523305721088558e-06, "loss": 0.0222, "step": 1446, "video_reward_cumulative_accuracy": 0.8136237897648686 }, { "epoch": 0.42950430394775896, "grad_norm": 1.4084819555282593, "learning_rate": 3.5209417509504668e-06, "loss": 0.0793, "step": 1447, "video_reward_cumulative_accuracy": 0.813752591568763 }, { "epoch": 0.42980112793113684, "grad_norm": 1.7031943798065186, "learning_rate": 3.518576684732316e-06, "loss": 0.0489, "step": 1448, "video_reward_cumulative_accuracy": 0.8138812154696132 }, { "epoch": 0.4300979519145147, "grad_norm": 2.925882339477539, "learning_rate": 3.5162105249732336e-06, "loss": 0.0379, "step": 1449, "video_reward_cumulative_accuracy": 0.8136645962732919 }, { "epoch": 0.43039477589789255, "grad_norm": 0.6623610854148865, "learning_rate": 3.5138432742135215e-06, "loss": 0.023, "step": 1450, "video_reward_cumulative_accuracy": 0.8137931034482758 }, { "epoch": 0.4306915998812704, "grad_norm": 1.5418504476547241, "learning_rate": 3.511474934994653e-06, "loss": 0.0315, "step": 1451, "video_reward_cumulative_accuracy": 0.8135768435561681 }, { "epoch": 0.43098842386464825, "grad_norm": 1.7840099334716797, "learning_rate": 3.509105509859271e-06, "loss": 0.0258, "step": 1452, "video_reward_cumulative_accuracy": 0.8137052341597796 }, { "epoch": 0.43128524784802613, "grad_norm": 3.910229444503784, "learning_rate": 3.5067350013511816e-06, "loss": 0.0932, "step": 1453, "video_reward_cumulative_accuracy": 0.8134893324156917 }, { "epoch": 0.43158207183140396, "grad_norm": 1.7669485807418823, "learning_rate": 3.5043634120153572e-06, "loss": 0.0423, "step": 1454, "video_reward_cumulative_accuracy": 0.813617606602476 }, { "epoch": 0.43187889581478184, "grad_norm": 0.9480779767036438, "learning_rate": 3.5019907443979297e-06, "loss": 0.0301, "step": 1455, "video_reward_cumulative_accuracy": 0.813745704467354 }, { "epoch": 0.4321757197981597, "grad_norm": 2.241234540939331, "learning_rate": 3.4996170010461862e-06, "loss": 0.0298, "step": 1456, "video_reward_cumulative_accuracy": 0.8138736263736264 }, { "epoch": 0.43247254378153754, "grad_norm": 2.01543927192688, "learning_rate": 3.497242184508571e-06, "loss": 0.0604, "step": 1457, "video_reward_cumulative_accuracy": 0.8140013726835964 }, { "epoch": 0.4327693677649154, "grad_norm": 1.9135305881500244, "learning_rate": 3.4948662973346816e-06, "loss": 0.0274, "step": 1458, "video_reward_cumulative_accuracy": 0.8141289437585734 }, { "epoch": 0.43306619174829325, "grad_norm": 1.8279916048049927, "learning_rate": 3.492489342075262e-06, "loss": 0.0504, "step": 1459, "video_reward_cumulative_accuracy": 0.8142563399588759 }, { "epoch": 0.43336301573167113, "grad_norm": 1.939100980758667, "learning_rate": 3.4901113212822057e-06, "loss": 0.0561, "step": 1460, "video_reward_cumulative_accuracy": 0.8143835616438356 }, { "epoch": 0.43365983971504896, "grad_norm": 1.3775911331176758, "learning_rate": 3.487732237508547e-06, "loss": 0.0456, "step": 1461, "video_reward_cumulative_accuracy": 0.8145106091718002 }, { "epoch": 0.43395666369842684, "grad_norm": 3.4904792308807373, "learning_rate": 3.485352093308465e-06, "loss": 0.0448, "step": 1462, "video_reward_cumulative_accuracy": 0.8146374829001368 }, { "epoch": 0.4342534876818047, "grad_norm": 1.0064564943313599, "learning_rate": 3.4829708912372746e-06, "loss": 0.0191, "step": 1463, "video_reward_cumulative_accuracy": 0.8147641831852358 }, { "epoch": 0.43455031166518254, "grad_norm": 2.0086288452148438, "learning_rate": 3.4805886338514277e-06, "loss": 0.0285, "step": 1464, "video_reward_cumulative_accuracy": 0.8145491803278688 }, { "epoch": 0.4348471356485604, "grad_norm": 2.064359664916992, "learning_rate": 3.4782053237085083e-06, "loss": 0.0519, "step": 1465, "video_reward_cumulative_accuracy": 0.8143344709897611 }, { "epoch": 0.43514395963193825, "grad_norm": 3.986988067626953, "learning_rate": 3.4758209633672313e-06, "loss": 0.081, "step": 1466, "video_reward_cumulative_accuracy": 0.8144611186903138 }, { "epoch": 0.43544078361531613, "grad_norm": 3.117048740386963, "learning_rate": 3.47343555538744e-06, "loss": 0.0216, "step": 1467, "video_reward_cumulative_accuracy": 0.8142467620995228 }, { "epoch": 0.43573760759869395, "grad_norm": 2.8893699645996094, "learning_rate": 3.4710491023300997e-06, "loss": 0.0428, "step": 1468, "video_reward_cumulative_accuracy": 0.8140326975476839 }, { "epoch": 0.43603443158207184, "grad_norm": 3.7498586177825928, "learning_rate": 3.468661606757301e-06, "loss": 0.0808, "step": 1469, "video_reward_cumulative_accuracy": 0.8138189244383934 }, { "epoch": 0.4363312555654497, "grad_norm": 1.7329144477844238, "learning_rate": 3.4662730712322514e-06, "loss": 0.0348, "step": 1470, "video_reward_cumulative_accuracy": 0.8136054421768707 }, { "epoch": 0.43662807954882754, "grad_norm": 5.551042079925537, "learning_rate": 3.4638834983192743e-06, "loss": 0.0883, "step": 1471, "video_reward_cumulative_accuracy": 0.8133922501699524 }, { "epoch": 0.4369249035322054, "grad_norm": 1.6959829330444336, "learning_rate": 3.4614928905838103e-06, "loss": 0.0174, "step": 1472, "video_reward_cumulative_accuracy": 0.8135190217391305 }, { "epoch": 0.43722172751558325, "grad_norm": 1.2532111406326294, "learning_rate": 3.4591012505924078e-06, "loss": 0.0122, "step": 1473, "video_reward_cumulative_accuracy": 0.8136456211812627 }, { "epoch": 0.43751855149896113, "grad_norm": 0.6892161965370178, "learning_rate": 3.4567085809127247e-06, "loss": 0.0065, "step": 1474, "video_reward_cumulative_accuracy": 0.8137720488466758 }, { "epoch": 0.43781537548233895, "grad_norm": 3.311598539352417, "learning_rate": 3.4543148841135243e-06, "loss": 0.0672, "step": 1475, "video_reward_cumulative_accuracy": 0.8135593220338984 }, { "epoch": 0.43811219946571683, "grad_norm": 1.2820119857788086, "learning_rate": 3.4519201627646713e-06, "loss": 0.0293, "step": 1476, "video_reward_cumulative_accuracy": 0.8133468834688347 }, { "epoch": 0.43840902344909466, "grad_norm": 1.7546663284301758, "learning_rate": 3.4495244194371337e-06, "loss": 0.0629, "step": 1477, "video_reward_cumulative_accuracy": 0.8134732566012187 }, { "epoch": 0.43870584743247254, "grad_norm": 3.1134088039398193, "learning_rate": 3.447127656702971e-06, "loss": 0.061, "step": 1478, "video_reward_cumulative_accuracy": 0.8135994587280109 }, { "epoch": 0.4390026714158504, "grad_norm": 3.5772175788879395, "learning_rate": 3.444729877135345e-06, "loss": 0.0409, "step": 1479, "video_reward_cumulative_accuracy": 0.8137254901960784 }, { "epoch": 0.43929949539922825, "grad_norm": 1.2507808208465576, "learning_rate": 3.4423310833085015e-06, "loss": 0.0542, "step": 1480, "video_reward_cumulative_accuracy": 0.8135135135135135 }, { "epoch": 0.4395963193826061, "grad_norm": 0.4887540936470032, "learning_rate": 3.4399312777977794e-06, "loss": 0.0112, "step": 1481, "video_reward_cumulative_accuracy": 0.8136394328156651 }, { "epoch": 0.43989314336598395, "grad_norm": 0.6733037233352661, "learning_rate": 3.437530463179604e-06, "loss": 0.0139, "step": 1482, "video_reward_cumulative_accuracy": 0.8137651821862348 }, { "epoch": 0.44018996734936183, "grad_norm": 0.46859636902809143, "learning_rate": 3.4351286420314807e-06, "loss": 0.0161, "step": 1483, "video_reward_cumulative_accuracy": 0.8138907619689818 }, { "epoch": 0.44048679133273966, "grad_norm": 1.171273946762085, "learning_rate": 3.4327258169319986e-06, "loss": 0.0282, "step": 1484, "video_reward_cumulative_accuracy": 0.8140161725067385 }, { "epoch": 0.44078361531611754, "grad_norm": 1.2421537637710571, "learning_rate": 3.4303219904608244e-06, "loss": 0.0144, "step": 1485, "video_reward_cumulative_accuracy": 0.8138047138047138 }, { "epoch": 0.4410804392994954, "grad_norm": 1.5912413597106934, "learning_rate": 3.427917165198698e-06, "loss": 0.0317, "step": 1486, "video_reward_cumulative_accuracy": 0.8135935397039031 }, { "epoch": 0.44137726328287324, "grad_norm": 2.529520034790039, "learning_rate": 3.425511343727434e-06, "loss": 0.04, "step": 1487, "video_reward_cumulative_accuracy": 0.8137188971082717 }, { "epoch": 0.4416740872662511, "grad_norm": 2.3800694942474365, "learning_rate": 3.4231045286299136e-06, "loss": 0.0557, "step": 1488, "video_reward_cumulative_accuracy": 0.8138440860215054 }, { "epoch": 0.44197091124962895, "grad_norm": 1.2274895906448364, "learning_rate": 3.4206967224900885e-06, "loss": 0.0194, "step": 1489, "video_reward_cumulative_accuracy": 0.8139691067830759 }, { "epoch": 0.44226773523300683, "grad_norm": 1.3150626420974731, "learning_rate": 3.41828792789297e-06, "loss": 0.0124, "step": 1490, "video_reward_cumulative_accuracy": 0.8140939597315436 }, { "epoch": 0.44256455921638466, "grad_norm": 1.3226598501205444, "learning_rate": 3.415878147424634e-06, "loss": 0.0129, "step": 1491, "video_reward_cumulative_accuracy": 0.8142186452045607 }, { "epoch": 0.44286138319976254, "grad_norm": 2.370067596435547, "learning_rate": 3.413467383672214e-06, "loss": 0.025, "step": 1492, "video_reward_cumulative_accuracy": 0.814343163538874 }, { "epoch": 0.4431582071831404, "grad_norm": 2.3997597694396973, "learning_rate": 3.411055639223898e-06, "loss": 0.0187, "step": 1493, "video_reward_cumulative_accuracy": 0.8144675150703282 }, { "epoch": 0.44345503116651824, "grad_norm": 1.877609133720398, "learning_rate": 3.4086429166689296e-06, "loss": 0.0527, "step": 1494, "video_reward_cumulative_accuracy": 0.8145917001338688 }, { "epoch": 0.4437518551498961, "grad_norm": 4.125174522399902, "learning_rate": 3.4062292185975987e-06, "loss": 0.0701, "step": 1495, "video_reward_cumulative_accuracy": 0.8147157190635451 }, { "epoch": 0.44404867913327395, "grad_norm": 2.3468966484069824, "learning_rate": 3.403814547601244e-06, "loss": 0.0234, "step": 1496, "video_reward_cumulative_accuracy": 0.8145053475935828 }, { "epoch": 0.44434550311665183, "grad_norm": 0.8067638874053955, "learning_rate": 3.4013989062722514e-06, "loss": 0.0121, "step": 1497, "video_reward_cumulative_accuracy": 0.814629258517034 }, { "epoch": 0.44464232710002966, "grad_norm": 1.2667425870895386, "learning_rate": 3.398982297204045e-06, "loss": 0.0718, "step": 1498, "video_reward_cumulative_accuracy": 0.8147530040053405 }, { "epoch": 0.44493915108340754, "grad_norm": 4.346756935119629, "learning_rate": 3.396564722991089e-06, "loss": 0.037, "step": 1499, "video_reward_cumulative_accuracy": 0.8148765843895931 }, { "epoch": 0.4452359750667854, "grad_norm": 2.6099324226379395, "learning_rate": 3.394146186228885e-06, "loss": 0.0565, "step": 1500, "video_reward_cumulative_accuracy": 0.815 }, { "epoch": 0.44553279905016324, "grad_norm": 2.9593958854675293, "learning_rate": 3.3917266895139654e-06, "loss": 0.0548, "step": 1501, "video_reward_cumulative_accuracy": 0.8147901399067289 }, { "epoch": 0.4458296230335411, "grad_norm": 1.9937952756881714, "learning_rate": 3.389306235443896e-06, "loss": 0.0267, "step": 1502, "video_reward_cumulative_accuracy": 0.8145805592543276 }, { "epoch": 0.44612644701691895, "grad_norm": 2.4806485176086426, "learning_rate": 3.3868848266172693e-06, "loss": 0.0222, "step": 1503, "video_reward_cumulative_accuracy": 0.8143712574850299 }, { "epoch": 0.44642327100029683, "grad_norm": 4.427879333496094, "learning_rate": 3.384462465633702e-06, "loss": 0.0498, "step": 1504, "video_reward_cumulative_accuracy": 0.8144946808510638 }, { "epoch": 0.44672009498367465, "grad_norm": 3.623908519744873, "learning_rate": 3.3820391550938337e-06, "loss": 0.0426, "step": 1505, "video_reward_cumulative_accuracy": 0.8142857142857143 }, { "epoch": 0.44701691896705253, "grad_norm": 3.808448076248169, "learning_rate": 3.3796148975993236e-06, "loss": 0.0388, "step": 1506, "video_reward_cumulative_accuracy": 0.8144090305444888 }, { "epoch": 0.4473137429504304, "grad_norm": 1.8826570510864258, "learning_rate": 3.3771896957528476e-06, "loss": 0.0414, "step": 1507, "video_reward_cumulative_accuracy": 0.814200398142004 }, { "epoch": 0.44761056693380824, "grad_norm": 2.2933990955352783, "learning_rate": 3.374763552158095e-06, "loss": 0.0396, "step": 1508, "video_reward_cumulative_accuracy": 0.8143236074270557 }, { "epoch": 0.4479073909171861, "grad_norm": 2.446279764175415, "learning_rate": 3.372336469419767e-06, "loss": 0.0834, "step": 1509, "video_reward_cumulative_accuracy": 0.8144466534128562 }, { "epoch": 0.44820421490056395, "grad_norm": 1.7556294202804565, "learning_rate": 3.3699084501435717e-06, "loss": 0.0329, "step": 1510, "video_reward_cumulative_accuracy": 0.8142384105960265 }, { "epoch": 0.4485010388839418, "grad_norm": 1.7487667798995972, "learning_rate": 3.3674794969362235e-06, "loss": 0.0511, "step": 1511, "video_reward_cumulative_accuracy": 0.814361350099272 }, { "epoch": 0.44879786286731965, "grad_norm": 2.5053646564483643, "learning_rate": 3.365049612405441e-06, "loss": 0.0534, "step": 1512, "video_reward_cumulative_accuracy": 0.814484126984127 }, { "epoch": 0.44909468685069753, "grad_norm": 1.7758805751800537, "learning_rate": 3.3626187991599384e-06, "loss": 0.0223, "step": 1513, "video_reward_cumulative_accuracy": 0.8146067415730337 }, { "epoch": 0.4493915108340754, "grad_norm": 3.1137726306915283, "learning_rate": 3.3601870598094317e-06, "loss": 0.0652, "step": 1514, "video_reward_cumulative_accuracy": 0.8147291941875826 }, { "epoch": 0.44968833481745324, "grad_norm": 1.9859057664871216, "learning_rate": 3.3577543969646287e-06, "loss": 0.0781, "step": 1515, "video_reward_cumulative_accuracy": 0.8148514851485148 }, { "epoch": 0.4499851588008311, "grad_norm": 2.5349478721618652, "learning_rate": 3.3553208132372284e-06, "loss": 0.056, "step": 1516, "video_reward_cumulative_accuracy": 0.8149736147757256 }, { "epoch": 0.45028198278420895, "grad_norm": 1.9917017221450806, "learning_rate": 3.35288631123992e-06, "loss": 0.0526, "step": 1517, "video_reward_cumulative_accuracy": 0.8147659854976929 }, { "epoch": 0.4505788067675868, "grad_norm": 2.3147571086883545, "learning_rate": 3.3504508935863776e-06, "loss": 0.0395, "step": 1518, "video_reward_cumulative_accuracy": 0.8148880105401844 }, { "epoch": 0.45087563075096465, "grad_norm": 3.1221325397491455, "learning_rate": 3.3480145628912574e-06, "loss": 0.0416, "step": 1519, "video_reward_cumulative_accuracy": 0.815009874917709 }, { "epoch": 0.45117245473434253, "grad_norm": 3.894681692123413, "learning_rate": 3.3455773217701977e-06, "loss": 0.0472, "step": 1520, "video_reward_cumulative_accuracy": 0.8151315789473684 }, { "epoch": 0.4514692787177204, "grad_norm": 2.071953773498535, "learning_rate": 3.343139172839813e-06, "loss": 0.0382, "step": 1521, "video_reward_cumulative_accuracy": 0.8152531229454306 }, { "epoch": 0.45176610270109824, "grad_norm": 1.4821226596832275, "learning_rate": 3.3407001187176934e-06, "loss": 0.0516, "step": 1522, "video_reward_cumulative_accuracy": 0.8153745072273325 }, { "epoch": 0.4520629266844761, "grad_norm": 2.996478796005249, "learning_rate": 3.3382601620224e-06, "loss": 0.0383, "step": 1523, "video_reward_cumulative_accuracy": 0.8154957321076822 }, { "epoch": 0.45235975066785394, "grad_norm": 0.9077578783035278, "learning_rate": 3.335819305373463e-06, "loss": 0.0113, "step": 1524, "video_reward_cumulative_accuracy": 0.8156167979002624 }, { "epoch": 0.4526565746512318, "grad_norm": 3.7463269233703613, "learning_rate": 3.333377551391379e-06, "loss": 0.065, "step": 1525, "video_reward_cumulative_accuracy": 0.8157377049180328 }, { "epoch": 0.45295339863460965, "grad_norm": 2.012558698654175, "learning_rate": 3.3309349026976074e-06, "loss": 0.0298, "step": 1526, "video_reward_cumulative_accuracy": 0.8158584534731324 }, { "epoch": 0.45325022261798753, "grad_norm": 2.4637279510498047, "learning_rate": 3.3284913619145697e-06, "loss": 0.0271, "step": 1527, "video_reward_cumulative_accuracy": 0.8159790438768828 }, { "epoch": 0.4535470466013654, "grad_norm": 1.1083879470825195, "learning_rate": 3.3260469316656435e-06, "loss": 0.0653, "step": 1528, "video_reward_cumulative_accuracy": 0.8157722513089005 }, { "epoch": 0.45384387058474324, "grad_norm": 0.8266122937202454, "learning_rate": 3.3236016145751616e-06, "loss": 0.0347, "step": 1529, "video_reward_cumulative_accuracy": 0.815892740353172 }, { "epoch": 0.4541406945681211, "grad_norm": 1.2026057243347168, "learning_rate": 3.32115541326841e-06, "loss": 0.035, "step": 1530, "video_reward_cumulative_accuracy": 0.8160130718954248 }, { "epoch": 0.45443751855149894, "grad_norm": 1.0709147453308105, "learning_rate": 3.3187083303716218e-06, "loss": 0.0366, "step": 1531, "video_reward_cumulative_accuracy": 0.8154800783801437 }, { "epoch": 0.4547343425348768, "grad_norm": 2.988943338394165, "learning_rate": 3.3162603685119795e-06, "loss": 0.075, "step": 1532, "video_reward_cumulative_accuracy": 0.8152741514360313 }, { "epoch": 0.45503116651825465, "grad_norm": 4.098608493804932, "learning_rate": 3.3138115303176073e-06, "loss": 0.0878, "step": 1533, "video_reward_cumulative_accuracy": 0.8153946510110893 }, { "epoch": 0.45532799050163253, "grad_norm": 1.185163140296936, "learning_rate": 3.31136181841757e-06, "loss": 0.0365, "step": 1534, "video_reward_cumulative_accuracy": 0.8155149934810951 }, { "epoch": 0.4556248144850104, "grad_norm": 1.2512524127960205, "learning_rate": 3.308911235441873e-06, "loss": 0.0201, "step": 1535, "video_reward_cumulative_accuracy": 0.8156351791530945 }, { "epoch": 0.45592163846838824, "grad_norm": 1.520028829574585, "learning_rate": 3.306459784021452e-06, "loss": 0.0322, "step": 1536, "video_reward_cumulative_accuracy": 0.8154296875 }, { "epoch": 0.4562184624517661, "grad_norm": 1.225994348526001, "learning_rate": 3.304007466788181e-06, "loss": 0.0554, "step": 1537, "video_reward_cumulative_accuracy": 0.815224463240078 }, { "epoch": 0.45651528643514394, "grad_norm": 2.5382115840911865, "learning_rate": 3.301554286374859e-06, "loss": 0.0308, "step": 1538, "video_reward_cumulative_accuracy": 0.8153446033810143 }, { "epoch": 0.4568121104185218, "grad_norm": 1.67540442943573, "learning_rate": 3.2991002454152133e-06, "loss": 0.0605, "step": 1539, "video_reward_cumulative_accuracy": 0.8151397011046134 }, { "epoch": 0.45710893440189965, "grad_norm": 0.42339617013931274, "learning_rate": 3.2966453465438954e-06, "loss": 0.011, "step": 1540, "video_reward_cumulative_accuracy": 0.8152597402597402 }, { "epoch": 0.4574057583852775, "grad_norm": 1.9776630401611328, "learning_rate": 3.294189592396477e-06, "loss": 0.0485, "step": 1541, "video_reward_cumulative_accuracy": 0.8153796236210253 }, { "epoch": 0.4577025823686554, "grad_norm": 3.960407018661499, "learning_rate": 3.29173298560945e-06, "loss": 0.0464, "step": 1542, "video_reward_cumulative_accuracy": 0.8154993514915694 }, { "epoch": 0.45799940635203323, "grad_norm": 0.9101660251617432, "learning_rate": 3.289275528820218e-06, "loss": 0.0242, "step": 1543, "video_reward_cumulative_accuracy": 0.8156189241736876 }, { "epoch": 0.4582962303354111, "grad_norm": 6.294965744018555, "learning_rate": 3.2868172246671005e-06, "loss": 0.0793, "step": 1544, "video_reward_cumulative_accuracy": 0.8157383419689119 }, { "epoch": 0.45859305431878894, "grad_norm": 2.3557205200195312, "learning_rate": 3.2843580757893266e-06, "loss": 0.0603, "step": 1545, "video_reward_cumulative_accuracy": 0.8158576051779936 }, { "epoch": 0.4588898783021668, "grad_norm": 3.9995932579040527, "learning_rate": 3.28189808482703e-06, "loss": 0.0902, "step": 1546, "video_reward_cumulative_accuracy": 0.815653298835705 }, { "epoch": 0.45918670228554465, "grad_norm": 1.1430221796035767, "learning_rate": 3.2794372544212495e-06, "loss": 0.0306, "step": 1547, "video_reward_cumulative_accuracy": 0.8157724628312863 }, { "epoch": 0.4594835262689225, "grad_norm": 4.331371784210205, "learning_rate": 3.2769755872139264e-06, "loss": 0.0725, "step": 1548, "video_reward_cumulative_accuracy": 0.8158914728682171 }, { "epoch": 0.4597803502523004, "grad_norm": 2.8517324924468994, "learning_rate": 3.274513085847899e-06, "loss": 0.0492, "step": 1549, "video_reward_cumulative_accuracy": 0.815687540348612 }, { "epoch": 0.46007717423567823, "grad_norm": 2.0116770267486572, "learning_rate": 3.272049752966901e-06, "loss": 0.0484, "step": 1550, "video_reward_cumulative_accuracy": 0.8158064516129032 }, { "epoch": 0.4603739982190561, "grad_norm": 2.2900784015655518, "learning_rate": 3.2695855912155605e-06, "loss": 0.0372, "step": 1551, "video_reward_cumulative_accuracy": 0.8156028368794326 }, { "epoch": 0.46067082220243394, "grad_norm": 2.2841272354125977, "learning_rate": 3.2671206032393926e-06, "loss": 0.0331, "step": 1552, "video_reward_cumulative_accuracy": 0.8150773195876289 }, { "epoch": 0.4609676461858118, "grad_norm": 1.335290789604187, "learning_rate": 3.264654791684803e-06, "loss": 0.0229, "step": 1553, "video_reward_cumulative_accuracy": 0.815196394075982 }, { "epoch": 0.46126447016918964, "grad_norm": 4.430738925933838, "learning_rate": 3.2621881591990784e-06, "loss": 0.0468, "step": 1554, "video_reward_cumulative_accuracy": 0.8153153153153153 }, { "epoch": 0.4615612941525675, "grad_norm": 5.130397319793701, "learning_rate": 3.2597207084303893e-06, "loss": 0.0863, "step": 1555, "video_reward_cumulative_accuracy": 0.8154340836012862 }, { "epoch": 0.4618581181359454, "grad_norm": 2.924949884414673, "learning_rate": 3.2572524420277825e-06, "loss": 0.0296, "step": 1556, "video_reward_cumulative_accuracy": 0.8152313624678663 }, { "epoch": 0.46215494211932323, "grad_norm": 1.930640697479248, "learning_rate": 3.2547833626411812e-06, "loss": 0.0361, "step": 1557, "video_reward_cumulative_accuracy": 0.8153500321130379 }, { "epoch": 0.4624517661027011, "grad_norm": 2.353710651397705, "learning_rate": 3.2523134729213833e-06, "loss": 0.0365, "step": 1558, "video_reward_cumulative_accuracy": 0.8151476251604621 }, { "epoch": 0.46274859008607894, "grad_norm": 2.0358400344848633, "learning_rate": 3.2498427755200546e-06, "loss": 0.0564, "step": 1559, "video_reward_cumulative_accuracy": 0.8152661962796665 }, { "epoch": 0.4630454140694568, "grad_norm": 2.5255966186523438, "learning_rate": 3.2473712730897282e-06, "loss": 0.0233, "step": 1560, "video_reward_cumulative_accuracy": 0.8153846153846154 }, { "epoch": 0.46334223805283464, "grad_norm": 0.2955355644226074, "learning_rate": 3.244898968283802e-06, "loss": 0.006, "step": 1561, "video_reward_cumulative_accuracy": 0.8155028827674567 }, { "epoch": 0.4636390620362125, "grad_norm": 1.5781440734863281, "learning_rate": 3.2424258637565347e-06, "loss": 0.0168, "step": 1562, "video_reward_cumulative_accuracy": 0.8156209987195903 }, { "epoch": 0.4639358860195904, "grad_norm": 1.639114260673523, "learning_rate": 3.239951962163045e-06, "loss": 0.0222, "step": 1563, "video_reward_cumulative_accuracy": 0.8154190658989123 }, { "epoch": 0.46423271000296823, "grad_norm": 0.8644540309906006, "learning_rate": 3.2374772661593055e-06, "loss": 0.0186, "step": 1564, "video_reward_cumulative_accuracy": 0.815537084398977 }, { "epoch": 0.4645295339863461, "grad_norm": 1.5920159816741943, "learning_rate": 3.235001778402143e-06, "loss": 0.0297, "step": 1565, "video_reward_cumulative_accuracy": 0.8156549520766773 }, { "epoch": 0.46482635796972394, "grad_norm": 2.091097354888916, "learning_rate": 3.232525501549234e-06, "loss": 0.0579, "step": 1566, "video_reward_cumulative_accuracy": 0.815772669220945 }, { "epoch": 0.4651231819531018, "grad_norm": 3.484055995941162, "learning_rate": 3.230048438259102e-06, "loss": 0.0441, "step": 1567, "video_reward_cumulative_accuracy": 0.8155711550733886 }, { "epoch": 0.46542000593647964, "grad_norm": 1.8032653331756592, "learning_rate": 3.227570591191114e-06, "loss": 0.0827, "step": 1568, "video_reward_cumulative_accuracy": 0.8156887755102041 }, { "epoch": 0.4657168299198575, "grad_norm": 2.3294694423675537, "learning_rate": 3.22509196300548e-06, "loss": 0.046, "step": 1569, "video_reward_cumulative_accuracy": 0.815806246016571 }, { "epoch": 0.4660136539032354, "grad_norm": 1.2451170682907104, "learning_rate": 3.222612556363249e-06, "loss": 0.0456, "step": 1570, "video_reward_cumulative_accuracy": 0.8159235668789809 }, { "epoch": 0.46631047788661323, "grad_norm": 2.33011531829834, "learning_rate": 3.2201323739263024e-06, "loss": 0.0337, "step": 1571, "video_reward_cumulative_accuracy": 0.8160407383831955 }, { "epoch": 0.4666073018699911, "grad_norm": 3.825652599334717, "learning_rate": 3.217651418357359e-06, "loss": 0.0352, "step": 1572, "video_reward_cumulative_accuracy": 0.8158396946564885 }, { "epoch": 0.46690412585336893, "grad_norm": 2.4924354553222656, "learning_rate": 3.2151696923199636e-06, "loss": 0.07, "step": 1573, "video_reward_cumulative_accuracy": 0.8156389065479974 }, { "epoch": 0.4672009498367468, "grad_norm": 3.4220986366271973, "learning_rate": 3.2126871984784907e-06, "loss": 0.079, "step": 1574, "video_reward_cumulative_accuracy": 0.8157560355781448 }, { "epoch": 0.46749777382012464, "grad_norm": 2.504857301712036, "learning_rate": 3.210203939498139e-06, "loss": 0.0278, "step": 1575, "video_reward_cumulative_accuracy": 0.8158730158730159 }, { "epoch": 0.4677945978035025, "grad_norm": 2.0332424640655518, "learning_rate": 3.207719918044927e-06, "loss": 0.0329, "step": 1576, "video_reward_cumulative_accuracy": 0.815989847715736 }, { "epoch": 0.4680914217868804, "grad_norm": 1.2692821025848389, "learning_rate": 3.205235136785693e-06, "loss": 0.0211, "step": 1577, "video_reward_cumulative_accuracy": 0.8161065313887127 }, { "epoch": 0.4683882457702582, "grad_norm": 1.8608229160308838, "learning_rate": 3.202749598388092e-06, "loss": 0.0441, "step": 1578, "video_reward_cumulative_accuracy": 0.8162230671736375 }, { "epoch": 0.4686850697536361, "grad_norm": 0.5689178109169006, "learning_rate": 3.200263305520589e-06, "loss": 0.0146, "step": 1579, "video_reward_cumulative_accuracy": 0.8163394553514883 }, { "epoch": 0.46898189373701393, "grad_norm": 1.689761996269226, "learning_rate": 3.197776260852462e-06, "loss": 0.0347, "step": 1580, "video_reward_cumulative_accuracy": 0.8161392405063291 }, { "epoch": 0.4692787177203918, "grad_norm": 2.7706823348999023, "learning_rate": 3.195288467053795e-06, "loss": 0.0643, "step": 1581, "video_reward_cumulative_accuracy": 0.8162555344718533 }, { "epoch": 0.46957554170376964, "grad_norm": 1.1557561159133911, "learning_rate": 3.1927999267954746e-06, "loss": 0.0424, "step": 1582, "video_reward_cumulative_accuracy": 0.8163716814159292 }, { "epoch": 0.4698723656871475, "grad_norm": 2.010145902633667, "learning_rate": 3.1903106427491914e-06, "loss": 0.0498, "step": 1583, "video_reward_cumulative_accuracy": 0.8164876816171825 }, { "epoch": 0.4701691896705254, "grad_norm": 3.788320302963257, "learning_rate": 3.1878206175874334e-06, "loss": 0.0303, "step": 1584, "video_reward_cumulative_accuracy": 0.8166035353535354 }, { "epoch": 0.4704660136539032, "grad_norm": 3.469613552093506, "learning_rate": 3.1853298539834836e-06, "loss": 0.0477, "step": 1585, "video_reward_cumulative_accuracy": 0.8167192429022082 }, { "epoch": 0.4707628376372811, "grad_norm": 5.445047378540039, "learning_rate": 3.1828383546114196e-06, "loss": 0.0825, "step": 1586, "video_reward_cumulative_accuracy": 0.8168348045397226 }, { "epoch": 0.47105966162065893, "grad_norm": 2.0806350708007812, "learning_rate": 3.180346122146106e-06, "loss": 0.0379, "step": 1587, "video_reward_cumulative_accuracy": 0.8169502205419029 }, { "epoch": 0.4713564856040368, "grad_norm": 1.694346308708191, "learning_rate": 3.177853159263199e-06, "loss": 0.0271, "step": 1588, "video_reward_cumulative_accuracy": 0.8167506297229219 }, { "epoch": 0.47165330958741464, "grad_norm": 2.332949161529541, "learning_rate": 3.1753594686391343e-06, "loss": 0.0367, "step": 1589, "video_reward_cumulative_accuracy": 0.8165512901195721 }, { "epoch": 0.4719501335707925, "grad_norm": 0.6793799996376038, "learning_rate": 3.1728650529511308e-06, "loss": 0.0154, "step": 1590, "video_reward_cumulative_accuracy": 0.8166666666666667 }, { "epoch": 0.4722469575541704, "grad_norm": 3.225214958190918, "learning_rate": 3.1703699148771866e-06, "loss": 0.0366, "step": 1591, "video_reward_cumulative_accuracy": 0.816781898177247 }, { "epoch": 0.4725437815375482, "grad_norm": 1.2964482307434082, "learning_rate": 3.167874057096073e-06, "loss": 0.0323, "step": 1592, "video_reward_cumulative_accuracy": 0.8168969849246231 }, { "epoch": 0.4728406055209261, "grad_norm": 1.0534178018569946, "learning_rate": 3.1653774822873378e-06, "loss": 0.0419, "step": 1593, "video_reward_cumulative_accuracy": 0.8166980539861896 }, { "epoch": 0.47313742950430393, "grad_norm": 3.556938886642456, "learning_rate": 3.162880193131296e-06, "loss": 0.04, "step": 1594, "video_reward_cumulative_accuracy": 0.8164993726474279 }, { "epoch": 0.4734342534876818, "grad_norm": 2.3933658599853516, "learning_rate": 3.1603821923090277e-06, "loss": 0.031, "step": 1595, "video_reward_cumulative_accuracy": 0.8166144200626959 }, { "epoch": 0.47373107747105964, "grad_norm": 2.3574721813201904, "learning_rate": 3.157883482502382e-06, "loss": 0.0415, "step": 1596, "video_reward_cumulative_accuracy": 0.8167293233082706 }, { "epoch": 0.4740279014544375, "grad_norm": 3.107463836669922, "learning_rate": 3.155384066393964e-06, "loss": 0.0538, "step": 1597, "video_reward_cumulative_accuracy": 0.8168440826549781 }, { "epoch": 0.4743247254378154, "grad_norm": 1.9954248666763306, "learning_rate": 3.1528839466671413e-06, "loss": 0.0545, "step": 1598, "video_reward_cumulative_accuracy": 0.8169586983729662 }, { "epoch": 0.4746215494211932, "grad_norm": 4.566195964813232, "learning_rate": 3.1503831260060346e-06, "loss": 0.0645, "step": 1599, "video_reward_cumulative_accuracy": 0.8170731707317073 }, { "epoch": 0.4749183734045711, "grad_norm": 1.2144123315811157, "learning_rate": 3.1478816070955164e-06, "loss": 0.0793, "step": 1600, "video_reward_cumulative_accuracy": 0.816875 }, { "epoch": 0.4749183734045711, "eval_runtime": 148.9352, "eval_samples_per_second": 5.298, "eval_steps_per_second": 0.665, "eval_test_set_accuracy": 0.7815656565656566, "step": 1600 }, { "epoch": 0.47521519738794893, "grad_norm": 1.2719308137893677, "learning_rate": 3.1453793926212125e-06, "loss": 0.0114, "step": 1601, "video_reward_cumulative_accuracy": 0.8169893816364772 }, { "epoch": 0.4755120213713268, "grad_norm": 1.4081724882125854, "learning_rate": 3.1428764852694905e-06, "loss": 0.0274, "step": 1602, "video_reward_cumulative_accuracy": 0.8167915106117354 }, { "epoch": 0.47580884535470463, "grad_norm": 2.737161874771118, "learning_rate": 3.1403728877274662e-06, "loss": 0.0492, "step": 1603, "video_reward_cumulative_accuracy": 0.8165938864628821 }, { "epoch": 0.4761056693380825, "grad_norm": 3.401216983795166, "learning_rate": 3.137868602682993e-06, "loss": 0.0512, "step": 1604, "video_reward_cumulative_accuracy": 0.816708229426434 }, { "epoch": 0.4764024933214604, "grad_norm": 3.201937437057495, "learning_rate": 3.1353636328246652e-06, "loss": 0.0277, "step": 1605, "video_reward_cumulative_accuracy": 0.8165109034267912 }, { "epoch": 0.4766993173048382, "grad_norm": 4.124434471130371, "learning_rate": 3.1328579808418103e-06, "loss": 0.043, "step": 1606, "video_reward_cumulative_accuracy": 0.8166251556662516 }, { "epoch": 0.4769961412882161, "grad_norm": 2.2116482257843018, "learning_rate": 3.1303516494244897e-06, "loss": 0.0553, "step": 1607, "video_reward_cumulative_accuracy": 0.8167392657125078 }, { "epoch": 0.4772929652715939, "grad_norm": 2.262800455093384, "learning_rate": 3.127844641263493e-06, "loss": 0.0599, "step": 1608, "video_reward_cumulative_accuracy": 0.8165422885572139 }, { "epoch": 0.4775897892549718, "grad_norm": 3.7271931171417236, "learning_rate": 3.1253369590503357e-06, "loss": 0.0817, "step": 1609, "video_reward_cumulative_accuracy": 0.8160348042262274 }, { "epoch": 0.47788661323834963, "grad_norm": 1.643730878829956, "learning_rate": 3.12282860547726e-06, "loss": 0.0279, "step": 1610, "video_reward_cumulative_accuracy": 0.8158385093167702 }, { "epoch": 0.4781834372217275, "grad_norm": 2.743765115737915, "learning_rate": 3.1203195832372256e-06, "loss": 0.0265, "step": 1611, "video_reward_cumulative_accuracy": 0.8159528243327127 }, { "epoch": 0.4784802612051054, "grad_norm": 3.5921216011047363, "learning_rate": 3.1178098950239118e-06, "loss": 0.0393, "step": 1612, "video_reward_cumulative_accuracy": 0.8160669975186104 }, { "epoch": 0.4787770851884832, "grad_norm": 1.2250019311904907, "learning_rate": 3.115299543531713e-06, "loss": 0.0227, "step": 1613, "video_reward_cumulative_accuracy": 0.8161810291382517 }, { "epoch": 0.4790739091718611, "grad_norm": 1.2587252855300903, "learning_rate": 3.1127885314557343e-06, "loss": 0.0438, "step": 1614, "video_reward_cumulative_accuracy": 0.8162949194547707 }, { "epoch": 0.4793707331552389, "grad_norm": 2.4114818572998047, "learning_rate": 3.110276861491791e-06, "loss": 0.0305, "step": 1615, "video_reward_cumulative_accuracy": 0.8164086687306501 }, { "epoch": 0.4796675571386168, "grad_norm": 0.43257880210876465, "learning_rate": 3.107764536336405e-06, "loss": 0.0134, "step": 1616, "video_reward_cumulative_accuracy": 0.8165222772277227 }, { "epoch": 0.47996438112199463, "grad_norm": 2.9027011394500732, "learning_rate": 3.1052515586868005e-06, "loss": 0.0472, "step": 1617, "video_reward_cumulative_accuracy": 0.8166357452071737 }, { "epoch": 0.4802612051053725, "grad_norm": 3.9004299640655518, "learning_rate": 3.102737931240904e-06, "loss": 0.0582, "step": 1618, "video_reward_cumulative_accuracy": 0.8167490729295427 }, { "epoch": 0.4805580290887504, "grad_norm": 1.6789156198501587, "learning_rate": 3.1002236566973383e-06, "loss": 0.044, "step": 1619, "video_reward_cumulative_accuracy": 0.8168622606547251 }, { "epoch": 0.4808548530721282, "grad_norm": 0.7176075577735901, "learning_rate": 3.09770873775542e-06, "loss": 0.0199, "step": 1620, "video_reward_cumulative_accuracy": 0.8169753086419753 }, { "epoch": 0.4811516770555061, "grad_norm": 3.140882968902588, "learning_rate": 3.09519317711516e-06, "loss": 0.0341, "step": 1621, "video_reward_cumulative_accuracy": 0.8170882171499074 }, { "epoch": 0.4814485010388839, "grad_norm": 4.167765140533447, "learning_rate": 3.092676977477256e-06, "loss": 0.069, "step": 1622, "video_reward_cumulative_accuracy": 0.8172009864364982 }, { "epoch": 0.4817453250222618, "grad_norm": 2.755486011505127, "learning_rate": 3.090160141543092e-06, "loss": 0.0635, "step": 1623, "video_reward_cumulative_accuracy": 0.8173136167590881 }, { "epoch": 0.48204214900563963, "grad_norm": 2.705613136291504, "learning_rate": 3.087642672014738e-06, "loss": 0.0451, "step": 1624, "video_reward_cumulative_accuracy": 0.8174261083743842 }, { "epoch": 0.4823389729890175, "grad_norm": 0.6087374091148376, "learning_rate": 3.085124571594939e-06, "loss": 0.0153, "step": 1625, "video_reward_cumulative_accuracy": 0.8175384615384615 }, { "epoch": 0.4826357969723954, "grad_norm": 4.505397319793701, "learning_rate": 3.0826058429871226e-06, "loss": 0.0659, "step": 1626, "video_reward_cumulative_accuracy": 0.817650676506765 }, { "epoch": 0.4829326209557732, "grad_norm": 1.5764565467834473, "learning_rate": 3.0800864888953863e-06, "loss": 0.0229, "step": 1627, "video_reward_cumulative_accuracy": 0.8177627535341119 }, { "epoch": 0.4832294449391511, "grad_norm": 1.974806785583496, "learning_rate": 3.077566512024503e-06, "loss": 0.0307, "step": 1628, "video_reward_cumulative_accuracy": 0.8178746928746928 }, { "epoch": 0.4835262689225289, "grad_norm": 2.2565455436706543, "learning_rate": 3.0750459150799116e-06, "loss": 0.0528, "step": 1629, "video_reward_cumulative_accuracy": 0.8173726212400245 }, { "epoch": 0.4838230929059068, "grad_norm": 2.1708600521087646, "learning_rate": 3.0725247007677172e-06, "loss": 0.0366, "step": 1630, "video_reward_cumulative_accuracy": 0.8171779141104294 }, { "epoch": 0.48411991688928463, "grad_norm": 0.7493535280227661, "learning_rate": 3.0700028717946895e-06, "loss": 0.0177, "step": 1631, "video_reward_cumulative_accuracy": 0.8172900061312078 }, { "epoch": 0.4844167408726625, "grad_norm": 0.8744514584541321, "learning_rate": 3.0674804308682547e-06, "loss": 0.0233, "step": 1632, "video_reward_cumulative_accuracy": 0.8174019607843137 }, { "epoch": 0.4847135648560404, "grad_norm": 2.157686710357666, "learning_rate": 3.0649573806965006e-06, "loss": 0.0401, "step": 1633, "video_reward_cumulative_accuracy": 0.8172075933864054 }, { "epoch": 0.4850103888394182, "grad_norm": 3.5518863201141357, "learning_rate": 3.0624337239881636e-06, "loss": 0.091, "step": 1634, "video_reward_cumulative_accuracy": 0.8170134638922889 }, { "epoch": 0.4853072128227961, "grad_norm": 2.3240246772766113, "learning_rate": 3.0599094634526348e-06, "loss": 0.0462, "step": 1635, "video_reward_cumulative_accuracy": 0.8168195718654434 }, { "epoch": 0.4856040368061739, "grad_norm": 1.841416358947754, "learning_rate": 3.057384601799955e-06, "loss": 0.0536, "step": 1636, "video_reward_cumulative_accuracy": 0.8166259168704156 }, { "epoch": 0.4859008607895518, "grad_norm": 1.3698524236679077, "learning_rate": 3.0548591417408054e-06, "loss": 0.0268, "step": 1637, "video_reward_cumulative_accuracy": 0.8167379352474038 }, { "epoch": 0.48619768477292963, "grad_norm": 8.311427116394043, "learning_rate": 3.0523330859865147e-06, "loss": 0.0834, "step": 1638, "video_reward_cumulative_accuracy": 0.8168498168498168 }, { "epoch": 0.4864945087563075, "grad_norm": 2.798309803009033, "learning_rate": 3.0498064372490475e-06, "loss": 0.0553, "step": 1639, "video_reward_cumulative_accuracy": 0.8169615619280048 }, { "epoch": 0.4867913327396854, "grad_norm": 1.4771977663040161, "learning_rate": 3.0472791982410065e-06, "loss": 0.0298, "step": 1640, "video_reward_cumulative_accuracy": 0.8170731707317073 }, { "epoch": 0.4870881567230632, "grad_norm": 2.279690742492676, "learning_rate": 3.0447513716756294e-06, "loss": 0.0534, "step": 1641, "video_reward_cumulative_accuracy": 0.8165752589884216 }, { "epoch": 0.4873849807064411, "grad_norm": 1.9072357416152954, "learning_rate": 3.0422229602667825e-06, "loss": 0.0476, "step": 1642, "video_reward_cumulative_accuracy": 0.8163824604141291 }, { "epoch": 0.4876818046898189, "grad_norm": 1.5728148221969604, "learning_rate": 3.0396939667289597e-06, "loss": 0.0234, "step": 1643, "video_reward_cumulative_accuracy": 0.8164942178940962 }, { "epoch": 0.4879786286731968, "grad_norm": 5.686618804931641, "learning_rate": 3.0371643937772834e-06, "loss": 0.0733, "step": 1644, "video_reward_cumulative_accuracy": 0.8166058394160584 }, { "epoch": 0.4882754526565746, "grad_norm": 2.5927038192749023, "learning_rate": 3.0346342441274935e-06, "loss": 0.0404, "step": 1645, "video_reward_cumulative_accuracy": 0.8164133738601824 }, { "epoch": 0.4885722766399525, "grad_norm": 3.340865135192871, "learning_rate": 3.0321035204959524e-06, "loss": 0.0583, "step": 1646, "video_reward_cumulative_accuracy": 0.8165249088699879 }, { "epoch": 0.4888691006233304, "grad_norm": 1.8082395792007446, "learning_rate": 3.029572225599637e-06, "loss": 0.1195, "step": 1647, "video_reward_cumulative_accuracy": 0.8166363084395871 }, { "epoch": 0.4891659246067082, "grad_norm": 2.37727427482605, "learning_rate": 3.0270403621561387e-06, "loss": 0.0565, "step": 1648, "video_reward_cumulative_accuracy": 0.816747572815534 }, { "epoch": 0.4894627485900861, "grad_norm": 2.865441083908081, "learning_rate": 3.024507932883659e-06, "loss": 0.0404, "step": 1649, "video_reward_cumulative_accuracy": 0.8168587022437841 }, { "epoch": 0.4897595725734639, "grad_norm": 1.271600604057312, "learning_rate": 3.0219749405010054e-06, "loss": 0.0444, "step": 1650, "video_reward_cumulative_accuracy": 0.816969696969697 }, { "epoch": 0.4900563965568418, "grad_norm": 0.5346439480781555, "learning_rate": 3.019441387727591e-06, "loss": 0.012, "step": 1651, "video_reward_cumulative_accuracy": 0.8170805572380375 }, { "epoch": 0.4903532205402196, "grad_norm": 1.969221830368042, "learning_rate": 3.016907277283431e-06, "loss": 0.0382, "step": 1652, "video_reward_cumulative_accuracy": 0.8171912832929782 }, { "epoch": 0.4906500445235975, "grad_norm": 1.0193865299224854, "learning_rate": 3.014372611889139e-06, "loss": 0.0287, "step": 1653, "video_reward_cumulative_accuracy": 0.8173018753781004 }, { "epoch": 0.4909468685069754, "grad_norm": 2.115161418914795, "learning_rate": 3.011837394265925e-06, "loss": 0.0778, "step": 1654, "video_reward_cumulative_accuracy": 0.8174123337363967 }, { "epoch": 0.4912436924903532, "grad_norm": 4.27302360534668, "learning_rate": 3.0093016271355894e-06, "loss": 0.0571, "step": 1655, "video_reward_cumulative_accuracy": 0.8172205438066465 }, { "epoch": 0.4915405164737311, "grad_norm": 3.0853607654571533, "learning_rate": 3.0067653132205267e-06, "loss": 0.0414, "step": 1656, "video_reward_cumulative_accuracy": 0.8170289855072463 }, { "epoch": 0.4918373404571089, "grad_norm": 1.4236749410629272, "learning_rate": 3.0042284552437138e-06, "loss": 0.0427, "step": 1657, "video_reward_cumulative_accuracy": 0.8168376584188292 }, { "epoch": 0.4921341644404868, "grad_norm": 3.9889345169067383, "learning_rate": 3.0016910559287147e-06, "loss": 0.0583, "step": 1658, "video_reward_cumulative_accuracy": 0.8166465621230398 }, { "epoch": 0.4924309884238646, "grad_norm": 3.3688066005706787, "learning_rate": 2.999153117999675e-06, "loss": 0.0509, "step": 1659, "video_reward_cumulative_accuracy": 0.8164556962025317 }, { "epoch": 0.4927278124072425, "grad_norm": 1.3700438737869263, "learning_rate": 2.996614644181316e-06, "loss": 0.0319, "step": 1660, "video_reward_cumulative_accuracy": 0.816566265060241 }, { "epoch": 0.4930246363906204, "grad_norm": 1.2768040895462036, "learning_rate": 2.9940756371989366e-06, "loss": 0.0296, "step": 1661, "video_reward_cumulative_accuracy": 0.8166767007826611 }, { "epoch": 0.4933214603739982, "grad_norm": 1.5340852737426758, "learning_rate": 2.9915360997784066e-06, "loss": 0.0312, "step": 1662, "video_reward_cumulative_accuracy": 0.8164861612515042 }, { "epoch": 0.4936182843573761, "grad_norm": 2.8251636028289795, "learning_rate": 2.9889960346461653e-06, "loss": 0.0578, "step": 1663, "video_reward_cumulative_accuracy": 0.8162958508719182 }, { "epoch": 0.4939151083407539, "grad_norm": 1.959545612335205, "learning_rate": 2.9864554445292205e-06, "loss": 0.0421, "step": 1664, "video_reward_cumulative_accuracy": 0.81640625 }, { "epoch": 0.4942119323241318, "grad_norm": 1.7720321416854858, "learning_rate": 2.9839143321551415e-06, "loss": 0.0273, "step": 1665, "video_reward_cumulative_accuracy": 0.8165165165165165 }, { "epoch": 0.4945087563075096, "grad_norm": 3.3178257942199707, "learning_rate": 2.9813727002520597e-06, "loss": 0.0837, "step": 1666, "video_reward_cumulative_accuracy": 0.8166266506602641 }, { "epoch": 0.4948055802908875, "grad_norm": 1.7163565158843994, "learning_rate": 2.9788305515486636e-06, "loss": 0.0305, "step": 1667, "video_reward_cumulative_accuracy": 0.8167366526694662 }, { "epoch": 0.4951024042742654, "grad_norm": 2.1592905521392822, "learning_rate": 2.9762878887741956e-06, "loss": 0.0309, "step": 1668, "video_reward_cumulative_accuracy": 0.8168465227817746 }, { "epoch": 0.4953992282576432, "grad_norm": 0.8975669741630554, "learning_rate": 2.973744714658452e-06, "loss": 0.0316, "step": 1669, "video_reward_cumulative_accuracy": 0.816956261234272 }, { "epoch": 0.4956960522410211, "grad_norm": 1.1875498294830322, "learning_rate": 2.9712010319317765e-06, "loss": 0.0149, "step": 1670, "video_reward_cumulative_accuracy": 0.8167664670658683 }, { "epoch": 0.4959928762243989, "grad_norm": 1.2564858198165894, "learning_rate": 2.968656843325059e-06, "loss": 0.0371, "step": 1671, "video_reward_cumulative_accuracy": 0.8168761220825853 }, { "epoch": 0.4962897002077768, "grad_norm": 3.0329394340515137, "learning_rate": 2.966112151569734e-06, "loss": 0.0305, "step": 1672, "video_reward_cumulative_accuracy": 0.8166866028708134 }, { "epoch": 0.4965865241911546, "grad_norm": 1.6530811786651611, "learning_rate": 2.963566959397774e-06, "loss": 0.0277, "step": 1673, "video_reward_cumulative_accuracy": 0.8164973102211596 }, { "epoch": 0.4968833481745325, "grad_norm": 4.417179107666016, "learning_rate": 2.9610212695416908e-06, "loss": 0.058, "step": 1674, "video_reward_cumulative_accuracy": 0.8166069295101553 }, { "epoch": 0.4971801721579104, "grad_norm": 3.6994571685791016, "learning_rate": 2.958475084734529e-06, "loss": 0.0651, "step": 1675, "video_reward_cumulative_accuracy": 0.8167164179104478 }, { "epoch": 0.4974769961412882, "grad_norm": 1.733941912651062, "learning_rate": 2.955928407709864e-06, "loss": 0.0476, "step": 1676, "video_reward_cumulative_accuracy": 0.8168257756563246 }, { "epoch": 0.4977738201246661, "grad_norm": 1.2677801847457886, "learning_rate": 2.953381241201804e-06, "loss": 0.0159, "step": 1677, "video_reward_cumulative_accuracy": 0.8169350029815146 }, { "epoch": 0.4980706441080439, "grad_norm": 1.808666467666626, "learning_rate": 2.9508335879449764e-06, "loss": 0.0442, "step": 1678, "video_reward_cumulative_accuracy": 0.8170441001191895 }, { "epoch": 0.4983674680914218, "grad_norm": 1.5872184038162231, "learning_rate": 2.9482854506745353e-06, "loss": 0.0499, "step": 1679, "video_reward_cumulative_accuracy": 0.8171530673019655 }, { "epoch": 0.4986642920747996, "grad_norm": 0.5910095572471619, "learning_rate": 2.945736832126153e-06, "loss": 0.0082, "step": 1680, "video_reward_cumulative_accuracy": 0.8172619047619047 }, { "epoch": 0.4989611160581775, "grad_norm": 3.666095018386841, "learning_rate": 2.9431877350360198e-06, "loss": 0.0307, "step": 1681, "video_reward_cumulative_accuracy": 0.8173706127305176 }, { "epoch": 0.4992579400415554, "grad_norm": 0.6903228163719177, "learning_rate": 2.9406381621408374e-06, "loss": 0.0174, "step": 1682, "video_reward_cumulative_accuracy": 0.8174791914387634 }, { "epoch": 0.4995547640249332, "grad_norm": 1.7546206712722778, "learning_rate": 2.9380881161778214e-06, "loss": 0.0198, "step": 1683, "video_reward_cumulative_accuracy": 0.8175876411170528 }, { "epoch": 0.4998515880083111, "grad_norm": 1.842877745628357, "learning_rate": 2.9355375998846923e-06, "loss": 0.0213, "step": 1684, "video_reward_cumulative_accuracy": 0.8176959619952494 }, { "epoch": 0.5001484119916889, "grad_norm": 2.9342010021209717, "learning_rate": 2.932986615999678e-06, "loss": 0.0577, "step": 1685, "video_reward_cumulative_accuracy": 0.8175074183976261 }, { "epoch": 0.5004452359750667, "grad_norm": 4.135309219360352, "learning_rate": 2.9304351672615067e-06, "loss": 0.0362, "step": 1686, "video_reward_cumulative_accuracy": 0.8173190984578885 }, { "epoch": 0.5007420599584447, "grad_norm": 3.366182804107666, "learning_rate": 2.9278832564094064e-06, "loss": 0.0511, "step": 1687, "video_reward_cumulative_accuracy": 0.8174273858921162 }, { "epoch": 0.5010388839418225, "grad_norm": 2.525951623916626, "learning_rate": 2.9253308861831e-06, "loss": 0.0335, "step": 1688, "video_reward_cumulative_accuracy": 0.8175355450236966 }, { "epoch": 0.5013357079252003, "grad_norm": 3.1717841625213623, "learning_rate": 2.9227780593228063e-06, "loss": 0.0498, "step": 1689, "video_reward_cumulative_accuracy": 0.8173475429248076 }, { "epoch": 0.5016325319085783, "grad_norm": 4.952225208282471, "learning_rate": 2.9202247785692323e-06, "loss": 0.0765, "step": 1690, "video_reward_cumulative_accuracy": 0.8168639053254438 }, { "epoch": 0.5019293558919561, "grad_norm": 2.3946175575256348, "learning_rate": 2.9176710466635718e-06, "loss": 0.0809, "step": 1691, "video_reward_cumulative_accuracy": 0.8166765227675932 }, { "epoch": 0.5022261798753339, "grad_norm": 1.2514123916625977, "learning_rate": 2.915116866347505e-06, "loss": 0.03, "step": 1692, "video_reward_cumulative_accuracy": 0.8167848699763594 }, { "epoch": 0.5025230038587117, "grad_norm": 1.9969528913497925, "learning_rate": 2.9125622403631913e-06, "loss": 0.017, "step": 1693, "video_reward_cumulative_accuracy": 0.8168930891907856 }, { "epoch": 0.5028198278420897, "grad_norm": 1.2726820707321167, "learning_rate": 2.9100071714532706e-06, "loss": 0.043, "step": 1694, "video_reward_cumulative_accuracy": 0.8170011806375442 }, { "epoch": 0.5031166518254675, "grad_norm": 1.732360601425171, "learning_rate": 2.907451662360857e-06, "loss": 0.0243, "step": 1695, "video_reward_cumulative_accuracy": 0.8171091445427728 }, { "epoch": 0.5034134758088453, "grad_norm": 0.7770466804504395, "learning_rate": 2.904895715829537e-06, "loss": 0.0144, "step": 1696, "video_reward_cumulative_accuracy": 0.8172169811320755 }, { "epoch": 0.5037102997922233, "grad_norm": 1.9971357583999634, "learning_rate": 2.902339334603369e-06, "loss": 0.0488, "step": 1697, "video_reward_cumulative_accuracy": 0.8173246906305245 }, { "epoch": 0.5040071237756011, "grad_norm": 2.280515670776367, "learning_rate": 2.8997825214268743e-06, "loss": 0.0486, "step": 1698, "video_reward_cumulative_accuracy": 0.8171378091872792 }, { "epoch": 0.5043039477589789, "grad_norm": 4.495890140533447, "learning_rate": 2.8972252790450413e-06, "loss": 0.082, "step": 1699, "video_reward_cumulative_accuracy": 0.8169511477339612 }, { "epoch": 0.5046007717423567, "grad_norm": 1.4922055006027222, "learning_rate": 2.8946676102033167e-06, "loss": 0.0249, "step": 1700, "video_reward_cumulative_accuracy": 0.8170588235294117 }, { "epoch": 0.5048975957257347, "grad_norm": 1.0637956857681274, "learning_rate": 2.892109517647607e-06, "loss": 0.0296, "step": 1701, "video_reward_cumulative_accuracy": 0.8168724279835391 }, { "epoch": 0.5051944197091125, "grad_norm": 1.7560207843780518, "learning_rate": 2.8895510041242737e-06, "loss": 0.0522, "step": 1702, "video_reward_cumulative_accuracy": 0.8169800235017627 }, { "epoch": 0.5054912436924903, "grad_norm": 0.3437102735042572, "learning_rate": 2.886992072380128e-06, "loss": 0.0042, "step": 1703, "video_reward_cumulative_accuracy": 0.8170874926600118 }, { "epoch": 0.5057880676758683, "grad_norm": 3.053436279296875, "learning_rate": 2.884432725162433e-06, "loss": 0.0345, "step": 1704, "video_reward_cumulative_accuracy": 0.8171948356807511 }, { "epoch": 0.5060848916592461, "grad_norm": 1.0694425106048584, "learning_rate": 2.8818729652188936e-06, "loss": 0.0315, "step": 1705, "video_reward_cumulative_accuracy": 0.817008797653959 }, { "epoch": 0.5063817156426239, "grad_norm": 3.757838249206543, "learning_rate": 2.879312795297663e-06, "loss": 0.0712, "step": 1706, "video_reward_cumulative_accuracy": 0.8171160609613131 }, { "epoch": 0.5066785396260017, "grad_norm": 2.922731637954712, "learning_rate": 2.8767522181473323e-06, "loss": 0.0432, "step": 1707, "video_reward_cumulative_accuracy": 0.81693028705331 }, { "epoch": 0.5069753636093797, "grad_norm": 2.4191317558288574, "learning_rate": 2.8741912365169276e-06, "loss": 0.0722, "step": 1708, "video_reward_cumulative_accuracy": 0.8167447306791569 }, { "epoch": 0.5072721875927575, "grad_norm": 1.132919192314148, "learning_rate": 2.8716298531559133e-06, "loss": 0.0104, "step": 1709, "video_reward_cumulative_accuracy": 0.8168519602106495 }, { "epoch": 0.5075690115761353, "grad_norm": 3.451291561126709, "learning_rate": 2.8690680708141814e-06, "loss": 0.1011, "step": 1710, "video_reward_cumulative_accuracy": 0.8169590643274853 }, { "epoch": 0.5078658355595133, "grad_norm": 2.044468879699707, "learning_rate": 2.866505892242055e-06, "loss": 0.0415, "step": 1711, "video_reward_cumulative_accuracy": 0.8170660432495617 }, { "epoch": 0.5081626595428911, "grad_norm": 1.3463438749313354, "learning_rate": 2.8639433201902807e-06, "loss": 0.0201, "step": 1712, "video_reward_cumulative_accuracy": 0.8171728971962616 }, { "epoch": 0.5084594835262689, "grad_norm": 1.1717925071716309, "learning_rate": 2.8613803574100284e-06, "loss": 0.0327, "step": 1713, "video_reward_cumulative_accuracy": 0.8172796263864565 }, { "epoch": 0.5087563075096467, "grad_norm": 2.5041465759277344, "learning_rate": 2.858817006652888e-06, "loss": 0.0391, "step": 1714, "video_reward_cumulative_accuracy": 0.8170945157526255 }, { "epoch": 0.5090531314930247, "grad_norm": 2.035304546356201, "learning_rate": 2.8562532706708655e-06, "loss": 0.017, "step": 1715, "video_reward_cumulative_accuracy": 0.817201166180758 }, { "epoch": 0.5093499554764025, "grad_norm": 2.6891417503356934, "learning_rate": 2.853689152216379e-06, "loss": 0.0498, "step": 1716, "video_reward_cumulative_accuracy": 0.8173076923076923 }, { "epoch": 0.5096467794597803, "grad_norm": 4.318187713623047, "learning_rate": 2.8511246540422597e-06, "loss": 0.0503, "step": 1717, "video_reward_cumulative_accuracy": 0.8174140943506115 }, { "epoch": 0.5099436034431583, "grad_norm": 5.709010124206543, "learning_rate": 2.848559778901745e-06, "loss": 0.062, "step": 1718, "video_reward_cumulative_accuracy": 0.8175203725261933 }, { "epoch": 0.5102404274265361, "grad_norm": 1.210877537727356, "learning_rate": 2.845994529548477e-06, "loss": 0.0096, "step": 1719, "video_reward_cumulative_accuracy": 0.8176265270506108 }, { "epoch": 0.5105372514099139, "grad_norm": 1.9540245532989502, "learning_rate": 2.8434289087365002e-06, "loss": 0.041, "step": 1720, "video_reward_cumulative_accuracy": 0.8177325581395349 }, { "epoch": 0.5108340753932917, "grad_norm": 2.498683452606201, "learning_rate": 2.8408629192202574e-06, "loss": 0.0548, "step": 1721, "video_reward_cumulative_accuracy": 0.8178384660081348 }, { "epoch": 0.5111308993766697, "grad_norm": 1.3648625612258911, "learning_rate": 2.8382965637545877e-06, "loss": 0.0349, "step": 1722, "video_reward_cumulative_accuracy": 0.8179442508710801 }, { "epoch": 0.5114277233600475, "grad_norm": 2.748464345932007, "learning_rate": 2.835729845094722e-06, "loss": 0.0527, "step": 1723, "video_reward_cumulative_accuracy": 0.818049912942542 }, { "epoch": 0.5117245473434253, "grad_norm": 1.2629681825637817, "learning_rate": 2.8331627659962824e-06, "loss": 0.0243, "step": 1724, "video_reward_cumulative_accuracy": 0.8181554524361949 }, { "epoch": 0.5120213713268033, "grad_norm": 2.213495969772339, "learning_rate": 2.8305953292152785e-06, "loss": 0.035, "step": 1725, "video_reward_cumulative_accuracy": 0.8182608695652174 }, { "epoch": 0.5123181953101811, "grad_norm": 2.6524288654327393, "learning_rate": 2.8280275375081e-06, "loss": 0.0457, "step": 1726, "video_reward_cumulative_accuracy": 0.8180764774044033 }, { "epoch": 0.5126150192935589, "grad_norm": 2.8354032039642334, "learning_rate": 2.8254593936315243e-06, "loss": 0.0827, "step": 1727, "video_reward_cumulative_accuracy": 0.8178922987840185 }, { "epoch": 0.5129118432769367, "grad_norm": 2.953294515609741, "learning_rate": 2.8228909003427003e-06, "loss": 0.0621, "step": 1728, "video_reward_cumulative_accuracy": 0.8179976851851852 }, { "epoch": 0.5132086672603147, "grad_norm": 3.763993263244629, "learning_rate": 2.820322060399156e-06, "loss": 0.0425, "step": 1729, "video_reward_cumulative_accuracy": 0.818102949681897 }, { "epoch": 0.5135054912436925, "grad_norm": 1.8136401176452637, "learning_rate": 2.817752876558789e-06, "loss": 0.0226, "step": 1730, "video_reward_cumulative_accuracy": 0.8182080924855492 }, { "epoch": 0.5138023152270703, "grad_norm": 0.8390701413154602, "learning_rate": 2.81518335157987e-06, "loss": 0.0311, "step": 1731, "video_reward_cumulative_accuracy": 0.818313113807048 }, { "epoch": 0.5140991392104483, "grad_norm": 1.5557044744491577, "learning_rate": 2.8126134882210313e-06, "loss": 0.0402, "step": 1732, "video_reward_cumulative_accuracy": 0.8181293302540416 }, { "epoch": 0.5143959631938261, "grad_norm": 2.67010235786438, "learning_rate": 2.8100432892412723e-06, "loss": 0.0301, "step": 1733, "video_reward_cumulative_accuracy": 0.8182342758222735 }, { "epoch": 0.5146927871772039, "grad_norm": 2.489520311355591, "learning_rate": 2.8074727573999495e-06, "loss": 0.0829, "step": 1734, "video_reward_cumulative_accuracy": 0.8183391003460208 }, { "epoch": 0.5149896111605817, "grad_norm": 2.488553524017334, "learning_rate": 2.8049018954567797e-06, "loss": 0.0745, "step": 1735, "video_reward_cumulative_accuracy": 0.8181556195965418 }, { "epoch": 0.5152864351439597, "grad_norm": 3.8210175037384033, "learning_rate": 2.802330706171831e-06, "loss": 0.049, "step": 1736, "video_reward_cumulative_accuracy": 0.8179723502304147 }, { "epoch": 0.5155832591273375, "grad_norm": 7.683956623077393, "learning_rate": 2.799759192305526e-06, "loss": 0.0934, "step": 1737, "video_reward_cumulative_accuracy": 0.8177892918825561 }, { "epoch": 0.5158800831107153, "grad_norm": 1.5301347970962524, "learning_rate": 2.7971873566186347e-06, "loss": 0.0483, "step": 1738, "video_reward_cumulative_accuracy": 0.8178941311852704 }, { "epoch": 0.5161769070940933, "grad_norm": 0.7959959506988525, "learning_rate": 2.7946152018722714e-06, "loss": 0.0287, "step": 1739, "video_reward_cumulative_accuracy": 0.8177113283496262 }, { "epoch": 0.5164737310774711, "grad_norm": 3.1695709228515625, "learning_rate": 2.7920427308278946e-06, "loss": 0.034, "step": 1740, "video_reward_cumulative_accuracy": 0.8178160919540229 }, { "epoch": 0.5167705550608489, "grad_norm": 4.1620917320251465, "learning_rate": 2.7894699462473008e-06, "loss": 0.039, "step": 1741, "video_reward_cumulative_accuracy": 0.8179207352096496 }, { "epoch": 0.5170673790442267, "grad_norm": 1.3403260707855225, "learning_rate": 2.7868968508926242e-06, "loss": 0.0228, "step": 1742, "video_reward_cumulative_accuracy": 0.8180252583237658 }, { "epoch": 0.5173642030276047, "grad_norm": 2.3487467765808105, "learning_rate": 2.784323447526333e-06, "loss": 0.0681, "step": 1743, "video_reward_cumulative_accuracy": 0.8181296615031555 }, { "epoch": 0.5176610270109825, "grad_norm": 1.9424347877502441, "learning_rate": 2.7817497389112247e-06, "loss": 0.0378, "step": 1744, "video_reward_cumulative_accuracy": 0.8182339449541285 }, { "epoch": 0.5179578509943603, "grad_norm": 2.208332061767578, "learning_rate": 2.779175727810426e-06, "loss": 0.0513, "step": 1745, "video_reward_cumulative_accuracy": 0.8177650429799427 }, { "epoch": 0.5182546749777382, "grad_norm": 4.050232887268066, "learning_rate": 2.7766014169873874e-06, "loss": 0.0422, "step": 1746, "video_reward_cumulative_accuracy": 0.8178694158075601 }, { "epoch": 0.5185514989611161, "grad_norm": 1.5156047344207764, "learning_rate": 2.7740268092058813e-06, "loss": 0.0397, "step": 1747, "video_reward_cumulative_accuracy": 0.8179736691471093 }, { "epoch": 0.5188483229444939, "grad_norm": 0.790824294090271, "learning_rate": 2.771451907229999e-06, "loss": 0.0114, "step": 1748, "video_reward_cumulative_accuracy": 0.8180778032036613 }, { "epoch": 0.5191451469278717, "grad_norm": 0.7043279409408569, "learning_rate": 2.7688767138241474e-06, "loss": 0.017, "step": 1749, "video_reward_cumulative_accuracy": 0.8181818181818182 }, { "epoch": 0.5194419709112497, "grad_norm": 2.117793560028076, "learning_rate": 2.7663012317530474e-06, "loss": 0.0303, "step": 1750, "video_reward_cumulative_accuracy": 0.818 }, { "epoch": 0.5197387948946275, "grad_norm": 2.9384334087371826, "learning_rate": 2.7637254637817284e-06, "loss": 0.066, "step": 1751, "video_reward_cumulative_accuracy": 0.817818389491719 }, { "epoch": 0.5200356188780053, "grad_norm": 5.217811584472656, "learning_rate": 2.7611494126755276e-06, "loss": 0.0593, "step": 1752, "video_reward_cumulative_accuracy": 0.817351598173516 }, { "epoch": 0.5203324428613832, "grad_norm": 4.0168890953063965, "learning_rate": 2.7585730812000855e-06, "loss": 0.0452, "step": 1753, "video_reward_cumulative_accuracy": 0.8171705647461495 }, { "epoch": 0.5206292668447611, "grad_norm": 2.7809596061706543, "learning_rate": 2.755996472121344e-06, "loss": 0.043, "step": 1754, "video_reward_cumulative_accuracy": 0.8169897377423033 }, { "epoch": 0.5209260908281389, "grad_norm": 2.7551541328430176, "learning_rate": 2.753419588205544e-06, "loss": 0.0537, "step": 1755, "video_reward_cumulative_accuracy": 0.8170940170940171 }, { "epoch": 0.5212229148115167, "grad_norm": 1.8034993410110474, "learning_rate": 2.750842432219219e-06, "loss": 0.0245, "step": 1756, "video_reward_cumulative_accuracy": 0.8171981776765376 }, { "epoch": 0.5215197387948947, "grad_norm": 2.4644577503204346, "learning_rate": 2.7482650069291976e-06, "loss": 0.0258, "step": 1757, "video_reward_cumulative_accuracy": 0.8170176437108708 }, { "epoch": 0.5218165627782725, "grad_norm": 3.7700116634368896, "learning_rate": 2.745687315102595e-06, "loss": 0.0812, "step": 1758, "video_reward_cumulative_accuracy": 0.8168373151308305 }, { "epoch": 0.5221133867616503, "grad_norm": 2.102640390396118, "learning_rate": 2.743109359506813e-06, "loss": 0.0272, "step": 1759, "video_reward_cumulative_accuracy": 0.816941444002274 }, { "epoch": 0.5224102107450282, "grad_norm": 3.0133395195007324, "learning_rate": 2.7405311429095384e-06, "loss": 0.0467, "step": 1760, "video_reward_cumulative_accuracy": 0.8170454545454545 }, { "epoch": 0.5227070347284061, "grad_norm": 4.504801273345947, "learning_rate": 2.7379526680787365e-06, "loss": 0.0576, "step": 1761, "video_reward_cumulative_accuracy": 0.8168654173764907 }, { "epoch": 0.5230038587117839, "grad_norm": 1.1885194778442383, "learning_rate": 2.7353739377826503e-06, "loss": 0.0166, "step": 1762, "video_reward_cumulative_accuracy": 0.8169693530079455 }, { "epoch": 0.5233006826951617, "grad_norm": 2.73569393157959, "learning_rate": 2.7327949547897977e-06, "loss": 0.0413, "step": 1763, "video_reward_cumulative_accuracy": 0.8170731707317073 }, { "epoch": 0.5235975066785397, "grad_norm": 4.12099027633667, "learning_rate": 2.7302157218689655e-06, "loss": 0.0539, "step": 1764, "video_reward_cumulative_accuracy": 0.8171768707482994 }, { "epoch": 0.5238943306619175, "grad_norm": 2.6001250743865967, "learning_rate": 2.7276362417892124e-06, "loss": 0.034, "step": 1765, "video_reward_cumulative_accuracy": 0.8172804532577904 }, { "epoch": 0.5241911546452953, "grad_norm": 2.1774399280548096, "learning_rate": 2.7250565173198596e-06, "loss": 0.0154, "step": 1766, "video_reward_cumulative_accuracy": 0.8173839184597962 }, { "epoch": 0.5244879786286732, "grad_norm": 3.843973159790039, "learning_rate": 2.722476551230491e-06, "loss": 0.0978, "step": 1767, "video_reward_cumulative_accuracy": 0.8174872665534805 }, { "epoch": 0.5247848026120511, "grad_norm": 1.590867280960083, "learning_rate": 2.7198963462909534e-06, "loss": 0.0226, "step": 1768, "video_reward_cumulative_accuracy": 0.8175904977375565 }, { "epoch": 0.5250816265954289, "grad_norm": 4.675069808959961, "learning_rate": 2.717315905271344e-06, "loss": 0.1181, "step": 1769, "video_reward_cumulative_accuracy": 0.8174109666478236 }, { "epoch": 0.5253784505788067, "grad_norm": 1.5898845195770264, "learning_rate": 2.714735230942019e-06, "loss": 0.0133, "step": 1770, "video_reward_cumulative_accuracy": 0.8175141242937853 }, { "epoch": 0.5256752745621847, "grad_norm": 2.406005382537842, "learning_rate": 2.712154326073581e-06, "loss": 0.042, "step": 1771, "video_reward_cumulative_accuracy": 0.8176171654432524 }, { "epoch": 0.5259720985455625, "grad_norm": 1.935448408126831, "learning_rate": 2.709573193436883e-06, "loss": 0.0373, "step": 1772, "video_reward_cumulative_accuracy": 0.8174379232505643 }, { "epoch": 0.5262689225289403, "grad_norm": 1.7007020711898804, "learning_rate": 2.7069918358030218e-06, "loss": 0.0311, "step": 1773, "video_reward_cumulative_accuracy": 0.817258883248731 }, { "epoch": 0.5265657465123182, "grad_norm": 5.44350528717041, "learning_rate": 2.7044102559433346e-06, "loss": 0.0672, "step": 1774, "video_reward_cumulative_accuracy": 0.8170800450958287 }, { "epoch": 0.5268625704956961, "grad_norm": 2.1259396076202393, "learning_rate": 2.701828456629398e-06, "loss": 0.0689, "step": 1775, "video_reward_cumulative_accuracy": 0.8169014084507042 }, { "epoch": 0.5271593944790739, "grad_norm": 0.9994587302207947, "learning_rate": 2.699246440633023e-06, "loss": 0.015, "step": 1776, "video_reward_cumulative_accuracy": 0.8170045045045045 }, { "epoch": 0.5274562184624517, "grad_norm": 2.5973446369171143, "learning_rate": 2.696664210726257e-06, "loss": 0.0958, "step": 1777, "video_reward_cumulative_accuracy": 0.8171074845244795 }, { "epoch": 0.5277530424458297, "grad_norm": 2.029411792755127, "learning_rate": 2.694081769681373e-06, "loss": 0.0194, "step": 1778, "video_reward_cumulative_accuracy": 0.8172103487064117 }, { "epoch": 0.5280498664292075, "grad_norm": 1.6319355964660645, "learning_rate": 2.6914991202708707e-06, "loss": 0.0408, "step": 1779, "video_reward_cumulative_accuracy": 0.8173130972456436 }, { "epoch": 0.5283466904125853, "grad_norm": 1.4673457145690918, "learning_rate": 2.6889162652674776e-06, "loss": 0.0176, "step": 1780, "video_reward_cumulative_accuracy": 0.8174157303370787 }, { "epoch": 0.5286435143959632, "grad_norm": 2.3411591053009033, "learning_rate": 2.6863332074441374e-06, "loss": 0.0476, "step": 1781, "video_reward_cumulative_accuracy": 0.8172375070185289 }, { "epoch": 0.5289403383793411, "grad_norm": 2.2734508514404297, "learning_rate": 2.6837499495740144e-06, "loss": 0.0589, "step": 1782, "video_reward_cumulative_accuracy": 0.8173400673400674 }, { "epoch": 0.5292371623627189, "grad_norm": 4.031787872314453, "learning_rate": 2.681166494430486e-06, "loss": 0.0424, "step": 1783, "video_reward_cumulative_accuracy": 0.8174425126191811 }, { "epoch": 0.5295339863460967, "grad_norm": 3.090514898300171, "learning_rate": 2.6785828447871415e-06, "loss": 0.0261, "step": 1784, "video_reward_cumulative_accuracy": 0.8175448430493274 }, { "epoch": 0.5298308103294747, "grad_norm": 0.7824661731719971, "learning_rate": 2.6759990034177814e-06, "loss": 0.0154, "step": 1785, "video_reward_cumulative_accuracy": 0.8176470588235294 }, { "epoch": 0.5301276343128525, "grad_norm": 1.5541051626205444, "learning_rate": 2.6734149730964085e-06, "loss": 0.0278, "step": 1786, "video_reward_cumulative_accuracy": 0.8177491601343785 }, { "epoch": 0.5304244582962303, "grad_norm": 1.8912038803100586, "learning_rate": 2.6708307565972307e-06, "loss": 0.0368, "step": 1787, "video_reward_cumulative_accuracy": 0.8178511471740347 }, { "epoch": 0.5307212822796082, "grad_norm": 0.8161097168922424, "learning_rate": 2.668246356694656e-06, "loss": 0.0143, "step": 1788, "video_reward_cumulative_accuracy": 0.8179530201342282 }, { "epoch": 0.5310181062629861, "grad_norm": 3.4581801891326904, "learning_rate": 2.6656617761632863e-06, "loss": 0.0676, "step": 1789, "video_reward_cumulative_accuracy": 0.8180547792062605 }, { "epoch": 0.5313149302463639, "grad_norm": 0.8631388545036316, "learning_rate": 2.6630770177779218e-06, "loss": 0.0121, "step": 1790, "video_reward_cumulative_accuracy": 0.8181564245810056 }, { "epoch": 0.5316117542297417, "grad_norm": 1.1447865962982178, "learning_rate": 2.660492084313551e-06, "loss": 0.0248, "step": 1791, "video_reward_cumulative_accuracy": 0.8182579564489112 }, { "epoch": 0.5319085782131197, "grad_norm": 2.4453208446502686, "learning_rate": 2.657906978545351e-06, "loss": 0.0264, "step": 1792, "video_reward_cumulative_accuracy": 0.8180803571428571 }, { "epoch": 0.5322054021964975, "grad_norm": 2.468322515487671, "learning_rate": 2.6553217032486832e-06, "loss": 0.0487, "step": 1793, "video_reward_cumulative_accuracy": 0.8181818181818182 }, { "epoch": 0.5325022261798753, "grad_norm": 0.8765484094619751, "learning_rate": 2.6527362611990915e-06, "loss": 0.0165, "step": 1794, "video_reward_cumulative_accuracy": 0.8182831661092531 }, { "epoch": 0.5327990501632532, "grad_norm": 2.160796880722046, "learning_rate": 2.6501506551722995e-06, "loss": 0.0564, "step": 1795, "video_reward_cumulative_accuracy": 0.8181058495821727 }, { "epoch": 0.5330958741466311, "grad_norm": 2.3637046813964844, "learning_rate": 2.6475648879442055e-06, "loss": 0.0582, "step": 1796, "video_reward_cumulative_accuracy": 0.8182071269487751 }, { "epoch": 0.5333926981300089, "grad_norm": 1.2972532510757446, "learning_rate": 2.6449789622908823e-06, "loss": 0.0191, "step": 1797, "video_reward_cumulative_accuracy": 0.8183082915971063 }, { "epoch": 0.5336895221133867, "grad_norm": 3.382450580596924, "learning_rate": 2.6423928809885716e-06, "loss": 0.0522, "step": 1798, "video_reward_cumulative_accuracy": 0.818131256952169 }, { "epoch": 0.5339863460967647, "grad_norm": 2.019676685333252, "learning_rate": 2.639806646813683e-06, "loss": 0.0699, "step": 1799, "video_reward_cumulative_accuracy": 0.8182323513062812 }, { "epoch": 0.5342831700801425, "grad_norm": 2.9581692218780518, "learning_rate": 2.6372202625427897e-06, "loss": 0.0817, "step": 1800, "video_reward_cumulative_accuracy": 0.8183333333333334 }, { "epoch": 0.5342831700801425, "eval_runtime": 131.6405, "eval_samples_per_second": 5.994, "eval_steps_per_second": 0.752, "eval_test_set_accuracy": 0.7765151515151515, "step": 1800 }, { "epoch": 0.5345799940635203, "grad_norm": 3.738384485244751, "learning_rate": 2.6346337309526265e-06, "loss": 0.0749, "step": 1801, "video_reward_cumulative_accuracy": 0.8181565796779567 }, { "epoch": 0.5348768180468982, "grad_norm": 1.4811760187149048, "learning_rate": 2.6320470548200848e-06, "loss": 0.0336, "step": 1802, "video_reward_cumulative_accuracy": 0.8182574916759157 }, { "epoch": 0.5351736420302761, "grad_norm": 1.9211323261260986, "learning_rate": 2.6294602369222145e-06, "loss": 0.0304, "step": 1803, "video_reward_cumulative_accuracy": 0.8183582917359956 }, { "epoch": 0.5354704660136539, "grad_norm": 0.7293126583099365, "learning_rate": 2.6268732800362147e-06, "loss": 0.0101, "step": 1804, "video_reward_cumulative_accuracy": 0.8184589800443459 }, { "epoch": 0.5357672899970317, "grad_norm": 2.35774827003479, "learning_rate": 2.624286186939435e-06, "loss": 0.0468, "step": 1805, "video_reward_cumulative_accuracy": 0.8182825484764543 }, { "epoch": 0.5360641139804097, "grad_norm": 3.0999953746795654, "learning_rate": 2.62169896040937e-06, "loss": 0.0384, "step": 1806, "video_reward_cumulative_accuracy": 0.8183831672203765 }, { "epoch": 0.5363609379637875, "grad_norm": 4.446628093719482, "learning_rate": 2.6191116032236598e-06, "loss": 0.0707, "step": 1807, "video_reward_cumulative_accuracy": 0.8184836745987825 }, { "epoch": 0.5366577619471653, "grad_norm": 2.9089622497558594, "learning_rate": 2.616524118160082e-06, "loss": 0.0499, "step": 1808, "video_reward_cumulative_accuracy": 0.8185840707964602 }, { "epoch": 0.5369545859305432, "grad_norm": 0.42697280645370483, "learning_rate": 2.613936507996554e-06, "loss": 0.0118, "step": 1809, "video_reward_cumulative_accuracy": 0.8186843559977889 }, { "epoch": 0.5372514099139211, "grad_norm": 2.4700889587402344, "learning_rate": 2.611348775511127e-06, "loss": 0.06, "step": 1810, "video_reward_cumulative_accuracy": 0.8187845303867404 }, { "epoch": 0.5375482338972989, "grad_norm": 1.9913703203201294, "learning_rate": 2.6087609234819822e-06, "loss": 0.0386, "step": 1811, "video_reward_cumulative_accuracy": 0.8188845941468802 }, { "epoch": 0.5378450578806767, "grad_norm": 2.4547126293182373, "learning_rate": 2.606172954687429e-06, "loss": 0.0696, "step": 1812, "video_reward_cumulative_accuracy": 0.8189845474613686 }, { "epoch": 0.5381418818640546, "grad_norm": 2.6594624519348145, "learning_rate": 2.603584871905905e-06, "loss": 0.065, "step": 1813, "video_reward_cumulative_accuracy": 0.8188086045228903 }, { "epoch": 0.5384387058474325, "grad_norm": 1.8410438299179077, "learning_rate": 2.600996677915967e-06, "loss": 0.0422, "step": 1814, "video_reward_cumulative_accuracy": 0.8189084895259096 }, { "epoch": 0.5387355298308103, "grad_norm": 1.4541661739349365, "learning_rate": 2.598408375496292e-06, "loss": 0.0367, "step": 1815, "video_reward_cumulative_accuracy": 0.8190082644628099 }, { "epoch": 0.5390323538141882, "grad_norm": 0.9136159420013428, "learning_rate": 2.5958199674256755e-06, "loss": 0.0166, "step": 1816, "video_reward_cumulative_accuracy": 0.8191079295154186 }, { "epoch": 0.5393291777975661, "grad_norm": 3.7377233505249023, "learning_rate": 2.5932314564830237e-06, "loss": 0.0694, "step": 1817, "video_reward_cumulative_accuracy": 0.8192074848651624 }, { "epoch": 0.5396260017809439, "grad_norm": 3.6235744953155518, "learning_rate": 2.5906428454473546e-06, "loss": 0.0713, "step": 1818, "video_reward_cumulative_accuracy": 0.819031903190319 }, { "epoch": 0.5399228257643217, "grad_norm": 3.179281711578369, "learning_rate": 2.588054137097793e-06, "loss": 0.0389, "step": 1819, "video_reward_cumulative_accuracy": 0.8191313908741067 }, { "epoch": 0.5402196497476996, "grad_norm": 2.8550491333007812, "learning_rate": 2.5854653342135687e-06, "loss": 0.0421, "step": 1820, "video_reward_cumulative_accuracy": 0.8192307692307692 }, { "epoch": 0.5405164737310775, "grad_norm": 4.346277713775635, "learning_rate": 2.5828764395740135e-06, "loss": 0.0625, "step": 1821, "video_reward_cumulative_accuracy": 0.8193300384404174 }, { "epoch": 0.5408132977144553, "grad_norm": 2.5140342712402344, "learning_rate": 2.5802874559585567e-06, "loss": 0.0429, "step": 1822, "video_reward_cumulative_accuracy": 0.8194291986827662 }, { "epoch": 0.5411101216978332, "grad_norm": 6.247716903686523, "learning_rate": 2.5776983861467237e-06, "loss": 0.0708, "step": 1823, "video_reward_cumulative_accuracy": 0.8195282501371366 }, { "epoch": 0.5414069456812111, "grad_norm": 3.389479875564575, "learning_rate": 2.575109232918131e-06, "loss": 0.0473, "step": 1824, "video_reward_cumulative_accuracy": 0.8196271929824561 }, { "epoch": 0.5417037696645889, "grad_norm": 2.464885711669922, "learning_rate": 2.5725199990524874e-06, "loss": 0.0297, "step": 1825, "video_reward_cumulative_accuracy": 0.8197260273972603 }, { "epoch": 0.5420005936479667, "grad_norm": 1.1972076892852783, "learning_rate": 2.569930687329586e-06, "loss": 0.0302, "step": 1826, "video_reward_cumulative_accuracy": 0.8198247535596933 }, { "epoch": 0.5422974176313446, "grad_norm": 1.1544277667999268, "learning_rate": 2.567341300529305e-06, "loss": 0.0154, "step": 1827, "video_reward_cumulative_accuracy": 0.8199233716475096 }, { "epoch": 0.5425942416147225, "grad_norm": 1.3733747005462646, "learning_rate": 2.5647518414316015e-06, "loss": 0.0441, "step": 1828, "video_reward_cumulative_accuracy": 0.8200218818380745 }, { "epoch": 0.5428910655981003, "grad_norm": 2.372936964035034, "learning_rate": 2.562162312816511e-06, "loss": 0.0401, "step": 1829, "video_reward_cumulative_accuracy": 0.8201202843083653 }, { "epoch": 0.5431878895814782, "grad_norm": 2.063004970550537, "learning_rate": 2.559572717464145e-06, "loss": 0.0189, "step": 1830, "video_reward_cumulative_accuracy": 0.8199453551912569 }, { "epoch": 0.5434847135648561, "grad_norm": 2.8820412158966064, "learning_rate": 2.556983058154685e-06, "loss": 0.0546, "step": 1831, "video_reward_cumulative_accuracy": 0.8200436919716002 }, { "epoch": 0.5437815375482339, "grad_norm": 1.9526046514511108, "learning_rate": 2.5543933376683805e-06, "loss": 0.0197, "step": 1832, "video_reward_cumulative_accuracy": 0.82014192139738 }, { "epoch": 0.5440783615316117, "grad_norm": 2.945932149887085, "learning_rate": 2.5518035587855492e-06, "loss": 0.06, "step": 1833, "video_reward_cumulative_accuracy": 0.8202400436442989 }, { "epoch": 0.5443751855149896, "grad_norm": 1.299325942993164, "learning_rate": 2.5492137242865706e-06, "loss": 0.0312, "step": 1834, "video_reward_cumulative_accuracy": 0.8203380588876772 }, { "epoch": 0.5446720094983675, "grad_norm": 1.5175635814666748, "learning_rate": 2.5466238369518808e-06, "loss": 0.0433, "step": 1835, "video_reward_cumulative_accuracy": 0.8204359673024523 }, { "epoch": 0.5449688334817453, "grad_norm": 3.6290009021759033, "learning_rate": 2.544033899561978e-06, "loss": 0.0684, "step": 1836, "video_reward_cumulative_accuracy": 0.8205337690631809 }, { "epoch": 0.5452656574651232, "grad_norm": 2.739287853240967, "learning_rate": 2.5414439148974096e-06, "loss": 0.0701, "step": 1837, "video_reward_cumulative_accuracy": 0.8206314643440392 }, { "epoch": 0.5455624814485011, "grad_norm": 2.4160091876983643, "learning_rate": 2.5388538857387756e-06, "loss": 0.0424, "step": 1838, "video_reward_cumulative_accuracy": 0.8204570184983678 }, { "epoch": 0.5458593054318789, "grad_norm": 2.7636618614196777, "learning_rate": 2.5362638148667256e-06, "loss": 0.0486, "step": 1839, "video_reward_cumulative_accuracy": 0.8205546492659054 }, { "epoch": 0.5461561294152567, "grad_norm": 1.2934203147888184, "learning_rate": 2.5336737050619497e-06, "loss": 0.0313, "step": 1840, "video_reward_cumulative_accuracy": 0.8206521739130435 }, { "epoch": 0.5464529533986346, "grad_norm": 1.7498849630355835, "learning_rate": 2.5310835591051837e-06, "loss": 0.0527, "step": 1841, "video_reward_cumulative_accuracy": 0.8207495926127105 }, { "epoch": 0.5467497773820125, "grad_norm": 2.3050217628479004, "learning_rate": 2.528493379777199e-06, "loss": 0.0268, "step": 1842, "video_reward_cumulative_accuracy": 0.8208469055374593 }, { "epoch": 0.5470466013653903, "grad_norm": 1.1211016178131104, "learning_rate": 2.5259031698588065e-06, "loss": 0.0133, "step": 1843, "video_reward_cumulative_accuracy": 0.8209441128594682 }, { "epoch": 0.5473434253487682, "grad_norm": 2.8683953285217285, "learning_rate": 2.523312932130847e-06, "loss": 0.0691, "step": 1844, "video_reward_cumulative_accuracy": 0.8210412147505423 }, { "epoch": 0.5476402493321461, "grad_norm": 2.4542722702026367, "learning_rate": 2.5207226693741914e-06, "loss": 0.0429, "step": 1845, "video_reward_cumulative_accuracy": 0.8208672086720867 }, { "epoch": 0.5479370733155239, "grad_norm": 3.8734562397003174, "learning_rate": 2.5181323843697403e-06, "loss": 0.0458, "step": 1846, "video_reward_cumulative_accuracy": 0.820964247020585 }, { "epoch": 0.5482338972989017, "grad_norm": 2.5339853763580322, "learning_rate": 2.5155420798984137e-06, "loss": 0.0254, "step": 1847, "video_reward_cumulative_accuracy": 0.821061180292366 }, { "epoch": 0.5485307212822796, "grad_norm": 1.4046697616577148, "learning_rate": 2.512951758741156e-06, "loss": 0.0352, "step": 1848, "video_reward_cumulative_accuracy": 0.8211580086580087 }, { "epoch": 0.5488275452656575, "grad_norm": 0.8442341089248657, "learning_rate": 2.510361423678929e-06, "loss": 0.024, "step": 1849, "video_reward_cumulative_accuracy": 0.8209843158464034 }, { "epoch": 0.5491243692490353, "grad_norm": 1.8290486335754395, "learning_rate": 2.5077710774927067e-06, "loss": 0.0147, "step": 1850, "video_reward_cumulative_accuracy": 0.8210810810810811 }, { "epoch": 0.5494211932324132, "grad_norm": 1.443580150604248, "learning_rate": 2.5051807229634796e-06, "loss": 0.0335, "step": 1851, "video_reward_cumulative_accuracy": 0.8211777417612102 }, { "epoch": 0.549718017215791, "grad_norm": 0.5750948190689087, "learning_rate": 2.5025903628722427e-06, "loss": 0.0131, "step": 1852, "video_reward_cumulative_accuracy": 0.8212742980561555 }, { "epoch": 0.5500148411991689, "grad_norm": 0.9644943475723267, "learning_rate": 2.5e-06, "loss": 0.0187, "step": 1853, "video_reward_cumulative_accuracy": 0.8213707501349163 }, { "epoch": 0.5503116651825467, "grad_norm": 2.259012460708618, "learning_rate": 2.4974096371277577e-06, "loss": 0.0195, "step": 1854, "video_reward_cumulative_accuracy": 0.8214670981661273 }, { "epoch": 0.5506084891659246, "grad_norm": 4.043064117431641, "learning_rate": 2.4948192770365217e-06, "loss": 0.0473, "step": 1855, "video_reward_cumulative_accuracy": 0.8215633423180593 }, { "epoch": 0.5509053131493025, "grad_norm": 2.6557042598724365, "learning_rate": 2.4922289225072937e-06, "loss": 0.0347, "step": 1856, "video_reward_cumulative_accuracy": 0.8213900862068966 }, { "epoch": 0.5512021371326803, "grad_norm": 3.153779983520508, "learning_rate": 2.4896385763210725e-06, "loss": 0.0883, "step": 1857, "video_reward_cumulative_accuracy": 0.821486268174475 }, { "epoch": 0.5514989611160582, "grad_norm": 2.566129446029663, "learning_rate": 2.4870482412588444e-06, "loss": 0.0617, "step": 1858, "video_reward_cumulative_accuracy": 0.8215823466092572 }, { "epoch": 0.551795785099436, "grad_norm": 1.7508662939071655, "learning_rate": 2.484457920101587e-06, "loss": 0.0305, "step": 1859, "video_reward_cumulative_accuracy": 0.8216783216783217 }, { "epoch": 0.5520926090828139, "grad_norm": 2.019207715988159, "learning_rate": 2.4818676156302605e-06, "loss": 0.0521, "step": 1860, "video_reward_cumulative_accuracy": 0.8217741935483871 }, { "epoch": 0.5523894330661917, "grad_norm": 0.6667674779891968, "learning_rate": 2.4792773306258085e-06, "loss": 0.011, "step": 1861, "video_reward_cumulative_accuracy": 0.8218699623858141 }, { "epoch": 0.5526862570495696, "grad_norm": 1.5519354343414307, "learning_rate": 2.4766870678691538e-06, "loss": 0.0503, "step": 1862, "video_reward_cumulative_accuracy": 0.8219656283566058 }, { "epoch": 0.5529830810329475, "grad_norm": 3.2907981872558594, "learning_rate": 2.474096830141194e-06, "loss": 0.0484, "step": 1863, "video_reward_cumulative_accuracy": 0.8217928073000537 }, { "epoch": 0.5532799050163253, "grad_norm": 1.4493234157562256, "learning_rate": 2.4715066202228017e-06, "loss": 0.0132, "step": 1864, "video_reward_cumulative_accuracy": 0.8218884120171673 }, { "epoch": 0.5535767289997032, "grad_norm": 0.8373463749885559, "learning_rate": 2.4689164408948176e-06, "loss": 0.0103, "step": 1865, "video_reward_cumulative_accuracy": 0.8219839142091153 }, { "epoch": 0.553873552983081, "grad_norm": 1.3251855373382568, "learning_rate": 2.4663262949380508e-06, "loss": 0.0186, "step": 1866, "video_reward_cumulative_accuracy": 0.8220793140407289 }, { "epoch": 0.5541703769664589, "grad_norm": 2.757843255996704, "learning_rate": 2.4637361851332752e-06, "loss": 0.0747, "step": 1867, "video_reward_cumulative_accuracy": 0.8221746116764863 }, { "epoch": 0.5544672009498367, "grad_norm": 3.380035877227783, "learning_rate": 2.4611461142612243e-06, "loss": 0.0661, "step": 1868, "video_reward_cumulative_accuracy": 0.8222698072805139 }, { "epoch": 0.5547640249332146, "grad_norm": 1.7979135513305664, "learning_rate": 2.4585560851025917e-06, "loss": 0.0395, "step": 1869, "video_reward_cumulative_accuracy": 0.8223649010165864 }, { "epoch": 0.5550608489165925, "grad_norm": 1.7438832521438599, "learning_rate": 2.455966100438023e-06, "loss": 0.0389, "step": 1870, "video_reward_cumulative_accuracy": 0.8224598930481284 }, { "epoch": 0.5553576728999703, "grad_norm": 2.5413877964019775, "learning_rate": 2.4533761630481205e-06, "loss": 0.0202, "step": 1871, "video_reward_cumulative_accuracy": 0.8225547835382149 }, { "epoch": 0.5556544968833482, "grad_norm": 2.7013587951660156, "learning_rate": 2.45078627571343e-06, "loss": 0.0537, "step": 1872, "video_reward_cumulative_accuracy": 0.8226495726495726 }, { "epoch": 0.555951320866726, "grad_norm": 2.5262835025787354, "learning_rate": 2.4481964412144508e-06, "loss": 0.0388, "step": 1873, "video_reward_cumulative_accuracy": 0.8227442605445809 }, { "epoch": 0.5562481448501039, "grad_norm": 1.8535455465316772, "learning_rate": 2.4456066623316203e-06, "loss": 0.0714, "step": 1874, "video_reward_cumulative_accuracy": 0.8228388473852721 }, { "epoch": 0.5565449688334817, "grad_norm": 2.635227680206299, "learning_rate": 2.4430169418453157e-06, "loss": 0.0385, "step": 1875, "video_reward_cumulative_accuracy": 0.8229333333333333 }, { "epoch": 0.5568417928168596, "grad_norm": 1.0287766456604004, "learning_rate": 2.4404272825358564e-06, "loss": 0.0146, "step": 1876, "video_reward_cumulative_accuracy": 0.8230277185501066 }, { "epoch": 0.5571386168002375, "grad_norm": 0.640048086643219, "learning_rate": 2.4378376871834896e-06, "loss": 0.0152, "step": 1877, "video_reward_cumulative_accuracy": 0.8231220031965903 }, { "epoch": 0.5574354407836153, "grad_norm": 1.0190867185592651, "learning_rate": 2.435248158568399e-06, "loss": 0.0137, "step": 1878, "video_reward_cumulative_accuracy": 0.8232161874334398 }, { "epoch": 0.5577322647669932, "grad_norm": 0.6473628282546997, "learning_rate": 2.4326586994706964e-06, "loss": 0.0099, "step": 1879, "video_reward_cumulative_accuracy": 0.8233102714209686 }, { "epoch": 0.558029088750371, "grad_norm": 1.6340287923812866, "learning_rate": 2.430069312670414e-06, "loss": 0.0222, "step": 1880, "video_reward_cumulative_accuracy": 0.823404255319149 }, { "epoch": 0.5583259127337489, "grad_norm": 2.285609245300293, "learning_rate": 2.4274800009475134e-06, "loss": 0.06, "step": 1881, "video_reward_cumulative_accuracy": 0.823498139287613 }, { "epoch": 0.5586227367171267, "grad_norm": 2.4804632663726807, "learning_rate": 2.42489076708187e-06, "loss": 0.0291, "step": 1882, "video_reward_cumulative_accuracy": 0.8235919234856536 }, { "epoch": 0.5589195607005046, "grad_norm": 2.7268261909484863, "learning_rate": 2.422301613853278e-06, "loss": 0.0443, "step": 1883, "video_reward_cumulative_accuracy": 0.8236856080722251 }, { "epoch": 0.5592163846838825, "grad_norm": 3.1525275707244873, "learning_rate": 2.419712544041444e-06, "loss": 0.0388, "step": 1884, "video_reward_cumulative_accuracy": 0.8237791932059448 }, { "epoch": 0.5595132086672603, "grad_norm": 1.7583348751068115, "learning_rate": 2.4171235604259865e-06, "loss": 0.0213, "step": 1885, "video_reward_cumulative_accuracy": 0.8238726790450929 }, { "epoch": 0.5598100326506382, "grad_norm": 4.774654388427734, "learning_rate": 2.4145346657864318e-06, "loss": 0.0528, "step": 1886, "video_reward_cumulative_accuracy": 0.823966065747614 }, { "epoch": 0.560106856634016, "grad_norm": 2.143681526184082, "learning_rate": 2.4119458629022077e-06, "loss": 0.0185, "step": 1887, "video_reward_cumulative_accuracy": 0.8240593534711181 }, { "epoch": 0.5604036806173939, "grad_norm": 1.6760286092758179, "learning_rate": 2.4093571545526466e-06, "loss": 0.0488, "step": 1888, "video_reward_cumulative_accuracy": 0.8241525423728814 }, { "epoch": 0.5607005046007717, "grad_norm": 0.8435209393501282, "learning_rate": 2.406768543516977e-06, "loss": 0.0108, "step": 1889, "video_reward_cumulative_accuracy": 0.8242456326098465 }, { "epoch": 0.5609973285841496, "grad_norm": 3.637840747833252, "learning_rate": 2.404180032574325e-06, "loss": 0.085, "step": 1890, "video_reward_cumulative_accuracy": 0.8243386243386244 }, { "epoch": 0.5612941525675275, "grad_norm": 0.5115882158279419, "learning_rate": 2.4015916245037086e-06, "loss": 0.0259, "step": 1891, "video_reward_cumulative_accuracy": 0.8244315177154945 }, { "epoch": 0.5615909765509053, "grad_norm": 1.5231218338012695, "learning_rate": 2.3990033220840344e-06, "loss": 0.0462, "step": 1892, "video_reward_cumulative_accuracy": 0.8242600422832981 }, { "epoch": 0.5618878005342832, "grad_norm": 2.774663209915161, "learning_rate": 2.3964151280940963e-06, "loss": 0.0213, "step": 1893, "video_reward_cumulative_accuracy": 0.8243528790279979 }, { "epoch": 0.562184624517661, "grad_norm": 7.451237201690674, "learning_rate": 2.3938270453125717e-06, "loss": 0.0661, "step": 1894, "video_reward_cumulative_accuracy": 0.8244456177402323 }, { "epoch": 0.5624814485010389, "grad_norm": 3.062457323074341, "learning_rate": 2.3912390765180195e-06, "loss": 0.018, "step": 1895, "video_reward_cumulative_accuracy": 0.8242744063324539 }, { "epoch": 0.5627782724844167, "grad_norm": 5.17927885055542, "learning_rate": 2.3886512244888737e-06, "loss": 0.071, "step": 1896, "video_reward_cumulative_accuracy": 0.8238396624472574 }, { "epoch": 0.5630750964677946, "grad_norm": 2.72430682182312, "learning_rate": 2.386063492003446e-06, "loss": 0.0373, "step": 1897, "video_reward_cumulative_accuracy": 0.8236689509752241 }, { "epoch": 0.5633719204511725, "grad_norm": 1.81307053565979, "learning_rate": 2.3834758818399185e-06, "loss": 0.0787, "step": 1898, "video_reward_cumulative_accuracy": 0.8232349841938883 }, { "epoch": 0.5636687444345503, "grad_norm": 2.822479248046875, "learning_rate": 2.3808883967763415e-06, "loss": 0.0702, "step": 1899, "video_reward_cumulative_accuracy": 0.8230647709320695 }, { "epoch": 0.5639655684179282, "grad_norm": 3.3593389987945557, "learning_rate": 2.378301039590631e-06, "loss": 0.0254, "step": 1900, "video_reward_cumulative_accuracy": 0.8231578947368421 }, { "epoch": 0.564262392401306, "grad_norm": 4.469653129577637, "learning_rate": 2.3757138130605662e-06, "loss": 0.0387, "step": 1901, "video_reward_cumulative_accuracy": 0.823250920568122 }, { "epoch": 0.5645592163846839, "grad_norm": 1.8031344413757324, "learning_rate": 2.3731267199637857e-06, "loss": 0.0324, "step": 1902, "video_reward_cumulative_accuracy": 0.8233438485804416 }, { "epoch": 0.5648560403680617, "grad_norm": 2.311720132827759, "learning_rate": 2.370539763077786e-06, "loss": 0.0386, "step": 1903, "video_reward_cumulative_accuracy": 0.8234366789280084 }, { "epoch": 0.5651528643514396, "grad_norm": 2.129380464553833, "learning_rate": 2.3679529451799156e-06, "loss": 0.0571, "step": 1904, "video_reward_cumulative_accuracy": 0.823266806722689 }, { "epoch": 0.5654496883348175, "grad_norm": 1.209018349647522, "learning_rate": 2.3653662690473747e-06, "loss": 0.0215, "step": 1905, "video_reward_cumulative_accuracy": 0.8233595800524934 }, { "epoch": 0.5657465123181953, "grad_norm": 2.74529767036438, "learning_rate": 2.3627797374572107e-06, "loss": 0.0363, "step": 1906, "video_reward_cumulative_accuracy": 0.8234522560335782 }, { "epoch": 0.5660433363015732, "grad_norm": 2.243732452392578, "learning_rate": 2.3601933531863182e-06, "loss": 0.0252, "step": 1907, "video_reward_cumulative_accuracy": 0.8235448348190876 }, { "epoch": 0.566340160284951, "grad_norm": 2.2682273387908936, "learning_rate": 2.357607119011429e-06, "loss": 0.0386, "step": 1908, "video_reward_cumulative_accuracy": 0.8236373165618449 }, { "epoch": 0.5666369842683289, "grad_norm": 1.7562425136566162, "learning_rate": 2.355021037709118e-06, "loss": 0.0565, "step": 1909, "video_reward_cumulative_accuracy": 0.8237297014143531 }, { "epoch": 0.5669338082517067, "grad_norm": 1.4634464979171753, "learning_rate": 2.352435112055795e-06, "loss": 0.0267, "step": 1910, "video_reward_cumulative_accuracy": 0.8238219895287958 }, { "epoch": 0.5672306322350846, "grad_norm": 1.8789516687393188, "learning_rate": 2.3498493448277013e-06, "loss": 0.0227, "step": 1911, "video_reward_cumulative_accuracy": 0.8239141810570382 }, { "epoch": 0.5675274562184625, "grad_norm": 2.6937637329101562, "learning_rate": 2.3472637388009094e-06, "loss": 0.0524, "step": 1912, "video_reward_cumulative_accuracy": 0.8240062761506276 }, { "epoch": 0.5678242802018403, "grad_norm": 1.9026590585708618, "learning_rate": 2.3446782967513176e-06, "loss": 0.04, "step": 1913, "video_reward_cumulative_accuracy": 0.8240982749607946 }, { "epoch": 0.5681211041852182, "grad_norm": 2.191349744796753, "learning_rate": 2.3420930214546496e-06, "loss": 0.0194, "step": 1914, "video_reward_cumulative_accuracy": 0.8241901776384535 }, { "epoch": 0.568417928168596, "grad_norm": 0.43110188841819763, "learning_rate": 2.3395079156864493e-06, "loss": 0.0067, "step": 1915, "video_reward_cumulative_accuracy": 0.8242819843342036 }, { "epoch": 0.5687147521519739, "grad_norm": 2.1239190101623535, "learning_rate": 2.3369229822220782e-06, "loss": 0.0494, "step": 1916, "video_reward_cumulative_accuracy": 0.8243736951983298 }, { "epoch": 0.5690115761353517, "grad_norm": 2.330325126647949, "learning_rate": 2.3343382238367145e-06, "loss": 0.0496, "step": 1917, "video_reward_cumulative_accuracy": 0.8244653103808033 }, { "epoch": 0.5693084001187296, "grad_norm": 2.316889524459839, "learning_rate": 2.331753643305345e-06, "loss": 0.0605, "step": 1918, "video_reward_cumulative_accuracy": 0.82429614181439 }, { "epoch": 0.5696052241021075, "grad_norm": 2.9871466159820557, "learning_rate": 2.3291692434027705e-06, "loss": 0.0446, "step": 1919, "video_reward_cumulative_accuracy": 0.8243877019280875 }, { "epoch": 0.5699020480854853, "grad_norm": 2.9490387439727783, "learning_rate": 2.326585026903592e-06, "loss": 0.03, "step": 1920, "video_reward_cumulative_accuracy": 0.8244791666666667 }, { "epoch": 0.5701988720688632, "grad_norm": 2.3850908279418945, "learning_rate": 2.324000996582219e-06, "loss": 0.0334, "step": 1921, "video_reward_cumulative_accuracy": 0.8243102550754815 }, { "epoch": 0.570495696052241, "grad_norm": 1.3517158031463623, "learning_rate": 2.3214171552128594e-06, "loss": 0.0335, "step": 1922, "video_reward_cumulative_accuracy": 0.8244016649323621 }, { "epoch": 0.5707925200356189, "grad_norm": 2.155529022216797, "learning_rate": 2.3188335055695145e-06, "loss": 0.0204, "step": 1923, "video_reward_cumulative_accuracy": 0.8244929797191888 }, { "epoch": 0.5710893440189967, "grad_norm": 2.1936349868774414, "learning_rate": 2.316250050425987e-06, "loss": 0.0398, "step": 1924, "video_reward_cumulative_accuracy": 0.8243243243243243 }, { "epoch": 0.5713861680023746, "grad_norm": 3.196364164352417, "learning_rate": 2.3136667925558635e-06, "loss": 0.0479, "step": 1925, "video_reward_cumulative_accuracy": 0.8244155844155844 }, { "epoch": 0.5716829919857525, "grad_norm": 1.847775936126709, "learning_rate": 2.311083734732523e-06, "loss": 0.0229, "step": 1926, "video_reward_cumulative_accuracy": 0.8245067497403946 }, { "epoch": 0.5719798159691303, "grad_norm": 0.9182643890380859, "learning_rate": 2.30850087972913e-06, "loss": 0.0187, "step": 1927, "video_reward_cumulative_accuracy": 0.8245978204462896 }, { "epoch": 0.5722766399525082, "grad_norm": 1.595245599746704, "learning_rate": 2.3059182303186276e-06, "loss": 0.0361, "step": 1928, "video_reward_cumulative_accuracy": 0.8246887966804979 }, { "epoch": 0.572573463935886, "grad_norm": 1.5071243047714233, "learning_rate": 2.303335789273744e-06, "loss": 0.028, "step": 1929, "video_reward_cumulative_accuracy": 0.8247796785899429 }, { "epoch": 0.5728702879192639, "grad_norm": 1.241547703742981, "learning_rate": 2.3007535593669773e-06, "loss": 0.0086, "step": 1930, "video_reward_cumulative_accuracy": 0.8248704663212435 }, { "epoch": 0.5731671119026417, "grad_norm": 1.961064100265503, "learning_rate": 2.2981715433706037e-06, "loss": 0.0262, "step": 1931, "video_reward_cumulative_accuracy": 0.824702226825479 }, { "epoch": 0.5734639358860196, "grad_norm": 0.9366904497146606, "learning_rate": 2.2955897440566667e-06, "loss": 0.0133, "step": 1932, "video_reward_cumulative_accuracy": 0.8247929606625258 }, { "epoch": 0.5737607598693975, "grad_norm": 1.7824345827102661, "learning_rate": 2.2930081641969782e-06, "loss": 0.0164, "step": 1933, "video_reward_cumulative_accuracy": 0.8248836006207967 }, { "epoch": 0.5740575838527753, "grad_norm": 2.103070020675659, "learning_rate": 2.2904268065631174e-06, "loss": 0.0494, "step": 1934, "video_reward_cumulative_accuracy": 0.8249741468459152 }, { "epoch": 0.5743544078361532, "grad_norm": 1.738095760345459, "learning_rate": 2.2878456739264197e-06, "loss": 0.0459, "step": 1935, "video_reward_cumulative_accuracy": 0.8250645994832041 }, { "epoch": 0.574651231819531, "grad_norm": 2.6681344509124756, "learning_rate": 2.2852647690579823e-06, "loss": 0.1033, "step": 1936, "video_reward_cumulative_accuracy": 0.824896694214876 }, { "epoch": 0.5749480558029089, "grad_norm": 2.3271491527557373, "learning_rate": 2.2826840947286566e-06, "loss": 0.0543, "step": 1937, "video_reward_cumulative_accuracy": 0.8249870934434693 }, { "epoch": 0.5752448797862867, "grad_norm": 3.099133014678955, "learning_rate": 2.2801036537090475e-06, "loss": 0.0862, "step": 1938, "video_reward_cumulative_accuracy": 0.8248194014447885 }, { "epoch": 0.5755417037696646, "grad_norm": 2.7922987937927246, "learning_rate": 2.2775234487695093e-06, "loss": 0.0294, "step": 1939, "video_reward_cumulative_accuracy": 0.8249097472924187 }, { "epoch": 0.5758385277530425, "grad_norm": 2.1268813610076904, "learning_rate": 2.2749434826801416e-06, "loss": 0.0671, "step": 1940, "video_reward_cumulative_accuracy": 0.825 }, { "epoch": 0.5761353517364203, "grad_norm": 3.367703676223755, "learning_rate": 2.272363758210789e-06, "loss": 0.0779, "step": 1941, "video_reward_cumulative_accuracy": 0.8250901597114889 }, { "epoch": 0.5764321757197982, "grad_norm": 4.295740127563477, "learning_rate": 2.2697842781310354e-06, "loss": 0.0447, "step": 1942, "video_reward_cumulative_accuracy": 0.8251802265705458 }, { "epoch": 0.576728999703176, "grad_norm": 6.525967597961426, "learning_rate": 2.2672050452102036e-06, "loss": 0.0787, "step": 1943, "video_reward_cumulative_accuracy": 0.8252702007205353 }, { "epoch": 0.5770258236865539, "grad_norm": 3.3833274841308594, "learning_rate": 2.26462606221735e-06, "loss": 0.0647, "step": 1944, "video_reward_cumulative_accuracy": 0.8253600823045267 }, { "epoch": 0.5773226476699317, "grad_norm": 1.5874844789505005, "learning_rate": 2.262047331921264e-06, "loss": 0.0274, "step": 1945, "video_reward_cumulative_accuracy": 0.8254498714652956 }, { "epoch": 0.5776194716533096, "grad_norm": 1.2541552782058716, "learning_rate": 2.259468857090462e-06, "loss": 0.0108, "step": 1946, "video_reward_cumulative_accuracy": 0.8255395683453237 }, { "epoch": 0.5779162956366874, "grad_norm": 1.4068396091461182, "learning_rate": 2.2568906404931878e-06, "loss": 0.0234, "step": 1947, "video_reward_cumulative_accuracy": 0.8256291730868002 }, { "epoch": 0.5782131196200653, "grad_norm": 1.276092290878296, "learning_rate": 2.254312684897406e-06, "loss": 0.0289, "step": 1948, "video_reward_cumulative_accuracy": 0.8257186858316222 }, { "epoch": 0.5785099436034432, "grad_norm": 0.7035172581672668, "learning_rate": 2.2517349930708032e-06, "loss": 0.0199, "step": 1949, "video_reward_cumulative_accuracy": 0.8258081067213956 }, { "epoch": 0.578806767586821, "grad_norm": 1.7570953369140625, "learning_rate": 2.2491575677807813e-06, "loss": 0.0319, "step": 1950, "video_reward_cumulative_accuracy": 0.8258974358974359 }, { "epoch": 0.5791035915701989, "grad_norm": 2.2827887535095215, "learning_rate": 2.2465804117944568e-06, "loss": 0.0531, "step": 1951, "video_reward_cumulative_accuracy": 0.8259866735007688 }, { "epoch": 0.5794004155535767, "grad_norm": 0.8664276003837585, "learning_rate": 2.244003527878656e-06, "loss": 0.0131, "step": 1952, "video_reward_cumulative_accuracy": 0.8260758196721312 }, { "epoch": 0.5796972395369546, "grad_norm": 0.4808574914932251, "learning_rate": 2.2414269187999153e-06, "loss": 0.0139, "step": 1953, "video_reward_cumulative_accuracy": 0.8261648745519713 }, { "epoch": 0.5799940635203324, "grad_norm": 3.057589054107666, "learning_rate": 2.2388505873244728e-06, "loss": 0.0282, "step": 1954, "video_reward_cumulative_accuracy": 0.8259979529170931 }, { "epoch": 0.5802908875037103, "grad_norm": 1.7676241397857666, "learning_rate": 2.2362745362182724e-06, "loss": 0.0718, "step": 1955, "video_reward_cumulative_accuracy": 0.8258312020460358 }, { "epoch": 0.5805877114870882, "grad_norm": 1.0364630222320557, "learning_rate": 2.2336987682469534e-06, "loss": 0.017, "step": 1956, "video_reward_cumulative_accuracy": 0.825920245398773 }, { "epoch": 0.580884535470466, "grad_norm": 1.8541654348373413, "learning_rate": 2.2311232861758526e-06, "loss": 0.0202, "step": 1957, "video_reward_cumulative_accuracy": 0.8260091977516607 }, { "epoch": 0.5811813594538439, "grad_norm": 1.1879740953445435, "learning_rate": 2.228548092770002e-06, "loss": 0.0549, "step": 1958, "video_reward_cumulative_accuracy": 0.8260980592441267 }, { "epoch": 0.5814781834372217, "grad_norm": 1.5490862131118774, "learning_rate": 2.2259731907941195e-06, "loss": 0.0214, "step": 1959, "video_reward_cumulative_accuracy": 0.8261868300153139 }, { "epoch": 0.5817750074205996, "grad_norm": 2.618912696838379, "learning_rate": 2.223398583012614e-06, "loss": 0.0376, "step": 1960, "video_reward_cumulative_accuracy": 0.8262755102040816 }, { "epoch": 0.5820718314039774, "grad_norm": 1.805620551109314, "learning_rate": 2.2208242721895744e-06, "loss": 0.0346, "step": 1961, "video_reward_cumulative_accuracy": 0.8263640999490056 }, { "epoch": 0.5823686553873553, "grad_norm": 4.237667083740234, "learning_rate": 2.2182502610887757e-06, "loss": 0.0546, "step": 1962, "video_reward_cumulative_accuracy": 0.8264525993883792 }, { "epoch": 0.5826654793707332, "grad_norm": 0.6842634677886963, "learning_rate": 2.215676552473668e-06, "loss": 0.005, "step": 1963, "video_reward_cumulative_accuracy": 0.826541008660214 }, { "epoch": 0.582962303354111, "grad_norm": 2.4333693981170654, "learning_rate": 2.213103149107376e-06, "loss": 0.0525, "step": 1964, "video_reward_cumulative_accuracy": 0.8263747454175153 }, { "epoch": 0.5832591273374889, "grad_norm": 2.396888256072998, "learning_rate": 2.210530053752701e-06, "loss": 0.0805, "step": 1965, "video_reward_cumulative_accuracy": 0.8264631043256997 }, { "epoch": 0.5835559513208667, "grad_norm": 2.7673487663269043, "learning_rate": 2.2079572691721063e-06, "loss": 0.0213, "step": 1966, "video_reward_cumulative_accuracy": 0.8262970498474059 }, { "epoch": 0.5838527753042446, "grad_norm": 1.4659632444381714, "learning_rate": 2.20538479812773e-06, "loss": 0.023, "step": 1967, "video_reward_cumulative_accuracy": 0.8263853584138282 }, { "epoch": 0.5841495992876224, "grad_norm": 2.001253366470337, "learning_rate": 2.2028126433813657e-06, "loss": 0.0304, "step": 1968, "video_reward_cumulative_accuracy": 0.8264735772357723 }, { "epoch": 0.5844464232710003, "grad_norm": 2.858795166015625, "learning_rate": 2.200240807694474e-06, "loss": 0.0827, "step": 1969, "video_reward_cumulative_accuracy": 0.8263077704418487 }, { "epoch": 0.5847432472543782, "grad_norm": 1.0106351375579834, "learning_rate": 2.19766929382817e-06, "loss": 0.035, "step": 1970, "video_reward_cumulative_accuracy": 0.8263959390862944 }, { "epoch": 0.585040071237756, "grad_norm": 1.513059377670288, "learning_rate": 2.195098104543221e-06, "loss": 0.019, "step": 1971, "video_reward_cumulative_accuracy": 0.8264840182648402 }, { "epoch": 0.5853368952211339, "grad_norm": 1.704352855682373, "learning_rate": 2.1925272426000514e-06, "loss": 0.0135, "step": 1972, "video_reward_cumulative_accuracy": 0.8265720081135902 }, { "epoch": 0.5856337192045117, "grad_norm": 3.4350345134735107, "learning_rate": 2.189956710758729e-06, "loss": 0.0736, "step": 1973, "video_reward_cumulative_accuracy": 0.8264064875823619 }, { "epoch": 0.5859305431878896, "grad_norm": 0.5892782807350159, "learning_rate": 2.1873865117789682e-06, "loss": 0.0074, "step": 1974, "video_reward_cumulative_accuracy": 0.8264944275582573 }, { "epoch": 0.5862273671712674, "grad_norm": 0.523690938949585, "learning_rate": 2.184816648420131e-06, "loss": 0.0095, "step": 1975, "video_reward_cumulative_accuracy": 0.8265822784810126 }, { "epoch": 0.5865241911546453, "grad_norm": 3.0676653385162354, "learning_rate": 2.1822471234412106e-06, "loss": 0.0585, "step": 1976, "video_reward_cumulative_accuracy": 0.8266700404858299 }, { "epoch": 0.5868210151380232, "grad_norm": 1.3661112785339355, "learning_rate": 2.1796779396008456e-06, "loss": 0.0415, "step": 1977, "video_reward_cumulative_accuracy": 0.8265048052604957 }, { "epoch": 0.587117839121401, "grad_norm": 2.5101301670074463, "learning_rate": 2.177109099657301e-06, "loss": 0.0227, "step": 1978, "video_reward_cumulative_accuracy": 0.826592517694641 }, { "epoch": 0.5874146631047789, "grad_norm": 1.466486930847168, "learning_rate": 2.174540606368477e-06, "loss": 0.054, "step": 1979, "video_reward_cumulative_accuracy": 0.8266801414855988 }, { "epoch": 0.5877114870881567, "grad_norm": 1.7742908000946045, "learning_rate": 2.1719724624919004e-06, "loss": 0.0465, "step": 1980, "video_reward_cumulative_accuracy": 0.8267676767676768 }, { "epoch": 0.5880083110715346, "grad_norm": 5.307931423187256, "learning_rate": 2.169404670784722e-06, "loss": 0.07, "step": 1981, "video_reward_cumulative_accuracy": 0.8268551236749117 }, { "epoch": 0.5883051350549124, "grad_norm": 2.4654183387756348, "learning_rate": 2.1668372340037184e-06, "loss": 0.0548, "step": 1982, "video_reward_cumulative_accuracy": 0.8269424823410696 }, { "epoch": 0.5886019590382903, "grad_norm": 3.1083977222442627, "learning_rate": 2.164270154905279e-06, "loss": 0.0294, "step": 1983, "video_reward_cumulative_accuracy": 0.827029752899647 }, { "epoch": 0.5888987830216682, "grad_norm": 2.782560348510742, "learning_rate": 2.1617034362454136e-06, "loss": 0.0229, "step": 1984, "video_reward_cumulative_accuracy": 0.827116935483871 }, { "epoch": 0.589195607005046, "grad_norm": 3.644629955291748, "learning_rate": 2.1591370807797434e-06, "loss": 0.0401, "step": 1985, "video_reward_cumulative_accuracy": 0.8272040302267003 }, { "epoch": 0.5894924309884239, "grad_norm": 3.797820806503296, "learning_rate": 2.1565710912635006e-06, "loss": 0.0408, "step": 1986, "video_reward_cumulative_accuracy": 0.8272910372608258 }, { "epoch": 0.5897892549718017, "grad_norm": 2.0654091835021973, "learning_rate": 2.154005470451524e-06, "loss": 0.0144, "step": 1987, "video_reward_cumulative_accuracy": 0.8273779567186713 }, { "epoch": 0.5900860789551796, "grad_norm": 2.2619528770446777, "learning_rate": 2.1514402210982558e-06, "loss": 0.0629, "step": 1988, "video_reward_cumulative_accuracy": 0.8272132796780685 }, { "epoch": 0.5903829029385574, "grad_norm": 3.204058885574341, "learning_rate": 2.148875345957741e-06, "loss": 0.0629, "step": 1989, "video_reward_cumulative_accuracy": 0.8273001508295625 }, { "epoch": 0.5906797269219353, "grad_norm": 2.4990057945251465, "learning_rate": 2.1463108477836217e-06, "loss": 0.0596, "step": 1990, "video_reward_cumulative_accuracy": 0.8273869346733669 }, { "epoch": 0.5909765509053132, "grad_norm": 1.8199396133422852, "learning_rate": 2.1437467293291357e-06, "loss": 0.0306, "step": 1991, "video_reward_cumulative_accuracy": 0.8274736313410347 }, { "epoch": 0.591273374888691, "grad_norm": 1.290522813796997, "learning_rate": 2.1411829933471124e-06, "loss": 0.0254, "step": 1992, "video_reward_cumulative_accuracy": 0.8273092369477911 }, { "epoch": 0.5915701988720689, "grad_norm": 0.8122701048851013, "learning_rate": 2.138619642589972e-06, "loss": 0.0428, "step": 1993, "video_reward_cumulative_accuracy": 0.8273958855995986 }, { "epoch": 0.5918670228554467, "grad_norm": 1.304606318473816, "learning_rate": 2.13605667980972e-06, "loss": 0.0408, "step": 1994, "video_reward_cumulative_accuracy": 0.827482447342026 }, { "epoch": 0.5921638468388246, "grad_norm": 0.6447356343269348, "learning_rate": 2.1334941077579457e-06, "loss": 0.0113, "step": 1995, "video_reward_cumulative_accuracy": 0.8275689223057644 }, { "epoch": 0.5924606708222024, "grad_norm": 1.6157499551773071, "learning_rate": 2.1309319291858194e-06, "loss": 0.0341, "step": 1996, "video_reward_cumulative_accuracy": 0.8276553106212425 }, { "epoch": 0.5927574948055803, "grad_norm": 1.3407137393951416, "learning_rate": 2.1283701468440875e-06, "loss": 0.0139, "step": 1997, "video_reward_cumulative_accuracy": 0.827741612418628 }, { "epoch": 0.5930543187889582, "grad_norm": 2.7344970703125, "learning_rate": 2.1258087634830724e-06, "loss": 0.0588, "step": 1998, "video_reward_cumulative_accuracy": 0.8275775775775776 }, { "epoch": 0.593351142772336, "grad_norm": 2.2847628593444824, "learning_rate": 2.1232477818526685e-06, "loss": 0.0199, "step": 1999, "video_reward_cumulative_accuracy": 0.8276638319159579 }, { "epoch": 0.5936479667557139, "grad_norm": 1.1096928119659424, "learning_rate": 2.120687204702337e-06, "loss": 0.0169, "step": 2000, "video_reward_cumulative_accuracy": 0.82775 }, { "epoch": 0.5936479667557139, "eval_runtime": 130.6244, "eval_samples_per_second": 6.04, "eval_steps_per_second": 0.758, "eval_test_set_accuracy": 0.8106060606060606, "step": 2000 }, { "epoch": 0.5939447907390917, "grad_norm": 0.6623875498771667, "learning_rate": 2.118127034781107e-06, "loss": 0.017, "step": 2001, "video_reward_cumulative_accuracy": 0.8278360819590205 }, { "epoch": 0.5942416147224696, "grad_norm": 1.5880417823791504, "learning_rate": 2.1155672748375684e-06, "loss": 0.0249, "step": 2002, "video_reward_cumulative_accuracy": 0.827922077922078 }, { "epoch": 0.5945384387058474, "grad_norm": 0.9980058670043945, "learning_rate": 2.1130079276198727e-06, "loss": 0.0209, "step": 2003, "video_reward_cumulative_accuracy": 0.828007988017973 }, { "epoch": 0.5948352626892253, "grad_norm": 1.9351149797439575, "learning_rate": 2.1104489958757267e-06, "loss": 0.028, "step": 2004, "video_reward_cumulative_accuracy": 0.8278443113772455 }, { "epoch": 0.5951320866726032, "grad_norm": 1.2848634719848633, "learning_rate": 2.107890482352393e-06, "loss": 0.0193, "step": 2005, "video_reward_cumulative_accuracy": 0.827930174563591 }, { "epoch": 0.595428910655981, "grad_norm": 2.291006565093994, "learning_rate": 2.105332389796684e-06, "loss": 0.0395, "step": 2006, "video_reward_cumulative_accuracy": 0.8280159521435693 }, { "epoch": 0.5957257346393589, "grad_norm": 1.295036792755127, "learning_rate": 2.1027747209549596e-06, "loss": 0.0176, "step": 2007, "video_reward_cumulative_accuracy": 0.828101644245142 }, { "epoch": 0.5960225586227367, "grad_norm": 1.5013102293014526, "learning_rate": 2.1002174785731265e-06, "loss": 0.0294, "step": 2008, "video_reward_cumulative_accuracy": 0.828187250996016 }, { "epoch": 0.5963193826061146, "grad_norm": 1.3362897634506226, "learning_rate": 2.097660665396632e-06, "loss": 0.0279, "step": 2009, "video_reward_cumulative_accuracy": 0.8282727725236436 }, { "epoch": 0.5966162065894924, "grad_norm": 2.3554601669311523, "learning_rate": 2.0951042841704628e-06, "loss": 0.0542, "step": 2010, "video_reward_cumulative_accuracy": 0.8278606965174129 }, { "epoch": 0.5969130305728703, "grad_norm": 2.2153587341308594, "learning_rate": 2.0925483376391437e-06, "loss": 0.0155, "step": 2011, "video_reward_cumulative_accuracy": 0.8279462953754351 }, { "epoch": 0.5972098545562482, "grad_norm": 2.6395490169525146, "learning_rate": 2.08999282854673e-06, "loss": 0.0452, "step": 2012, "video_reward_cumulative_accuracy": 0.8280318091451292 }, { "epoch": 0.597506678539626, "grad_norm": 2.8601341247558594, "learning_rate": 2.08743775963681e-06, "loss": 0.0375, "step": 2013, "video_reward_cumulative_accuracy": 0.8281172379533035 }, { "epoch": 0.5978035025230038, "grad_norm": 1.6802117824554443, "learning_rate": 2.0848831336524956e-06, "loss": 0.0371, "step": 2014, "video_reward_cumulative_accuracy": 0.8282025819265144 }, { "epoch": 0.5981003265063817, "grad_norm": 2.9565298557281494, "learning_rate": 2.0823289533364295e-06, "loss": 0.0639, "step": 2015, "video_reward_cumulative_accuracy": 0.828287841191067 }, { "epoch": 0.5983971504897596, "grad_norm": 3.7333922386169434, "learning_rate": 2.0797752214307685e-06, "loss": 0.0546, "step": 2016, "video_reward_cumulative_accuracy": 0.8283730158730159 }, { "epoch": 0.5986939744731374, "grad_norm": 1.6850907802581787, "learning_rate": 2.077221940677194e-06, "loss": 0.03, "step": 2017, "video_reward_cumulative_accuracy": 0.8284581060981656 }, { "epoch": 0.5989907984565153, "grad_norm": 0.4525964856147766, "learning_rate": 2.0746691138169013e-06, "loss": 0.0098, "step": 2018, "video_reward_cumulative_accuracy": 0.8285431119920713 }, { "epoch": 0.5992876224398932, "grad_norm": 1.5810643434524536, "learning_rate": 2.0721167435905945e-06, "loss": 0.0129, "step": 2019, "video_reward_cumulative_accuracy": 0.8283803863298663 }, { "epoch": 0.599584446423271, "grad_norm": 3.38840389251709, "learning_rate": 2.069564832738495e-06, "loss": 0.0289, "step": 2020, "video_reward_cumulative_accuracy": 0.8284653465346534 }, { "epoch": 0.5998812704066488, "grad_norm": 2.073776960372925, "learning_rate": 2.067013384000323e-06, "loss": 0.0237, "step": 2021, "video_reward_cumulative_accuracy": 0.8285502226620485 }, { "epoch": 0.6001780943900267, "grad_norm": 3.2106335163116455, "learning_rate": 2.0644624001153073e-06, "loss": 0.0942, "step": 2022, "video_reward_cumulative_accuracy": 0.8283877349159249 }, { "epoch": 0.6004749183734046, "grad_norm": 2.6965174674987793, "learning_rate": 2.06191188382218e-06, "loss": 0.0288, "step": 2023, "video_reward_cumulative_accuracy": 0.828225407810183 }, { "epoch": 0.6007717423567824, "grad_norm": 0.7816161513328552, "learning_rate": 2.0593618378591625e-06, "loss": 0.0159, "step": 2024, "video_reward_cumulative_accuracy": 0.8283102766798419 }, { "epoch": 0.6010685663401603, "grad_norm": 2.3955113887786865, "learning_rate": 2.0568122649639815e-06, "loss": 0.0364, "step": 2025, "video_reward_cumulative_accuracy": 0.828395061728395 }, { "epoch": 0.6013653903235382, "grad_norm": 2.5972840785980225, "learning_rate": 2.0542631678738478e-06, "loss": 0.0567, "step": 2026, "video_reward_cumulative_accuracy": 0.8284797630799605 }, { "epoch": 0.601662214306916, "grad_norm": 2.920619249343872, "learning_rate": 2.051714549325466e-06, "loss": 0.0282, "step": 2027, "video_reward_cumulative_accuracy": 0.8285643808584114 }, { "epoch": 0.6019590382902938, "grad_norm": 2.846386432647705, "learning_rate": 2.049166412055025e-06, "loss": 0.0385, "step": 2028, "video_reward_cumulative_accuracy": 0.8286489151873767 }, { "epoch": 0.6022558622736717, "grad_norm": 1.5138801336288452, "learning_rate": 2.046618758798197e-06, "loss": 0.0298, "step": 2029, "video_reward_cumulative_accuracy": 0.8287333661902415 }, { "epoch": 0.6025526862570496, "grad_norm": 3.822578191757202, "learning_rate": 2.0440715922901362e-06, "loss": 0.0489, "step": 2030, "video_reward_cumulative_accuracy": 0.8288177339901478 }, { "epoch": 0.6028495102404274, "grad_norm": 4.538577556610107, "learning_rate": 2.041524915265472e-06, "loss": 0.056, "step": 2031, "video_reward_cumulative_accuracy": 0.828902018709995 }, { "epoch": 0.6031463342238053, "grad_norm": 2.920750141143799, "learning_rate": 2.0389787304583105e-06, "loss": 0.0301, "step": 2032, "video_reward_cumulative_accuracy": 0.828986220472441 }, { "epoch": 0.6034431582071832, "grad_norm": 2.3497962951660156, "learning_rate": 2.0364330406022265e-06, "loss": 0.0964, "step": 2033, "video_reward_cumulative_accuracy": 0.8288243974422036 }, { "epoch": 0.603739982190561, "grad_norm": 3.301518440246582, "learning_rate": 2.033887848430267e-06, "loss": 0.057, "step": 2034, "video_reward_cumulative_accuracy": 0.8289085545722714 }, { "epoch": 0.6040368061739388, "grad_norm": 2.4687845706939697, "learning_rate": 2.031343156674942e-06, "loss": 0.1124, "step": 2035, "video_reward_cumulative_accuracy": 0.8287469287469288 }, { "epoch": 0.6043336301573167, "grad_norm": 4.89956521987915, "learning_rate": 2.0287989680682247e-06, "loss": 0.0747, "step": 2036, "video_reward_cumulative_accuracy": 0.8288310412573674 }, { "epoch": 0.6046304541406946, "grad_norm": 3.3470458984375, "learning_rate": 2.026255285341549e-06, "loss": 0.0345, "step": 2037, "video_reward_cumulative_accuracy": 0.8289150711831125 }, { "epoch": 0.6049272781240724, "grad_norm": 1.8060578107833862, "learning_rate": 2.023712111225805e-06, "loss": 0.0265, "step": 2038, "video_reward_cumulative_accuracy": 0.8289990186457311 }, { "epoch": 0.6052241021074503, "grad_norm": 4.508866310119629, "learning_rate": 2.0211694484513376e-06, "loss": 0.0483, "step": 2039, "video_reward_cumulative_accuracy": 0.8288376655223149 }, { "epoch": 0.6055209260908282, "grad_norm": 1.380003571510315, "learning_rate": 2.0186272997479407e-06, "loss": 0.0247, "step": 2040, "video_reward_cumulative_accuracy": 0.828921568627451 }, { "epoch": 0.605817750074206, "grad_norm": 2.6239430904388428, "learning_rate": 2.016085667844859e-06, "loss": 0.0568, "step": 2041, "video_reward_cumulative_accuracy": 0.8290053895149436 }, { "epoch": 0.6061145740575838, "grad_norm": 3.0193066596984863, "learning_rate": 2.0135445554707803e-06, "loss": 0.0539, "step": 2042, "video_reward_cumulative_accuracy": 0.8290891283055828 }, { "epoch": 0.6064113980409617, "grad_norm": 2.1342217922210693, "learning_rate": 2.011003965353835e-06, "loss": 0.0406, "step": 2043, "video_reward_cumulative_accuracy": 0.8291727851199217 }, { "epoch": 0.6067082220243396, "grad_norm": 2.42179536819458, "learning_rate": 2.008463900221595e-06, "loss": 0.0604, "step": 2044, "video_reward_cumulative_accuracy": 0.8292563600782779 }, { "epoch": 0.6070050460077174, "grad_norm": 3.034480333328247, "learning_rate": 2.0059243628010643e-06, "loss": 0.0389, "step": 2045, "video_reward_cumulative_accuracy": 0.8290953545232274 }, { "epoch": 0.6073018699910953, "grad_norm": 1.7863432168960571, "learning_rate": 2.0033853558186845e-06, "loss": 0.0353, "step": 2046, "video_reward_cumulative_accuracy": 0.8291788856304986 }, { "epoch": 0.6075986939744732, "grad_norm": 1.3945716619491577, "learning_rate": 2.0008468820003257e-06, "loss": 0.0178, "step": 2047, "video_reward_cumulative_accuracy": 0.8290180752320468 }, { "epoch": 0.607895517957851, "grad_norm": 1.2314127683639526, "learning_rate": 1.9983089440712853e-06, "loss": 0.0172, "step": 2048, "video_reward_cumulative_accuracy": 0.8291015625 }, { "epoch": 0.6081923419412288, "grad_norm": 2.0700814723968506, "learning_rate": 1.995771544756287e-06, "loss": 0.0232, "step": 2049, "video_reward_cumulative_accuracy": 0.8289409468033186 }, { "epoch": 0.6084891659246067, "grad_norm": 2.8075079917907715, "learning_rate": 1.993234686779474e-06, "loss": 0.034, "step": 2050, "video_reward_cumulative_accuracy": 0.8290243902439024 }, { "epoch": 0.6087859899079846, "grad_norm": 0.5491511225700378, "learning_rate": 1.990698372864411e-06, "loss": 0.0198, "step": 2051, "video_reward_cumulative_accuracy": 0.8291077523159435 }, { "epoch": 0.6090828138913624, "grad_norm": 0.6947237253189087, "learning_rate": 1.9881626057340757e-06, "loss": 0.019, "step": 2052, "video_reward_cumulative_accuracy": 0.8291910331384016 }, { "epoch": 0.6093796378747403, "grad_norm": 2.1727752685546875, "learning_rate": 1.9856273881108613e-06, "loss": 0.0855, "step": 2053, "video_reward_cumulative_accuracy": 0.8292742328300049 }, { "epoch": 0.6096764618581182, "grad_norm": 1.6275689601898193, "learning_rate": 1.9830927227165697e-06, "loss": 0.0308, "step": 2054, "video_reward_cumulative_accuracy": 0.8293573515092503 }, { "epoch": 0.609973285841496, "grad_norm": 2.2714242935180664, "learning_rate": 1.9805586122724095e-06, "loss": 0.0478, "step": 2055, "video_reward_cumulative_accuracy": 0.8294403892944039 }, { "epoch": 0.6102701098248738, "grad_norm": 1.9769221544265747, "learning_rate": 1.978025059498996e-06, "loss": 0.0581, "step": 2056, "video_reward_cumulative_accuracy": 0.829523346303502 }, { "epoch": 0.6105669338082517, "grad_norm": 0.7704261541366577, "learning_rate": 1.9754920671163418e-06, "loss": 0.0099, "step": 2057, "video_reward_cumulative_accuracy": 0.829606222654351 }, { "epoch": 0.6108637577916296, "grad_norm": 2.209693193435669, "learning_rate": 1.972959637843861e-06, "loss": 0.0349, "step": 2058, "video_reward_cumulative_accuracy": 0.8296890184645287 }, { "epoch": 0.6111605817750074, "grad_norm": 0.5300698280334473, "learning_rate": 1.9704277744003632e-06, "loss": 0.0127, "step": 2059, "video_reward_cumulative_accuracy": 0.8297717338513841 }, { "epoch": 0.6114574057583853, "grad_norm": 2.17396879196167, "learning_rate": 1.967896479504048e-06, "loss": 0.0377, "step": 2060, "video_reward_cumulative_accuracy": 0.8298543689320388 }, { "epoch": 0.6117542297417632, "grad_norm": 2.26611328125, "learning_rate": 1.9653657558725077e-06, "loss": 0.0517, "step": 2061, "video_reward_cumulative_accuracy": 0.8296943231441049 }, { "epoch": 0.612051053725141, "grad_norm": 1.2621009349822998, "learning_rate": 1.962835606222717e-06, "loss": 0.0343, "step": 2062, "video_reward_cumulative_accuracy": 0.8297769156159069 }, { "epoch": 0.6123478777085188, "grad_norm": 1.5711784362792969, "learning_rate": 1.9603060332710415e-06, "loss": 0.0368, "step": 2063, "video_reward_cumulative_accuracy": 0.8298594280174503 }, { "epoch": 0.6126447016918967, "grad_norm": 2.2621207237243652, "learning_rate": 1.9577770397332184e-06, "loss": 0.0516, "step": 2064, "video_reward_cumulative_accuracy": 0.8296996124031008 }, { "epoch": 0.6129415256752746, "grad_norm": 2.1944663524627686, "learning_rate": 1.955248628324371e-06, "loss": 0.0442, "step": 2065, "video_reward_cumulative_accuracy": 0.8295399515738499 }, { "epoch": 0.6132383496586524, "grad_norm": 2.246171236038208, "learning_rate": 1.9527208017589944e-06, "loss": 0.0516, "step": 2066, "video_reward_cumulative_accuracy": 0.829622458857696 }, { "epoch": 0.6135351736420303, "grad_norm": 1.9881268739700317, "learning_rate": 1.950193562750953e-06, "loss": 0.034, "step": 2067, "video_reward_cumulative_accuracy": 0.8297048863086599 }, { "epoch": 0.6138319976254082, "grad_norm": 0.437326580286026, "learning_rate": 1.947666914013487e-06, "loss": 0.0082, "step": 2068, "video_reward_cumulative_accuracy": 0.8297872340425532 }, { "epoch": 0.614128821608786, "grad_norm": 2.9777653217315674, "learning_rate": 1.945140858259195e-06, "loss": 0.0259, "step": 2069, "video_reward_cumulative_accuracy": 0.8298695021749638 }, { "epoch": 0.6144256455921638, "grad_norm": 1.040705919265747, "learning_rate": 1.9426153982000455e-06, "loss": 0.0141, "step": 2070, "video_reward_cumulative_accuracy": 0.8299516908212561 }, { "epoch": 0.6147224695755417, "grad_norm": 1.5954067707061768, "learning_rate": 1.9400905365473656e-06, "loss": 0.033, "step": 2071, "video_reward_cumulative_accuracy": 0.8297923708353453 }, { "epoch": 0.6150192935589196, "grad_norm": 4.421472549438477, "learning_rate": 1.937566276011837e-06, "loss": 0.0657, "step": 2072, "video_reward_cumulative_accuracy": 0.8298745173745173 }, { "epoch": 0.6153161175422974, "grad_norm": 0.8702653050422668, "learning_rate": 1.935042619303501e-06, "loss": 0.0172, "step": 2073, "video_reward_cumulative_accuracy": 0.8299565846599132 }, { "epoch": 0.6156129415256753, "grad_norm": 1.1119422912597656, "learning_rate": 1.9325195691317457e-06, "loss": 0.0224, "step": 2074, "video_reward_cumulative_accuracy": 0.8300385728061717 }, { "epoch": 0.6159097655090532, "grad_norm": 0.5069667100906372, "learning_rate": 1.929997128205312e-06, "loss": 0.008, "step": 2075, "video_reward_cumulative_accuracy": 0.8301204819277108 }, { "epoch": 0.616206589492431, "grad_norm": 3.037325143814087, "learning_rate": 1.927475299232283e-06, "loss": 0.0442, "step": 2076, "video_reward_cumulative_accuracy": 0.8299614643545279 }, { "epoch": 0.6165034134758088, "grad_norm": 1.0800257921218872, "learning_rate": 1.924954084920089e-06, "loss": 0.0153, "step": 2077, "video_reward_cumulative_accuracy": 0.8300433317284545 }, { "epoch": 0.6168002374591867, "grad_norm": 2.045151710510254, "learning_rate": 1.922433487975498e-06, "loss": 0.0427, "step": 2078, "video_reward_cumulative_accuracy": 0.8298845043310876 }, { "epoch": 0.6170970614425646, "grad_norm": 2.991358518600464, "learning_rate": 1.919913511104614e-06, "loss": 0.0411, "step": 2079, "video_reward_cumulative_accuracy": 0.8297258297258298 }, { "epoch": 0.6173938854259424, "grad_norm": 1.7816275358200073, "learning_rate": 1.9173941570128786e-06, "loss": 0.0149, "step": 2080, "video_reward_cumulative_accuracy": 0.8298076923076924 }, { "epoch": 0.6176907094093202, "grad_norm": 1.7904562950134277, "learning_rate": 1.9148754284050616e-06, "loss": 0.0377, "step": 2081, "video_reward_cumulative_accuracy": 0.829889476213359 }, { "epoch": 0.6179875333926982, "grad_norm": 3.2209877967834473, "learning_rate": 1.9123573279852632e-06, "loss": 0.0225, "step": 2082, "video_reward_cumulative_accuracy": 0.829971181556196 }, { "epoch": 0.618284357376076, "grad_norm": 0.7754690051078796, "learning_rate": 1.9098398584569085e-06, "loss": 0.0197, "step": 2083, "video_reward_cumulative_accuracy": 0.8300528084493519 }, { "epoch": 0.6185811813594538, "grad_norm": 1.7929531335830688, "learning_rate": 1.9073230225227451e-06, "loss": 0.039, "step": 2084, "video_reward_cumulative_accuracy": 0.82989443378119 }, { "epoch": 0.6188780053428317, "grad_norm": 1.5716784000396729, "learning_rate": 1.9048068228848412e-06, "loss": 0.0188, "step": 2085, "video_reward_cumulative_accuracy": 0.8299760191846522 }, { "epoch": 0.6191748293262096, "grad_norm": 2.760315179824829, "learning_rate": 1.9022912622445808e-06, "loss": 0.0232, "step": 2086, "video_reward_cumulative_accuracy": 0.8300575263662512 }, { "epoch": 0.6194716533095874, "grad_norm": 7.752729892730713, "learning_rate": 1.8997763433026631e-06, "loss": 0.0878, "step": 2087, "video_reward_cumulative_accuracy": 0.8298993770963105 }, { "epoch": 0.6197684772929652, "grad_norm": 2.39508056640625, "learning_rate": 1.8972620687590964e-06, "loss": 0.0234, "step": 2088, "video_reward_cumulative_accuracy": 0.8299808429118773 }, { "epoch": 0.6200653012763432, "grad_norm": 3.0315792560577393, "learning_rate": 1.8947484413131996e-06, "loss": 0.0888, "step": 2089, "video_reward_cumulative_accuracy": 0.8300622307324078 }, { "epoch": 0.620362125259721, "grad_norm": 0.3297179341316223, "learning_rate": 1.892235463663596e-06, "loss": 0.0049, "step": 2090, "video_reward_cumulative_accuracy": 0.8301435406698564 }, { "epoch": 0.6206589492430988, "grad_norm": 0.5340771675109863, "learning_rate": 1.8897231385082096e-06, "loss": 0.0084, "step": 2091, "video_reward_cumulative_accuracy": 0.8302247728359636 }, { "epoch": 0.6209557732264767, "grad_norm": 2.871993064880371, "learning_rate": 1.8872114685442665e-06, "loss": 0.0711, "step": 2092, "video_reward_cumulative_accuracy": 0.8303059273422562 }, { "epoch": 0.6212525972098546, "grad_norm": 1.4503148794174194, "learning_rate": 1.8847004564682878e-06, "loss": 0.0436, "step": 2093, "video_reward_cumulative_accuracy": 0.8303870043000477 }, { "epoch": 0.6215494211932324, "grad_norm": 3.7459793090820312, "learning_rate": 1.8821901049760882e-06, "loss": 0.0717, "step": 2094, "video_reward_cumulative_accuracy": 0.8304680038204394 }, { "epoch": 0.6218462451766102, "grad_norm": 2.9643142223358154, "learning_rate": 1.879680416762775e-06, "loss": 0.0366, "step": 2095, "video_reward_cumulative_accuracy": 0.830310262529833 }, { "epoch": 0.6221430691599882, "grad_norm": 1.8001595735549927, "learning_rate": 1.8771713945227404e-06, "loss": 0.0272, "step": 2096, "video_reward_cumulative_accuracy": 0.8303912213740458 }, { "epoch": 0.622439893143366, "grad_norm": 3.3852388858795166, "learning_rate": 1.8746630409496647e-06, "loss": 0.0394, "step": 2097, "video_reward_cumulative_accuracy": 0.8304721030042919 }, { "epoch": 0.6227367171267438, "grad_norm": 2.9447176456451416, "learning_rate": 1.872155358736508e-06, "loss": 0.0385, "step": 2098, "video_reward_cumulative_accuracy": 0.8305529075309819 }, { "epoch": 0.6230335411101217, "grad_norm": 2.6223206520080566, "learning_rate": 1.8696483505755114e-06, "loss": 0.0507, "step": 2099, "video_reward_cumulative_accuracy": 0.8303954263935207 }, { "epoch": 0.6233303650934996, "grad_norm": 1.8533953428268433, "learning_rate": 1.8671420191581901e-06, "loss": 0.0151, "step": 2100, "video_reward_cumulative_accuracy": 0.8304761904761905 }, { "epoch": 0.6236271890768774, "grad_norm": 2.846639633178711, "learning_rate": 1.8646363671753354e-06, "loss": 0.0545, "step": 2101, "video_reward_cumulative_accuracy": 0.8305568776772966 }, { "epoch": 0.6239240130602552, "grad_norm": 2.0987446308135986, "learning_rate": 1.8621313973170074e-06, "loss": 0.0465, "step": 2102, "video_reward_cumulative_accuracy": 0.8306374881065651 }, { "epoch": 0.6242208370436332, "grad_norm": 2.0528335571289062, "learning_rate": 1.8596271122725346e-06, "loss": 0.0421, "step": 2103, "video_reward_cumulative_accuracy": 0.8304802662862577 }, { "epoch": 0.624517661027011, "grad_norm": 1.371273159980774, "learning_rate": 1.8571235147305106e-06, "loss": 0.0196, "step": 2104, "video_reward_cumulative_accuracy": 0.8305608365019012 }, { "epoch": 0.6248144850103888, "grad_norm": 1.087815761566162, "learning_rate": 1.8546206073787882e-06, "loss": 0.0236, "step": 2105, "video_reward_cumulative_accuracy": 0.8306413301662707 }, { "epoch": 0.6251113089937667, "grad_norm": 0.9951589703559875, "learning_rate": 1.8521183929044834e-06, "loss": 0.0072, "step": 2106, "video_reward_cumulative_accuracy": 0.8307217473884141 }, { "epoch": 0.6254081329771446, "grad_norm": 1.7407307624816895, "learning_rate": 1.8496168739939662e-06, "loss": 0.0414, "step": 2107, "video_reward_cumulative_accuracy": 0.8305647840531561 }, { "epoch": 0.6257049569605224, "grad_norm": 0.9107375741004944, "learning_rate": 1.8471160533328591e-06, "loss": 0.0086, "step": 2108, "video_reward_cumulative_accuracy": 0.8306451612903226 }, { "epoch": 0.6260017809439002, "grad_norm": 1.3645784854888916, "learning_rate": 1.844615933606037e-06, "loss": 0.0305, "step": 2109, "video_reward_cumulative_accuracy": 0.8307254623044097 }, { "epoch": 0.6262986049272781, "grad_norm": 0.9860436320304871, "learning_rate": 1.8421165174976191e-06, "loss": 0.0254, "step": 2110, "video_reward_cumulative_accuracy": 0.8308056872037914 }, { "epoch": 0.626595428910656, "grad_norm": 1.1706582307815552, "learning_rate": 1.8396178076909735e-06, "loss": 0.0133, "step": 2111, "video_reward_cumulative_accuracy": 0.8306489815253434 }, { "epoch": 0.6268922528940338, "grad_norm": 2.995161533355713, "learning_rate": 1.8371198068687051e-06, "loss": 0.0276, "step": 2112, "video_reward_cumulative_accuracy": 0.8307291666666666 }, { "epoch": 0.6271890768774117, "grad_norm": 3.4480783939361572, "learning_rate": 1.8346225177126622e-06, "loss": 0.0283, "step": 2113, "video_reward_cumulative_accuracy": 0.8308092759110269 }, { "epoch": 0.6274859008607896, "grad_norm": 4.289238929748535, "learning_rate": 1.8321259429039276e-06, "loss": 0.0667, "step": 2114, "video_reward_cumulative_accuracy": 0.8306527909176916 }, { "epoch": 0.6277827248441674, "grad_norm": 4.29648494720459, "learning_rate": 1.829630085122814e-06, "loss": 0.0629, "step": 2115, "video_reward_cumulative_accuracy": 0.8304964539007093 }, { "epoch": 0.6280795488275452, "grad_norm": 1.3143110275268555, "learning_rate": 1.8271349470488703e-06, "loss": 0.0287, "step": 2116, "video_reward_cumulative_accuracy": 0.8305765595463138 }, { "epoch": 0.6283763728109231, "grad_norm": 2.627892255783081, "learning_rate": 1.8246405313608668e-06, "loss": 0.055, "step": 2117, "video_reward_cumulative_accuracy": 0.8306565895134624 }, { "epoch": 0.628673196794301, "grad_norm": 4.371956825256348, "learning_rate": 1.8221468407368009e-06, "loss": 0.0693, "step": 2118, "video_reward_cumulative_accuracy": 0.8307365439093485 }, { "epoch": 0.6289700207776788, "grad_norm": 1.8598741292953491, "learning_rate": 1.8196538778538941e-06, "loss": 0.0516, "step": 2119, "video_reward_cumulative_accuracy": 0.8308164228409627 }, { "epoch": 0.6292668447610567, "grad_norm": 1.7819162607192993, "learning_rate": 1.8171616453885806e-06, "loss": 0.029, "step": 2120, "video_reward_cumulative_accuracy": 0.8308962264150943 }, { "epoch": 0.6295636687444346, "grad_norm": 1.686316967010498, "learning_rate": 1.8146701460165172e-06, "loss": 0.0179, "step": 2121, "video_reward_cumulative_accuracy": 0.830975954738331 }, { "epoch": 0.6298604927278124, "grad_norm": 3.108572006225586, "learning_rate": 1.8121793824125677e-06, "loss": 0.0561, "step": 2122, "video_reward_cumulative_accuracy": 0.8310556079170593 }, { "epoch": 0.6301573167111902, "grad_norm": 1.9357826709747314, "learning_rate": 1.80968935725081e-06, "loss": 0.0569, "step": 2123, "video_reward_cumulative_accuracy": 0.8311351860574658 }, { "epoch": 0.6304541406945681, "grad_norm": 5.203329563140869, "learning_rate": 1.8072000732045265e-06, "loss": 0.0402, "step": 2124, "video_reward_cumulative_accuracy": 0.8312146892655368 }, { "epoch": 0.630750964677946, "grad_norm": 1.325329065322876, "learning_rate": 1.804711532946206e-06, "loss": 0.0609, "step": 2125, "video_reward_cumulative_accuracy": 0.8312941176470589 }, { "epoch": 0.6310477886613238, "grad_norm": 3.6725287437438965, "learning_rate": 1.8022237391475389e-06, "loss": 0.1029, "step": 2126, "video_reward_cumulative_accuracy": 0.83137347130762 }, { "epoch": 0.6313446126447017, "grad_norm": 1.4435230493545532, "learning_rate": 1.7997366944794116e-06, "loss": 0.0419, "step": 2127, "video_reward_cumulative_accuracy": 0.8314527503526093 }, { "epoch": 0.6316414366280796, "grad_norm": 1.3416098356246948, "learning_rate": 1.7972504016119092e-06, "loss": 0.0154, "step": 2128, "video_reward_cumulative_accuracy": 0.8315319548872181 }, { "epoch": 0.6319382606114574, "grad_norm": 2.8020572662353516, "learning_rate": 1.7947648632143075e-06, "loss": 0.0215, "step": 2129, "video_reward_cumulative_accuracy": 0.8316110850164397 }, { "epoch": 0.6322350845948352, "grad_norm": 0.8301799893379211, "learning_rate": 1.7922800819550737e-06, "loss": 0.0204, "step": 2130, "video_reward_cumulative_accuracy": 0.8316901408450704 }, { "epoch": 0.6325319085782131, "grad_norm": 0.9084307551383972, "learning_rate": 1.7897960605018623e-06, "loss": 0.008, "step": 2131, "video_reward_cumulative_accuracy": 0.83176912247771 }, { "epoch": 0.632828732561591, "grad_norm": 1.8782029151916504, "learning_rate": 1.78731280152151e-06, "loss": 0.0184, "step": 2132, "video_reward_cumulative_accuracy": 0.8318480300187617 }, { "epoch": 0.6331255565449688, "grad_norm": 1.1400971412658691, "learning_rate": 1.7848303076800378e-06, "loss": 0.0157, "step": 2133, "video_reward_cumulative_accuracy": 0.8319268635724332 }, { "epoch": 0.6334223805283467, "grad_norm": 1.0256128311157227, "learning_rate": 1.7823485816426422e-06, "loss": 0.0287, "step": 2134, "video_reward_cumulative_accuracy": 0.8317713214620431 }, { "epoch": 0.6337192045117246, "grad_norm": 3.9153847694396973, "learning_rate": 1.7798676260736986e-06, "loss": 0.0366, "step": 2135, "video_reward_cumulative_accuracy": 0.831615925058548 }, { "epoch": 0.6340160284951024, "grad_norm": 3.1156997680664062, "learning_rate": 1.7773874436367521e-06, "loss": 0.0212, "step": 2136, "video_reward_cumulative_accuracy": 0.8316947565543071 }, { "epoch": 0.6343128524784802, "grad_norm": 1.2179923057556152, "learning_rate": 1.77490803699452e-06, "loss": 0.03, "step": 2137, "video_reward_cumulative_accuracy": 0.8317735142723444 }, { "epoch": 0.6346096764618581, "grad_norm": 1.7540775537490845, "learning_rate": 1.7724294088088867e-06, "loss": 0.0142, "step": 2138, "video_reward_cumulative_accuracy": 0.8318521983161834 }, { "epoch": 0.634906500445236, "grad_norm": 0.7738943696022034, "learning_rate": 1.769951561740899e-06, "loss": 0.0245, "step": 2139, "video_reward_cumulative_accuracy": 0.8319308087891538 }, { "epoch": 0.6352033244286138, "grad_norm": 1.5957173109054565, "learning_rate": 1.7674744984507668e-06, "loss": 0.028, "step": 2140, "video_reward_cumulative_accuracy": 0.8320093457943926 }, { "epoch": 0.6355001484119916, "grad_norm": 4.45468282699585, "learning_rate": 1.7649982215978573e-06, "loss": 0.0393, "step": 2141, "video_reward_cumulative_accuracy": 0.8320878094348435 }, { "epoch": 0.6357969723953696, "grad_norm": 2.190316677093506, "learning_rate": 1.7625227338406946e-06, "loss": 0.0443, "step": 2142, "video_reward_cumulative_accuracy": 0.8321661998132587 }, { "epoch": 0.6360937963787474, "grad_norm": 1.6457760334014893, "learning_rate": 1.7600480378369555e-06, "loss": 0.0134, "step": 2143, "video_reward_cumulative_accuracy": 0.8322445170321978 }, { "epoch": 0.6363906203621252, "grad_norm": 2.3591673374176025, "learning_rate": 1.7575741362434655e-06, "loss": 0.0413, "step": 2144, "video_reward_cumulative_accuracy": 0.831856343283582 }, { "epoch": 0.6366874443455031, "grad_norm": 2.5493083000183105, "learning_rate": 1.7551010317161987e-06, "loss": 0.0269, "step": 2145, "video_reward_cumulative_accuracy": 0.8319347319347319 }, { "epoch": 0.636984268328881, "grad_norm": 1.5170857906341553, "learning_rate": 1.7526287269102724e-06, "loss": 0.0253, "step": 2146, "video_reward_cumulative_accuracy": 0.8320130475302889 }, { "epoch": 0.6372810923122588, "grad_norm": 1.9697927236557007, "learning_rate": 1.750157224479946e-06, "loss": 0.0429, "step": 2147, "video_reward_cumulative_accuracy": 0.8320912901723335 }, { "epoch": 0.6375779162956366, "grad_norm": 1.7776871919631958, "learning_rate": 1.7476865270786169e-06, "loss": 0.0185, "step": 2148, "video_reward_cumulative_accuracy": 0.832169459962756 }, { "epoch": 0.6378747402790146, "grad_norm": 1.8674986362457275, "learning_rate": 1.7452166373588185e-06, "loss": 0.0131, "step": 2149, "video_reward_cumulative_accuracy": 0.8322475570032574 }, { "epoch": 0.6381715642623924, "grad_norm": 4.821762561798096, "learning_rate": 1.7427475579722186e-06, "loss": 0.1067, "step": 2150, "video_reward_cumulative_accuracy": 0.8323255813953488 }, { "epoch": 0.6384683882457702, "grad_norm": 1.7846288681030273, "learning_rate": 1.7402792915696115e-06, "loss": 0.0251, "step": 2151, "video_reward_cumulative_accuracy": 0.8324035332403533 }, { "epoch": 0.6387652122291481, "grad_norm": 3.2803213596343994, "learning_rate": 1.7378118408009227e-06, "loss": 0.045, "step": 2152, "video_reward_cumulative_accuracy": 0.8322490706319703 }, { "epoch": 0.639062036212526, "grad_norm": 3.1670420169830322, "learning_rate": 1.7353452083151975e-06, "loss": 0.0336, "step": 2153, "video_reward_cumulative_accuracy": 0.8323269856014863 }, { "epoch": 0.6393588601959038, "grad_norm": 0.46488067507743835, "learning_rate": 1.7328793967606072e-06, "loss": 0.006, "step": 2154, "video_reward_cumulative_accuracy": 0.8324048282265553 }, { "epoch": 0.6396556841792816, "grad_norm": 2.008270740509033, "learning_rate": 1.7304144087844405e-06, "loss": 0.0287, "step": 2155, "video_reward_cumulative_accuracy": 0.8324825986078886 }, { "epoch": 0.6399525081626596, "grad_norm": 4.751814365386963, "learning_rate": 1.7279502470330994e-06, "loss": 0.1412, "step": 2156, "video_reward_cumulative_accuracy": 0.8325602968460112 }, { "epoch": 0.6402493321460374, "grad_norm": 0.5087360143661499, "learning_rate": 1.7254869141521026e-06, "loss": 0.0166, "step": 2157, "video_reward_cumulative_accuracy": 0.832637923041261 }, { "epoch": 0.6405461561294152, "grad_norm": 2.7528774738311768, "learning_rate": 1.723024412786074e-06, "loss": 0.1185, "step": 2158, "video_reward_cumulative_accuracy": 0.8322520852641334 }, { "epoch": 0.6408429801127931, "grad_norm": 2.7754266262054443, "learning_rate": 1.7205627455787515e-06, "loss": 0.0346, "step": 2159, "video_reward_cumulative_accuracy": 0.8323297823066235 }, { "epoch": 0.641139804096171, "grad_norm": 2.181504726409912, "learning_rate": 1.7181019151729709e-06, "loss": 0.054, "step": 2160, "video_reward_cumulative_accuracy": 0.8324074074074074 }, { "epoch": 0.6414366280795488, "grad_norm": 1.7188265323638916, "learning_rate": 1.7156419242106736e-06, "loss": 0.0377, "step": 2161, "video_reward_cumulative_accuracy": 0.8324849606663581 }, { "epoch": 0.6417334520629266, "grad_norm": 3.2530951499938965, "learning_rate": 1.7131827753329e-06, "loss": 0.0481, "step": 2162, "video_reward_cumulative_accuracy": 0.8323311748381128 }, { "epoch": 0.6420302760463046, "grad_norm": 0.4359917938709259, "learning_rate": 1.710724471179782e-06, "loss": 0.0043, "step": 2163, "video_reward_cumulative_accuracy": 0.8324086916319926 }, { "epoch": 0.6423271000296824, "grad_norm": 2.456068754196167, "learning_rate": 1.7082670143905516e-06, "loss": 0.0374, "step": 2164, "video_reward_cumulative_accuracy": 0.8324861367837338 }, { "epoch": 0.6426239240130602, "grad_norm": 1.3380742073059082, "learning_rate": 1.7058104076035237e-06, "loss": 0.0174, "step": 2165, "video_reward_cumulative_accuracy": 0.8325635103926097 }, { "epoch": 0.6429207479964381, "grad_norm": 2.8565738201141357, "learning_rate": 1.7033546534561046e-06, "loss": 0.0186, "step": 2166, "video_reward_cumulative_accuracy": 0.832409972299169 }, { "epoch": 0.643217571979816, "grad_norm": 3.0799217224121094, "learning_rate": 1.7008997545847878e-06, "loss": 0.066, "step": 2167, "video_reward_cumulative_accuracy": 0.8322565759113982 }, { "epoch": 0.6435143959631938, "grad_norm": 1.7654461860656738, "learning_rate": 1.6984457136251415e-06, "loss": 0.0339, "step": 2168, "video_reward_cumulative_accuracy": 0.8323339483394834 }, { "epoch": 0.6438112199465716, "grad_norm": 4.95688533782959, "learning_rate": 1.69599253321182e-06, "loss": 0.0901, "step": 2169, "video_reward_cumulative_accuracy": 0.8321807284462887 }, { "epoch": 0.6441080439299496, "grad_norm": 1.675530195236206, "learning_rate": 1.6935402159785482e-06, "loss": 0.0691, "step": 2170, "video_reward_cumulative_accuracy": 0.8320276497695852 }, { "epoch": 0.6444048679133274, "grad_norm": 2.567598342895508, "learning_rate": 1.6910887645581288e-06, "loss": 0.0519, "step": 2171, "video_reward_cumulative_accuracy": 0.8321050207277753 }, { "epoch": 0.6447016918967052, "grad_norm": 2.3529388904571533, "learning_rate": 1.6886381815824304e-06, "loss": 0.0239, "step": 2172, "video_reward_cumulative_accuracy": 0.8321823204419889 }, { "epoch": 0.6449985158800831, "grad_norm": 3.154841899871826, "learning_rate": 1.6861884696823935e-06, "loss": 0.0572, "step": 2173, "video_reward_cumulative_accuracy": 0.8322595490105844 }, { "epoch": 0.645295339863461, "grad_norm": 2.80783748626709, "learning_rate": 1.6837396314880216e-06, "loss": 0.0874, "step": 2174, "video_reward_cumulative_accuracy": 0.8323367065317387 }, { "epoch": 0.6455921638468388, "grad_norm": 1.9993150234222412, "learning_rate": 1.681291669628379e-06, "loss": 0.0253, "step": 2175, "video_reward_cumulative_accuracy": 0.8324137931034483 }, { "epoch": 0.6458889878302166, "grad_norm": 2.1698851585388184, "learning_rate": 1.6788445867315918e-06, "loss": 0.0296, "step": 2176, "video_reward_cumulative_accuracy": 0.8324908088235294 }, { "epoch": 0.6461858118135946, "grad_norm": 3.3127455711364746, "learning_rate": 1.6763983854248395e-06, "loss": 0.0626, "step": 2177, "video_reward_cumulative_accuracy": 0.8325677537896188 }, { "epoch": 0.6464826357969724, "grad_norm": 2.9646496772766113, "learning_rate": 1.6739530683343574e-06, "loss": 0.0443, "step": 2178, "video_reward_cumulative_accuracy": 0.8321854912764004 }, { "epoch": 0.6467794597803502, "grad_norm": 3.9705727100372314, "learning_rate": 1.6715086380854311e-06, "loss": 0.0516, "step": 2179, "video_reward_cumulative_accuracy": 0.8322625057365765 }, { "epoch": 0.647076283763728, "grad_norm": 1.3406922817230225, "learning_rate": 1.669065097302393e-06, "loss": 0.0174, "step": 2180, "video_reward_cumulative_accuracy": 0.8323394495412844 }, { "epoch": 0.647373107747106, "grad_norm": 0.7483333349227905, "learning_rate": 1.666622448608622e-06, "loss": 0.0206, "step": 2181, "video_reward_cumulative_accuracy": 0.832416322787712 }, { "epoch": 0.6476699317304838, "grad_norm": 1.2349746227264404, "learning_rate": 1.6641806946265377e-06, "loss": 0.0168, "step": 2182, "video_reward_cumulative_accuracy": 0.8324931255728689 }, { "epoch": 0.6479667557138616, "grad_norm": 1.7513976097106934, "learning_rate": 1.6617398379776008e-06, "loss": 0.0558, "step": 2183, "video_reward_cumulative_accuracy": 0.8323408153916628 }, { "epoch": 0.6482635796972396, "grad_norm": 0.5160261988639832, "learning_rate": 1.6592998812823072e-06, "loss": 0.0109, "step": 2184, "video_reward_cumulative_accuracy": 0.8324175824175825 }, { "epoch": 0.6485604036806174, "grad_norm": 5.037443161010742, "learning_rate": 1.6568608271601873e-06, "loss": 0.0496, "step": 2185, "video_reward_cumulative_accuracy": 0.8324942791762013 }, { "epoch": 0.6488572276639952, "grad_norm": 2.4052493572235107, "learning_rate": 1.6544226782298033e-06, "loss": 0.0281, "step": 2186, "video_reward_cumulative_accuracy": 0.8325709057639524 }, { "epoch": 0.649154051647373, "grad_norm": 2.0196239948272705, "learning_rate": 1.6519854371087434e-06, "loss": 0.0943, "step": 2187, "video_reward_cumulative_accuracy": 0.8324188385916781 }, { "epoch": 0.649450875630751, "grad_norm": 1.2571297883987427, "learning_rate": 1.6495491064136239e-06, "loss": 0.0215, "step": 2188, "video_reward_cumulative_accuracy": 0.8324954296160878 }, { "epoch": 0.6497476996141288, "grad_norm": 1.217132806777954, "learning_rate": 1.6471136887600805e-06, "loss": 0.017, "step": 2189, "video_reward_cumulative_accuracy": 0.832571950662403 }, { "epoch": 0.6500445235975066, "grad_norm": 2.48207426071167, "learning_rate": 1.6446791867627718e-06, "loss": 0.0539, "step": 2190, "video_reward_cumulative_accuracy": 0.832648401826484 }, { "epoch": 0.6503413475808846, "grad_norm": 4.670328617095947, "learning_rate": 1.642245603035372e-06, "loss": 0.0683, "step": 2191, "video_reward_cumulative_accuracy": 0.8327247832040164 }, { "epoch": 0.6506381715642624, "grad_norm": 0.8069223761558533, "learning_rate": 1.6398129401905687e-06, "loss": 0.032, "step": 2192, "video_reward_cumulative_accuracy": 0.8328010948905109 }, { "epoch": 0.6509349955476402, "grad_norm": 3.4688570499420166, "learning_rate": 1.6373812008400623e-06, "loss": 0.0296, "step": 2193, "video_reward_cumulative_accuracy": 0.8326493388052896 }, { "epoch": 0.651231819531018, "grad_norm": 1.3955700397491455, "learning_rate": 1.6349503875945599e-06, "loss": 0.041, "step": 2194, "video_reward_cumulative_accuracy": 0.8327256153144941 }, { "epoch": 0.651528643514396, "grad_norm": 0.7285477519035339, "learning_rate": 1.632520503063777e-06, "loss": 0.0208, "step": 2195, "video_reward_cumulative_accuracy": 0.8328018223234624 }, { "epoch": 0.6518254674977738, "grad_norm": 1.377070426940918, "learning_rate": 1.630091549856429e-06, "loss": 0.0357, "step": 2196, "video_reward_cumulative_accuracy": 0.8328779599271403 }, { "epoch": 0.6521222914811516, "grad_norm": 1.5354235172271729, "learning_rate": 1.6276635305802336e-06, "loss": 0.0356, "step": 2197, "video_reward_cumulative_accuracy": 0.8329540282203004 }, { "epoch": 0.6524191154645296, "grad_norm": 0.5859040021896362, "learning_rate": 1.6252364478419057e-06, "loss": 0.0181, "step": 2198, "video_reward_cumulative_accuracy": 0.8330300272975433 }, { "epoch": 0.6527159394479074, "grad_norm": 0.6209362745285034, "learning_rate": 1.622810304247153e-06, "loss": 0.0127, "step": 2199, "video_reward_cumulative_accuracy": 0.833105957253297 }, { "epoch": 0.6530127634312852, "grad_norm": 1.3316088914871216, "learning_rate": 1.6203851024006779e-06, "loss": 0.0219, "step": 2200, "video_reward_cumulative_accuracy": 0.8331818181818181 }, { "epoch": 0.6530127634312852, "eval_runtime": 132.7968, "eval_samples_per_second": 5.941, "eval_steps_per_second": 0.745, "eval_test_set_accuracy": 0.7878787878787878, "step": 2200 }, { "epoch": 0.653309587414663, "grad_norm": 1.4014110565185547, "learning_rate": 1.6179608449061671e-06, "loss": 0.0302, "step": 2201, "video_reward_cumulative_accuracy": 0.8330304407087687 }, { "epoch": 0.653606411398041, "grad_norm": 1.8992540836334229, "learning_rate": 1.6155375343662986e-06, "loss": 0.0529, "step": 2202, "video_reward_cumulative_accuracy": 0.8331062670299727 }, { "epoch": 0.6539032353814188, "grad_norm": 0.6547995209693909, "learning_rate": 1.6131151733827314e-06, "loss": 0.0144, "step": 2203, "video_reward_cumulative_accuracy": 0.833182024512029 }, { "epoch": 0.6542000593647966, "grad_norm": 4.633205890655518, "learning_rate": 1.6106937645561042e-06, "loss": 0.0385, "step": 2204, "video_reward_cumulative_accuracy": 0.8332577132486388 }, { "epoch": 0.6544968833481746, "grad_norm": 0.8929312229156494, "learning_rate": 1.6082733104860354e-06, "loss": 0.0146, "step": 2205, "video_reward_cumulative_accuracy": 0.8333333333333334 }, { "epoch": 0.6547937073315524, "grad_norm": 1.7991684675216675, "learning_rate": 1.6058538137711155e-06, "loss": 0.0463, "step": 2206, "video_reward_cumulative_accuracy": 0.8334088848594742 }, { "epoch": 0.6550905313149302, "grad_norm": 1.2529010772705078, "learning_rate": 1.603435277008912e-06, "loss": 0.0193, "step": 2207, "video_reward_cumulative_accuracy": 0.8334843679202537 }, { "epoch": 0.655387355298308, "grad_norm": 2.0492186546325684, "learning_rate": 1.6010177027959556e-06, "loss": 0.0362, "step": 2208, "video_reward_cumulative_accuracy": 0.8335597826086957 }, { "epoch": 0.655684179281686, "grad_norm": 3.027116060256958, "learning_rate": 1.598601093727749e-06, "loss": 0.0604, "step": 2209, "video_reward_cumulative_accuracy": 0.833635129017655 }, { "epoch": 0.6559810032650638, "grad_norm": 2.721015691757202, "learning_rate": 1.5961854523987569e-06, "loss": 0.0486, "step": 2210, "video_reward_cumulative_accuracy": 0.833710407239819 }, { "epoch": 0.6562778272484416, "grad_norm": 0.9648367166519165, "learning_rate": 1.5937707814024024e-06, "loss": 0.0094, "step": 2211, "video_reward_cumulative_accuracy": 0.833785617367707 }, { "epoch": 0.6565746512318196, "grad_norm": 1.7013683319091797, "learning_rate": 1.5913570833310721e-06, "loss": 0.0201, "step": 2212, "video_reward_cumulative_accuracy": 0.8338607594936709 }, { "epoch": 0.6568714752151974, "grad_norm": 1.8337125778198242, "learning_rate": 1.5889443607761023e-06, "loss": 0.0281, "step": 2213, "video_reward_cumulative_accuracy": 0.8339358337098961 }, { "epoch": 0.6571682991985752, "grad_norm": 3.3444020748138428, "learning_rate": 1.5865326163277862e-06, "loss": 0.0687, "step": 2214, "video_reward_cumulative_accuracy": 0.8337850045167118 }, { "epoch": 0.657465123181953, "grad_norm": 1.9684284925460815, "learning_rate": 1.584121852575367e-06, "loss": 0.0323, "step": 2215, "video_reward_cumulative_accuracy": 0.8338600451467268 }, { "epoch": 0.657761947165331, "grad_norm": 5.16708517074585, "learning_rate": 1.5817120721070302e-06, "loss": 0.055, "step": 2216, "video_reward_cumulative_accuracy": 0.8339350180505415 }, { "epoch": 0.6580587711487088, "grad_norm": 1.9357199668884277, "learning_rate": 1.579303277509913e-06, "loss": 0.0191, "step": 2217, "video_reward_cumulative_accuracy": 0.8340099233198015 }, { "epoch": 0.6583555951320866, "grad_norm": 1.5035184621810913, "learning_rate": 1.5768954713700868e-06, "loss": 0.0106, "step": 2218, "video_reward_cumulative_accuracy": 0.8340847610459874 }, { "epoch": 0.6586524191154646, "grad_norm": 2.6335108280181885, "learning_rate": 1.574488656272567e-06, "loss": 0.0271, "step": 2219, "video_reward_cumulative_accuracy": 0.8341595313204146 }, { "epoch": 0.6589492430988424, "grad_norm": 2.186413049697876, "learning_rate": 1.5720828348013022e-06, "loss": 0.031, "step": 2220, "video_reward_cumulative_accuracy": 0.8342342342342343 }, { "epoch": 0.6592460670822202, "grad_norm": 0.6132137775421143, "learning_rate": 1.5696780095391762e-06, "loss": 0.0234, "step": 2221, "video_reward_cumulative_accuracy": 0.8343088698784331 }, { "epoch": 0.659542891065598, "grad_norm": 2.6216671466827393, "learning_rate": 1.5672741830680022e-06, "loss": 0.0649, "step": 2222, "video_reward_cumulative_accuracy": 0.8343834383438344 }, { "epoch": 0.659839715048976, "grad_norm": 1.9002763032913208, "learning_rate": 1.5648713579685201e-06, "loss": 0.036, "step": 2223, "video_reward_cumulative_accuracy": 0.8342330184435448 }, { "epoch": 0.6601365390323538, "grad_norm": 4.694214820861816, "learning_rate": 1.5624695368203975e-06, "loss": 0.0338, "step": 2224, "video_reward_cumulative_accuracy": 0.8343075539568345 }, { "epoch": 0.6604333630157316, "grad_norm": 2.101926326751709, "learning_rate": 1.560068722202221e-06, "loss": 0.0163, "step": 2225, "video_reward_cumulative_accuracy": 0.8343820224719101 }, { "epoch": 0.6607301869991096, "grad_norm": 2.703768491744995, "learning_rate": 1.557668916691499e-06, "loss": 0.0342, "step": 2226, "video_reward_cumulative_accuracy": 0.8344564240790656 }, { "epoch": 0.6610270109824874, "grad_norm": 1.3713093996047974, "learning_rate": 1.555270122864656e-06, "loss": 0.0089, "step": 2227, "video_reward_cumulative_accuracy": 0.8345307588684329 }, { "epoch": 0.6613238349658652, "grad_norm": 1.0959382057189941, "learning_rate": 1.5528723432970288e-06, "loss": 0.0285, "step": 2228, "video_reward_cumulative_accuracy": 0.8346050269299821 }, { "epoch": 0.661620658949243, "grad_norm": 2.450881242752075, "learning_rate": 1.5504755805628677e-06, "loss": 0.026, "step": 2229, "video_reward_cumulative_accuracy": 0.8346792283535217 }, { "epoch": 0.661917482932621, "grad_norm": 3.218003749847412, "learning_rate": 1.548079837235329e-06, "loss": 0.0422, "step": 2230, "video_reward_cumulative_accuracy": 0.8345291479820628 }, { "epoch": 0.6622143069159988, "grad_norm": 3.611631393432617, "learning_rate": 1.545685115886477e-06, "loss": 0.1359, "step": 2231, "video_reward_cumulative_accuracy": 0.834603316898252 }, { "epoch": 0.6625111308993766, "grad_norm": 3.1366591453552246, "learning_rate": 1.5432914190872757e-06, "loss": 0.1005, "step": 2232, "video_reward_cumulative_accuracy": 0.8346774193548387 }, { "epoch": 0.6628079548827546, "grad_norm": 1.874556064605713, "learning_rate": 1.5408987494075924e-06, "loss": 0.0187, "step": 2233, "video_reward_cumulative_accuracy": 0.8345275414240931 }, { "epoch": 0.6631047788661324, "grad_norm": 2.389155864715576, "learning_rate": 1.53850710941619e-06, "loss": 0.0388, "step": 2234, "video_reward_cumulative_accuracy": 0.8346016114592659 }, { "epoch": 0.6634016028495102, "grad_norm": 2.063230037689209, "learning_rate": 1.5361165016807261e-06, "loss": 0.0311, "step": 2235, "video_reward_cumulative_accuracy": 0.834675615212528 }, { "epoch": 0.663698426832888, "grad_norm": 2.24172043800354, "learning_rate": 1.5337269287677497e-06, "loss": 0.0262, "step": 2236, "video_reward_cumulative_accuracy": 0.8347495527728086 }, { "epoch": 0.663995250816266, "grad_norm": 2.5830023288726807, "learning_rate": 1.5313383932426996e-06, "loss": 0.0807, "step": 2237, "video_reward_cumulative_accuracy": 0.834823424228878 }, { "epoch": 0.6642920747996438, "grad_norm": 2.467978000640869, "learning_rate": 1.5289508976699007e-06, "loss": 0.0606, "step": 2238, "video_reward_cumulative_accuracy": 0.8348972296693477 }, { "epoch": 0.6645888987830216, "grad_norm": 2.111464500427246, "learning_rate": 1.5265644446125606e-06, "loss": 0.0398, "step": 2239, "video_reward_cumulative_accuracy": 0.8349709691826709 }, { "epoch": 0.6648857227663996, "grad_norm": 2.0798873901367188, "learning_rate": 1.5241790366327685e-06, "loss": 0.0338, "step": 2240, "video_reward_cumulative_accuracy": 0.8350446428571429 }, { "epoch": 0.6651825467497774, "grad_norm": 0.6554487943649292, "learning_rate": 1.5217946762914924e-06, "loss": 0.0063, "step": 2241, "video_reward_cumulative_accuracy": 0.8351182507809014 }, { "epoch": 0.6654793707331552, "grad_norm": 0.8652382493019104, "learning_rate": 1.5194113661485727e-06, "loss": 0.0122, "step": 2242, "video_reward_cumulative_accuracy": 0.8351917930419268 }, { "epoch": 0.665776194716533, "grad_norm": 1.6087634563446045, "learning_rate": 1.5170291087627258e-06, "loss": 0.0285, "step": 2243, "video_reward_cumulative_accuracy": 0.8352652697280428 }, { "epoch": 0.666073018699911, "grad_norm": 2.6194751262664795, "learning_rate": 1.5146479066915355e-06, "loss": 0.0324, "step": 2244, "video_reward_cumulative_accuracy": 0.8351158645276292 }, { "epoch": 0.6663698426832888, "grad_norm": 1.7327265739440918, "learning_rate": 1.5122677624914528e-06, "loss": 0.0704, "step": 2245, "video_reward_cumulative_accuracy": 0.834966592427617 }, { "epoch": 0.6666666666666666, "grad_norm": 0.7121922373771667, "learning_rate": 1.5098886787177951e-06, "loss": 0.0064, "step": 2246, "video_reward_cumulative_accuracy": 0.835040071237756 }, { "epoch": 0.6669634906500446, "grad_norm": 2.6367523670196533, "learning_rate": 1.507510657924738e-06, "loss": 0.0327, "step": 2247, "video_reward_cumulative_accuracy": 0.835113484646195 }, { "epoch": 0.6672603146334224, "grad_norm": 4.55868673324585, "learning_rate": 1.5051337026653195e-06, "loss": 0.048, "step": 2248, "video_reward_cumulative_accuracy": 0.8351868327402135 }, { "epoch": 0.6675571386168002, "grad_norm": 3.684610605239868, "learning_rate": 1.502757815491429e-06, "loss": 0.0326, "step": 2249, "video_reward_cumulative_accuracy": 0.8352601156069365 }, { "epoch": 0.667853962600178, "grad_norm": 3.4382266998291016, "learning_rate": 1.5003829989538154e-06, "loss": 0.0588, "step": 2250, "video_reward_cumulative_accuracy": 0.8351111111111111 }, { "epoch": 0.668150786583556, "grad_norm": 2.8827478885650635, "learning_rate": 1.4980092556020713e-06, "loss": 0.041, "step": 2251, "video_reward_cumulative_accuracy": 0.8351843625055531 }, { "epoch": 0.6684476105669338, "grad_norm": 0.7824203968048096, "learning_rate": 1.495636587984643e-06, "loss": 0.0075, "step": 2252, "video_reward_cumulative_accuracy": 0.8352575488454707 }, { "epoch": 0.6687444345503116, "grad_norm": 1.5358774662017822, "learning_rate": 1.4932649986488192e-06, "loss": 0.0158, "step": 2253, "video_reward_cumulative_accuracy": 0.8353306702174877 }, { "epoch": 0.6690412585336896, "grad_norm": 0.3571893572807312, "learning_rate": 1.4908944901407296e-06, "loss": 0.0068, "step": 2254, "video_reward_cumulative_accuracy": 0.8354037267080745 }, { "epoch": 0.6693380825170674, "grad_norm": 5.035340785980225, "learning_rate": 1.488525065005348e-06, "loss": 0.0604, "step": 2255, "video_reward_cumulative_accuracy": 0.8352549889135255 }, { "epoch": 0.6696349065004452, "grad_norm": 4.127224922180176, "learning_rate": 1.4861567257864795e-06, "loss": 0.0443, "step": 2256, "video_reward_cumulative_accuracy": 0.8353280141843972 }, { "epoch": 0.669931730483823, "grad_norm": 1.52211594581604, "learning_rate": 1.4837894750267664e-06, "loss": 0.0399, "step": 2257, "video_reward_cumulative_accuracy": 0.835400974745237 }, { "epoch": 0.670228554467201, "grad_norm": 2.7589707374572754, "learning_rate": 1.481423315267685e-06, "loss": 0.0633, "step": 2258, "video_reward_cumulative_accuracy": 0.8354738706820195 }, { "epoch": 0.6705253784505788, "grad_norm": 1.0256385803222656, "learning_rate": 1.479058249049533e-06, "loss": 0.0133, "step": 2259, "video_reward_cumulative_accuracy": 0.8355467020805666 }, { "epoch": 0.6708222024339566, "grad_norm": 1.3333526849746704, "learning_rate": 1.4766942789114432e-06, "loss": 0.0201, "step": 2260, "video_reward_cumulative_accuracy": 0.8353982300884956 }, { "epoch": 0.6711190264173346, "grad_norm": 0.8754663467407227, "learning_rate": 1.4743314073913636e-06, "loss": 0.0151, "step": 2261, "video_reward_cumulative_accuracy": 0.8354710305174702 }, { "epoch": 0.6714158504007124, "grad_norm": 3.1899867057800293, "learning_rate": 1.4719696370260678e-06, "loss": 0.0595, "step": 2262, "video_reward_cumulative_accuracy": 0.8353227232537578 }, { "epoch": 0.6717126743840902, "grad_norm": 0.2183128446340561, "learning_rate": 1.469608970351144e-06, "loss": 0.004, "step": 2263, "video_reward_cumulative_accuracy": 0.8353954927087937 }, { "epoch": 0.672009498367468, "grad_norm": 2.6553597450256348, "learning_rate": 1.4672494099009981e-06, "loss": 0.0242, "step": 2264, "video_reward_cumulative_accuracy": 0.8354681978798587 }, { "epoch": 0.672306322350846, "grad_norm": 1.7975043058395386, "learning_rate": 1.4648909582088482e-06, "loss": 0.0358, "step": 2265, "video_reward_cumulative_accuracy": 0.8355408388520972 }, { "epoch": 0.6726031463342238, "grad_norm": 2.4716291427612305, "learning_rate": 1.4625336178067189e-06, "loss": 0.0977, "step": 2266, "video_reward_cumulative_accuracy": 0.8351721094439541 }, { "epoch": 0.6728999703176016, "grad_norm": 1.4434208869934082, "learning_rate": 1.4601773912254457e-06, "loss": 0.0303, "step": 2267, "video_reward_cumulative_accuracy": 0.8352448169386855 }, { "epoch": 0.6731967943009796, "grad_norm": 0.3788398504257202, "learning_rate": 1.4578222809946655e-06, "loss": 0.0153, "step": 2268, "video_reward_cumulative_accuracy": 0.8353174603174603 }, { "epoch": 0.6734936182843574, "grad_norm": 2.074415683746338, "learning_rate": 1.4554682896428179e-06, "loss": 0.0558, "step": 2269, "video_reward_cumulative_accuracy": 0.8351696782723667 }, { "epoch": 0.6737904422677352, "grad_norm": 6.2886962890625, "learning_rate": 1.4531154196971414e-06, "loss": 0.0606, "step": 2270, "video_reward_cumulative_accuracy": 0.8352422907488987 }, { "epoch": 0.674087266251113, "grad_norm": 3.100346803665161, "learning_rate": 1.45076367368367e-06, "loss": 0.027, "step": 2271, "video_reward_cumulative_accuracy": 0.8350946719506825 }, { "epoch": 0.674384090234491, "grad_norm": 2.0418272018432617, "learning_rate": 1.4484130541272323e-06, "loss": 0.0412, "step": 2272, "video_reward_cumulative_accuracy": 0.8351672535211268 }, { "epoch": 0.6746809142178688, "grad_norm": 1.8124505281448364, "learning_rate": 1.4460635635514448e-06, "loss": 0.0292, "step": 2273, "video_reward_cumulative_accuracy": 0.8352397712274527 }, { "epoch": 0.6749777382012466, "grad_norm": 3.0479700565338135, "learning_rate": 1.443715204478715e-06, "loss": 0.0704, "step": 2274, "video_reward_cumulative_accuracy": 0.8353122251539138 }, { "epoch": 0.6752745621846246, "grad_norm": 1.432740330696106, "learning_rate": 1.4413679794302325e-06, "loss": 0.0208, "step": 2275, "video_reward_cumulative_accuracy": 0.8353846153846154 }, { "epoch": 0.6755713861680024, "grad_norm": 1.0187022686004639, "learning_rate": 1.4390218909259731e-06, "loss": 0.0187, "step": 2276, "video_reward_cumulative_accuracy": 0.835456942003515 }, { "epoch": 0.6758682101513802, "grad_norm": 3.2267208099365234, "learning_rate": 1.436676941484691e-06, "loss": 0.0574, "step": 2277, "video_reward_cumulative_accuracy": 0.8353096179183136 }, { "epoch": 0.676165034134758, "grad_norm": 1.9733569622039795, "learning_rate": 1.4343331336239151e-06, "loss": 0.031, "step": 2278, "video_reward_cumulative_accuracy": 0.8353819139596137 }, { "epoch": 0.676461858118136, "grad_norm": 1.0220005512237549, "learning_rate": 1.4319904698599524e-06, "loss": 0.0171, "step": 2279, "video_reward_cumulative_accuracy": 0.8354541465555068 }, { "epoch": 0.6767586821015138, "grad_norm": 0.9054021239280701, "learning_rate": 1.4296489527078777e-06, "loss": 0.0106, "step": 2280, "video_reward_cumulative_accuracy": 0.8355263157894737 }, { "epoch": 0.6770555060848916, "grad_norm": 1.9927374124526978, "learning_rate": 1.4273085846815374e-06, "loss": 0.0309, "step": 2281, "video_reward_cumulative_accuracy": 0.8355984217448488 }, { "epoch": 0.6773523300682696, "grad_norm": 1.0571626424789429, "learning_rate": 1.4249693682935462e-06, "loss": 0.0116, "step": 2282, "video_reward_cumulative_accuracy": 0.8356704645048203 }, { "epoch": 0.6776491540516474, "grad_norm": 2.752652883529663, "learning_rate": 1.4226313060552774e-06, "loss": 0.0646, "step": 2283, "video_reward_cumulative_accuracy": 0.835742444152431 }, { "epoch": 0.6779459780350252, "grad_norm": 1.6572239398956299, "learning_rate": 1.4202944004768694e-06, "loss": 0.0332, "step": 2284, "video_reward_cumulative_accuracy": 0.8358143607705779 }, { "epoch": 0.678242802018403, "grad_norm": 2.04713773727417, "learning_rate": 1.4179586540672152e-06, "loss": 0.0624, "step": 2285, "video_reward_cumulative_accuracy": 0.8358862144420132 }, { "epoch": 0.678539626001781, "grad_norm": 0.3329226076602936, "learning_rate": 1.4156240693339663e-06, "loss": 0.0062, "step": 2286, "video_reward_cumulative_accuracy": 0.8359580052493438 }, { "epoch": 0.6788364499851588, "grad_norm": 2.5659382343292236, "learning_rate": 1.4132906487835263e-06, "loss": 0.0549, "step": 2287, "video_reward_cumulative_accuracy": 0.8360297332750328 }, { "epoch": 0.6791332739685366, "grad_norm": 2.2131476402282715, "learning_rate": 1.4109583949210481e-06, "loss": 0.0267, "step": 2288, "video_reward_cumulative_accuracy": 0.8361013986013986 }, { "epoch": 0.6794300979519146, "grad_norm": 0.8246326446533203, "learning_rate": 1.408627310250434e-06, "loss": 0.0205, "step": 2289, "video_reward_cumulative_accuracy": 0.836173001310616 }, { "epoch": 0.6797269219352924, "grad_norm": 1.7747602462768555, "learning_rate": 1.4062973972743277e-06, "loss": 0.0255, "step": 2290, "video_reward_cumulative_accuracy": 0.8362445414847162 }, { "epoch": 0.6800237459186702, "grad_norm": 2.40305495262146, "learning_rate": 1.4039686584941176e-06, "loss": 0.0255, "step": 2291, "video_reward_cumulative_accuracy": 0.8360977738978612 }, { "epoch": 0.680320569902048, "grad_norm": 1.9818377494812012, "learning_rate": 1.4016410964099308e-06, "loss": 0.0512, "step": 2292, "video_reward_cumulative_accuracy": 0.8361692844677138 }, { "epoch": 0.680617393885426, "grad_norm": 2.252643585205078, "learning_rate": 1.3993147135206311e-06, "loss": 0.0324, "step": 2293, "video_reward_cumulative_accuracy": 0.8362407326646315 }, { "epoch": 0.6809142178688038, "grad_norm": 2.190392017364502, "learning_rate": 1.3969895123238177e-06, "loss": 0.0207, "step": 2294, "video_reward_cumulative_accuracy": 0.8360941586748039 }, { "epoch": 0.6812110418521816, "grad_norm": 2.94880747795105, "learning_rate": 1.3946654953158176e-06, "loss": 0.0357, "step": 2295, "video_reward_cumulative_accuracy": 0.8359477124183007 }, { "epoch": 0.6815078658355596, "grad_norm": 1.3520139455795288, "learning_rate": 1.3923426649916894e-06, "loss": 0.0097, "step": 2296, "video_reward_cumulative_accuracy": 0.8360191637630662 }, { "epoch": 0.6818046898189374, "grad_norm": 2.1631641387939453, "learning_rate": 1.3900210238452169e-06, "loss": 0.0406, "step": 2297, "video_reward_cumulative_accuracy": 0.8360905528950805 }, { "epoch": 0.6821015138023152, "grad_norm": 3.2488834857940674, "learning_rate": 1.3877005743689087e-06, "loss": 0.055, "step": 2298, "video_reward_cumulative_accuracy": 0.8361618798955613 }, { "epoch": 0.682398337785693, "grad_norm": 3.1709814071655273, "learning_rate": 1.3853813190539899e-06, "loss": 0.0387, "step": 2299, "video_reward_cumulative_accuracy": 0.8362331448455851 }, { "epoch": 0.682695161769071, "grad_norm": 5.1996307373046875, "learning_rate": 1.3830632603904075e-06, "loss": 0.0409, "step": 2300, "video_reward_cumulative_accuracy": 0.836304347826087 }, { "epoch": 0.6829919857524488, "grad_norm": 1.6898528337478638, "learning_rate": 1.3807464008668225e-06, "loss": 0.0352, "step": 2301, "video_reward_cumulative_accuracy": 0.8363754889178618 }, { "epoch": 0.6832888097358266, "grad_norm": 2.885385513305664, "learning_rate": 1.3784307429706084e-06, "loss": 0.0583, "step": 2302, "video_reward_cumulative_accuracy": 0.8364465682015638 }, { "epoch": 0.6835856337192046, "grad_norm": 0.6500884890556335, "learning_rate": 1.3761162891878496e-06, "loss": 0.0198, "step": 2303, "video_reward_cumulative_accuracy": 0.8363004776378636 }, { "epoch": 0.6838824577025824, "grad_norm": 2.2626075744628906, "learning_rate": 1.3738030420033349e-06, "loss": 0.0381, "step": 2304, "video_reward_cumulative_accuracy": 0.8363715277777778 }, { "epoch": 0.6841792816859602, "grad_norm": 2.9060208797454834, "learning_rate": 1.3714910039005608e-06, "loss": 0.0655, "step": 2305, "video_reward_cumulative_accuracy": 0.8364425162689805 }, { "epoch": 0.684476105669338, "grad_norm": 0.6458223462104797, "learning_rate": 1.3691801773617247e-06, "loss": 0.0079, "step": 2306, "video_reward_cumulative_accuracy": 0.8365134431916739 }, { "epoch": 0.684772929652716, "grad_norm": 0.7338875532150269, "learning_rate": 1.3668705648677227e-06, "loss": 0.0113, "step": 2307, "video_reward_cumulative_accuracy": 0.8365843086259211 }, { "epoch": 0.6850697536360938, "grad_norm": 2.2893717288970947, "learning_rate": 1.3645621688981497e-06, "loss": 0.0306, "step": 2308, "video_reward_cumulative_accuracy": 0.8366551126516465 }, { "epoch": 0.6853665776194716, "grad_norm": 0.5800947546958923, "learning_rate": 1.3622549919312902e-06, "loss": 0.0128, "step": 2309, "video_reward_cumulative_accuracy": 0.8367258553486357 }, { "epoch": 0.6856634016028496, "grad_norm": 5.060973167419434, "learning_rate": 1.3599490364441236e-06, "loss": 0.0676, "step": 2310, "video_reward_cumulative_accuracy": 0.8367965367965368 }, { "epoch": 0.6859602255862274, "grad_norm": 0.9198673367500305, "learning_rate": 1.3576443049123175e-06, "loss": 0.0215, "step": 2311, "video_reward_cumulative_accuracy": 0.8368671570748594 }, { "epoch": 0.6862570495696052, "grad_norm": 1.2726801633834839, "learning_rate": 1.3553407998102243e-06, "loss": 0.0265, "step": 2312, "video_reward_cumulative_accuracy": 0.8369377162629758 }, { "epoch": 0.686553873552983, "grad_norm": 0.5441017746925354, "learning_rate": 1.3530385236108817e-06, "loss": 0.0066, "step": 2313, "video_reward_cumulative_accuracy": 0.8370082144401211 }, { "epoch": 0.686850697536361, "grad_norm": 5.122513771057129, "learning_rate": 1.3507374787860045e-06, "loss": 0.0473, "step": 2314, "video_reward_cumulative_accuracy": 0.8370786516853933 }, { "epoch": 0.6871475215197388, "grad_norm": 3.541785478591919, "learning_rate": 1.3484376678059885e-06, "loss": 0.0438, "step": 2315, "video_reward_cumulative_accuracy": 0.8371490280777538 }, { "epoch": 0.6874443455031166, "grad_norm": 1.8629510402679443, "learning_rate": 1.3461390931399044e-06, "loss": 0.05, "step": 2316, "video_reward_cumulative_accuracy": 0.8372193436960277 }, { "epoch": 0.6877411694864946, "grad_norm": 2.798170328140259, "learning_rate": 1.3438417572554947e-06, "loss": 0.0348, "step": 2317, "video_reward_cumulative_accuracy": 0.8372895986189037 }, { "epoch": 0.6880379934698724, "grad_norm": 1.2799954414367676, "learning_rate": 1.3415456626191737e-06, "loss": 0.0092, "step": 2318, "video_reward_cumulative_accuracy": 0.8373597929249353 }, { "epoch": 0.6883348174532502, "grad_norm": 2.5710394382476807, "learning_rate": 1.33925081169602e-06, "loss": 0.0219, "step": 2319, "video_reward_cumulative_accuracy": 0.8374299266925399 }, { "epoch": 0.688631641436628, "grad_norm": 2.2735490798950195, "learning_rate": 1.3369572069497802e-06, "loss": 0.0518, "step": 2320, "video_reward_cumulative_accuracy": 0.8375 }, { "epoch": 0.688928465420006, "grad_norm": 2.3334121704101562, "learning_rate": 1.3346648508428595e-06, "loss": 0.0304, "step": 2321, "video_reward_cumulative_accuracy": 0.8375700129254632 }, { "epoch": 0.6892252894033838, "grad_norm": 2.004912853240967, "learning_rate": 1.3323737458363278e-06, "loss": 0.0474, "step": 2322, "video_reward_cumulative_accuracy": 0.8376399655469423 }, { "epoch": 0.6895221133867616, "grad_norm": 3.0432021617889404, "learning_rate": 1.3300838943899064e-06, "loss": 0.0394, "step": 2323, "video_reward_cumulative_accuracy": 0.837709857942316 }, { "epoch": 0.6898189373701396, "grad_norm": 0.710309624671936, "learning_rate": 1.327795298961974e-06, "loss": 0.0132, "step": 2324, "video_reward_cumulative_accuracy": 0.8377796901893287 }, { "epoch": 0.6901157613535174, "grad_norm": 1.954226016998291, "learning_rate": 1.3255079620095602e-06, "loss": 0.0161, "step": 2325, "video_reward_cumulative_accuracy": 0.8378494623655914 }, { "epoch": 0.6904125853368952, "grad_norm": 2.4498023986816406, "learning_rate": 1.323221885988341e-06, "loss": 0.0582, "step": 2326, "video_reward_cumulative_accuracy": 0.8377042132416165 }, { "epoch": 0.690709409320273, "grad_norm": 0.965714693069458, "learning_rate": 1.3209370733526444e-06, "loss": 0.013, "step": 2327, "video_reward_cumulative_accuracy": 0.8377739578856898 }, { "epoch": 0.691006233303651, "grad_norm": 1.6976726055145264, "learning_rate": 1.3186535265554363e-06, "loss": 0.0317, "step": 2328, "video_reward_cumulative_accuracy": 0.8378436426116839 }, { "epoch": 0.6913030572870288, "grad_norm": 2.1996965408325195, "learning_rate": 1.3163712480483255e-06, "loss": 0.0466, "step": 2329, "video_reward_cumulative_accuracy": 0.8379132674967797 }, { "epoch": 0.6915998812704066, "grad_norm": 5.554974555969238, "learning_rate": 1.3140902402815616e-06, "loss": 0.038, "step": 2330, "video_reward_cumulative_accuracy": 0.8379828326180258 }, { "epoch": 0.6918967052537845, "grad_norm": 2.487802028656006, "learning_rate": 1.3118105057040245e-06, "loss": 0.0667, "step": 2331, "video_reward_cumulative_accuracy": 0.838052338052338 }, { "epoch": 0.6921935292371624, "grad_norm": 2.990607976913452, "learning_rate": 1.3095320467632344e-06, "loss": 0.0428, "step": 2332, "video_reward_cumulative_accuracy": 0.8381217838765008 }, { "epoch": 0.6924903532205402, "grad_norm": 1.0717166662216187, "learning_rate": 1.3072548659053353e-06, "loss": 0.0196, "step": 2333, "video_reward_cumulative_accuracy": 0.8381911701671667 }, { "epoch": 0.692787177203918, "grad_norm": 2.585355043411255, "learning_rate": 1.3049789655751039e-06, "loss": 0.0557, "step": 2334, "video_reward_cumulative_accuracy": 0.8382604970008569 }, { "epoch": 0.693084001187296, "grad_norm": 1.229258418083191, "learning_rate": 1.3027043482159378e-06, "loss": 0.0284, "step": 2335, "video_reward_cumulative_accuracy": 0.8383297644539615 }, { "epoch": 0.6933808251706738, "grad_norm": 1.2362085580825806, "learning_rate": 1.3004310162698598e-06, "loss": 0.025, "step": 2336, "video_reward_cumulative_accuracy": 0.8383989726027398 }, { "epoch": 0.6936776491540516, "grad_norm": 2.241290807723999, "learning_rate": 1.298158972177515e-06, "loss": 0.0327, "step": 2337, "video_reward_cumulative_accuracy": 0.8382541720154044 }, { "epoch": 0.6939744731374295, "grad_norm": 2.196943998336792, "learning_rate": 1.2958882183781612e-06, "loss": 0.0565, "step": 2338, "video_reward_cumulative_accuracy": 0.838109495295124 }, { "epoch": 0.6942712971208074, "grad_norm": 1.276810884475708, "learning_rate": 1.2936187573096737e-06, "loss": 0.0237, "step": 2339, "video_reward_cumulative_accuracy": 0.837964942283027 }, { "epoch": 0.6945681211041852, "grad_norm": 2.0675230026245117, "learning_rate": 1.2913505914085384e-06, "loss": 0.0823, "step": 2340, "video_reward_cumulative_accuracy": 0.8378205128205128 }, { "epoch": 0.694864945087563, "grad_norm": 1.5794941186904907, "learning_rate": 1.2890837231098513e-06, "loss": 0.0305, "step": 2341, "video_reward_cumulative_accuracy": 0.8378897906877403 }, { "epoch": 0.695161769070941, "grad_norm": 1.307797908782959, "learning_rate": 1.2868181548473168e-06, "loss": 0.0242, "step": 2342, "video_reward_cumulative_accuracy": 0.8379590093936806 }, { "epoch": 0.6954585930543188, "grad_norm": 1.9544386863708496, "learning_rate": 1.2845538890532416e-06, "loss": 0.0246, "step": 2343, "video_reward_cumulative_accuracy": 0.8380281690140845 }, { "epoch": 0.6957554170376966, "grad_norm": 2.473175048828125, "learning_rate": 1.2822909281585359e-06, "loss": 0.0998, "step": 2344, "video_reward_cumulative_accuracy": 0.8380972696245734 }, { "epoch": 0.6960522410210745, "grad_norm": 2.0167837142944336, "learning_rate": 1.280029274592706e-06, "loss": 0.0363, "step": 2345, "video_reward_cumulative_accuracy": 0.8381663113006397 }, { "epoch": 0.6963490650044524, "grad_norm": 0.8653875589370728, "learning_rate": 1.2777689307838572e-06, "loss": 0.0146, "step": 2346, "video_reward_cumulative_accuracy": 0.8382352941176471 }, { "epoch": 0.6966458889878302, "grad_norm": 2.326350688934326, "learning_rate": 1.2755098991586884e-06, "loss": 0.0474, "step": 2347, "video_reward_cumulative_accuracy": 0.8383042181508309 }, { "epoch": 0.696942712971208, "grad_norm": 3.8789632320404053, "learning_rate": 1.273252182142489e-06, "loss": 0.0332, "step": 2348, "video_reward_cumulative_accuracy": 0.8383730834752982 }, { "epoch": 0.697239536954586, "grad_norm": 1.4527256488800049, "learning_rate": 1.2709957821591384e-06, "loss": 0.0275, "step": 2349, "video_reward_cumulative_accuracy": 0.8384418901660281 }, { "epoch": 0.6975363609379638, "grad_norm": 2.494121551513672, "learning_rate": 1.2687407016310992e-06, "loss": 0.0569, "step": 2350, "video_reward_cumulative_accuracy": 0.8382978723404255 }, { "epoch": 0.6978331849213416, "grad_norm": 0.8342524170875549, "learning_rate": 1.2664869429794197e-06, "loss": 0.0104, "step": 2351, "video_reward_cumulative_accuracy": 0.8383666524883029 }, { "epoch": 0.6981300089047195, "grad_norm": 0.7167454361915588, "learning_rate": 1.2642345086237294e-06, "loss": 0.0146, "step": 2352, "video_reward_cumulative_accuracy": 0.8384353741496599 }, { "epoch": 0.6984268328880974, "grad_norm": 3.59609317779541, "learning_rate": 1.261983400982234e-06, "loss": 0.0414, "step": 2353, "video_reward_cumulative_accuracy": 0.838504037399065 }, { "epoch": 0.6987236568714752, "grad_norm": 2.267436981201172, "learning_rate": 1.2597336224717183e-06, "loss": 0.0416, "step": 2354, "video_reward_cumulative_accuracy": 0.8383602378929482 }, { "epoch": 0.699020480854853, "grad_norm": 2.883449077606201, "learning_rate": 1.257485175507535e-06, "loss": 0.038, "step": 2355, "video_reward_cumulative_accuracy": 0.8384288747346073 }, { "epoch": 0.699317304838231, "grad_norm": 0.7676532864570618, "learning_rate": 1.255238062503612e-06, "loss": 0.0109, "step": 2356, "video_reward_cumulative_accuracy": 0.8384974533106961 }, { "epoch": 0.6996141288216088, "grad_norm": 0.7922206521034241, "learning_rate": 1.2529922858724422e-06, "loss": 0.0178, "step": 2357, "video_reward_cumulative_accuracy": 0.8385659736953754 }, { "epoch": 0.6999109528049866, "grad_norm": 1.5223472118377686, "learning_rate": 1.2507478480250862e-06, "loss": 0.0353, "step": 2358, "video_reward_cumulative_accuracy": 0.8386344359626803 }, { "epoch": 0.7002077767883645, "grad_norm": 2.4586706161499023, "learning_rate": 1.2485047513711643e-06, "loss": 0.0442, "step": 2359, "video_reward_cumulative_accuracy": 0.8387028401865197 }, { "epoch": 0.7005046007717424, "grad_norm": 3.2379069328308105, "learning_rate": 1.2462629983188586e-06, "loss": 0.0436, "step": 2360, "video_reward_cumulative_accuracy": 0.8387711864406779 }, { "epoch": 0.7008014247551202, "grad_norm": 1.5388333797454834, "learning_rate": 1.2440225912749096e-06, "loss": 0.0233, "step": 2361, "video_reward_cumulative_accuracy": 0.838839474798814 }, { "epoch": 0.701098248738498, "grad_norm": 1.7202107906341553, "learning_rate": 1.241783532644611e-06, "loss": 0.0366, "step": 2362, "video_reward_cumulative_accuracy": 0.8389077053344624 }, { "epoch": 0.701395072721876, "grad_norm": 2.6380770206451416, "learning_rate": 1.2395458248318107e-06, "loss": 0.0371, "step": 2363, "video_reward_cumulative_accuracy": 0.8387642826914938 }, { "epoch": 0.7016918967052538, "grad_norm": 1.3252801895141602, "learning_rate": 1.2373094702389031e-06, "loss": 0.015, "step": 2364, "video_reward_cumulative_accuracy": 0.8388324873096447 }, { "epoch": 0.7019887206886316, "grad_norm": 2.323251724243164, "learning_rate": 1.2350744712668332e-06, "loss": 0.0574, "step": 2365, "video_reward_cumulative_accuracy": 0.8386892177589852 }, { "epoch": 0.7022855446720095, "grad_norm": 2.107527017593384, "learning_rate": 1.2328408303150892e-06, "loss": 0.0387, "step": 2366, "video_reward_cumulative_accuracy": 0.8387573964497042 }, { "epoch": 0.7025823686553874, "grad_norm": 2.007369041442871, "learning_rate": 1.2306085497817016e-06, "loss": 0.0187, "step": 2367, "video_reward_cumulative_accuracy": 0.8388255175327418 }, { "epoch": 0.7028791926387652, "grad_norm": 2.632237195968628, "learning_rate": 1.2283776320632409e-06, "loss": 0.0215, "step": 2368, "video_reward_cumulative_accuracy": 0.838893581081081 }, { "epoch": 0.703176016622143, "grad_norm": 2.760110378265381, "learning_rate": 1.2261480795548123e-06, "loss": 0.0942, "step": 2369, "video_reward_cumulative_accuracy": 0.8389615871675813 }, { "epoch": 0.703472840605521, "grad_norm": 1.2644137144088745, "learning_rate": 1.223919894650058e-06, "loss": 0.0105, "step": 2370, "video_reward_cumulative_accuracy": 0.8390295358649789 }, { "epoch": 0.7037696645888988, "grad_norm": 1.7368650436401367, "learning_rate": 1.2216930797411486e-06, "loss": 0.0209, "step": 2371, "video_reward_cumulative_accuracy": 0.8390974272458878 }, { "epoch": 0.7040664885722766, "grad_norm": 1.736447811126709, "learning_rate": 1.2194676372187886e-06, "loss": 0.0279, "step": 2372, "video_reward_cumulative_accuracy": 0.8389544688026982 }, { "epoch": 0.7043633125556545, "grad_norm": 2.79758620262146, "learning_rate": 1.2172435694722064e-06, "loss": 0.0739, "step": 2373, "video_reward_cumulative_accuracy": 0.838811630847029 }, { "epoch": 0.7046601365390324, "grad_norm": 0.7806415557861328, "learning_rate": 1.2150208788891533e-06, "loss": 0.0124, "step": 2374, "video_reward_cumulative_accuracy": 0.8388795282224094 }, { "epoch": 0.7049569605224102, "grad_norm": 1.3390443325042725, "learning_rate": 1.2127995678559042e-06, "loss": 0.015, "step": 2375, "video_reward_cumulative_accuracy": 0.8389473684210527 }, { "epoch": 0.705253784505788, "grad_norm": 2.772890567779541, "learning_rate": 1.2105796387572514e-06, "loss": 0.0162, "step": 2376, "video_reward_cumulative_accuracy": 0.8390151515151515 }, { "epoch": 0.705550608489166, "grad_norm": 1.042292594909668, "learning_rate": 1.2083610939765031e-06, "loss": 0.0178, "step": 2377, "video_reward_cumulative_accuracy": 0.8390828775767775 }, { "epoch": 0.7058474324725438, "grad_norm": 2.8607852458953857, "learning_rate": 1.2061439358954862e-06, "loss": 0.0205, "step": 2378, "video_reward_cumulative_accuracy": 0.8391505466778806 }, { "epoch": 0.7061442564559216, "grad_norm": 1.2580045461654663, "learning_rate": 1.203928166894532e-06, "loss": 0.0118, "step": 2379, "video_reward_cumulative_accuracy": 0.8392181588902901 }, { "epoch": 0.7064410804392995, "grad_norm": 2.3520500659942627, "learning_rate": 1.2017137893524851e-06, "loss": 0.0244, "step": 2380, "video_reward_cumulative_accuracy": 0.8392857142857143 }, { "epoch": 0.7067379044226774, "grad_norm": 0.5174872875213623, "learning_rate": 1.1995008056466933e-06, "loss": 0.0071, "step": 2381, "video_reward_cumulative_accuracy": 0.8393532129357413 }, { "epoch": 0.7070347284060552, "grad_norm": 1.8015003204345703, "learning_rate": 1.19728921815301e-06, "loss": 0.019, "step": 2382, "video_reward_cumulative_accuracy": 0.8394206549118388 }, { "epoch": 0.707331552389433, "grad_norm": 1.3630971908569336, "learning_rate": 1.1950790292457893e-06, "loss": 0.0423, "step": 2383, "video_reward_cumulative_accuracy": 0.8394880402853546 }, { "epoch": 0.707628376372811, "grad_norm": 3.022733449935913, "learning_rate": 1.1928702412978833e-06, "loss": 0.066, "step": 2384, "video_reward_cumulative_accuracy": 0.8395553691275168 }, { "epoch": 0.7079252003561888, "grad_norm": 1.3637797832489014, "learning_rate": 1.1906628566806414e-06, "loss": 0.0447, "step": 2385, "video_reward_cumulative_accuracy": 0.8394129979035639 }, { "epoch": 0.7082220243395666, "grad_norm": 0.23082542419433594, "learning_rate": 1.188456877763903e-06, "loss": 0.0028, "step": 2386, "video_reward_cumulative_accuracy": 0.8394803017602682 }, { "epoch": 0.7085188483229445, "grad_norm": 3.9995596408843994, "learning_rate": 1.1862523069160017e-06, "loss": 0.0492, "step": 2387, "video_reward_cumulative_accuracy": 0.8395475492249685 }, { "epoch": 0.7088156723063224, "grad_norm": 2.762430429458618, "learning_rate": 1.1840491465037584e-06, "loss": 0.0473, "step": 2388, "video_reward_cumulative_accuracy": 0.8396147403685092 }, { "epoch": 0.7091124962897002, "grad_norm": 1.9774202108383179, "learning_rate": 1.1818473988924797e-06, "loss": 0.0493, "step": 2389, "video_reward_cumulative_accuracy": 0.8396818752616158 }, { "epoch": 0.709409320273078, "grad_norm": 1.2784936428070068, "learning_rate": 1.179647066445956e-06, "loss": 0.0216, "step": 2390, "video_reward_cumulative_accuracy": 0.8397489539748954 }, { "epoch": 0.709706144256456, "grad_norm": 3.470581531524658, "learning_rate": 1.177448151526456e-06, "loss": 0.0223, "step": 2391, "video_reward_cumulative_accuracy": 0.8398159765788373 }, { "epoch": 0.7100029682398338, "grad_norm": 2.1586861610412598, "learning_rate": 1.1752506564947294e-06, "loss": 0.0198, "step": 2392, "video_reward_cumulative_accuracy": 0.8398829431438127 }, { "epoch": 0.7102997922232116, "grad_norm": 2.96563982963562, "learning_rate": 1.1730545837099999e-06, "loss": 0.071, "step": 2393, "video_reward_cumulative_accuracy": 0.8399498537400752 }, { "epoch": 0.7105966162065895, "grad_norm": 1.8292112350463867, "learning_rate": 1.1708599355299662e-06, "loss": 0.0207, "step": 2394, "video_reward_cumulative_accuracy": 0.8400167084377611 }, { "epoch": 0.7108934401899674, "grad_norm": 4.007872581481934, "learning_rate": 1.168666714310794e-06, "loss": 0.0726, "step": 2395, "video_reward_cumulative_accuracy": 0.8400835073068893 }, { "epoch": 0.7111902641733452, "grad_norm": 3.1843671798706055, "learning_rate": 1.1664749224071203e-06, "loss": 0.0496, "step": 2396, "video_reward_cumulative_accuracy": 0.8401502504173622 }, { "epoch": 0.711487088156723, "grad_norm": 3.3738343715667725, "learning_rate": 1.1642845621720463e-06, "loss": 0.0419, "step": 2397, "video_reward_cumulative_accuracy": 0.8402169378389653 }, { "epoch": 0.711783912140101, "grad_norm": 1.7037007808685303, "learning_rate": 1.1620956359571364e-06, "loss": 0.0587, "step": 2398, "video_reward_cumulative_accuracy": 0.8402835696413679 }, { "epoch": 0.7120807361234788, "grad_norm": 2.8686163425445557, "learning_rate": 1.1599081461124161e-06, "loss": 0.0247, "step": 2399, "video_reward_cumulative_accuracy": 0.8403501458941226 }, { "epoch": 0.7123775601068566, "grad_norm": 0.6228769421577454, "learning_rate": 1.1577220949863663e-06, "loss": 0.0058, "step": 2400, "video_reward_cumulative_accuracy": 0.8404166666666667 }, { "epoch": 0.7123775601068566, "eval_runtime": 130.8618, "eval_samples_per_second": 6.029, "eval_steps_per_second": 0.757, "eval_test_set_accuracy": 0.8131313131313131, "step": 2400 }, { "epoch": 0.7126743840902345, "grad_norm": 1.7672585248947144, "learning_rate": 1.155537484925926e-06, "loss": 0.0586, "step": 2401, "video_reward_cumulative_accuracy": 0.8404831320283215 }, { "epoch": 0.7129712080736124, "grad_norm": 2.9688684940338135, "learning_rate": 1.153354318276486e-06, "loss": 0.0505, "step": 2402, "video_reward_cumulative_accuracy": 0.8405495420482931 }, { "epoch": 0.7132680320569902, "grad_norm": 2.505089044570923, "learning_rate": 1.1511725973818879e-06, "loss": 0.0257, "step": 2403, "video_reward_cumulative_accuracy": 0.8406158967956721 }, { "epoch": 0.713564856040368, "grad_norm": 0.5991393327713013, "learning_rate": 1.1489923245844214e-06, "loss": 0.0085, "step": 2404, "video_reward_cumulative_accuracy": 0.8406821963394343 }, { "epoch": 0.713861680023746, "grad_norm": 1.6461894512176514, "learning_rate": 1.1468135022248195e-06, "loss": 0.0169, "step": 2405, "video_reward_cumulative_accuracy": 0.8407484407484408 }, { "epoch": 0.7141585040071238, "grad_norm": 3.0698747634887695, "learning_rate": 1.14463613264226e-06, "loss": 0.0623, "step": 2406, "video_reward_cumulative_accuracy": 0.8408146300914381 }, { "epoch": 0.7144553279905016, "grad_norm": 2.5160300731658936, "learning_rate": 1.1424602181743609e-06, "loss": 0.066, "step": 2407, "video_reward_cumulative_accuracy": 0.8406730369754881 }, { "epoch": 0.7147521519738795, "grad_norm": 1.7137246131896973, "learning_rate": 1.1402857611571772e-06, "loss": 0.0148, "step": 2408, "video_reward_cumulative_accuracy": 0.8407392026578073 }, { "epoch": 0.7150489759572574, "grad_norm": 1.8538068532943726, "learning_rate": 1.1381127639252005e-06, "loss": 0.0292, "step": 2409, "video_reward_cumulative_accuracy": 0.8408053134080531 }, { "epoch": 0.7153457999406352, "grad_norm": 4.450146675109863, "learning_rate": 1.1359412288113526e-06, "loss": 0.031, "step": 2410, "video_reward_cumulative_accuracy": 0.8404564315352697 }, { "epoch": 0.715642623924013, "grad_norm": 1.273587703704834, "learning_rate": 1.133771158146988e-06, "loss": 0.0124, "step": 2411, "video_reward_cumulative_accuracy": 0.8405226047283285 }, { "epoch": 0.715939447907391, "grad_norm": 2.9038286209106445, "learning_rate": 1.131602554261888e-06, "loss": 0.0557, "step": 2412, "video_reward_cumulative_accuracy": 0.8405887230514096 }, { "epoch": 0.7162362718907688, "grad_norm": 1.5233856439590454, "learning_rate": 1.1294354194842597e-06, "loss": 0.0135, "step": 2413, "video_reward_cumulative_accuracy": 0.8406547865727311 }, { "epoch": 0.7165330958741466, "grad_norm": 0.3505679666996002, "learning_rate": 1.1272697561407334e-06, "loss": 0.004, "step": 2414, "video_reward_cumulative_accuracy": 0.8407207953603977 }, { "epoch": 0.7168299198575245, "grad_norm": 3.6674511432647705, "learning_rate": 1.125105566556357e-06, "loss": 0.0877, "step": 2415, "video_reward_cumulative_accuracy": 0.8405797101449275 }, { "epoch": 0.7171267438409024, "grad_norm": 0.8542490601539612, "learning_rate": 1.1229428530546002e-06, "loss": 0.0147, "step": 2416, "video_reward_cumulative_accuracy": 0.8406456953642384 }, { "epoch": 0.7174235678242802, "grad_norm": 2.0247857570648193, "learning_rate": 1.1207816179573427e-06, "loss": 0.0484, "step": 2417, "video_reward_cumulative_accuracy": 0.8407116259826231 }, { "epoch": 0.717720391807658, "grad_norm": 1.7398759126663208, "learning_rate": 1.1186218635848838e-06, "loss": 0.0595, "step": 2418, "video_reward_cumulative_accuracy": 0.8407775020678246 }, { "epoch": 0.718017215791036, "grad_norm": 1.2904212474822998, "learning_rate": 1.1164635922559273e-06, "loss": 0.07, "step": 2419, "video_reward_cumulative_accuracy": 0.8408433236874742 }, { "epoch": 0.7183140397744138, "grad_norm": 3.129547595977783, "learning_rate": 1.114306806287587e-06, "loss": 0.0425, "step": 2420, "video_reward_cumulative_accuracy": 0.8409090909090909 }, { "epoch": 0.7186108637577916, "grad_norm": 3.4823479652404785, "learning_rate": 1.1121515079953834e-06, "loss": 0.0369, "step": 2421, "video_reward_cumulative_accuracy": 0.8409748038000826 }, { "epoch": 0.7189076877411695, "grad_norm": 2.5278398990631104, "learning_rate": 1.1099976996932357e-06, "loss": 0.0397, "step": 2422, "video_reward_cumulative_accuracy": 0.8410404624277457 }, { "epoch": 0.7192045117245474, "grad_norm": 1.669782280921936, "learning_rate": 1.1078453836934697e-06, "loss": 0.0514, "step": 2423, "video_reward_cumulative_accuracy": 0.8406933553446141 }, { "epoch": 0.7195013357079252, "grad_norm": 1.4766911268234253, "learning_rate": 1.1056945623068023e-06, "loss": 0.026, "step": 2424, "video_reward_cumulative_accuracy": 0.8407590759075908 }, { "epoch": 0.719798159691303, "grad_norm": 2.538355588912964, "learning_rate": 1.1035452378423512e-06, "loss": 0.0246, "step": 2425, "video_reward_cumulative_accuracy": 0.8408247422680413 }, { "epoch": 0.7200949836746809, "grad_norm": 2.352414131164551, "learning_rate": 1.1013974126076243e-06, "loss": 0.0457, "step": 2426, "video_reward_cumulative_accuracy": 0.8408903544929925 }, { "epoch": 0.7203918076580588, "grad_norm": 1.7153733968734741, "learning_rate": 1.0992510889085187e-06, "loss": 0.0419, "step": 2427, "video_reward_cumulative_accuracy": 0.8409559126493613 }, { "epoch": 0.7206886316414366, "grad_norm": 4.937928199768066, "learning_rate": 1.0971062690493242e-06, "loss": 0.045, "step": 2428, "video_reward_cumulative_accuracy": 0.8410214168039539 }, { "epoch": 0.7209854556248145, "grad_norm": 1.5569862127304077, "learning_rate": 1.0949629553327106e-06, "loss": 0.0175, "step": 2429, "video_reward_cumulative_accuracy": 0.8410868670234665 }, { "epoch": 0.7212822796081924, "grad_norm": 0.807574987411499, "learning_rate": 1.0928211500597355e-06, "loss": 0.0236, "step": 2430, "video_reward_cumulative_accuracy": 0.8411522633744856 }, { "epoch": 0.7215791035915702, "grad_norm": 1.268849492073059, "learning_rate": 1.0906808555298323e-06, "loss": 0.0497, "step": 2431, "video_reward_cumulative_accuracy": 0.8410119292472233 }, { "epoch": 0.721875927574948, "grad_norm": 3.1053664684295654, "learning_rate": 1.088542074040816e-06, "loss": 0.0356, "step": 2432, "video_reward_cumulative_accuracy": 0.841077302631579 }, { "epoch": 0.7221727515583259, "grad_norm": 2.7275187969207764, "learning_rate": 1.0864048078888758e-06, "loss": 0.0367, "step": 2433, "video_reward_cumulative_accuracy": 0.8411426222770243 }, { "epoch": 0.7224695755417038, "grad_norm": 3.4648077487945557, "learning_rate": 1.084269059368575e-06, "loss": 0.041, "step": 2434, "video_reward_cumulative_accuracy": 0.8412078882497945 }, { "epoch": 0.7227663995250816, "grad_norm": 1.614362359046936, "learning_rate": 1.0821348307728478e-06, "loss": 0.0259, "step": 2435, "video_reward_cumulative_accuracy": 0.8412731006160165 }, { "epoch": 0.7230632235084595, "grad_norm": 3.655402898788452, "learning_rate": 1.0800021243929931e-06, "loss": 0.0496, "step": 2436, "video_reward_cumulative_accuracy": 0.8411330049261084 }, { "epoch": 0.7233600474918374, "grad_norm": 2.584467649459839, "learning_rate": 1.0778709425186801e-06, "loss": 0.0659, "step": 2437, "video_reward_cumulative_accuracy": 0.8409930242100944 }, { "epoch": 0.7236568714752152, "grad_norm": 3.22719669342041, "learning_rate": 1.0757412874379386e-06, "loss": 0.0443, "step": 2438, "video_reward_cumulative_accuracy": 0.8408531583264971 }, { "epoch": 0.723953695458593, "grad_norm": 0.4424673318862915, "learning_rate": 1.0736131614371602e-06, "loss": 0.0074, "step": 2439, "video_reward_cumulative_accuracy": 0.8409184091840919 }, { "epoch": 0.7242505194419709, "grad_norm": 0.7209199070930481, "learning_rate": 1.0714865668010962e-06, "loss": 0.03, "step": 2440, "video_reward_cumulative_accuracy": 0.840983606557377 }, { "epoch": 0.7245473434253488, "grad_norm": 2.2565574645996094, "learning_rate": 1.0693615058128502e-06, "loss": 0.0298, "step": 2441, "video_reward_cumulative_accuracy": 0.8410487505120852 }, { "epoch": 0.7248441674087266, "grad_norm": 2.199859857559204, "learning_rate": 1.0672379807538818e-06, "loss": 0.0788, "step": 2442, "video_reward_cumulative_accuracy": 0.8411138411138411 }, { "epoch": 0.7251409913921045, "grad_norm": 1.1875553131103516, "learning_rate": 1.0651159939040017e-06, "loss": 0.0301, "step": 2443, "video_reward_cumulative_accuracy": 0.8409742120343839 }, { "epoch": 0.7254378153754824, "grad_norm": 2.6030995845794678, "learning_rate": 1.0629955475413691e-06, "loss": 0.0283, "step": 2444, "video_reward_cumulative_accuracy": 0.8410392798690671 }, { "epoch": 0.7257346393588602, "grad_norm": 2.654780864715576, "learning_rate": 1.0608766439424895e-06, "loss": 0.0215, "step": 2445, "video_reward_cumulative_accuracy": 0.8411042944785276 }, { "epoch": 0.726031463342238, "grad_norm": 1.8764584064483643, "learning_rate": 1.0587592853822096e-06, "loss": 0.038, "step": 2446, "video_reward_cumulative_accuracy": 0.8411692559280458 }, { "epoch": 0.7263282873256159, "grad_norm": 2.492081642150879, "learning_rate": 1.0566434741337204e-06, "loss": 0.0329, "step": 2447, "video_reward_cumulative_accuracy": 0.8412341642827953 }, { "epoch": 0.7266251113089938, "grad_norm": 2.2990763187408447, "learning_rate": 1.0545292124685506e-06, "loss": 0.0288, "step": 2448, "video_reward_cumulative_accuracy": 0.8412990196078431 }, { "epoch": 0.7269219352923716, "grad_norm": 1.3020623922348022, "learning_rate": 1.0524165026565655e-06, "loss": 0.0178, "step": 2449, "video_reward_cumulative_accuracy": 0.8413638219681503 }, { "epoch": 0.7272187592757495, "grad_norm": 0.8552646040916443, "learning_rate": 1.0503053469659647e-06, "loss": 0.0229, "step": 2450, "video_reward_cumulative_accuracy": 0.8414285714285714 }, { "epoch": 0.7275155832591274, "grad_norm": 2.16868257522583, "learning_rate": 1.0481957476632773e-06, "loss": 0.0564, "step": 2451, "video_reward_cumulative_accuracy": 0.8412892696858425 }, { "epoch": 0.7278124072425052, "grad_norm": 0.4563276767730713, "learning_rate": 1.0460877070133634e-06, "loss": 0.012, "step": 2452, "video_reward_cumulative_accuracy": 0.8413539967373572 }, { "epoch": 0.728109231225883, "grad_norm": 3.3756730556488037, "learning_rate": 1.0439812272794096e-06, "loss": 0.0633, "step": 2453, "video_reward_cumulative_accuracy": 0.8414186710150836 }, { "epoch": 0.7284060552092609, "grad_norm": 5.538758754730225, "learning_rate": 1.0418763107229271e-06, "loss": 0.0483, "step": 2454, "video_reward_cumulative_accuracy": 0.841483292583537 }, { "epoch": 0.7287028791926388, "grad_norm": 1.5109965801239014, "learning_rate": 1.0397729596037463e-06, "loss": 0.0185, "step": 2455, "video_reward_cumulative_accuracy": 0.8415478615071283 }, { "epoch": 0.7289997031760166, "grad_norm": 2.8143222332000732, "learning_rate": 1.0376711761800196e-06, "loss": 0.0553, "step": 2456, "video_reward_cumulative_accuracy": 0.8416123778501629 }, { "epoch": 0.7292965271593945, "grad_norm": 1.900389313697815, "learning_rate": 1.0355709627082155e-06, "loss": 0.0637, "step": 2457, "video_reward_cumulative_accuracy": 0.8414733414733415 }, { "epoch": 0.7295933511427724, "grad_norm": 2.070751905441284, "learning_rate": 1.0334723214431175e-06, "loss": 0.0273, "step": 2458, "video_reward_cumulative_accuracy": 0.8415378356387306 }, { "epoch": 0.7298901751261502, "grad_norm": 3.6097755432128906, "learning_rate": 1.031375254637821e-06, "loss": 0.0496, "step": 2459, "video_reward_cumulative_accuracy": 0.8413989426596177 }, { "epoch": 0.730186999109528, "grad_norm": 2.63653302192688, "learning_rate": 1.0292797645437288e-06, "loss": 0.0594, "step": 2460, "video_reward_cumulative_accuracy": 0.8412601626016261 }, { "epoch": 0.7304838230929059, "grad_norm": 3.204415798187256, "learning_rate": 1.0271858534105547e-06, "loss": 0.0269, "step": 2461, "video_reward_cumulative_accuracy": 0.8411214953271028 }, { "epoch": 0.7307806470762838, "grad_norm": 1.9313973188400269, "learning_rate": 1.0250935234863147e-06, "loss": 0.0336, "step": 2462, "video_reward_cumulative_accuracy": 0.8411860276198213 }, { "epoch": 0.7310774710596616, "grad_norm": 1.1721389293670654, "learning_rate": 1.0230027770173282e-06, "loss": 0.0178, "step": 2463, "video_reward_cumulative_accuracy": 0.8412505075111653 }, { "epoch": 0.7313742950430395, "grad_norm": 2.0116591453552246, "learning_rate": 1.0209136162482155e-06, "loss": 0.026, "step": 2464, "video_reward_cumulative_accuracy": 0.841314935064935 }, { "epoch": 0.7316711190264173, "grad_norm": 2.902461290359497, "learning_rate": 1.0188260434218919e-06, "loss": 0.0487, "step": 2465, "video_reward_cumulative_accuracy": 0.8411764705882353 }, { "epoch": 0.7319679430097952, "grad_norm": 0.5362818241119385, "learning_rate": 1.0167400607795708e-06, "loss": 0.0077, "step": 2466, "video_reward_cumulative_accuracy": 0.8412408759124088 }, { "epoch": 0.732264766993173, "grad_norm": 1.2604457139968872, "learning_rate": 1.0146556705607544e-06, "loss": 0.0254, "step": 2467, "video_reward_cumulative_accuracy": 0.841305229023105 }, { "epoch": 0.7325615909765509, "grad_norm": 0.365590900182724, "learning_rate": 1.012572875003241e-06, "loss": 0.0057, "step": 2468, "video_reward_cumulative_accuracy": 0.8413695299837926 }, { "epoch": 0.7328584149599288, "grad_norm": 0.8860239386558533, "learning_rate": 1.0104916763431133e-06, "loss": 0.0209, "step": 2469, "video_reward_cumulative_accuracy": 0.8414337788578372 }, { "epoch": 0.7331552389433066, "grad_norm": 1.316934585571289, "learning_rate": 1.0084120768147385e-06, "loss": 0.0112, "step": 2470, "video_reward_cumulative_accuracy": 0.841497975708502 }, { "epoch": 0.7334520629266845, "grad_norm": 1.1632755994796753, "learning_rate": 1.00633407865077e-06, "loss": 0.0526, "step": 2471, "video_reward_cumulative_accuracy": 0.8413597733711048 }, { "epoch": 0.7337488869100623, "grad_norm": 0.5752823352813721, "learning_rate": 1.0042576840821394e-06, "loss": 0.0115, "step": 2472, "video_reward_cumulative_accuracy": 0.8414239482200647 }, { "epoch": 0.7340457108934402, "grad_norm": 2.8331027030944824, "learning_rate": 1.0021828953380572e-06, "loss": 0.0209, "step": 2473, "video_reward_cumulative_accuracy": 0.8414880711686211 }, { "epoch": 0.734342534876818, "grad_norm": 3.890144109725952, "learning_rate": 1.0001097146460134e-06, "loss": 0.0889, "step": 2474, "video_reward_cumulative_accuracy": 0.8413500404203719 }, { "epoch": 0.7346393588601959, "grad_norm": 1.1951637268066406, "learning_rate": 9.980381442317661e-07, "loss": 0.0127, "step": 2475, "video_reward_cumulative_accuracy": 0.8414141414141414 }, { "epoch": 0.7349361828435738, "grad_norm": 1.1834206581115723, "learning_rate": 9.959681863193489e-07, "loss": 0.0174, "step": 2476, "video_reward_cumulative_accuracy": 0.8414781906300485 }, { "epoch": 0.7352330068269516, "grad_norm": 0.8492854833602905, "learning_rate": 9.938998431310604e-07, "loss": 0.0202, "step": 2477, "video_reward_cumulative_accuracy": 0.8415421881308034 }, { "epoch": 0.7355298308103295, "grad_norm": 1.2624728679656982, "learning_rate": 9.918331168874693e-07, "loss": 0.0083, "step": 2478, "video_reward_cumulative_accuracy": 0.8416061339790153 }, { "epoch": 0.7358266547937073, "grad_norm": 2.332775831222534, "learning_rate": 9.897680098074063e-07, "loss": 0.0323, "step": 2479, "video_reward_cumulative_accuracy": 0.8416700282371924 }, { "epoch": 0.7361234787770852, "grad_norm": 2.9723246097564697, "learning_rate": 9.877045241079647e-07, "loss": 0.0763, "step": 2480, "video_reward_cumulative_accuracy": 0.8415322580645161 }, { "epoch": 0.736420302760463, "grad_norm": 1.4844021797180176, "learning_rate": 9.85642662004497e-07, "loss": 0.0088, "step": 2481, "video_reward_cumulative_accuracy": 0.841596130592503 }, { "epoch": 0.7367171267438409, "grad_norm": 0.8543074727058411, "learning_rate": 9.835824257106112e-07, "loss": 0.019, "step": 2482, "video_reward_cumulative_accuracy": 0.8416599516518937 }, { "epoch": 0.7370139507272188, "grad_norm": 1.366849422454834, "learning_rate": 9.815238174381711e-07, "loss": 0.0092, "step": 2483, "video_reward_cumulative_accuracy": 0.8417237213048732 }, { "epoch": 0.7373107747105966, "grad_norm": 0.7628781199455261, "learning_rate": 9.794668393972932e-07, "loss": 0.0266, "step": 2484, "video_reward_cumulative_accuracy": 0.8417874396135265 }, { "epoch": 0.7376075986939745, "grad_norm": 3.3191943168640137, "learning_rate": 9.774114937963425e-07, "loss": 0.0279, "step": 2485, "video_reward_cumulative_accuracy": 0.8418511066398391 }, { "epoch": 0.7379044226773523, "grad_norm": 0.4921557903289795, "learning_rate": 9.753577828419331e-07, "loss": 0.0094, "step": 2486, "video_reward_cumulative_accuracy": 0.8419147224456959 }, { "epoch": 0.7382012466607302, "grad_norm": 5.423759937286377, "learning_rate": 9.73305708738921e-07, "loss": 0.0882, "step": 2487, "video_reward_cumulative_accuracy": 0.8415761962203459 }, { "epoch": 0.738498070644108, "grad_norm": 2.609516143798828, "learning_rate": 9.712552736904085e-07, "loss": 0.02, "step": 2488, "video_reward_cumulative_accuracy": 0.8416398713826366 }, { "epoch": 0.7387948946274859, "grad_norm": 2.0234172344207764, "learning_rate": 9.69206479897736e-07, "loss": 0.0207, "step": 2489, "video_reward_cumulative_accuracy": 0.8417034953796706 }, { "epoch": 0.7390917186108638, "grad_norm": 2.902467966079712, "learning_rate": 9.671593295604836e-07, "loss": 0.0226, "step": 2490, "video_reward_cumulative_accuracy": 0.8417670682730923 }, { "epoch": 0.7393885425942416, "grad_norm": 1.8308614492416382, "learning_rate": 9.65113824876464e-07, "loss": 0.0383, "step": 2491, "video_reward_cumulative_accuracy": 0.841830590124448 }, { "epoch": 0.7396853665776195, "grad_norm": 0.8829131722450256, "learning_rate": 9.63069968041726e-07, "loss": 0.0106, "step": 2492, "video_reward_cumulative_accuracy": 0.8418940609951846 }, { "epoch": 0.7399821905609973, "grad_norm": 1.45628821849823, "learning_rate": 9.610277612505483e-07, "loss": 0.0142, "step": 2493, "video_reward_cumulative_accuracy": 0.8419574809466506 }, { "epoch": 0.7402790145443752, "grad_norm": 2.1599154472351074, "learning_rate": 9.58987206695438e-07, "loss": 0.0342, "step": 2494, "video_reward_cumulative_accuracy": 0.8420208500400962 }, { "epoch": 0.740575838527753, "grad_norm": 3.008652687072754, "learning_rate": 9.569483065671294e-07, "loss": 0.0649, "step": 2495, "video_reward_cumulative_accuracy": 0.8420841683366733 }, { "epoch": 0.7408726625111309, "grad_norm": 1.7300156354904175, "learning_rate": 9.549110630545783e-07, "loss": 0.025, "step": 2496, "video_reward_cumulative_accuracy": 0.8419471153846154 }, { "epoch": 0.7411694864945088, "grad_norm": 1.144446611404419, "learning_rate": 9.528754783449634e-07, "loss": 0.0149, "step": 2497, "video_reward_cumulative_accuracy": 0.842010412494994 }, { "epoch": 0.7414663104778866, "grad_norm": 0.29633480310440063, "learning_rate": 9.508415546236829e-07, "loss": 0.004, "step": 2498, "video_reward_cumulative_accuracy": 0.8420736589271417 }, { "epoch": 0.7417631344612645, "grad_norm": 2.104227304458618, "learning_rate": 9.488092940743516e-07, "loss": 0.0251, "step": 2499, "video_reward_cumulative_accuracy": 0.8421368547418968 }, { "epoch": 0.7420599584446423, "grad_norm": 2.1143417358398438, "learning_rate": 9.467786988787989e-07, "loss": 0.0524, "step": 2500, "video_reward_cumulative_accuracy": 0.8422 }, { "epoch": 0.7423567824280202, "grad_norm": 3.5316734313964844, "learning_rate": 9.447497712170642e-07, "loss": 0.0873, "step": 2501, "video_reward_cumulative_accuracy": 0.8422630947620952 }, { "epoch": 0.742653606411398, "grad_norm": 1.2431542873382568, "learning_rate": 9.427225132673992e-07, "loss": 0.0786, "step": 2502, "video_reward_cumulative_accuracy": 0.842326139088729 }, { "epoch": 0.7429504303947759, "grad_norm": 0.4430326819419861, "learning_rate": 9.406969272062619e-07, "loss": 0.0069, "step": 2503, "video_reward_cumulative_accuracy": 0.8423891330403516 }, { "epoch": 0.7432472543781538, "grad_norm": 2.5912365913391113, "learning_rate": 9.386730152083156e-07, "loss": 0.0423, "step": 2504, "video_reward_cumulative_accuracy": 0.8422523961661342 }, { "epoch": 0.7435440783615316, "grad_norm": 1.016870379447937, "learning_rate": 9.366507794464275e-07, "loss": 0.0096, "step": 2505, "video_reward_cumulative_accuracy": 0.8423153692614771 }, { "epoch": 0.7438409023449095, "grad_norm": 0.46760135889053345, "learning_rate": 9.346302220916619e-07, "loss": 0.004, "step": 2506, "video_reward_cumulative_accuracy": 0.8423782920989625 }, { "epoch": 0.7441377263282873, "grad_norm": 2.0241291522979736, "learning_rate": 9.326113453132848e-07, "loss": 0.051, "step": 2507, "video_reward_cumulative_accuracy": 0.8424411647387315 }, { "epoch": 0.7444345503116652, "grad_norm": 1.800049901008606, "learning_rate": 9.305941512787542e-07, "loss": 0.0245, "step": 2508, "video_reward_cumulative_accuracy": 0.8425039872408293 }, { "epoch": 0.744731374295043, "grad_norm": 0.5981684923171997, "learning_rate": 9.28578642153726e-07, "loss": 0.0119, "step": 2509, "video_reward_cumulative_accuracy": 0.8425667596652052 }, { "epoch": 0.7450281982784209, "grad_norm": 2.190657615661621, "learning_rate": 9.265648201020447e-07, "loss": 0.0158, "step": 2510, "video_reward_cumulative_accuracy": 0.8426294820717132 }, { "epoch": 0.7453250222617988, "grad_norm": 3.271556854248047, "learning_rate": 9.245526872857424e-07, "loss": 0.0234, "step": 2511, "video_reward_cumulative_accuracy": 0.8426921545201115 }, { "epoch": 0.7456218462451766, "grad_norm": 1.8033744096755981, "learning_rate": 9.225422458650404e-07, "loss": 0.0207, "step": 2512, "video_reward_cumulative_accuracy": 0.8427547770700637 }, { "epoch": 0.7459186702285545, "grad_norm": 1.6773443222045898, "learning_rate": 9.205334979983402e-07, "loss": 0.0244, "step": 2513, "video_reward_cumulative_accuracy": 0.8428173497811381 }, { "epoch": 0.7462154942119323, "grad_norm": 1.4466123580932617, "learning_rate": 9.185264458422313e-07, "loss": 0.0266, "step": 2514, "video_reward_cumulative_accuracy": 0.8428798727128083 }, { "epoch": 0.7465123181953102, "grad_norm": 0.8426374793052673, "learning_rate": 9.165210915514758e-07, "loss": 0.0225, "step": 2515, "video_reward_cumulative_accuracy": 0.8429423459244533 }, { "epoch": 0.746809142178688, "grad_norm": 1.8962173461914062, "learning_rate": 9.145174372790178e-07, "loss": 0.0292, "step": 2516, "video_reward_cumulative_accuracy": 0.8430047694753577 }, { "epoch": 0.7471059661620659, "grad_norm": 2.5202438831329346, "learning_rate": 9.125154851759749e-07, "loss": 0.0394, "step": 2517, "video_reward_cumulative_accuracy": 0.843067143424712 }, { "epoch": 0.7474027901454438, "grad_norm": 1.6238250732421875, "learning_rate": 9.105152373916346e-07, "loss": 0.069, "step": 2518, "video_reward_cumulative_accuracy": 0.8431294678316124 }, { "epoch": 0.7476996141288216, "grad_norm": 1.0544601678848267, "learning_rate": 9.085166960734604e-07, "loss": 0.0164, "step": 2519, "video_reward_cumulative_accuracy": 0.8431917427550615 }, { "epoch": 0.7479964381121995, "grad_norm": 2.0650112628936768, "learning_rate": 9.06519863367078e-07, "loss": 0.0742, "step": 2520, "video_reward_cumulative_accuracy": 0.8432539682539683 }, { "epoch": 0.7482932620955773, "grad_norm": 1.7210766077041626, "learning_rate": 9.045247414162817e-07, "loss": 0.0288, "step": 2521, "video_reward_cumulative_accuracy": 0.843316144387148 }, { "epoch": 0.7485900860789552, "grad_norm": 2.06630539894104, "learning_rate": 9.025313323630297e-07, "loss": 0.0206, "step": 2522, "video_reward_cumulative_accuracy": 0.8431800158604282 }, { "epoch": 0.748886910062333, "grad_norm": 2.9529051780700684, "learning_rate": 9.005396383474371e-07, "loss": 0.0575, "step": 2523, "video_reward_cumulative_accuracy": 0.8428458184700753 }, { "epoch": 0.7491837340457109, "grad_norm": 1.8153632879257202, "learning_rate": 8.985496615077849e-07, "loss": 0.0322, "step": 2524, "video_reward_cumulative_accuracy": 0.8429080824088748 }, { "epoch": 0.7494805580290888, "grad_norm": 2.185244560241699, "learning_rate": 8.965614039805029e-07, "loss": 0.0312, "step": 2525, "video_reward_cumulative_accuracy": 0.8429702970297029 }, { "epoch": 0.7497773820124666, "grad_norm": 0.35279035568237305, "learning_rate": 8.945748679001808e-07, "loss": 0.0041, "step": 2526, "video_reward_cumulative_accuracy": 0.8430324623911323 }, { "epoch": 0.7500742059958445, "grad_norm": 1.9734653234481812, "learning_rate": 8.925900553995564e-07, "loss": 0.0319, "step": 2527, "video_reward_cumulative_accuracy": 0.8430945785516423 }, { "epoch": 0.7503710299792223, "grad_norm": 1.5152698755264282, "learning_rate": 8.906069686095189e-07, "loss": 0.0241, "step": 2528, "video_reward_cumulative_accuracy": 0.8431566455696202 }, { "epoch": 0.7506678539626002, "grad_norm": 0.6552301049232483, "learning_rate": 8.886256096591048e-07, "loss": 0.0108, "step": 2529, "video_reward_cumulative_accuracy": 0.843218663503361 }, { "epoch": 0.750964677945978, "grad_norm": 3.156954050064087, "learning_rate": 8.866459806754948e-07, "loss": 0.0368, "step": 2530, "video_reward_cumulative_accuracy": 0.8432806324110672 }, { "epoch": 0.7512615019293559, "grad_norm": 1.5369415283203125, "learning_rate": 8.84668083784014e-07, "loss": 0.0243, "step": 2531, "video_reward_cumulative_accuracy": 0.8433425523508494 }, { "epoch": 0.7515583259127337, "grad_norm": 2.561065673828125, "learning_rate": 8.82691921108125e-07, "loss": 0.0483, "step": 2532, "video_reward_cumulative_accuracy": 0.8434044233807267 }, { "epoch": 0.7518551498961116, "grad_norm": 3.3253390789031982, "learning_rate": 8.807174947694308e-07, "loss": 0.0232, "step": 2533, "video_reward_cumulative_accuracy": 0.8434662455586261 }, { "epoch": 0.7521519738794895, "grad_norm": 2.2705724239349365, "learning_rate": 8.787448068876697e-07, "loss": 0.0315, "step": 2534, "video_reward_cumulative_accuracy": 0.8435280189423836 }, { "epoch": 0.7524487978628673, "grad_norm": 4.962090015411377, "learning_rate": 8.76773859580714e-07, "loss": 0.0608, "step": 2535, "video_reward_cumulative_accuracy": 0.8435897435897436 }, { "epoch": 0.7527456218462452, "grad_norm": 1.0470224618911743, "learning_rate": 8.748046549645675e-07, "loss": 0.0121, "step": 2536, "video_reward_cumulative_accuracy": 0.8436514195583596 }, { "epoch": 0.753042445829623, "grad_norm": 1.1866475343704224, "learning_rate": 8.72837195153361e-07, "loss": 0.0349, "step": 2537, "video_reward_cumulative_accuracy": 0.8437130469057943 }, { "epoch": 0.7533392698130009, "grad_norm": 1.4927583932876587, "learning_rate": 8.70871482259354e-07, "loss": 0.0145, "step": 2538, "video_reward_cumulative_accuracy": 0.8435776201733649 }, { "epoch": 0.7536360937963787, "grad_norm": 1.2378430366516113, "learning_rate": 8.689075183929304e-07, "loss": 0.015, "step": 2539, "video_reward_cumulative_accuracy": 0.8436392280425364 }, { "epoch": 0.7539329177797566, "grad_norm": 2.65450382232666, "learning_rate": 8.669453056625959e-07, "loss": 0.0736, "step": 2540, "video_reward_cumulative_accuracy": 0.8437007874015748 }, { "epoch": 0.7542297417631345, "grad_norm": 2.1863057613372803, "learning_rate": 8.649848461749772e-07, "loss": 0.0301, "step": 2541, "video_reward_cumulative_accuracy": 0.8437622983077528 }, { "epoch": 0.7545265657465123, "grad_norm": 1.4505633115768433, "learning_rate": 8.630261420348162e-07, "loss": 0.0232, "step": 2542, "video_reward_cumulative_accuracy": 0.8438237608182534 }, { "epoch": 0.7548233897298902, "grad_norm": 0.5751045942306519, "learning_rate": 8.610691953449727e-07, "loss": 0.0088, "step": 2543, "video_reward_cumulative_accuracy": 0.8438851749901691 }, { "epoch": 0.755120213713268, "grad_norm": 1.236505150794983, "learning_rate": 8.591140082064189e-07, "loss": 0.018, "step": 2544, "video_reward_cumulative_accuracy": 0.8439465408805031 }, { "epoch": 0.7554170376966459, "grad_norm": 1.4471156597137451, "learning_rate": 8.571605827182381e-07, "loss": 0.0264, "step": 2545, "video_reward_cumulative_accuracy": 0.844007858546169 }, { "epoch": 0.7557138616800237, "grad_norm": 1.338175892829895, "learning_rate": 8.552089209776226e-07, "loss": 0.0155, "step": 2546, "video_reward_cumulative_accuracy": 0.8440691280439906 }, { "epoch": 0.7560106856634016, "grad_norm": 1.3986073732376099, "learning_rate": 8.532590250798695e-07, "loss": 0.0298, "step": 2547, "video_reward_cumulative_accuracy": 0.8439340400471143 }, { "epoch": 0.7563075096467795, "grad_norm": 3.191577911376953, "learning_rate": 8.513108971183817e-07, "loss": 0.0398, "step": 2548, "video_reward_cumulative_accuracy": 0.8439952904238619 }, { "epoch": 0.7566043336301573, "grad_norm": 1.3186044692993164, "learning_rate": 8.493645391846642e-07, "loss": 0.0241, "step": 2549, "video_reward_cumulative_accuracy": 0.8440564927422519 }, { "epoch": 0.7569011576135352, "grad_norm": 3.0393431186676025, "learning_rate": 8.474199533683214e-07, "loss": 0.0545, "step": 2550, "video_reward_cumulative_accuracy": 0.8441176470588235 }, { "epoch": 0.757197981596913, "grad_norm": 2.7258641719818115, "learning_rate": 8.454771417570537e-07, "loss": 0.0403, "step": 2551, "video_reward_cumulative_accuracy": 0.8441787534300275 }, { "epoch": 0.7574948055802909, "grad_norm": 1.2162940502166748, "learning_rate": 8.435361064366585e-07, "loss": 0.0148, "step": 2552, "video_reward_cumulative_accuracy": 0.8442398119122257 }, { "epoch": 0.7577916295636687, "grad_norm": 0.32078516483306885, "learning_rate": 8.415968494910253e-07, "loss": 0.0036, "step": 2553, "video_reward_cumulative_accuracy": 0.8443008225616921 }, { "epoch": 0.7580884535470466, "grad_norm": 1.9234449863433838, "learning_rate": 8.396593730021355e-07, "loss": 0.0207, "step": 2554, "video_reward_cumulative_accuracy": 0.8441660140955364 }, { "epoch": 0.7583852775304245, "grad_norm": 4.319919586181641, "learning_rate": 8.377236790500584e-07, "loss": 0.0487, "step": 2555, "video_reward_cumulative_accuracy": 0.8442270058708415 }, { "epoch": 0.7586821015138023, "grad_norm": 1.7034393548965454, "learning_rate": 8.357897697129477e-07, "loss": 0.0126, "step": 2556, "video_reward_cumulative_accuracy": 0.8442879499217527 }, { "epoch": 0.7589789254971802, "grad_norm": 1.2667155265808105, "learning_rate": 8.338576470670437e-07, "loss": 0.0073, "step": 2557, "video_reward_cumulative_accuracy": 0.8443488463042628 }, { "epoch": 0.759275749480558, "grad_norm": 2.665156126022339, "learning_rate": 8.319273131866675e-07, "loss": 0.0465, "step": 2558, "video_reward_cumulative_accuracy": 0.8444096950742768 }, { "epoch": 0.7595725734639359, "grad_norm": 2.237888813018799, "learning_rate": 8.299987701442203e-07, "loss": 0.0296, "step": 2559, "video_reward_cumulative_accuracy": 0.8444704962876124 }, { "epoch": 0.7598693974473137, "grad_norm": 2.12754225730896, "learning_rate": 8.280720200101805e-07, "loss": 0.015, "step": 2560, "video_reward_cumulative_accuracy": 0.84453125 }, { "epoch": 0.7601662214306916, "grad_norm": 2.578101873397827, "learning_rate": 8.261470648530998e-07, "loss": 0.0259, "step": 2561, "video_reward_cumulative_accuracy": 0.8445919562670832 }, { "epoch": 0.7604630454140695, "grad_norm": 2.2189407348632812, "learning_rate": 8.242239067396063e-07, "loss": 0.0262, "step": 2562, "video_reward_cumulative_accuracy": 0.8446526151444185 }, { "epoch": 0.7607598693974473, "grad_norm": 1.3384156227111816, "learning_rate": 8.223025477343944e-07, "loss": 0.0228, "step": 2563, "video_reward_cumulative_accuracy": 0.8445181428014046 }, { "epoch": 0.7610566933808252, "grad_norm": 1.1982171535491943, "learning_rate": 8.203829899002316e-07, "loss": 0.0104, "step": 2564, "video_reward_cumulative_accuracy": 0.8445787831513261 }, { "epoch": 0.761353517364203, "grad_norm": 1.45271897315979, "learning_rate": 8.1846523529795e-07, "loss": 0.0164, "step": 2565, "video_reward_cumulative_accuracy": 0.8446393762183236 }, { "epoch": 0.7616503413475809, "grad_norm": 1.5878190994262695, "learning_rate": 8.165492859864435e-07, "loss": 0.0238, "step": 2566, "video_reward_cumulative_accuracy": 0.8445050662509743 }, { "epoch": 0.7619471653309587, "grad_norm": 2.7507424354553223, "learning_rate": 8.146351440226711e-07, "loss": 0.0443, "step": 2567, "video_reward_cumulative_accuracy": 0.8445656408258668 }, { "epoch": 0.7622439893143366, "grad_norm": 2.033236503601074, "learning_rate": 8.127228114616484e-07, "loss": 0.0223, "step": 2568, "video_reward_cumulative_accuracy": 0.844626168224299 }, { "epoch": 0.7625408132977145, "grad_norm": 0.4424172043800354, "learning_rate": 8.108122903564502e-07, "loss": 0.0082, "step": 2569, "video_reward_cumulative_accuracy": 0.8446866485013624 }, { "epoch": 0.7628376372810923, "grad_norm": 4.878910541534424, "learning_rate": 8.089035827582087e-07, "loss": 0.0613, "step": 2570, "video_reward_cumulative_accuracy": 0.8447470817120623 }, { "epoch": 0.7631344612644702, "grad_norm": 0.8770198822021484, "learning_rate": 8.069966907161042e-07, "loss": 0.0104, "step": 2571, "video_reward_cumulative_accuracy": 0.8448074679113186 }, { "epoch": 0.763431285247848, "grad_norm": 1.469495415687561, "learning_rate": 8.05091616277372e-07, "loss": 0.0102, "step": 2572, "video_reward_cumulative_accuracy": 0.8448678071539658 }, { "epoch": 0.7637281092312259, "grad_norm": 1.270909070968628, "learning_rate": 8.031883614872929e-07, "loss": 0.0271, "step": 2573, "video_reward_cumulative_accuracy": 0.8449280994947532 }, { "epoch": 0.7640249332146037, "grad_norm": 3.988424301147461, "learning_rate": 8.012869283891967e-07, "loss": 0.0606, "step": 2574, "video_reward_cumulative_accuracy": 0.844988344988345 }, { "epoch": 0.7643217571979816, "grad_norm": 2.989523410797119, "learning_rate": 7.993873190244558e-07, "loss": 0.0208, "step": 2575, "video_reward_cumulative_accuracy": 0.8448543689320388 }, { "epoch": 0.7646185811813595, "grad_norm": 0.9885690212249756, "learning_rate": 7.974895354324857e-07, "loss": 0.0226, "step": 2576, "video_reward_cumulative_accuracy": 0.8449145962732919 }, { "epoch": 0.7649154051647373, "grad_norm": 2.960150718688965, "learning_rate": 7.955935796507419e-07, "loss": 0.049, "step": 2577, "video_reward_cumulative_accuracy": 0.8449747768723321 }, { "epoch": 0.7652122291481152, "grad_norm": 3.4820029735565186, "learning_rate": 7.936994537147155e-07, "loss": 0.0499, "step": 2578, "video_reward_cumulative_accuracy": 0.8446470131885182 }, { "epoch": 0.765509053131493, "grad_norm": 2.2043135166168213, "learning_rate": 7.91807159657935e-07, "loss": 0.0192, "step": 2579, "video_reward_cumulative_accuracy": 0.8447072508724311 }, { "epoch": 0.7658058771148709, "grad_norm": 2.675544023513794, "learning_rate": 7.89916699511962e-07, "loss": 0.0514, "step": 2580, "video_reward_cumulative_accuracy": 0.8447674418604652 }, { "epoch": 0.7661027010982487, "grad_norm": 2.820864200592041, "learning_rate": 7.880280753063891e-07, "loss": 0.0133, "step": 2581, "video_reward_cumulative_accuracy": 0.8448275862068966 }, { "epoch": 0.7663995250816266, "grad_norm": 3.3666510581970215, "learning_rate": 7.86141289068838e-07, "loss": 0.0393, "step": 2582, "video_reward_cumulative_accuracy": 0.8448876839659178 }, { "epoch": 0.7666963490650045, "grad_norm": 3.5256621837615967, "learning_rate": 7.842563428249555e-07, "loss": 0.0183, "step": 2583, "video_reward_cumulative_accuracy": 0.8449477351916377 }, { "epoch": 0.7669931730483823, "grad_norm": 0.805884063243866, "learning_rate": 7.823732385984154e-07, "loss": 0.0258, "step": 2584, "video_reward_cumulative_accuracy": 0.8450077399380805 }, { "epoch": 0.7672899970317602, "grad_norm": 2.2179319858551025, "learning_rate": 7.804919784109124e-07, "loss": 0.0197, "step": 2585, "video_reward_cumulative_accuracy": 0.8450676982591876 }, { "epoch": 0.767586821015138, "grad_norm": 2.208432674407959, "learning_rate": 7.786125642821632e-07, "loss": 0.0248, "step": 2586, "video_reward_cumulative_accuracy": 0.8451276102088167 }, { "epoch": 0.7678836449985159, "grad_norm": 1.296506643295288, "learning_rate": 7.767349982298992e-07, "loss": 0.0078, "step": 2587, "video_reward_cumulative_accuracy": 0.8451874758407422 }, { "epoch": 0.7681804689818937, "grad_norm": 2.701901435852051, "learning_rate": 7.748592822698708e-07, "loss": 0.02, "step": 2588, "video_reward_cumulative_accuracy": 0.8450540958268934 }, { "epoch": 0.7684772929652716, "grad_norm": 4.324398517608643, "learning_rate": 7.729854184158411e-07, "loss": 0.033, "step": 2589, "video_reward_cumulative_accuracy": 0.8451139436075705 }, { "epoch": 0.7687741169486495, "grad_norm": 2.3187663555145264, "learning_rate": 7.711134086795852e-07, "loss": 0.0229, "step": 2590, "video_reward_cumulative_accuracy": 0.8451737451737452 }, { "epoch": 0.7690709409320273, "grad_norm": 3.8253369331359863, "learning_rate": 7.692432550708873e-07, "loss": 0.0558, "step": 2591, "video_reward_cumulative_accuracy": 0.8450405248938634 }, { "epoch": 0.7693677649154052, "grad_norm": 2.7652409076690674, "learning_rate": 7.673749595975378e-07, "loss": 0.0173, "step": 2592, "video_reward_cumulative_accuracy": 0.8451003086419753 }, { "epoch": 0.769664588898783, "grad_norm": 4.698938846588135, "learning_rate": 7.655085242653337e-07, "loss": 0.0321, "step": 2593, "video_reward_cumulative_accuracy": 0.8449672194369456 }, { "epoch": 0.7699614128821609, "grad_norm": 3.0443601608276367, "learning_rate": 7.636439510780747e-07, "loss": 0.0465, "step": 2594, "video_reward_cumulative_accuracy": 0.8450269853508096 }, { "epoch": 0.7702582368655387, "grad_norm": 3.421412706375122, "learning_rate": 7.617812420375611e-07, "loss": 0.0844, "step": 2595, "video_reward_cumulative_accuracy": 0.8450867052023121 }, { "epoch": 0.7705550608489166, "grad_norm": 1.4718027114868164, "learning_rate": 7.599203991435924e-07, "loss": 0.0188, "step": 2596, "video_reward_cumulative_accuracy": 0.8451463790446841 }, { "epoch": 0.7708518848322945, "grad_norm": 2.6113193035125732, "learning_rate": 7.580614243939627e-07, "loss": 0.0317, "step": 2597, "video_reward_cumulative_accuracy": 0.8452060069310743 }, { "epoch": 0.7711487088156723, "grad_norm": 3.532366991043091, "learning_rate": 7.562043197844626e-07, "loss": 0.0406, "step": 2598, "video_reward_cumulative_accuracy": 0.8452655889145496 }, { "epoch": 0.7714455327990501, "grad_norm": 1.3378589153289795, "learning_rate": 7.543490873088738e-07, "loss": 0.0172, "step": 2599, "video_reward_cumulative_accuracy": 0.8453251250480954 }, { "epoch": 0.771742356782428, "grad_norm": 6.215091228485107, "learning_rate": 7.52495728958969e-07, "loss": 0.0305, "step": 2600, "video_reward_cumulative_accuracy": 0.8453846153846154 }, { "epoch": 0.771742356782428, "eval_runtime": 132.3938, "eval_samples_per_second": 5.959, "eval_steps_per_second": 0.748, "eval_test_set_accuracy": 0.821969696969697, "step": 2600 }, { "epoch": 0.7720391807658059, "grad_norm": 2.3559024333953857, "learning_rate": 7.506442467245084e-07, "loss": 0.0387, "step": 2601, "video_reward_cumulative_accuracy": 0.845444059976932 }, { "epoch": 0.7723360047491837, "grad_norm": 3.2431516647338867, "learning_rate": 7.487946425932372e-07, "loss": 0.0431, "step": 2602, "video_reward_cumulative_accuracy": 0.8453112990007686 }, { "epoch": 0.7726328287325616, "grad_norm": 3.5290749073028564, "learning_rate": 7.469469185508854e-07, "loss": 0.0461, "step": 2603, "video_reward_cumulative_accuracy": 0.8453707260852862 }, { "epoch": 0.7729296527159395, "grad_norm": 4.477185249328613, "learning_rate": 7.451010765811628e-07, "loss": 0.0644, "step": 2604, "video_reward_cumulative_accuracy": 0.8452380952380952 }, { "epoch": 0.7732264766993173, "grad_norm": 1.7302701473236084, "learning_rate": 7.432571186657614e-07, "loss": 0.0116, "step": 2605, "video_reward_cumulative_accuracy": 0.8452975047984644 }, { "epoch": 0.7735233006826951, "grad_norm": 1.9252644777297974, "learning_rate": 7.414150467843498e-07, "loss": 0.027, "step": 2606, "video_reward_cumulative_accuracy": 0.8453568687643899 }, { "epoch": 0.773820124666073, "grad_norm": 3.0413153171539307, "learning_rate": 7.395748629145685e-07, "loss": 0.088, "step": 2607, "video_reward_cumulative_accuracy": 0.8454161871883391 }, { "epoch": 0.7741169486494509, "grad_norm": 2.653510093688965, "learning_rate": 7.37736569032036e-07, "loss": 0.0589, "step": 2608, "video_reward_cumulative_accuracy": 0.8452837423312883 }, { "epoch": 0.7744137726328287, "grad_norm": 2.2542154788970947, "learning_rate": 7.359001671103361e-07, "loss": 0.0261, "step": 2609, "video_reward_cumulative_accuracy": 0.8453430433116137 }, { "epoch": 0.7747105966162066, "grad_norm": 1.7998321056365967, "learning_rate": 7.340656591210279e-07, "loss": 0.0194, "step": 2610, "video_reward_cumulative_accuracy": 0.8454022988505747 }, { "epoch": 0.7750074205995845, "grad_norm": 2.0335445404052734, "learning_rate": 7.322330470336314e-07, "loss": 0.0157, "step": 2611, "video_reward_cumulative_accuracy": 0.845461509000383 }, { "epoch": 0.7753042445829623, "grad_norm": 3.780298948287964, "learning_rate": 7.304023328156345e-07, "loss": 0.0582, "step": 2612, "video_reward_cumulative_accuracy": 0.84552067381317 }, { "epoch": 0.7756010685663401, "grad_norm": 2.430021286010742, "learning_rate": 7.285735184324872e-07, "loss": 0.0478, "step": 2613, "video_reward_cumulative_accuracy": 0.8455797933409873 }, { "epoch": 0.775897892549718, "grad_norm": 2.428281784057617, "learning_rate": 7.267466058475969e-07, "loss": 0.0268, "step": 2614, "video_reward_cumulative_accuracy": 0.8456388676358072 }, { "epoch": 0.7761947165330959, "grad_norm": 3.825349807739258, "learning_rate": 7.249215970223347e-07, "loss": 0.033, "step": 2615, "video_reward_cumulative_accuracy": 0.8455066921606118 }, { "epoch": 0.7764915405164737, "grad_norm": 3.4468047618865967, "learning_rate": 7.230984939160227e-07, "loss": 0.0454, "step": 2616, "video_reward_cumulative_accuracy": 0.845565749235474 }, { "epoch": 0.7767883644998516, "grad_norm": 2.420203447341919, "learning_rate": 7.2127729848594e-07, "loss": 0.0323, "step": 2617, "video_reward_cumulative_accuracy": 0.8456247611769201 }, { "epoch": 0.7770851884832295, "grad_norm": 0.8875495195388794, "learning_rate": 7.194580126873155e-07, "loss": 0.0189, "step": 2618, "video_reward_cumulative_accuracy": 0.8456837280366692 }, { "epoch": 0.7773820124666073, "grad_norm": 0.7100759148597717, "learning_rate": 7.176406384733289e-07, "loss": 0.0083, "step": 2619, "video_reward_cumulative_accuracy": 0.8457426498663612 }, { "epoch": 0.7776788364499851, "grad_norm": 2.6363914012908936, "learning_rate": 7.158251777951103e-07, "loss": 0.0294, "step": 2620, "video_reward_cumulative_accuracy": 0.8456106870229008 }, { "epoch": 0.777975660433363, "grad_norm": 3.5254852771759033, "learning_rate": 7.140116326017304e-07, "loss": 0.0296, "step": 2621, "video_reward_cumulative_accuracy": 0.8456695917588707 }, { "epoch": 0.7782724844167409, "grad_norm": 2.1141459941864014, "learning_rate": 7.122000048402078e-07, "loss": 0.0356, "step": 2622, "video_reward_cumulative_accuracy": 0.8457284515636918 }, { "epoch": 0.7785693084001187, "grad_norm": 1.737390398979187, "learning_rate": 7.10390296455499e-07, "loss": 0.0434, "step": 2623, "video_reward_cumulative_accuracy": 0.8457872664887534 }, { "epoch": 0.7788661323834966, "grad_norm": 1.159108281135559, "learning_rate": 7.085825093905025e-07, "loss": 0.0135, "step": 2624, "video_reward_cumulative_accuracy": 0.8458460365853658 }, { "epoch": 0.7791629563668745, "grad_norm": 5.79228401184082, "learning_rate": 7.06776645586053e-07, "loss": 0.0594, "step": 2625, "video_reward_cumulative_accuracy": 0.8459047619047619 }, { "epoch": 0.7794597803502523, "grad_norm": 1.6138088703155518, "learning_rate": 7.049727069809206e-07, "loss": 0.0134, "step": 2626, "video_reward_cumulative_accuracy": 0.845963442498096 }, { "epoch": 0.7797566043336301, "grad_norm": 4.311069011688232, "learning_rate": 7.031706955118095e-07, "loss": 0.0717, "step": 2627, "video_reward_cumulative_accuracy": 0.8460220784164446 }, { "epoch": 0.780053428317008, "grad_norm": 1.8174479007720947, "learning_rate": 7.013706131133522e-07, "loss": 0.0319, "step": 2628, "video_reward_cumulative_accuracy": 0.8460806697108066 }, { "epoch": 0.7803502523003859, "grad_norm": 1.904004693031311, "learning_rate": 6.995724617181124e-07, "loss": 0.0252, "step": 2629, "video_reward_cumulative_accuracy": 0.8461392164321034 }, { "epoch": 0.7806470762837637, "grad_norm": 2.597764015197754, "learning_rate": 6.977762432565805e-07, "loss": 0.0188, "step": 2630, "video_reward_cumulative_accuracy": 0.8461977186311787 }, { "epoch": 0.7809439002671416, "grad_norm": 2.199734926223755, "learning_rate": 6.95981959657171e-07, "loss": 0.0377, "step": 2631, "video_reward_cumulative_accuracy": 0.8462561763587989 }, { "epoch": 0.7812407242505195, "grad_norm": 0.7549028396606445, "learning_rate": 6.941896128462227e-07, "loss": 0.018, "step": 2632, "video_reward_cumulative_accuracy": 0.8463145896656535 }, { "epoch": 0.7815375482338973, "grad_norm": 2.8155016899108887, "learning_rate": 6.923992047479921e-07, "loss": 0.0286, "step": 2633, "video_reward_cumulative_accuracy": 0.8463729586023547 }, { "epoch": 0.7818343722172751, "grad_norm": 2.58642578125, "learning_rate": 6.906107372846568e-07, "loss": 0.0239, "step": 2634, "video_reward_cumulative_accuracy": 0.8462414578587699 }, { "epoch": 0.782131196200653, "grad_norm": 2.169006109237671, "learning_rate": 6.888242123763103e-07, "loss": 0.0208, "step": 2635, "video_reward_cumulative_accuracy": 0.8462998102466793 }, { "epoch": 0.7824280201840309, "grad_norm": 2.2981460094451904, "learning_rate": 6.870396319409602e-07, "loss": 0.0529, "step": 2636, "video_reward_cumulative_accuracy": 0.8463581183611533 }, { "epoch": 0.7827248441674087, "grad_norm": 1.9309078454971313, "learning_rate": 6.852569978945281e-07, "loss": 0.0217, "step": 2637, "video_reward_cumulative_accuracy": 0.8464163822525598 }, { "epoch": 0.7830216681507866, "grad_norm": 0.8763541579246521, "learning_rate": 6.834763121508428e-07, "loss": 0.0146, "step": 2638, "video_reward_cumulative_accuracy": 0.8464746019711903 }, { "epoch": 0.7833184921341645, "grad_norm": 3.7167813777923584, "learning_rate": 6.816975766216441e-07, "loss": 0.0478, "step": 2639, "video_reward_cumulative_accuracy": 0.8465327775672603 }, { "epoch": 0.7836153161175423, "grad_norm": 4.534986972808838, "learning_rate": 6.799207932165772e-07, "loss": 0.045, "step": 2640, "video_reward_cumulative_accuracy": 0.8464015151515152 }, { "epoch": 0.7839121401009201, "grad_norm": 1.1529324054718018, "learning_rate": 6.781459638431923e-07, "loss": 0.0177, "step": 2641, "video_reward_cumulative_accuracy": 0.8464596743657705 }, { "epoch": 0.784208964084298, "grad_norm": 1.5981630086898804, "learning_rate": 6.763730904069393e-07, "loss": 0.0107, "step": 2642, "video_reward_cumulative_accuracy": 0.8465177895533686 }, { "epoch": 0.7845057880676759, "grad_norm": 2.033008575439453, "learning_rate": 6.746021748111709e-07, "loss": 0.0226, "step": 2643, "video_reward_cumulative_accuracy": 0.846575860764283 }, { "epoch": 0.7848026120510537, "grad_norm": 0.6989650726318359, "learning_rate": 6.728332189571368e-07, "loss": 0.007, "step": 2644, "video_reward_cumulative_accuracy": 0.8466338880484114 }, { "epoch": 0.7850994360344316, "grad_norm": 1.581715703010559, "learning_rate": 6.710662247439831e-07, "loss": 0.0276, "step": 2645, "video_reward_cumulative_accuracy": 0.8466918714555766 }, { "epoch": 0.7853962600178095, "grad_norm": 1.8581857681274414, "learning_rate": 6.693011940687499e-07, "loss": 0.0321, "step": 2646, "video_reward_cumulative_accuracy": 0.8465608465608465 }, { "epoch": 0.7856930840011873, "grad_norm": 1.9918158054351807, "learning_rate": 6.675381288263675e-07, "loss": 0.0228, "step": 2647, "video_reward_cumulative_accuracy": 0.8466188137514167 }, { "epoch": 0.7859899079845651, "grad_norm": 2.124476194381714, "learning_rate": 6.657770309096584e-07, "loss": 0.011, "step": 2648, "video_reward_cumulative_accuracy": 0.8466767371601208 }, { "epoch": 0.786286731967943, "grad_norm": 2.0029237270355225, "learning_rate": 6.640179022093324e-07, "loss": 0.0261, "step": 2649, "video_reward_cumulative_accuracy": 0.846734616836542 }, { "epoch": 0.7865835559513209, "grad_norm": 1.5002411603927612, "learning_rate": 6.622607446139844e-07, "loss": 0.0173, "step": 2650, "video_reward_cumulative_accuracy": 0.8467924528301887 }, { "epoch": 0.7868803799346987, "grad_norm": 2.7715702056884766, "learning_rate": 6.605055600100945e-07, "loss": 0.0615, "step": 2651, "video_reward_cumulative_accuracy": 0.8466616371180686 }, { "epoch": 0.7871772039180766, "grad_norm": 2.444265604019165, "learning_rate": 6.587523502820226e-07, "loss": 0.0486, "step": 2652, "video_reward_cumulative_accuracy": 0.8467194570135747 }, { "epoch": 0.7874740279014545, "grad_norm": 1.8649511337280273, "learning_rate": 6.570011173120108e-07, "loss": 0.043, "step": 2653, "video_reward_cumulative_accuracy": 0.8467772333207689 }, { "epoch": 0.7877708518848323, "grad_norm": 0.8448922634124756, "learning_rate": 6.552518629801752e-07, "loss": 0.0105, "step": 2654, "video_reward_cumulative_accuracy": 0.8468349660889224 }, { "epoch": 0.7880676758682101, "grad_norm": 2.1769087314605713, "learning_rate": 6.535045891645125e-07, "loss": 0.0765, "step": 2655, "video_reward_cumulative_accuracy": 0.8467043314500942 }, { "epoch": 0.788364499851588, "grad_norm": 3.1270945072174072, "learning_rate": 6.517592977408909e-07, "loss": 0.045, "step": 2656, "video_reward_cumulative_accuracy": 0.8467620481927711 }, { "epoch": 0.7886613238349659, "grad_norm": 2.3830227851867676, "learning_rate": 6.500159905830484e-07, "loss": 0.0181, "step": 2657, "video_reward_cumulative_accuracy": 0.8468197214904027 }, { "epoch": 0.7889581478183437, "grad_norm": 2.3455231189727783, "learning_rate": 6.48274669562596e-07, "loss": 0.0461, "step": 2658, "video_reward_cumulative_accuracy": 0.8466892400300978 }, { "epoch": 0.7892549718017215, "grad_norm": 1.0843396186828613, "learning_rate": 6.465353365490093e-07, "loss": 0.0176, "step": 2659, "video_reward_cumulative_accuracy": 0.8467468973298232 }, { "epoch": 0.7895517957850995, "grad_norm": 1.672133207321167, "learning_rate": 6.447979934096313e-07, "loss": 0.0171, "step": 2660, "video_reward_cumulative_accuracy": 0.8468045112781954 }, { "epoch": 0.7898486197684773, "grad_norm": 2.30859112739563, "learning_rate": 6.430626420096703e-07, "loss": 0.0343, "step": 2661, "video_reward_cumulative_accuracy": 0.8468620819240887 }, { "epoch": 0.7901454437518551, "grad_norm": 0.6453092098236084, "learning_rate": 6.413292842121927e-07, "loss": 0.0085, "step": 2662, "video_reward_cumulative_accuracy": 0.8469196093163035 }, { "epoch": 0.790442267735233, "grad_norm": 2.6562001705169678, "learning_rate": 6.395979218781276e-07, "loss": 0.0279, "step": 2663, "video_reward_cumulative_accuracy": 0.8469770935035674 }, { "epoch": 0.7907390917186109, "grad_norm": 0.6483455300331116, "learning_rate": 6.37868556866259e-07, "loss": 0.013, "step": 2664, "video_reward_cumulative_accuracy": 0.8470345345345346 }, { "epoch": 0.7910359157019887, "grad_norm": 2.082960605621338, "learning_rate": 6.361411910332288e-07, "loss": 0.027, "step": 2665, "video_reward_cumulative_accuracy": 0.8470919324577861 }, { "epoch": 0.7913327396853665, "grad_norm": 2.668041944503784, "learning_rate": 6.34415826233532e-07, "loss": 0.0209, "step": 2666, "video_reward_cumulative_accuracy": 0.8471492873218305 }, { "epoch": 0.7916295636687445, "grad_norm": 1.8569388389587402, "learning_rate": 6.326924643195151e-07, "loss": 0.027, "step": 2667, "video_reward_cumulative_accuracy": 0.8470191226096738 }, { "epoch": 0.7919263876521223, "grad_norm": 2.4845709800720215, "learning_rate": 6.309711071413752e-07, "loss": 0.0211, "step": 2668, "video_reward_cumulative_accuracy": 0.8470764617691154 }, { "epoch": 0.7922232116355001, "grad_norm": 3.088460922241211, "learning_rate": 6.292517565471548e-07, "loss": 0.0389, "step": 2669, "video_reward_cumulative_accuracy": 0.8471337579617835 }, { "epoch": 0.792520035618878, "grad_norm": 1.7615797519683838, "learning_rate": 6.275344143827442e-07, "loss": 0.0808, "step": 2670, "video_reward_cumulative_accuracy": 0.8471910112359551 }, { "epoch": 0.7928168596022559, "grad_norm": 6.1424360275268555, "learning_rate": 6.258190824918772e-07, "loss": 0.0601, "step": 2671, "video_reward_cumulative_accuracy": 0.8472482216398353 }, { "epoch": 0.7931136835856337, "grad_norm": 3.102992534637451, "learning_rate": 6.241057627161287e-07, "loss": 0.0441, "step": 2672, "video_reward_cumulative_accuracy": 0.8473053892215568 }, { "epoch": 0.7934105075690115, "grad_norm": 0.6833072900772095, "learning_rate": 6.223944568949147e-07, "loss": 0.005, "step": 2673, "video_reward_cumulative_accuracy": 0.8473625140291807 }, { "epoch": 0.7937073315523895, "grad_norm": 3.3606889247894287, "learning_rate": 6.206851668654867e-07, "loss": 0.0639, "step": 2674, "video_reward_cumulative_accuracy": 0.8474195961106956 }, { "epoch": 0.7940041555357673, "grad_norm": 2.6462979316711426, "learning_rate": 6.189778944629343e-07, "loss": 0.0401, "step": 2675, "video_reward_cumulative_accuracy": 0.8474766355140186 }, { "epoch": 0.7943009795191451, "grad_norm": 2.362200975418091, "learning_rate": 6.172726415201796e-07, "loss": 0.0143, "step": 2676, "video_reward_cumulative_accuracy": 0.8475336322869955 }, { "epoch": 0.794597803502523, "grad_norm": 3.2839114665985107, "learning_rate": 6.155694098679785e-07, "loss": 0.0293, "step": 2677, "video_reward_cumulative_accuracy": 0.8475905864774 }, { "epoch": 0.7948946274859009, "grad_norm": 3.1356849670410156, "learning_rate": 6.138682013349137e-07, "loss": 0.0445, "step": 2678, "video_reward_cumulative_accuracy": 0.8472740851381628 }, { "epoch": 0.7951914514692787, "grad_norm": 3.2600576877593994, "learning_rate": 6.121690177473983e-07, "loss": 0.0273, "step": 2679, "video_reward_cumulative_accuracy": 0.847331093691676 }, { "epoch": 0.7954882754526565, "grad_norm": 2.7981717586517334, "learning_rate": 6.104718609296709e-07, "loss": 0.0729, "step": 2680, "video_reward_cumulative_accuracy": 0.8473880597014926 }, { "epoch": 0.7957850994360345, "grad_norm": 0.5687323212623596, "learning_rate": 6.087767327037944e-07, "loss": 0.0221, "step": 2681, "video_reward_cumulative_accuracy": 0.8474449832152182 }, { "epoch": 0.7960819234194123, "grad_norm": 2.2972726821899414, "learning_rate": 6.070836348896536e-07, "loss": 0.0153, "step": 2682, "video_reward_cumulative_accuracy": 0.8475018642803878 }, { "epoch": 0.7963787474027901, "grad_norm": 0.8837341666221619, "learning_rate": 6.053925693049523e-07, "loss": 0.0174, "step": 2683, "video_reward_cumulative_accuracy": 0.8475587029444651 }, { "epoch": 0.796675571386168, "grad_norm": 3.468062400817871, "learning_rate": 6.037035377652143e-07, "loss": 0.0426, "step": 2684, "video_reward_cumulative_accuracy": 0.8476154992548435 }, { "epoch": 0.7969723953695459, "grad_norm": 2.4997260570526123, "learning_rate": 6.020165420837786e-07, "loss": 0.0243, "step": 2685, "video_reward_cumulative_accuracy": 0.8476722532588454 }, { "epoch": 0.7972692193529237, "grad_norm": 2.9322338104248047, "learning_rate": 6.003315840717991e-07, "loss": 0.0768, "step": 2686, "video_reward_cumulative_accuracy": 0.847728965003723 }, { "epoch": 0.7975660433363015, "grad_norm": 2.4431042671203613, "learning_rate": 5.986486655382423e-07, "loss": 0.0188, "step": 2687, "video_reward_cumulative_accuracy": 0.847785634536658 }, { "epoch": 0.7978628673196795, "grad_norm": 1.0281823873519897, "learning_rate": 5.96967788289883e-07, "loss": 0.0198, "step": 2688, "video_reward_cumulative_accuracy": 0.8478422619047619 }, { "epoch": 0.7981596913030573, "grad_norm": 3.702180862426758, "learning_rate": 5.95288954131307e-07, "loss": 0.0398, "step": 2689, "video_reward_cumulative_accuracy": 0.847712904425437 }, { "epoch": 0.7984565152864351, "grad_norm": 3.876319646835327, "learning_rate": 5.93612164864906e-07, "loss": 0.0481, "step": 2690, "video_reward_cumulative_accuracy": 0.8477695167286246 }, { "epoch": 0.798753339269813, "grad_norm": 1.6658989191055298, "learning_rate": 5.919374222908753e-07, "loss": 0.0348, "step": 2691, "video_reward_cumulative_accuracy": 0.8478260869565217 }, { "epoch": 0.7990501632531909, "grad_norm": 1.9231022596359253, "learning_rate": 5.902647282072149e-07, "loss": 0.0492, "step": 2692, "video_reward_cumulative_accuracy": 0.8478826151560178 }, { "epoch": 0.7993469872365687, "grad_norm": 1.0569583177566528, "learning_rate": 5.885940844097226e-07, "loss": 0.0217, "step": 2693, "video_reward_cumulative_accuracy": 0.8475677682881545 }, { "epoch": 0.7996438112199465, "grad_norm": 0.5432111024856567, "learning_rate": 5.869254926919976e-07, "loss": 0.011, "step": 2694, "video_reward_cumulative_accuracy": 0.8476243504083147 }, { "epoch": 0.7999406352033245, "grad_norm": 1.2595707178115845, "learning_rate": 5.852589548454346e-07, "loss": 0.0299, "step": 2695, "video_reward_cumulative_accuracy": 0.8476808905380334 }, { "epoch": 0.8002374591867023, "grad_norm": 1.7451565265655518, "learning_rate": 5.835944726592241e-07, "loss": 0.0576, "step": 2696, "video_reward_cumulative_accuracy": 0.8477373887240356 }, { "epoch": 0.8005342831700801, "grad_norm": 2.6311933994293213, "learning_rate": 5.8193204792035e-07, "loss": 0.0461, "step": 2697, "video_reward_cumulative_accuracy": 0.8477938450129774 }, { "epoch": 0.800831107153458, "grad_norm": 1.7497179508209229, "learning_rate": 5.802716824135849e-07, "loss": 0.0388, "step": 2698, "video_reward_cumulative_accuracy": 0.8478502594514455 }, { "epoch": 0.8011279311368359, "grad_norm": 0.3311484754085541, "learning_rate": 5.786133779214939e-07, "loss": 0.0024, "step": 2699, "video_reward_cumulative_accuracy": 0.8479066320859577 }, { "epoch": 0.8014247551202137, "grad_norm": 2.232048749923706, "learning_rate": 5.769571362244258e-07, "loss": 0.0189, "step": 2700, "video_reward_cumulative_accuracy": 0.8479629629629629 }, { "epoch": 0.8017215791035915, "grad_norm": 1.2579126358032227, "learning_rate": 5.753029591005197e-07, "loss": 0.0104, "step": 2701, "video_reward_cumulative_accuracy": 0.8480192521288412 }, { "epoch": 0.8020184030869695, "grad_norm": 1.1070455312728882, "learning_rate": 5.736508483256931e-07, "loss": 0.0259, "step": 2702, "video_reward_cumulative_accuracy": 0.8478904515173945 }, { "epoch": 0.8023152270703473, "grad_norm": 1.2897884845733643, "learning_rate": 5.720008056736476e-07, "loss": 0.0082, "step": 2703, "video_reward_cumulative_accuracy": 0.8479467258601554 }, { "epoch": 0.8026120510537251, "grad_norm": 2.983745813369751, "learning_rate": 5.703528329158653e-07, "loss": 0.037, "step": 2704, "video_reward_cumulative_accuracy": 0.8478180473372781 }, { "epoch": 0.802908875037103, "grad_norm": 3.134145975112915, "learning_rate": 5.687069318216027e-07, "loss": 0.0506, "step": 2705, "video_reward_cumulative_accuracy": 0.8476894639556377 }, { "epoch": 0.8032056990204809, "grad_norm": 1.839669108390808, "learning_rate": 5.670631041578969e-07, "loss": 0.0386, "step": 2706, "video_reward_cumulative_accuracy": 0.8477457501847746 }, { "epoch": 0.8035025230038587, "grad_norm": 1.0478723049163818, "learning_rate": 5.654213516895549e-07, "loss": 0.0117, "step": 2707, "video_reward_cumulative_accuracy": 0.8478019948282232 }, { "epoch": 0.8037993469872365, "grad_norm": 2.9105279445648193, "learning_rate": 5.637816761791573e-07, "loss": 0.0334, "step": 2708, "video_reward_cumulative_accuracy": 0.8478581979320532 }, { "epoch": 0.8040961709706145, "grad_norm": 0.682174026966095, "learning_rate": 5.621440793870564e-07, "loss": 0.0109, "step": 2709, "video_reward_cumulative_accuracy": 0.8479143595422666 }, { "epoch": 0.8043929949539923, "grad_norm": 1.712660312652588, "learning_rate": 5.605085630713686e-07, "loss": 0.0253, "step": 2710, "video_reward_cumulative_accuracy": 0.847970479704797 }, { "epoch": 0.8046898189373701, "grad_norm": 1.6642423868179321, "learning_rate": 5.588751289879823e-07, "loss": 0.0174, "step": 2711, "video_reward_cumulative_accuracy": 0.8480265584655109 }, { "epoch": 0.804986642920748, "grad_norm": 0.8261018395423889, "learning_rate": 5.572437788905455e-07, "loss": 0.0173, "step": 2712, "video_reward_cumulative_accuracy": 0.8480825958702065 }, { "epoch": 0.8052834669041259, "grad_norm": 1.6964601278305054, "learning_rate": 5.556145145304722e-07, "loss": 0.0515, "step": 2713, "video_reward_cumulative_accuracy": 0.8481385919646148 }, { "epoch": 0.8055802908875037, "grad_norm": 1.0702115297317505, "learning_rate": 5.53987337656935e-07, "loss": 0.015, "step": 2714, "video_reward_cumulative_accuracy": 0.8481945467943994 }, { "epoch": 0.8058771148708815, "grad_norm": 3.9482622146606445, "learning_rate": 5.523622500168651e-07, "loss": 0.0711, "step": 2715, "video_reward_cumulative_accuracy": 0.8482504604051565 }, { "epoch": 0.8061739388542595, "grad_norm": 1.610306739807129, "learning_rate": 5.507392533549549e-07, "loss": 0.0158, "step": 2716, "video_reward_cumulative_accuracy": 0.8483063328424153 }, { "epoch": 0.8064707628376373, "grad_norm": 2.1032094955444336, "learning_rate": 5.491183494136462e-07, "loss": 0.0246, "step": 2717, "video_reward_cumulative_accuracy": 0.8483621641516378 }, { "epoch": 0.8067675868210151, "grad_norm": 2.930027484893799, "learning_rate": 5.474995399331385e-07, "loss": 0.0331, "step": 2718, "video_reward_cumulative_accuracy": 0.8484179543782193 }, { "epoch": 0.807064410804393, "grad_norm": 3.511140823364258, "learning_rate": 5.458828266513788e-07, "loss": 0.0481, "step": 2719, "video_reward_cumulative_accuracy": 0.8482898124310408 }, { "epoch": 0.8073612347877709, "grad_norm": 1.0248184204101562, "learning_rate": 5.442682113040674e-07, "loss": 0.0169, "step": 2720, "video_reward_cumulative_accuracy": 0.8481617647058823 }, { "epoch": 0.8076580587711487, "grad_norm": 0.7856757044792175, "learning_rate": 5.426556956246495e-07, "loss": 0.0092, "step": 2721, "video_reward_cumulative_accuracy": 0.8482175670709298 }, { "epoch": 0.8079548827545265, "grad_norm": 5.376564979553223, "learning_rate": 5.410452813443182e-07, "loss": 0.0535, "step": 2722, "video_reward_cumulative_accuracy": 0.8482733284349743 }, { "epoch": 0.8082517067379045, "grad_norm": 4.981514930725098, "learning_rate": 5.394369701920096e-07, "loss": 0.0552, "step": 2723, "video_reward_cumulative_accuracy": 0.8483290488431876 }, { "epoch": 0.8085485307212823, "grad_norm": 0.30428871512413025, "learning_rate": 5.378307638944008e-07, "loss": 0.0053, "step": 2724, "video_reward_cumulative_accuracy": 0.8483847283406755 }, { "epoch": 0.8088453547046601, "grad_norm": 0.45453497767448425, "learning_rate": 5.362266641759103e-07, "loss": 0.0064, "step": 2725, "video_reward_cumulative_accuracy": 0.848440366972477 }, { "epoch": 0.809142178688038, "grad_norm": 0.8478443026542664, "learning_rate": 5.346246727586954e-07, "loss": 0.0151, "step": 2726, "video_reward_cumulative_accuracy": 0.8484959647835657 }, { "epoch": 0.8094390026714159, "grad_norm": 1.4090654850006104, "learning_rate": 5.330247913626494e-07, "loss": 0.0297, "step": 2727, "video_reward_cumulative_accuracy": 0.8485515218188485 }, { "epoch": 0.8097358266547937, "grad_norm": 1.971437931060791, "learning_rate": 5.314270217054004e-07, "loss": 0.0297, "step": 2728, "video_reward_cumulative_accuracy": 0.8486070381231672 }, { "epoch": 0.8100326506381715, "grad_norm": 1.2329754829406738, "learning_rate": 5.298313655023083e-07, "loss": 0.0111, "step": 2729, "video_reward_cumulative_accuracy": 0.8486625137412972 }, { "epoch": 0.8103294746215495, "grad_norm": 3.727600574493408, "learning_rate": 5.282378244664655e-07, "loss": 0.0434, "step": 2730, "video_reward_cumulative_accuracy": 0.8487179487179487 }, { "epoch": 0.8106262986049273, "grad_norm": 2.156374454498291, "learning_rate": 5.266464003086927e-07, "loss": 0.0274, "step": 2731, "video_reward_cumulative_accuracy": 0.8485902599780301 }, { "epoch": 0.8109231225883051, "grad_norm": 4.032077789306641, "learning_rate": 5.250570947375383e-07, "loss": 0.0297, "step": 2732, "video_reward_cumulative_accuracy": 0.8484626647144948 }, { "epoch": 0.811219946571683, "grad_norm": 2.540410280227661, "learning_rate": 5.234699094592771e-07, "loss": 0.0311, "step": 2733, "video_reward_cumulative_accuracy": 0.8485181119648738 }, { "epoch": 0.8115167705550609, "grad_norm": 1.1667871475219727, "learning_rate": 5.21884846177905e-07, "loss": 0.0121, "step": 2734, "video_reward_cumulative_accuracy": 0.8485735186539868 }, { "epoch": 0.8118135945384387, "grad_norm": 4.324409484863281, "learning_rate": 5.203019065951417e-07, "loss": 0.0714, "step": 2735, "video_reward_cumulative_accuracy": 0.8486288848263254 }, { "epoch": 0.8121104185218165, "grad_norm": 1.3035205602645874, "learning_rate": 5.187210924104269e-07, "loss": 0.0293, "step": 2736, "video_reward_cumulative_accuracy": 0.8486842105263158 }, { "epoch": 0.8124072425051945, "grad_norm": 1.0958853960037231, "learning_rate": 5.171424053209184e-07, "loss": 0.0078, "step": 2737, "video_reward_cumulative_accuracy": 0.8487394957983193 }, { "epoch": 0.8127040664885723, "grad_norm": 1.4998290538787842, "learning_rate": 5.155658470214889e-07, "loss": 0.0165, "step": 2738, "video_reward_cumulative_accuracy": 0.8487947406866326 }, { "epoch": 0.8130008904719501, "grad_norm": 3.2503786087036133, "learning_rate": 5.139914192047271e-07, "loss": 0.0309, "step": 2739, "video_reward_cumulative_accuracy": 0.8486673968601679 }, { "epoch": 0.813297714455328, "grad_norm": 3.0941522121429443, "learning_rate": 5.124191235609344e-07, "loss": 0.0287, "step": 2740, "video_reward_cumulative_accuracy": 0.8485401459854015 }, { "epoch": 0.8135945384387059, "grad_norm": 1.8579998016357422, "learning_rate": 5.108489617781226e-07, "loss": 0.0179, "step": 2741, "video_reward_cumulative_accuracy": 0.848595403137541 }, { "epoch": 0.8138913624220837, "grad_norm": 0.7049815654754639, "learning_rate": 5.092809355420137e-07, "loss": 0.0053, "step": 2742, "video_reward_cumulative_accuracy": 0.8486506199854121 }, { "epoch": 0.8141881864054615, "grad_norm": 1.6860101222991943, "learning_rate": 5.077150465360342e-07, "loss": 0.0217, "step": 2743, "video_reward_cumulative_accuracy": 0.8487057965730952 }, { "epoch": 0.8144850103888395, "grad_norm": 2.035961389541626, "learning_rate": 5.06151296441319e-07, "loss": 0.0333, "step": 2744, "video_reward_cumulative_accuracy": 0.8487609329446064 }, { "epoch": 0.8147818343722173, "grad_norm": 3.6466519832611084, "learning_rate": 5.045896869367056e-07, "loss": 0.0462, "step": 2745, "video_reward_cumulative_accuracy": 0.8486338797814208 }, { "epoch": 0.8150786583555951, "grad_norm": 2.12888503074646, "learning_rate": 5.030302196987333e-07, "loss": 0.0177, "step": 2746, "video_reward_cumulative_accuracy": 0.8486890021849963 }, { "epoch": 0.815375482338973, "grad_norm": 0.45268264412879944, "learning_rate": 5.014728964016422e-07, "loss": 0.0052, "step": 2747, "video_reward_cumulative_accuracy": 0.8487440844557699 }, { "epoch": 0.8156723063223509, "grad_norm": 3.4227840900421143, "learning_rate": 4.999177187173685e-07, "loss": 0.1001, "step": 2748, "video_reward_cumulative_accuracy": 0.8487991266375546 }, { "epoch": 0.8159691303057287, "grad_norm": 1.468361735343933, "learning_rate": 4.983646883155479e-07, "loss": 0.0192, "step": 2749, "video_reward_cumulative_accuracy": 0.8488541287740997 }, { "epoch": 0.8162659542891065, "grad_norm": 3.7149956226348877, "learning_rate": 4.968138068635076e-07, "loss": 0.0808, "step": 2750, "video_reward_cumulative_accuracy": 0.8487272727272728 }, { "epoch": 0.8165627782724845, "grad_norm": 1.8487035036087036, "learning_rate": 4.952650760262706e-07, "loss": 0.0174, "step": 2751, "video_reward_cumulative_accuracy": 0.8487822609960014 }, { "epoch": 0.8168596022558623, "grad_norm": 3.4142134189605713, "learning_rate": 4.937184974665504e-07, "loss": 0.0341, "step": 2752, "video_reward_cumulative_accuracy": 0.8488372093023255 }, { "epoch": 0.8171564262392401, "grad_norm": 2.0607471466064453, "learning_rate": 4.921740728447474e-07, "loss": 0.033, "step": 2753, "video_reward_cumulative_accuracy": 0.8487104976389394 }, { "epoch": 0.8174532502226179, "grad_norm": 0.7195733189582825, "learning_rate": 4.906318038189531e-07, "loss": 0.0067, "step": 2754, "video_reward_cumulative_accuracy": 0.8487654320987654 }, { "epoch": 0.8177500742059959, "grad_norm": 0.9425991177558899, "learning_rate": 4.890916920449415e-07, "loss": 0.0147, "step": 2755, "video_reward_cumulative_accuracy": 0.8488203266787658 }, { "epoch": 0.8180468981893737, "grad_norm": 6.290672302246094, "learning_rate": 4.87553739176172e-07, "loss": 0.0799, "step": 2756, "video_reward_cumulative_accuracy": 0.8488751814223512 }, { "epoch": 0.8183437221727515, "grad_norm": 3.862820863723755, "learning_rate": 4.860179468637882e-07, "loss": 0.0691, "step": 2757, "video_reward_cumulative_accuracy": 0.848929996372869 }, { "epoch": 0.8186405461561295, "grad_norm": 1.6942017078399658, "learning_rate": 4.844843167566104e-07, "loss": 0.0183, "step": 2758, "video_reward_cumulative_accuracy": 0.8489847715736041 }, { "epoch": 0.8189373701395073, "grad_norm": 4.168092727661133, "learning_rate": 4.829528505011405e-07, "loss": 0.091, "step": 2759, "video_reward_cumulative_accuracy": 0.8488582819862269 }, { "epoch": 0.8192341941228851, "grad_norm": 0.7059163451194763, "learning_rate": 4.81423549741555e-07, "loss": 0.0061, "step": 2760, "video_reward_cumulative_accuracy": 0.8489130434782609 }, { "epoch": 0.8195310181062629, "grad_norm": 1.3192722797393799, "learning_rate": 4.798964161197075e-07, "loss": 0.0222, "step": 2761, "video_reward_cumulative_accuracy": 0.8489677653024267 }, { "epoch": 0.8198278420896409, "grad_norm": 0.7440000772476196, "learning_rate": 4.78371451275124e-07, "loss": 0.0124, "step": 2762, "video_reward_cumulative_accuracy": 0.8490224475018103 }, { "epoch": 0.8201246660730187, "grad_norm": 3.4163613319396973, "learning_rate": 4.768486568450018e-07, "loss": 0.0335, "step": 2763, "video_reward_cumulative_accuracy": 0.8490770901194354 }, { "epoch": 0.8204214900563965, "grad_norm": 3.9325835704803467, "learning_rate": 4.7532803446420997e-07, "loss": 0.038, "step": 2764, "video_reward_cumulative_accuracy": 0.8491316931982634 }, { "epoch": 0.8207183140397745, "grad_norm": 2.8216254711151123, "learning_rate": 4.7380958576528247e-07, "loss": 0.0307, "step": 2765, "video_reward_cumulative_accuracy": 0.849005424954792 }, { "epoch": 0.8210151380231523, "grad_norm": 0.6501257419586182, "learning_rate": 4.722933123784221e-07, "loss": 0.0089, "step": 2766, "video_reward_cumulative_accuracy": 0.849060014461316 }, { "epoch": 0.8213119620065301, "grad_norm": 5.257883071899414, "learning_rate": 4.707792159314956e-07, "loss": 0.0805, "step": 2767, "video_reward_cumulative_accuracy": 0.8487531622696061 }, { "epoch": 0.8216087859899079, "grad_norm": 2.2783634662628174, "learning_rate": 4.6926729805003234e-07, "loss": 0.0209, "step": 2768, "video_reward_cumulative_accuracy": 0.848807803468208 }, { "epoch": 0.8219056099732859, "grad_norm": 2.1635773181915283, "learning_rate": 4.677575603572235e-07, "loss": 0.0443, "step": 2769, "video_reward_cumulative_accuracy": 0.8488624052004333 }, { "epoch": 0.8222024339566637, "grad_norm": 0.6879743933677673, "learning_rate": 4.6625000447391795e-07, "loss": 0.0059, "step": 2770, "video_reward_cumulative_accuracy": 0.8489169675090252 }, { "epoch": 0.8224992579400415, "grad_norm": 3.005458116531372, "learning_rate": 4.647446320186236e-07, "loss": 0.1152, "step": 2771, "video_reward_cumulative_accuracy": 0.8489714904366654 }, { "epoch": 0.8227960819234195, "grad_norm": 3.669316291809082, "learning_rate": 4.6324144460750427e-07, "loss": 0.032, "step": 2772, "video_reward_cumulative_accuracy": 0.849025974025974 }, { "epoch": 0.8230929059067973, "grad_norm": 2.6024012565612793, "learning_rate": 4.6174044385437765e-07, "loss": 0.0662, "step": 2773, "video_reward_cumulative_accuracy": 0.8489001081860801 }, { "epoch": 0.8233897298901751, "grad_norm": 1.4586189985275269, "learning_rate": 4.602416313707131e-07, "loss": 0.0176, "step": 2774, "video_reward_cumulative_accuracy": 0.8489545782263879 }, { "epoch": 0.8236865538735529, "grad_norm": 1.6324635744094849, "learning_rate": 4.5874500876563144e-07, "loss": 0.0279, "step": 2775, "video_reward_cumulative_accuracy": 0.849009009009009 }, { "epoch": 0.8239833778569309, "grad_norm": 0.6080179810523987, "learning_rate": 4.572505776459024e-07, "loss": 0.0066, "step": 2776, "video_reward_cumulative_accuracy": 0.8490634005763689 }, { "epoch": 0.8242802018403087, "grad_norm": 2.1260578632354736, "learning_rate": 4.557583396159429e-07, "loss": 0.0368, "step": 2777, "video_reward_cumulative_accuracy": 0.8489377025567159 }, { "epoch": 0.8245770258236865, "grad_norm": 3.0380239486694336, "learning_rate": 4.542682962778161e-07, "loss": 0.0527, "step": 2778, "video_reward_cumulative_accuracy": 0.8489920806335494 }, { "epoch": 0.8248738498070645, "grad_norm": 1.556333065032959, "learning_rate": 4.5278044923122654e-07, "loss": 0.0195, "step": 2779, "video_reward_cumulative_accuracy": 0.8488664987405542 }, { "epoch": 0.8251706737904423, "grad_norm": 1.5159677267074585, "learning_rate": 4.512948000735234e-07, "loss": 0.0095, "step": 2780, "video_reward_cumulative_accuracy": 0.8489208633093526 }, { "epoch": 0.8254674977738201, "grad_norm": 0.4575834274291992, "learning_rate": 4.498113503996948e-07, "loss": 0.0059, "step": 2781, "video_reward_cumulative_accuracy": 0.8489751887810141 }, { "epoch": 0.8257643217571979, "grad_norm": 1.370509147644043, "learning_rate": 4.4833010180236836e-07, "loss": 0.0194, "step": 2782, "video_reward_cumulative_accuracy": 0.8490294751976994 }, { "epoch": 0.8260611457405759, "grad_norm": 1.5616401433944702, "learning_rate": 4.4685105587180895e-07, "loss": 0.0262, "step": 2783, "video_reward_cumulative_accuracy": 0.8490837226015092 }, { "epoch": 0.8263579697239537, "grad_norm": 0.9546812772750854, "learning_rate": 4.453742141959141e-07, "loss": 0.0074, "step": 2784, "video_reward_cumulative_accuracy": 0.8491379310344828 }, { "epoch": 0.8266547937073315, "grad_norm": 1.1593352556228638, "learning_rate": 4.4389957836021765e-07, "loss": 0.0387, "step": 2785, "video_reward_cumulative_accuracy": 0.8491921005385996 }, { "epoch": 0.8269516176907095, "grad_norm": 1.6280272006988525, "learning_rate": 4.424271499478844e-07, "loss": 0.0173, "step": 2786, "video_reward_cumulative_accuracy": 0.8492462311557789 }, { "epoch": 0.8272484416740873, "grad_norm": 3.1349103450775146, "learning_rate": 4.409569305397088e-07, "loss": 0.0256, "step": 2787, "video_reward_cumulative_accuracy": 0.8493003229278795 }, { "epoch": 0.8275452656574651, "grad_norm": 1.798938274383545, "learning_rate": 4.394889217141152e-07, "loss": 0.0205, "step": 2788, "video_reward_cumulative_accuracy": 0.8493543758967002 }, { "epoch": 0.8278420896408429, "grad_norm": 1.2877916097640991, "learning_rate": 4.38023125047152e-07, "loss": 0.0248, "step": 2789, "video_reward_cumulative_accuracy": 0.84940839010398 }, { "epoch": 0.8281389136242209, "grad_norm": 1.9247994422912598, "learning_rate": 4.365595421124949e-07, "loss": 0.037, "step": 2790, "video_reward_cumulative_accuracy": 0.8494623655913979 }, { "epoch": 0.8284357376075987, "grad_norm": 1.2538220882415771, "learning_rate": 4.35098174481442e-07, "loss": 0.041, "step": 2791, "video_reward_cumulative_accuracy": 0.8495163024005733 }, { "epoch": 0.8287325615909765, "grad_norm": 0.8949556946754456, "learning_rate": 4.336390237229138e-07, "loss": 0.0087, "step": 2792, "video_reward_cumulative_accuracy": 0.8495702005730659 }, { "epoch": 0.8290293855743545, "grad_norm": 2.420616626739502, "learning_rate": 4.321820914034502e-07, "loss": 0.0267, "step": 2793, "video_reward_cumulative_accuracy": 0.849624060150376 }, { "epoch": 0.8293262095577323, "grad_norm": 2.788456916809082, "learning_rate": 4.307273790872091e-07, "loss": 0.0342, "step": 2794, "video_reward_cumulative_accuracy": 0.8496778811739442 }, { "epoch": 0.8296230335411101, "grad_norm": 1.2072490453720093, "learning_rate": 4.292748883359657e-07, "loss": 0.0171, "step": 2795, "video_reward_cumulative_accuracy": 0.8497316636851521 }, { "epoch": 0.8299198575244879, "grad_norm": 2.1791484355926514, "learning_rate": 4.278246207091083e-07, "loss": 0.0144, "step": 2796, "video_reward_cumulative_accuracy": 0.8497854077253219 }, { "epoch": 0.8302166815078659, "grad_norm": 1.2288272380828857, "learning_rate": 4.263765777636425e-07, "loss": 0.0077, "step": 2797, "video_reward_cumulative_accuracy": 0.8498391133357168 }, { "epoch": 0.8305135054912437, "grad_norm": 0.8437438011169434, "learning_rate": 4.2493076105418114e-07, "loss": 0.0226, "step": 2798, "video_reward_cumulative_accuracy": 0.8498927805575411 }, { "epoch": 0.8308103294746215, "grad_norm": 1.1709120273590088, "learning_rate": 4.2348717213294923e-07, "loss": 0.0083, "step": 2799, "video_reward_cumulative_accuracy": 0.84994640943194 }, { "epoch": 0.8311071534579995, "grad_norm": 1.575054407119751, "learning_rate": 4.2204581254978034e-07, "loss": 0.0187, "step": 2800, "video_reward_cumulative_accuracy": 0.85 }, { "epoch": 0.8311071534579995, "eval_runtime": 131.0431, "eval_samples_per_second": 6.021, "eval_steps_per_second": 0.755, "eval_test_set_accuracy": 0.8194444444444444, "step": 2800 }, { "epoch": 0.8314039774413773, "grad_norm": 1.4664020538330078, "learning_rate": 4.2060668385211196e-07, "loss": 0.0194, "step": 2801, "video_reward_cumulative_accuracy": 0.850053552302749 }, { "epoch": 0.8317008014247551, "grad_norm": 3.3089983463287354, "learning_rate": 4.1916978758499095e-07, "loss": 0.0237, "step": 2802, "video_reward_cumulative_accuracy": 0.8501070663811563 }, { "epoch": 0.8319976254081329, "grad_norm": 1.9900091886520386, "learning_rate": 4.1773512529106305e-07, "loss": 0.039, "step": 2803, "video_reward_cumulative_accuracy": 0.8499821619693186 }, { "epoch": 0.8322944493915109, "grad_norm": 1.6034806966781616, "learning_rate": 4.163026985105778e-07, "loss": 0.0162, "step": 2804, "video_reward_cumulative_accuracy": 0.8500356633380884 }, { "epoch": 0.8325912733748887, "grad_norm": 4.051023960113525, "learning_rate": 4.1487250878138567e-07, "loss": 0.0284, "step": 2805, "video_reward_cumulative_accuracy": 0.8500891265597148 }, { "epoch": 0.8328880973582665, "grad_norm": 1.4414976835250854, "learning_rate": 4.134445576389315e-07, "loss": 0.0089, "step": 2806, "video_reward_cumulative_accuracy": 0.8501425516749822 }, { "epoch": 0.8331849213416445, "grad_norm": 2.868297815322876, "learning_rate": 4.1201884661626253e-07, "loss": 0.051, "step": 2807, "video_reward_cumulative_accuracy": 0.8501959387246171 }, { "epoch": 0.8334817453250223, "grad_norm": 1.3796963691711426, "learning_rate": 4.105953772440158e-07, "loss": 0.0085, "step": 2808, "video_reward_cumulative_accuracy": 0.8502492877492878 }, { "epoch": 0.8337785693084001, "grad_norm": 5.215771198272705, "learning_rate": 4.091741510504249e-07, "loss": 0.0674, "step": 2809, "video_reward_cumulative_accuracy": 0.8503025987896048 }, { "epoch": 0.8340753932917779, "grad_norm": 1.1552796363830566, "learning_rate": 4.0775516956131327e-07, "loss": 0.0084, "step": 2810, "video_reward_cumulative_accuracy": 0.850355871886121 }, { "epoch": 0.8343722172751559, "grad_norm": 4.928788661956787, "learning_rate": 4.0633843430009563e-07, "loss": 0.0486, "step": 2811, "video_reward_cumulative_accuracy": 0.8502312344361437 }, { "epoch": 0.8346690412585337, "grad_norm": 0.6429322957992554, "learning_rate": 4.049239467877747e-07, "loss": 0.0093, "step": 2812, "video_reward_cumulative_accuracy": 0.8502844950213371 }, { "epoch": 0.8349658652419115, "grad_norm": 4.207029819488525, "learning_rate": 4.0351170854294017e-07, "loss": 0.0345, "step": 2813, "video_reward_cumulative_accuracy": 0.8503377177390686 }, { "epoch": 0.8352626892252895, "grad_norm": 1.94601309299469, "learning_rate": 4.0210172108176767e-07, "loss": 0.036, "step": 2814, "video_reward_cumulative_accuracy": 0.8503909026297086 }, { "epoch": 0.8355595132086673, "grad_norm": 0.2354949563741684, "learning_rate": 4.0069398591801423e-07, "loss": 0.003, "step": 2815, "video_reward_cumulative_accuracy": 0.8504440497335701 }, { "epoch": 0.8358563371920451, "grad_norm": 2.0666418075561523, "learning_rate": 3.9928850456302073e-07, "loss": 0.045, "step": 2816, "video_reward_cumulative_accuracy": 0.8504971590909091 }, { "epoch": 0.8361531611754229, "grad_norm": 0.41040000319480896, "learning_rate": 3.9788527852570813e-07, "loss": 0.0077, "step": 2817, "video_reward_cumulative_accuracy": 0.850550230741924 }, { "epoch": 0.8364499851588009, "grad_norm": 4.305946350097656, "learning_rate": 3.964843093125753e-07, "loss": 0.0499, "step": 2818, "video_reward_cumulative_accuracy": 0.8506032647267565 }, { "epoch": 0.8367468091421787, "grad_norm": 1.3210865259170532, "learning_rate": 3.950855984276994e-07, "loss": 0.0141, "step": 2819, "video_reward_cumulative_accuracy": 0.8506562610854913 }, { "epoch": 0.8370436331255565, "grad_norm": 1.932438850402832, "learning_rate": 3.936891473727314e-07, "loss": 0.0332, "step": 2820, "video_reward_cumulative_accuracy": 0.8507092198581561 }, { "epoch": 0.8373404571089345, "grad_norm": 1.8300782442092896, "learning_rate": 3.9229495764689734e-07, "loss": 0.0406, "step": 2821, "video_reward_cumulative_accuracy": 0.8507621410847217 }, { "epoch": 0.8376372810923123, "grad_norm": 0.9342535734176636, "learning_rate": 3.9090303074699546e-07, "loss": 0.0265, "step": 2822, "video_reward_cumulative_accuracy": 0.8506378454996456 }, { "epoch": 0.8379341050756901, "grad_norm": 2.6911323070526123, "learning_rate": 3.89513368167394e-07, "loss": 0.0273, "step": 2823, "video_reward_cumulative_accuracy": 0.8506907545164718 }, { "epoch": 0.8382309290590679, "grad_norm": 1.113362431526184, "learning_rate": 3.881259714000318e-07, "loss": 0.0098, "step": 2824, "video_reward_cumulative_accuracy": 0.8507436260623229 }, { "epoch": 0.8385277530424459, "grad_norm": 2.1424875259399414, "learning_rate": 3.8674084193441235e-07, "loss": 0.0223, "step": 2825, "video_reward_cumulative_accuracy": 0.8507964601769912 }, { "epoch": 0.8388245770258237, "grad_norm": 1.2196357250213623, "learning_rate": 3.8535798125760695e-07, "loss": 0.0245, "step": 2826, "video_reward_cumulative_accuracy": 0.8508492569002123 }, { "epoch": 0.8391214010092015, "grad_norm": 1.4248687028884888, "learning_rate": 3.839773908542513e-07, "loss": 0.0097, "step": 2827, "video_reward_cumulative_accuracy": 0.8509020162716661 }, { "epoch": 0.8394182249925795, "grad_norm": 3.166527032852173, "learning_rate": 3.8259907220654286e-07, "loss": 0.0677, "step": 2828, "video_reward_cumulative_accuracy": 0.8509547383309759 }, { "epoch": 0.8397150489759573, "grad_norm": 1.9461520910263062, "learning_rate": 3.81223026794241e-07, "loss": 0.0228, "step": 2829, "video_reward_cumulative_accuracy": 0.8510074231177094 }, { "epoch": 0.8400118729593351, "grad_norm": 4.6057353019714355, "learning_rate": 3.798492560946632e-07, "loss": 0.092, "step": 2830, "video_reward_cumulative_accuracy": 0.8508833922261484 }, { "epoch": 0.8403086969427129, "grad_norm": 1.7150532007217407, "learning_rate": 3.7847776158268594e-07, "loss": 0.0519, "step": 2831, "video_reward_cumulative_accuracy": 0.8507594489579654 }, { "epoch": 0.8406055209260909, "grad_norm": 4.691923141479492, "learning_rate": 3.771085447307418e-07, "loss": 0.0646, "step": 2832, "video_reward_cumulative_accuracy": 0.850635593220339 }, { "epoch": 0.8409023449094687, "grad_norm": 1.0979514122009277, "learning_rate": 3.757416070088185e-07, "loss": 0.0091, "step": 2833, "video_reward_cumulative_accuracy": 0.8506883162725026 }, { "epoch": 0.8411991688928465, "grad_norm": 0.6728662848472595, "learning_rate": 3.7437694988445517e-07, "loss": 0.0061, "step": 2834, "video_reward_cumulative_accuracy": 0.8507410021171489 }, { "epoch": 0.8414959928762245, "grad_norm": 3.404707193374634, "learning_rate": 3.730145748227443e-07, "loss": 0.0272, "step": 2835, "video_reward_cumulative_accuracy": 0.8507936507936508 }, { "epoch": 0.8417928168596023, "grad_norm": 1.896315097808838, "learning_rate": 3.716544832863275e-07, "loss": 0.0159, "step": 2836, "video_reward_cumulative_accuracy": 0.8506699576868829 }, { "epoch": 0.8420896408429801, "grad_norm": 1.7696223258972168, "learning_rate": 3.702966767353958e-07, "loss": 0.052, "step": 2837, "video_reward_cumulative_accuracy": 0.8505463517800493 }, { "epoch": 0.8423864648263579, "grad_norm": 2.6428275108337402, "learning_rate": 3.6894115662768596e-07, "loss": 0.0275, "step": 2838, "video_reward_cumulative_accuracy": 0.8504228329809725 }, { "epoch": 0.8426832888097359, "grad_norm": 1.312769889831543, "learning_rate": 3.675879244184799e-07, "loss": 0.0219, "step": 2839, "video_reward_cumulative_accuracy": 0.850475519549137 }, { "epoch": 0.8429801127931137, "grad_norm": 2.8779304027557373, "learning_rate": 3.66236981560604e-07, "loss": 0.0236, "step": 2840, "video_reward_cumulative_accuracy": 0.8503521126760564 }, { "epoch": 0.8432769367764915, "grad_norm": 3.4229025840759277, "learning_rate": 3.6488832950442644e-07, "loss": 0.0779, "step": 2841, "video_reward_cumulative_accuracy": 0.8502287926786343 }, { "epoch": 0.8435737607598695, "grad_norm": 1.8767544031143188, "learning_rate": 3.635419696978565e-07, "loss": 0.0558, "step": 2842, "video_reward_cumulative_accuracy": 0.8501055594651654 }, { "epoch": 0.8438705847432473, "grad_norm": 0.48670053482055664, "learning_rate": 3.621979035863421e-07, "loss": 0.0078, "step": 2843, "video_reward_cumulative_accuracy": 0.8501582835033415 }, { "epoch": 0.8441674087266251, "grad_norm": 0.923923134803772, "learning_rate": 3.6085613261286816e-07, "loss": 0.0089, "step": 2844, "video_reward_cumulative_accuracy": 0.8502109704641351 }, { "epoch": 0.8444642327100029, "grad_norm": 2.631030797958374, "learning_rate": 3.5951665821795686e-07, "loss": 0.0444, "step": 2845, "video_reward_cumulative_accuracy": 0.8500878734622144 }, { "epoch": 0.8447610566933809, "grad_norm": 2.941474437713623, "learning_rate": 3.5817948183966224e-07, "loss": 0.0307, "step": 2846, "video_reward_cumulative_accuracy": 0.8501405481377372 }, { "epoch": 0.8450578806767587, "grad_norm": 1.852303385734558, "learning_rate": 3.5684460491357457e-07, "loss": 0.0394, "step": 2847, "video_reward_cumulative_accuracy": 0.8501931858096242 }, { "epoch": 0.8453547046601365, "grad_norm": 2.0034775733947754, "learning_rate": 3.5551202887281423e-07, "loss": 0.0212, "step": 2848, "video_reward_cumulative_accuracy": 0.8502457865168539 }, { "epoch": 0.8456515286435144, "grad_norm": 1.9412490129470825, "learning_rate": 3.541817551480292e-07, "loss": 0.037, "step": 2849, "video_reward_cumulative_accuracy": 0.8502983502983503 }, { "epoch": 0.8459483526268923, "grad_norm": 1.5758916139602661, "learning_rate": 3.528537851673988e-07, "loss": 0.0143, "step": 2850, "video_reward_cumulative_accuracy": 0.8501754385964913 }, { "epoch": 0.8462451766102701, "grad_norm": 2.0527806282043457, "learning_rate": 3.5152812035662674e-07, "loss": 0.0376, "step": 2851, "video_reward_cumulative_accuracy": 0.8500526131182041 }, { "epoch": 0.8465420005936479, "grad_norm": 1.3531450033187866, "learning_rate": 3.502047621389426e-07, "loss": 0.0074, "step": 2852, "video_reward_cumulative_accuracy": 0.8501051893408135 }, { "epoch": 0.8468388245770259, "grad_norm": 0.5980221033096313, "learning_rate": 3.488837119351018e-07, "loss": 0.0049, "step": 2853, "video_reward_cumulative_accuracy": 0.8501577287066246 }, { "epoch": 0.8471356485604037, "grad_norm": 1.4160398244857788, "learning_rate": 3.4756497116337826e-07, "loss": 0.0129, "step": 2854, "video_reward_cumulative_accuracy": 0.8502102312543798 }, { "epoch": 0.8474324725437815, "grad_norm": 1.262010097503662, "learning_rate": 3.4624854123956916e-07, "loss": 0.0192, "step": 2855, "video_reward_cumulative_accuracy": 0.850262697022767 }, { "epoch": 0.8477292965271594, "grad_norm": 0.3952392041683197, "learning_rate": 3.449344235769886e-07, "loss": 0.0078, "step": 2856, "video_reward_cumulative_accuracy": 0.8503151260504201 }, { "epoch": 0.8480261205105373, "grad_norm": 2.0614068508148193, "learning_rate": 3.4362261958647e-07, "loss": 0.0281, "step": 2857, "video_reward_cumulative_accuracy": 0.8503675183759188 }, { "epoch": 0.8483229444939151, "grad_norm": 3.423657178878784, "learning_rate": 3.423131306763627e-07, "loss": 0.0277, "step": 2858, "video_reward_cumulative_accuracy": 0.8504198740377886 }, { "epoch": 0.8486197684772929, "grad_norm": 0.3777397871017456, "learning_rate": 3.4100595825252965e-07, "loss": 0.005, "step": 2859, "video_reward_cumulative_accuracy": 0.8504721930745016 }, { "epoch": 0.8489165924606709, "grad_norm": 0.7437081336975098, "learning_rate": 3.3970110371834814e-07, "loss": 0.0104, "step": 2860, "video_reward_cumulative_accuracy": 0.8505244755244755 }, { "epoch": 0.8492134164440487, "grad_norm": 1.1904757022857666, "learning_rate": 3.3839856847470485e-07, "loss": 0.0124, "step": 2861, "video_reward_cumulative_accuracy": 0.8505767214260748 }, { "epoch": 0.8495102404274265, "grad_norm": 3.3361258506774902, "learning_rate": 3.3709835391999846e-07, "loss": 0.0594, "step": 2862, "video_reward_cumulative_accuracy": 0.8504542278127184 }, { "epoch": 0.8498070644108044, "grad_norm": 5.005155086517334, "learning_rate": 3.3580046145013534e-07, "loss": 0.0646, "step": 2863, "video_reward_cumulative_accuracy": 0.8503318197694726 }, { "epoch": 0.8501038883941823, "grad_norm": 1.4068641662597656, "learning_rate": 3.3450489245852926e-07, "loss": 0.0191, "step": 2864, "video_reward_cumulative_accuracy": 0.8502094972067039 }, { "epoch": 0.8504007123775601, "grad_norm": 1.33848237991333, "learning_rate": 3.3321164833609976e-07, "loss": 0.0272, "step": 2865, "video_reward_cumulative_accuracy": 0.8502617801047121 }, { "epoch": 0.8506975363609379, "grad_norm": 1.8083772659301758, "learning_rate": 3.319207304712688e-07, "loss": 0.0599, "step": 2866, "video_reward_cumulative_accuracy": 0.8503140265177949 }, { "epoch": 0.8509943603443159, "grad_norm": 3.3464255332946777, "learning_rate": 3.306321402499627e-07, "loss": 0.0327, "step": 2867, "video_reward_cumulative_accuracy": 0.8503662364841298 }, { "epoch": 0.8512911843276937, "grad_norm": 3.124976873397827, "learning_rate": 3.2934587905560756e-07, "loss": 0.0613, "step": 2868, "video_reward_cumulative_accuracy": 0.850418410041841 }, { "epoch": 0.8515880083110715, "grad_norm": 4.9768171310424805, "learning_rate": 3.2806194826913107e-07, "loss": 0.0197, "step": 2869, "video_reward_cumulative_accuracy": 0.8504705472289996 }, { "epoch": 0.8518848322944494, "grad_norm": 3.0971407890319824, "learning_rate": 3.267803492689556e-07, "loss": 0.0494, "step": 2870, "video_reward_cumulative_accuracy": 0.8505226480836237 }, { "epoch": 0.8521816562778273, "grad_norm": 0.6683565974235535, "learning_rate": 3.2550108343100293e-07, "loss": 0.0072, "step": 2871, "video_reward_cumulative_accuracy": 0.8505747126436781 }, { "epoch": 0.8524784802612051, "grad_norm": 0.6179012060165405, "learning_rate": 3.242241521286893e-07, "loss": 0.0036, "step": 2872, "video_reward_cumulative_accuracy": 0.8506267409470752 }, { "epoch": 0.8527753042445829, "grad_norm": 2.777172327041626, "learning_rate": 3.2294955673292437e-07, "loss": 0.023, "step": 2873, "video_reward_cumulative_accuracy": 0.8506787330316742 }, { "epoch": 0.8530721282279609, "grad_norm": 3.996541976928711, "learning_rate": 3.2167729861211026e-07, "loss": 0.0392, "step": 2874, "video_reward_cumulative_accuracy": 0.8505567153792624 }, { "epoch": 0.8533689522113387, "grad_norm": 1.6711208820343018, "learning_rate": 3.2040737913213853e-07, "loss": 0.0176, "step": 2875, "video_reward_cumulative_accuracy": 0.8506086956521739 }, { "epoch": 0.8536657761947165, "grad_norm": 0.736202597618103, "learning_rate": 3.1913979965639166e-07, "loss": 0.0054, "step": 2876, "video_reward_cumulative_accuracy": 0.8506606397774688 }, { "epoch": 0.8539626001780944, "grad_norm": 1.9681017398834229, "learning_rate": 3.178745615457393e-07, "loss": 0.0178, "step": 2877, "video_reward_cumulative_accuracy": 0.8507125477928398 }, { "epoch": 0.8542594241614723, "grad_norm": 4.273847579956055, "learning_rate": 3.1661166615853723e-07, "loss": 0.0847, "step": 2878, "video_reward_cumulative_accuracy": 0.8505906879777624 }, { "epoch": 0.8545562481448501, "grad_norm": 2.556535243988037, "learning_rate": 3.153511148506269e-07, "loss": 0.0457, "step": 2879, "video_reward_cumulative_accuracy": 0.8506425842306357 }, { "epoch": 0.8548530721282279, "grad_norm": 3.7858211994171143, "learning_rate": 3.140929089753311e-07, "loss": 0.0359, "step": 2880, "video_reward_cumulative_accuracy": 0.8506944444444444 }, { "epoch": 0.8551498961116059, "grad_norm": 3.342463254928589, "learning_rate": 3.128370498834571e-07, "loss": 0.0899, "step": 2881, "video_reward_cumulative_accuracy": 0.8505727178063173 }, { "epoch": 0.8554467200949837, "grad_norm": 3.1436612606048584, "learning_rate": 3.1158353892329075e-07, "loss": 0.0864, "step": 2882, "video_reward_cumulative_accuracy": 0.8506245662734212 }, { "epoch": 0.8557435440783615, "grad_norm": 0.46163490414619446, "learning_rate": 3.1033237744059805e-07, "loss": 0.0036, "step": 2883, "video_reward_cumulative_accuracy": 0.8506763787721123 }, { "epoch": 0.8560403680617394, "grad_norm": 1.36380934715271, "learning_rate": 3.090835667786232e-07, "loss": 0.0211, "step": 2884, "video_reward_cumulative_accuracy": 0.8507281553398058 }, { "epoch": 0.8563371920451173, "grad_norm": 5.0028977394104, "learning_rate": 3.078371082780843e-07, "loss": 0.0455, "step": 2885, "video_reward_cumulative_accuracy": 0.8506065857885615 }, { "epoch": 0.8566340160284951, "grad_norm": 3.535642147064209, "learning_rate": 3.065930032771763e-07, "loss": 0.0395, "step": 2886, "video_reward_cumulative_accuracy": 0.8506583506583506 }, { "epoch": 0.8569308400118729, "grad_norm": 0.9238957166671753, "learning_rate": 3.053512531115654e-07, "loss": 0.0094, "step": 2887, "video_reward_cumulative_accuracy": 0.8507100796674749 }, { "epoch": 0.8572276639952509, "grad_norm": 1.2704198360443115, "learning_rate": 3.041118591143924e-07, "loss": 0.0114, "step": 2888, "video_reward_cumulative_accuracy": 0.8507617728531855 }, { "epoch": 0.8575244879786287, "grad_norm": 5.546009063720703, "learning_rate": 3.0287482261626727e-07, "loss": 0.1227, "step": 2889, "video_reward_cumulative_accuracy": 0.8506403599861544 }, { "epoch": 0.8578213119620065, "grad_norm": 0.7070528268814087, "learning_rate": 3.016401449452674e-07, "loss": 0.011, "step": 2890, "video_reward_cumulative_accuracy": 0.8506920415224913 }, { "epoch": 0.8581181359453844, "grad_norm": 2.1009104251861572, "learning_rate": 3.0040782742694037e-07, "loss": 0.0506, "step": 2891, "video_reward_cumulative_accuracy": 0.8507436873054306 }, { "epoch": 0.8584149599287623, "grad_norm": 1.1483750343322754, "learning_rate": 2.991778713842969e-07, "loss": 0.025, "step": 2892, "video_reward_cumulative_accuracy": 0.8507952973720608 }, { "epoch": 0.8587117839121401, "grad_norm": 0.6957268714904785, "learning_rate": 2.979502781378163e-07, "loss": 0.0083, "step": 2893, "video_reward_cumulative_accuracy": 0.8508468717594193 }, { "epoch": 0.8590086078955179, "grad_norm": 1.5722553730010986, "learning_rate": 2.967250490054377e-07, "loss": 0.044, "step": 2894, "video_reward_cumulative_accuracy": 0.850898410504492 }, { "epoch": 0.8593054318788959, "grad_norm": 1.9897538423538208, "learning_rate": 2.955021853025639e-07, "loss": 0.0717, "step": 2895, "video_reward_cumulative_accuracy": 0.8509499136442141 }, { "epoch": 0.8596022558622737, "grad_norm": 1.8997362852096558, "learning_rate": 2.942816883420582e-07, "loss": 0.0146, "step": 2896, "video_reward_cumulative_accuracy": 0.8510013812154696 }, { "epoch": 0.8598990798456515, "grad_norm": 1.694429636001587, "learning_rate": 2.9306355943424097e-07, "loss": 0.0154, "step": 2897, "video_reward_cumulative_accuracy": 0.8510528132550915 }, { "epoch": 0.8601959038290294, "grad_norm": 2.0843210220336914, "learning_rate": 2.91847799886894e-07, "loss": 0.0171, "step": 2898, "video_reward_cumulative_accuracy": 0.851104209799862 }, { "epoch": 0.8604927278124073, "grad_norm": 3.370720386505127, "learning_rate": 2.9063441100525167e-07, "loss": 0.0258, "step": 2899, "video_reward_cumulative_accuracy": 0.8509830976198689 }, { "epoch": 0.8607895517957851, "grad_norm": 1.6068284511566162, "learning_rate": 2.8942339409200523e-07, "loss": 0.0278, "step": 2900, "video_reward_cumulative_accuracy": 0.8510344827586207 }, { "epoch": 0.8610863757791629, "grad_norm": 1.0114235877990723, "learning_rate": 2.88214750447299e-07, "loss": 0.0117, "step": 2901, "video_reward_cumulative_accuracy": 0.8510858324715616 }, { "epoch": 0.8613831997625409, "grad_norm": 1.6765480041503906, "learning_rate": 2.8700848136872823e-07, "loss": 0.0457, "step": 2902, "video_reward_cumulative_accuracy": 0.8511371467953136 }, { "epoch": 0.8616800237459187, "grad_norm": 4.060369968414307, "learning_rate": 2.858045881513416e-07, "loss": 0.0417, "step": 2903, "video_reward_cumulative_accuracy": 0.8510161901481227 }, { "epoch": 0.8619768477292965, "grad_norm": 2.511009693145752, "learning_rate": 2.846030720876339e-07, "loss": 0.0231, "step": 2904, "video_reward_cumulative_accuracy": 0.8510674931129476 }, { "epoch": 0.8622736717126744, "grad_norm": 1.7390601634979248, "learning_rate": 2.834039344675504e-07, "loss": 0.0318, "step": 2905, "video_reward_cumulative_accuracy": 0.851118760757315 }, { "epoch": 0.8625704956960523, "grad_norm": 2.4160759449005127, "learning_rate": 2.8220717657848037e-07, "loss": 0.0218, "step": 2906, "video_reward_cumulative_accuracy": 0.8511699931176876 }, { "epoch": 0.8628673196794301, "grad_norm": 2.386310338973999, "learning_rate": 2.8101279970526e-07, "loss": 0.0381, "step": 2907, "video_reward_cumulative_accuracy": 0.8512211902304782 }, { "epoch": 0.8631641436628079, "grad_norm": 1.4243983030319214, "learning_rate": 2.798208051301693e-07, "loss": 0.0189, "step": 2908, "video_reward_cumulative_accuracy": 0.8512723521320495 }, { "epoch": 0.8634609676461859, "grad_norm": 2.750420331954956, "learning_rate": 2.786311941329298e-07, "loss": 0.0442, "step": 2909, "video_reward_cumulative_accuracy": 0.8511515984874527 }, { "epoch": 0.8637577916295637, "grad_norm": 1.8915764093399048, "learning_rate": 2.774439679907051e-07, "loss": 0.0549, "step": 2910, "video_reward_cumulative_accuracy": 0.8512027491408934 }, { "epoch": 0.8640546156129415, "grad_norm": 2.3890373706817627, "learning_rate": 2.762591279780963e-07, "loss": 0.0192, "step": 2911, "video_reward_cumulative_accuracy": 0.8512538646513226 }, { "epoch": 0.8643514395963194, "grad_norm": 2.8765714168548584, "learning_rate": 2.7507667536714496e-07, "loss": 0.0201, "step": 2912, "video_reward_cumulative_accuracy": 0.851304945054945 }, { "epoch": 0.8646482635796973, "grad_norm": 0.4579935073852539, "learning_rate": 2.738966114273287e-07, "loss": 0.0051, "step": 2913, "video_reward_cumulative_accuracy": 0.8513559903879162 }, { "epoch": 0.8649450875630751, "grad_norm": 1.4378376007080078, "learning_rate": 2.727189374255604e-07, "loss": 0.0094, "step": 2914, "video_reward_cumulative_accuracy": 0.8514070006863418 }, { "epoch": 0.8652419115464529, "grad_norm": 1.244295358657837, "learning_rate": 2.715436546261882e-07, "loss": 0.0155, "step": 2915, "video_reward_cumulative_accuracy": 0.8514579759862779 }, { "epoch": 0.8655387355298308, "grad_norm": 2.9890527725219727, "learning_rate": 2.703707642909914e-07, "loss": 0.0832, "step": 2916, "video_reward_cumulative_accuracy": 0.8513374485596708 }, { "epoch": 0.8658355595132087, "grad_norm": 0.4092223644256592, "learning_rate": 2.6920026767918163e-07, "loss": 0.0084, "step": 2917, "video_reward_cumulative_accuracy": 0.8513884127528283 }, { "epoch": 0.8661323834965865, "grad_norm": 3.3004024028778076, "learning_rate": 2.680321660474011e-07, "loss": 0.054, "step": 2918, "video_reward_cumulative_accuracy": 0.8514393420150789 }, { "epoch": 0.8664292074799644, "grad_norm": 2.0161333084106445, "learning_rate": 2.6686646064971983e-07, "loss": 0.0137, "step": 2919, "video_reward_cumulative_accuracy": 0.8514902363823227 }, { "epoch": 0.8667260314633423, "grad_norm": 1.7455112934112549, "learning_rate": 2.6570315273763663e-07, "loss": 0.0391, "step": 2920, "video_reward_cumulative_accuracy": 0.8515410958904109 }, { "epoch": 0.8670228554467201, "grad_norm": 1.8997581005096436, "learning_rate": 2.6454224356007417e-07, "loss": 0.0372, "step": 2921, "video_reward_cumulative_accuracy": 0.8515919205751455 }, { "epoch": 0.8673196794300979, "grad_norm": 3.565519094467163, "learning_rate": 2.633837343633816e-07, "loss": 0.0423, "step": 2922, "video_reward_cumulative_accuracy": 0.8516427104722792 }, { "epoch": 0.8676165034134758, "grad_norm": 2.546748399734497, "learning_rate": 2.622276263913312e-07, "loss": 0.0663, "step": 2923, "video_reward_cumulative_accuracy": 0.8515224084844338 }, { "epoch": 0.8679133273968537, "grad_norm": 0.6215806603431702, "learning_rate": 2.6107392088511706e-07, "loss": 0.0154, "step": 2924, "video_reward_cumulative_accuracy": 0.8515731874145007 }, { "epoch": 0.8682101513802315, "grad_norm": 2.480520009994507, "learning_rate": 2.5992261908335454e-07, "loss": 0.0223, "step": 2925, "video_reward_cumulative_accuracy": 0.8516239316239316 }, { "epoch": 0.8685069753636094, "grad_norm": 2.851381301879883, "learning_rate": 2.587737222220765e-07, "loss": 0.0466, "step": 2926, "video_reward_cumulative_accuracy": 0.8515037593984962 }, { "epoch": 0.8688037993469873, "grad_norm": 1.1734604835510254, "learning_rate": 2.576272315347361e-07, "loss": 0.0104, "step": 2927, "video_reward_cumulative_accuracy": 0.8515544926545952 }, { "epoch": 0.8691006233303651, "grad_norm": 2.432842493057251, "learning_rate": 2.5648314825220233e-07, "loss": 0.0306, "step": 2928, "video_reward_cumulative_accuracy": 0.8516051912568307 }, { "epoch": 0.8693974473137429, "grad_norm": 1.94566011428833, "learning_rate": 2.5534147360276014e-07, "loss": 0.0697, "step": 2929, "video_reward_cumulative_accuracy": 0.8514851485148515 }, { "epoch": 0.8696942712971208, "grad_norm": 3.096381902694702, "learning_rate": 2.542022088121068e-07, "loss": 0.0651, "step": 2930, "video_reward_cumulative_accuracy": 0.8515358361774744 }, { "epoch": 0.8699910952804987, "grad_norm": 1.368943452835083, "learning_rate": 2.530653551033546e-07, "loss": 0.0131, "step": 2931, "video_reward_cumulative_accuracy": 0.8515864892528148 }, { "epoch": 0.8702879192638765, "grad_norm": 4.917116165161133, "learning_rate": 2.519309136970258e-07, "loss": 0.0782, "step": 2932, "video_reward_cumulative_accuracy": 0.8516371077762619 }, { "epoch": 0.8705847432472544, "grad_norm": 3.473444700241089, "learning_rate": 2.507988858110538e-07, "loss": 0.0313, "step": 2933, "video_reward_cumulative_accuracy": 0.8516876917831572 }, { "epoch": 0.8708815672306323, "grad_norm": 0.6671600937843323, "learning_rate": 2.4966927266078077e-07, "loss": 0.008, "step": 2934, "video_reward_cumulative_accuracy": 0.8517382413087935 }, { "epoch": 0.8711783912140101, "grad_norm": 0.5415387749671936, "learning_rate": 2.4854207545895515e-07, "loss": 0.0091, "step": 2935, "video_reward_cumulative_accuracy": 0.8517887563884157 }, { "epoch": 0.8714752151973879, "grad_norm": 1.7057019472122192, "learning_rate": 2.474172954157328e-07, "loss": 0.0371, "step": 2936, "video_reward_cumulative_accuracy": 0.8518392370572208 }, { "epoch": 0.8717720391807658, "grad_norm": 1.5639770030975342, "learning_rate": 2.462949337386744e-07, "loss": 0.0343, "step": 2937, "video_reward_cumulative_accuracy": 0.8518896833503575 }, { "epoch": 0.8720688631641437, "grad_norm": 2.864452362060547, "learning_rate": 2.4517499163274395e-07, "loss": 0.0471, "step": 2938, "video_reward_cumulative_accuracy": 0.8519400953029271 }, { "epoch": 0.8723656871475215, "grad_norm": 2.2662036418914795, "learning_rate": 2.4405747030030903e-07, "loss": 0.0436, "step": 2939, "video_reward_cumulative_accuracy": 0.851990472949983 }, { "epoch": 0.8726625111308994, "grad_norm": 0.40300530195236206, "learning_rate": 2.4294237094113576e-07, "loss": 0.0072, "step": 2940, "video_reward_cumulative_accuracy": 0.8520408163265306 }, { "epoch": 0.8729593351142773, "grad_norm": 5.474096298217773, "learning_rate": 2.418296947523927e-07, "loss": 0.0834, "step": 2941, "video_reward_cumulative_accuracy": 0.8520911254675281 }, { "epoch": 0.8732561590976551, "grad_norm": 2.9174013137817383, "learning_rate": 2.4071944292864414e-07, "loss": 0.0285, "step": 2942, "video_reward_cumulative_accuracy": 0.8521414004078858 }, { "epoch": 0.8735529830810329, "grad_norm": 2.5676088333129883, "learning_rate": 2.3961161666185476e-07, "loss": 0.027, "step": 2943, "video_reward_cumulative_accuracy": 0.8521916411824668 }, { "epoch": 0.8738498070644108, "grad_norm": 2.0752131938934326, "learning_rate": 2.385062171413838e-07, "loss": 0.0428, "step": 2944, "video_reward_cumulative_accuracy": 0.8522418478260869 }, { "epoch": 0.8741466310477887, "grad_norm": 1.9137473106384277, "learning_rate": 2.3740324555398346e-07, "loss": 0.0525, "step": 2945, "video_reward_cumulative_accuracy": 0.8522920203735145 }, { "epoch": 0.8744434550311665, "grad_norm": 2.5203845500946045, "learning_rate": 2.3630270308380221e-07, "loss": 0.0348, "step": 2946, "video_reward_cumulative_accuracy": 0.8521724372029871 }, { "epoch": 0.8747402790145444, "grad_norm": 1.3323040008544922, "learning_rate": 2.352045909123779e-07, "loss": 0.0112, "step": 2947, "video_reward_cumulative_accuracy": 0.8522225992534781 }, { "epoch": 0.8750371029979223, "grad_norm": 0.23800607025623322, "learning_rate": 2.3410891021864058e-07, "loss": 0.002, "step": 2948, "video_reward_cumulative_accuracy": 0.8522727272727273 }, { "epoch": 0.8753339269813001, "grad_norm": 1.832039475440979, "learning_rate": 2.3301566217891148e-07, "loss": 0.0263, "step": 2949, "video_reward_cumulative_accuracy": 0.8523228212953543 }, { "epoch": 0.8756307509646779, "grad_norm": 2.428018569946289, "learning_rate": 2.31924847966897e-07, "loss": 0.0579, "step": 2950, "video_reward_cumulative_accuracy": 0.8520338983050847 }, { "epoch": 0.8759275749480558, "grad_norm": 1.4113751649856567, "learning_rate": 2.3083646875369293e-07, "loss": 0.0297, "step": 2951, "video_reward_cumulative_accuracy": 0.8520840393087089 }, { "epoch": 0.8762243989314337, "grad_norm": 2.2765519618988037, "learning_rate": 2.2975052570777896e-07, "loss": 0.0271, "step": 2952, "video_reward_cumulative_accuracy": 0.8519647696476965 }, { "epoch": 0.8765212229148115, "grad_norm": 1.4904448986053467, "learning_rate": 2.2866701999502083e-07, "loss": 0.0244, "step": 2953, "video_reward_cumulative_accuracy": 0.8520149001015916 }, { "epoch": 0.8768180468981893, "grad_norm": 2.649986505508423, "learning_rate": 2.275859527786675e-07, "loss": 0.0505, "step": 2954, "video_reward_cumulative_accuracy": 0.8518957345971564 }, { "epoch": 0.8771148708815673, "grad_norm": 2.690307140350342, "learning_rate": 2.2650732521934891e-07, "loss": 0.0813, "step": 2955, "video_reward_cumulative_accuracy": 0.8519458544839256 }, { "epoch": 0.8774116948649451, "grad_norm": 1.8081152439117432, "learning_rate": 2.2543113847507735e-07, "loss": 0.0534, "step": 2956, "video_reward_cumulative_accuracy": 0.8519959404600812 }, { "epoch": 0.8777085188483229, "grad_norm": 1.8062191009521484, "learning_rate": 2.2435739370124277e-07, "loss": 0.0196, "step": 2957, "video_reward_cumulative_accuracy": 0.852045992560027 }, { "epoch": 0.8780053428317008, "grad_norm": 1.1374989748001099, "learning_rate": 2.2328609205061442e-07, "loss": 0.0244, "step": 2958, "video_reward_cumulative_accuracy": 0.8520960108181204 }, { "epoch": 0.8783021668150787, "grad_norm": 0.8241883516311646, "learning_rate": 2.2221723467333922e-07, "loss": 0.022, "step": 2959, "video_reward_cumulative_accuracy": 0.8521459952686719 }, { "epoch": 0.8785989907984565, "grad_norm": 2.666593551635742, "learning_rate": 2.21150822716939e-07, "loss": 0.0145, "step": 2960, "video_reward_cumulative_accuracy": 0.8521959459459459 }, { "epoch": 0.8788958147818343, "grad_norm": 1.3375494480133057, "learning_rate": 2.2008685732631096e-07, "loss": 0.0177, "step": 2961, "video_reward_cumulative_accuracy": 0.8522458628841607 }, { "epoch": 0.8791926387652123, "grad_norm": 3.6023035049438477, "learning_rate": 2.1902533964372448e-07, "loss": 0.0534, "step": 2962, "video_reward_cumulative_accuracy": 0.8521269412559082 }, { "epoch": 0.8794894627485901, "grad_norm": 3.426396608352661, "learning_rate": 2.1796627080882205e-07, "loss": 0.0531, "step": 2963, "video_reward_cumulative_accuracy": 0.8521768477894026 }, { "epoch": 0.8797862867319679, "grad_norm": 2.153428077697754, "learning_rate": 2.1690965195861668e-07, "loss": 0.0237, "step": 2964, "video_reward_cumulative_accuracy": 0.8522267206477733 }, { "epoch": 0.8800831107153458, "grad_norm": 1.4342325925827026, "learning_rate": 2.1585548422749236e-07, "loss": 0.0163, "step": 2965, "video_reward_cumulative_accuracy": 0.8522765598650928 }, { "epoch": 0.8803799346987237, "grad_norm": 1.447503924369812, "learning_rate": 2.1480376874719876e-07, "loss": 0.0233, "step": 2966, "video_reward_cumulative_accuracy": 0.8523263654753878 }, { "epoch": 0.8806767586821015, "grad_norm": 2.423767328262329, "learning_rate": 2.1375450664685577e-07, "loss": 0.0742, "step": 2967, "video_reward_cumulative_accuracy": 0.8523761375126391 }, { "epoch": 0.8809735826654793, "grad_norm": 2.4350879192352295, "learning_rate": 2.1270769905294752e-07, "loss": 0.0423, "step": 2968, "video_reward_cumulative_accuracy": 0.8524258760107817 }, { "epoch": 0.8812704066488573, "grad_norm": 4.910870552062988, "learning_rate": 2.1166334708932367e-07, "loss": 0.051, "step": 2969, "video_reward_cumulative_accuracy": 0.8524755810037049 }, { "epoch": 0.8815672306322351, "grad_norm": 3.4264378547668457, "learning_rate": 2.1062145187719818e-07, "loss": 0.0618, "step": 2970, "video_reward_cumulative_accuracy": 0.8525252525252526 }, { "epoch": 0.8818640546156129, "grad_norm": 0.8235085010528564, "learning_rate": 2.0958201453514515e-07, "loss": 0.0156, "step": 2971, "video_reward_cumulative_accuracy": 0.8525748906092225 }, { "epoch": 0.8821608785989908, "grad_norm": 3.6554598808288574, "learning_rate": 2.0854503617910278e-07, "loss": 0.0275, "step": 2972, "video_reward_cumulative_accuracy": 0.8526244952893675 }, { "epoch": 0.8824577025823687, "grad_norm": 1.463263750076294, "learning_rate": 2.0751051792236714e-07, "loss": 0.022, "step": 2973, "video_reward_cumulative_accuracy": 0.8526740665993946 }, { "epoch": 0.8827545265657465, "grad_norm": 2.6287763118743896, "learning_rate": 2.0647846087559459e-07, "loss": 0.0397, "step": 2974, "video_reward_cumulative_accuracy": 0.8527236045729657 }, { "epoch": 0.8830513505491243, "grad_norm": 1.2779161930084229, "learning_rate": 2.0544886614679848e-07, "loss": 0.0286, "step": 2975, "video_reward_cumulative_accuracy": 0.8526050420168068 }, { "epoch": 0.8833481745325023, "grad_norm": 2.465494394302368, "learning_rate": 2.0442173484134826e-07, "loss": 0.0588, "step": 2976, "video_reward_cumulative_accuracy": 0.8526545698924731 }, { "epoch": 0.8836449985158801, "grad_norm": 1.980695128440857, "learning_rate": 2.033970680619693e-07, "loss": 0.0563, "step": 2977, "video_reward_cumulative_accuracy": 0.8527040644944575 }, { "epoch": 0.8839418224992579, "grad_norm": 1.9491227865219116, "learning_rate": 2.023748669087408e-07, "loss": 0.0332, "step": 2978, "video_reward_cumulative_accuracy": 0.8525856279382136 }, { "epoch": 0.8842386464826358, "grad_norm": 2.006883144378662, "learning_rate": 2.0135513247909493e-07, "loss": 0.0141, "step": 2979, "video_reward_cumulative_accuracy": 0.8526351124538436 }, { "epoch": 0.8845354704660137, "grad_norm": 2.424649715423584, "learning_rate": 2.0033786586781624e-07, "loss": 0.0493, "step": 2980, "video_reward_cumulative_accuracy": 0.85251677852349 }, { "epoch": 0.8848322944493915, "grad_norm": 2.046546697616577, "learning_rate": 1.9932306816703773e-07, "loss": 0.084, "step": 2981, "video_reward_cumulative_accuracy": 0.8525662529352567 }, { "epoch": 0.8851291184327693, "grad_norm": 1.7148088216781616, "learning_rate": 1.9831074046624488e-07, "loss": 0.0261, "step": 2982, "video_reward_cumulative_accuracy": 0.85261569416499 }, { "epoch": 0.8854259424161472, "grad_norm": 1.020740032196045, "learning_rate": 1.9730088385226774e-07, "loss": 0.01, "step": 2983, "video_reward_cumulative_accuracy": 0.852665102246061 }, { "epoch": 0.8857227663995251, "grad_norm": 1.4954924583435059, "learning_rate": 1.9629349940928715e-07, "loss": 0.0195, "step": 2984, "video_reward_cumulative_accuracy": 0.8527144772117963 }, { "epoch": 0.8860195903829029, "grad_norm": 0.6353304982185364, "learning_rate": 1.952885882188277e-07, "loss": 0.023, "step": 2985, "video_reward_cumulative_accuracy": 0.8527638190954774 }, { "epoch": 0.8863164143662808, "grad_norm": 1.529733657836914, "learning_rate": 1.9428615135975855e-07, "loss": 0.018, "step": 2986, "video_reward_cumulative_accuracy": 0.8528131279303416 }, { "epoch": 0.8866132383496587, "grad_norm": 2.5808000564575195, "learning_rate": 1.9328618990829384e-07, "loss": 0.0279, "step": 2987, "video_reward_cumulative_accuracy": 0.8528624037495816 }, { "epoch": 0.8869100623330365, "grad_norm": 1.363269329071045, "learning_rate": 1.9228870493798763e-07, "loss": 0.0136, "step": 2988, "video_reward_cumulative_accuracy": 0.8529116465863453 }, { "epoch": 0.8872068863164143, "grad_norm": 0.9487797021865845, "learning_rate": 1.912936975197388e-07, "loss": 0.0137, "step": 2989, "video_reward_cumulative_accuracy": 0.852960856473737 }, { "epoch": 0.8875037102997922, "grad_norm": 0.6261467337608337, "learning_rate": 1.9030116872178317e-07, "loss": 0.0065, "step": 2990, "video_reward_cumulative_accuracy": 0.8530100334448161 }, { "epoch": 0.8878005342831701, "grad_norm": 1.148743987083435, "learning_rate": 1.8931111960969694e-07, "loss": 0.0226, "step": 2991, "video_reward_cumulative_accuracy": 0.8530591775325977 }, { "epoch": 0.8880973582665479, "grad_norm": 2.52793025970459, "learning_rate": 1.8832355124639463e-07, "loss": 0.0447, "step": 2992, "video_reward_cumulative_accuracy": 0.852774064171123 }, { "epoch": 0.8883941822499258, "grad_norm": 1.2899773120880127, "learning_rate": 1.873384646921253e-07, "loss": 0.0289, "step": 2993, "video_reward_cumulative_accuracy": 0.8528232542599399 }, { "epoch": 0.8886910062333037, "grad_norm": 1.8950523138046265, "learning_rate": 1.8635586100447633e-07, "loss": 0.0585, "step": 2994, "video_reward_cumulative_accuracy": 0.852872411489646 }, { "epoch": 0.8889878302166815, "grad_norm": 2.048049211502075, "learning_rate": 1.8537574123836748e-07, "loss": 0.0715, "step": 2995, "video_reward_cumulative_accuracy": 0.8529215358931552 }, { "epoch": 0.8892846542000593, "grad_norm": 1.89472496509552, "learning_rate": 1.843981064460529e-07, "loss": 0.0298, "step": 2996, "video_reward_cumulative_accuracy": 0.8529706275033377 }, { "epoch": 0.8895814781834372, "grad_norm": 2.2065141201019287, "learning_rate": 1.8342295767711794e-07, "loss": 0.0269, "step": 2997, "video_reward_cumulative_accuracy": 0.8530196863530197 }, { "epoch": 0.8898783021668151, "grad_norm": 1.0499368906021118, "learning_rate": 1.8245029597847907e-07, "loss": 0.006, "step": 2998, "video_reward_cumulative_accuracy": 0.8530687124749833 }, { "epoch": 0.8901751261501929, "grad_norm": 3.8206775188446045, "learning_rate": 1.8148012239438434e-07, "loss": 0.033, "step": 2999, "video_reward_cumulative_accuracy": 0.8531177059019673 }, { "epoch": 0.8904719501335708, "grad_norm": 3.880343198776245, "learning_rate": 1.8051243796640805e-07, "loss": 0.0258, "step": 3000, "video_reward_cumulative_accuracy": 0.8531666666666666 }, { "epoch": 0.8904719501335708, "eval_runtime": 132.9639, "eval_samples_per_second": 5.934, "eval_steps_per_second": 0.745, "eval_test_set_accuracy": 0.8169191919191919, "step": 3000 }, { "epoch": 0.8907687741169487, "grad_norm": 1.0296834707260132, "learning_rate": 1.7954724373345445e-07, "loss": 0.0095, "step": 3001, "video_reward_cumulative_accuracy": 0.8532155948017327 }, { "epoch": 0.8910655981003265, "grad_norm": 1.028411626815796, "learning_rate": 1.7858454073175185e-07, "loss": 0.0088, "step": 3002, "video_reward_cumulative_accuracy": 0.8532644903397735 }, { "epoch": 0.8913624220837043, "grad_norm": 3.756903886795044, "learning_rate": 1.776243299948563e-07, "loss": 0.0585, "step": 3003, "video_reward_cumulative_accuracy": 0.8533133533133533 }, { "epoch": 0.8916592460670822, "grad_norm": 2.9036705493927, "learning_rate": 1.7666661255364704e-07, "loss": 0.0412, "step": 3004, "video_reward_cumulative_accuracy": 0.8533621837549934 }, { "epoch": 0.8919560700504601, "grad_norm": 2.294867992401123, "learning_rate": 1.7571138943632688e-07, "loss": 0.0249, "step": 3005, "video_reward_cumulative_accuracy": 0.8534109816971713 }, { "epoch": 0.8922528940338379, "grad_norm": 3.16927170753479, "learning_rate": 1.7475866166842048e-07, "loss": 0.0349, "step": 3006, "video_reward_cumulative_accuracy": 0.853459747172322 }, { "epoch": 0.8925497180172158, "grad_norm": 1.4870845079421997, "learning_rate": 1.73808430272773e-07, "loss": 0.0282, "step": 3007, "video_reward_cumulative_accuracy": 0.8535084802128368 }, { "epoch": 0.8928465420005937, "grad_norm": 2.188786029815674, "learning_rate": 1.728606962695506e-07, "loss": 0.0509, "step": 3008, "video_reward_cumulative_accuracy": 0.8535571808510638 }, { "epoch": 0.8931433659839715, "grad_norm": 3.7729499340057373, "learning_rate": 1.7191546067623772e-07, "loss": 0.0906, "step": 3009, "video_reward_cumulative_accuracy": 0.8534396809571286 }, { "epoch": 0.8934401899673493, "grad_norm": 3.361833095550537, "learning_rate": 1.7097272450763646e-07, "loss": 0.0877, "step": 3010, "video_reward_cumulative_accuracy": 0.8533222591362126 }, { "epoch": 0.8937370139507272, "grad_norm": 2.5203890800476074, "learning_rate": 1.7003248877586558e-07, "loss": 0.0243, "step": 3011, "video_reward_cumulative_accuracy": 0.8533709730986383 }, { "epoch": 0.8940338379341051, "grad_norm": 1.8145414590835571, "learning_rate": 1.6909475449035929e-07, "loss": 0.0231, "step": 3012, "video_reward_cumulative_accuracy": 0.8534196547144755 }, { "epoch": 0.8943306619174829, "grad_norm": 3.811539888381958, "learning_rate": 1.6815952265786638e-07, "loss": 0.0447, "step": 3013, "video_reward_cumulative_accuracy": 0.8533023564553601 }, { "epoch": 0.8946274859008608, "grad_norm": 3.9783248901367188, "learning_rate": 1.6722679428244903e-07, "loss": 0.0442, "step": 3014, "video_reward_cumulative_accuracy": 0.8533510285335103 }, { "epoch": 0.8949243098842387, "grad_norm": 0.7299622893333435, "learning_rate": 1.6629657036548175e-07, "loss": 0.01, "step": 3015, "video_reward_cumulative_accuracy": 0.8533996683250414 }, { "epoch": 0.8952211338676165, "grad_norm": 3.1430046558380127, "learning_rate": 1.6536885190565066e-07, "loss": 0.0237, "step": 3016, "video_reward_cumulative_accuracy": 0.8532824933687002 }, { "epoch": 0.8955179578509943, "grad_norm": 1.1799105405807495, "learning_rate": 1.6444363989895046e-07, "loss": 0.0112, "step": 3017, "video_reward_cumulative_accuracy": 0.8533311236327478 }, { "epoch": 0.8958147818343722, "grad_norm": 2.950824022293091, "learning_rate": 1.6352093533868658e-07, "loss": 0.0471, "step": 3018, "video_reward_cumulative_accuracy": 0.8532140490390987 }, { "epoch": 0.8961116058177501, "grad_norm": 2.92531156539917, "learning_rate": 1.6260073921547215e-07, "loss": 0.0304, "step": 3019, "video_reward_cumulative_accuracy": 0.8532626697581981 }, { "epoch": 0.8964084298011279, "grad_norm": 2.9492204189300537, "learning_rate": 1.616830525172272e-07, "loss": 0.0292, "step": 3020, "video_reward_cumulative_accuracy": 0.8533112582781457 }, { "epoch": 0.8967052537845058, "grad_norm": 1.7324609756469727, "learning_rate": 1.6076787622917673e-07, "loss": 0.0184, "step": 3021, "video_reward_cumulative_accuracy": 0.8533598146309169 }, { "epoch": 0.8970020777678837, "grad_norm": 2.3193140029907227, "learning_rate": 1.5985521133385168e-07, "loss": 0.0551, "step": 3022, "video_reward_cumulative_accuracy": 0.8534083388484447 }, { "epoch": 0.8972989017512615, "grad_norm": 3.8793790340423584, "learning_rate": 1.5894505881108635e-07, "loss": 0.0541, "step": 3023, "video_reward_cumulative_accuracy": 0.8531260337413166 }, { "epoch": 0.8975957257346393, "grad_norm": 1.8072460889816284, "learning_rate": 1.58037419638018e-07, "loss": 0.0564, "step": 3024, "video_reward_cumulative_accuracy": 0.8531746031746031 }, { "epoch": 0.8978925497180172, "grad_norm": 3.4717845916748047, "learning_rate": 1.5713229478908577e-07, "loss": 0.0491, "step": 3025, "video_reward_cumulative_accuracy": 0.8532231404958678 }, { "epoch": 0.8981893737013951, "grad_norm": 0.3415747880935669, "learning_rate": 1.562296852360279e-07, "loss": 0.0053, "step": 3026, "video_reward_cumulative_accuracy": 0.8532716457369465 }, { "epoch": 0.8984861976847729, "grad_norm": 1.9290162324905396, "learning_rate": 1.5532959194788395e-07, "loss": 0.035, "step": 3027, "video_reward_cumulative_accuracy": 0.8529897588371325 }, { "epoch": 0.8987830216681508, "grad_norm": 3.8427798748016357, "learning_rate": 1.5443201589099149e-07, "loss": 0.0378, "step": 3028, "video_reward_cumulative_accuracy": 0.8530383091149274 }, { "epoch": 0.8990798456515287, "grad_norm": 1.119760513305664, "learning_rate": 1.5353695802898556e-07, "loss": 0.0185, "step": 3029, "video_reward_cumulative_accuracy": 0.8530868273357544 }, { "epoch": 0.8993766696349065, "grad_norm": 1.9450827836990356, "learning_rate": 1.52644419322798e-07, "loss": 0.0364, "step": 3030, "video_reward_cumulative_accuracy": 0.8531353135313532 }, { "epoch": 0.8996734936182843, "grad_norm": 1.9632068872451782, "learning_rate": 1.5175440073065485e-07, "loss": 0.0607, "step": 3031, "video_reward_cumulative_accuracy": 0.8531837677334213 }, { "epoch": 0.8999703176016622, "grad_norm": 0.8110266327857971, "learning_rate": 1.508669032080781e-07, "loss": 0.0176, "step": 3032, "video_reward_cumulative_accuracy": 0.8532321899736148 }, { "epoch": 0.9002671415850401, "grad_norm": 1.5525119304656982, "learning_rate": 1.499819277078818e-07, "loss": 0.0092, "step": 3033, "video_reward_cumulative_accuracy": 0.8532805802835477 }, { "epoch": 0.9005639655684179, "grad_norm": 1.3226412534713745, "learning_rate": 1.4909947518017387e-07, "loss": 0.0238, "step": 3034, "video_reward_cumulative_accuracy": 0.8533289386947923 }, { "epoch": 0.9008607895517958, "grad_norm": 0.9626790881156921, "learning_rate": 1.4821954657235266e-07, "loss": 0.012, "step": 3035, "video_reward_cumulative_accuracy": 0.8533772652388797 }, { "epoch": 0.9011576135351737, "grad_norm": 0.8461338877677917, "learning_rate": 1.4734214282910664e-07, "loss": 0.0223, "step": 3036, "video_reward_cumulative_accuracy": 0.8532608695652174 }, { "epoch": 0.9014544375185515, "grad_norm": 4.338505268096924, "learning_rate": 1.4646726489241397e-07, "loss": 0.0458, "step": 3037, "video_reward_cumulative_accuracy": 0.8531445505432993 }, { "epoch": 0.9017512615019293, "grad_norm": 1.510209560394287, "learning_rate": 1.4559491370154083e-07, "loss": 0.0465, "step": 3038, "video_reward_cumulative_accuracy": 0.8531928900592495 }, { "epoch": 0.9020480854853072, "grad_norm": 2.6341168880462646, "learning_rate": 1.4472509019304053e-07, "loss": 0.0466, "step": 3039, "video_reward_cumulative_accuracy": 0.8532411977624218 }, { "epoch": 0.9023449094686851, "grad_norm": 2.911071300506592, "learning_rate": 1.4385779530075444e-07, "loss": 0.0256, "step": 3040, "video_reward_cumulative_accuracy": 0.8532894736842105 }, { "epoch": 0.9026417334520629, "grad_norm": 2.444882869720459, "learning_rate": 1.4299302995580634e-07, "loss": 0.027, "step": 3041, "video_reward_cumulative_accuracy": 0.8533377178559685 }, { "epoch": 0.9029385574354408, "grad_norm": 4.529541969299316, "learning_rate": 1.4213079508660688e-07, "loss": 0.051, "step": 3042, "video_reward_cumulative_accuracy": 0.8532215647600263 }, { "epoch": 0.9032353814188187, "grad_norm": 3.1979563236236572, "learning_rate": 1.412710916188481e-07, "loss": 0.0291, "step": 3043, "video_reward_cumulative_accuracy": 0.8532697995399277 }, { "epoch": 0.9035322054021965, "grad_norm": 3.461008071899414, "learning_rate": 1.4041392047550506e-07, "loss": 0.0434, "step": 3044, "video_reward_cumulative_accuracy": 0.8533180026281209 }, { "epoch": 0.9038290293855743, "grad_norm": 3.109524726867676, "learning_rate": 1.3955928257683465e-07, "loss": 0.0798, "step": 3045, "video_reward_cumulative_accuracy": 0.8533661740558293 }, { "epoch": 0.9041258533689522, "grad_norm": 3.351715326309204, "learning_rate": 1.3870717884037321e-07, "loss": 0.0346, "step": 3046, "video_reward_cumulative_accuracy": 0.853414313854235 }, { "epoch": 0.9044226773523301, "grad_norm": 1.3447494506835938, "learning_rate": 1.3785761018093757e-07, "loss": 0.0162, "step": 3047, "video_reward_cumulative_accuracy": 0.8534624220544799 }, { "epoch": 0.9047195013357079, "grad_norm": 4.213351249694824, "learning_rate": 1.3701057751062146e-07, "loss": 0.0712, "step": 3048, "video_reward_cumulative_accuracy": 0.853510498687664 }, { "epoch": 0.9050163253190858, "grad_norm": 2.7610223293304443, "learning_rate": 1.3616608173879636e-07, "loss": 0.0292, "step": 3049, "video_reward_cumulative_accuracy": 0.8535585437848475 }, { "epoch": 0.9053131493024636, "grad_norm": 0.9333323240280151, "learning_rate": 1.3532412377211119e-07, "loss": 0.0066, "step": 3050, "video_reward_cumulative_accuracy": 0.8536065573770492 }, { "epoch": 0.9056099732858415, "grad_norm": 2.758444309234619, "learning_rate": 1.3448470451448896e-07, "loss": 0.0446, "step": 3051, "video_reward_cumulative_accuracy": 0.8536545394952475 }, { "epoch": 0.9059067972692193, "grad_norm": 1.3979527950286865, "learning_rate": 1.3364782486712857e-07, "loss": 0.016, "step": 3052, "video_reward_cumulative_accuracy": 0.8535386631716907 }, { "epoch": 0.9062036212525972, "grad_norm": 2.014826774597168, "learning_rate": 1.3281348572850045e-07, "loss": 0.0658, "step": 3053, "video_reward_cumulative_accuracy": 0.853422862757943 }, { "epoch": 0.9065004452359751, "grad_norm": 1.4265682697296143, "learning_rate": 1.3198168799434947e-07, "loss": 0.0116, "step": 3054, "video_reward_cumulative_accuracy": 0.8534708578912901 }, { "epoch": 0.9067972692193529, "grad_norm": 1.420961618423462, "learning_rate": 1.3115243255769072e-07, "loss": 0.0359, "step": 3055, "video_reward_cumulative_accuracy": 0.853518821603928 }, { "epoch": 0.9070940932027308, "grad_norm": 1.8929790258407593, "learning_rate": 1.3032572030881097e-07, "loss": 0.0078, "step": 3056, "video_reward_cumulative_accuracy": 0.8535667539267016 }, { "epoch": 0.9073909171861086, "grad_norm": 3.999789237976074, "learning_rate": 1.295015521352652e-07, "loss": 0.0465, "step": 3057, "video_reward_cumulative_accuracy": 0.8536146548904154 }, { "epoch": 0.9076877411694865, "grad_norm": 1.7095073461532593, "learning_rate": 1.2867992892187846e-07, "loss": 0.0234, "step": 3058, "video_reward_cumulative_accuracy": 0.8534990189666448 }, { "epoch": 0.9079845651528643, "grad_norm": 1.0274499654769897, "learning_rate": 1.2786085155074318e-07, "loss": 0.0247, "step": 3059, "video_reward_cumulative_accuracy": 0.8533834586466166 }, { "epoch": 0.9082813891362422, "grad_norm": 3.9192311763763428, "learning_rate": 1.2704432090121815e-07, "loss": 0.0385, "step": 3060, "video_reward_cumulative_accuracy": 0.8534313725490196 }, { "epoch": 0.9085782131196201, "grad_norm": 0.8952836394309998, "learning_rate": 1.2623033784992855e-07, "loss": 0.0113, "step": 3061, "video_reward_cumulative_accuracy": 0.8534792551453774 }, { "epoch": 0.9088750371029979, "grad_norm": 0.603430449962616, "learning_rate": 1.2541890327076368e-07, "loss": 0.0136, "step": 3062, "video_reward_cumulative_accuracy": 0.8535271064663619 }, { "epoch": 0.9091718610863758, "grad_norm": 1.0425788164138794, "learning_rate": 1.246100180348775e-07, "loss": 0.0135, "step": 3063, "video_reward_cumulative_accuracy": 0.8535749265426053 }, { "epoch": 0.9094686850697536, "grad_norm": 0.720369279384613, "learning_rate": 1.2380368301068728e-07, "loss": 0.0103, "step": 3064, "video_reward_cumulative_accuracy": 0.8536227154046997 }, { "epoch": 0.9097655090531315, "grad_norm": 1.9932835102081299, "learning_rate": 1.2299989906387116e-07, "loss": 0.0127, "step": 3065, "video_reward_cumulative_accuracy": 0.8536704730831974 }, { "epoch": 0.9100623330365093, "grad_norm": 1.871851921081543, "learning_rate": 1.221986670573702e-07, "loss": 0.0446, "step": 3066, "video_reward_cumulative_accuracy": 0.8537181996086106 }, { "epoch": 0.9103591570198872, "grad_norm": 3.508359909057617, "learning_rate": 1.2139998785138386e-07, "loss": 0.0373, "step": 3067, "video_reward_cumulative_accuracy": 0.8537658950114118 }, { "epoch": 0.9106559810032651, "grad_norm": 1.359955072402954, "learning_rate": 1.20603862303372e-07, "loss": 0.0153, "step": 3068, "video_reward_cumulative_accuracy": 0.8538135593220338 }, { "epoch": 0.9109528049866429, "grad_norm": 0.9213302135467529, "learning_rate": 1.1981029126805293e-07, "loss": 0.0142, "step": 3069, "video_reward_cumulative_accuracy": 0.85386119257087 }, { "epoch": 0.9112496289700208, "grad_norm": 0.6175004839897156, "learning_rate": 1.1901927559740178e-07, "loss": 0.0092, "step": 3070, "video_reward_cumulative_accuracy": 0.8539087947882736 }, { "epoch": 0.9115464529533986, "grad_norm": 1.5162551403045654, "learning_rate": 1.1823081614065146e-07, "loss": 0.0549, "step": 3071, "video_reward_cumulative_accuracy": 0.8537935525887334 }, { "epoch": 0.9118432769367765, "grad_norm": 0.9067859053611755, "learning_rate": 1.1744491374428845e-07, "loss": 0.0102, "step": 3072, "video_reward_cumulative_accuracy": 0.8538411458333334 }, { "epoch": 0.9121401009201543, "grad_norm": 1.3547265529632568, "learning_rate": 1.1666156925205619e-07, "loss": 0.048, "step": 3073, "video_reward_cumulative_accuracy": 0.8538887081028311 }, { "epoch": 0.9124369249035322, "grad_norm": 2.882599353790283, "learning_rate": 1.158807835049508e-07, "loss": 0.0496, "step": 3074, "video_reward_cumulative_accuracy": 0.8537735849056604 }, { "epoch": 0.9127337488869101, "grad_norm": 2.568582773208618, "learning_rate": 1.151025573412215e-07, "loss": 0.0347, "step": 3075, "video_reward_cumulative_accuracy": 0.8536585365853658 }, { "epoch": 0.9130305728702879, "grad_norm": 1.8911688327789307, "learning_rate": 1.1432689159636995e-07, "loss": 0.0527, "step": 3076, "video_reward_cumulative_accuracy": 0.85370611183355 }, { "epoch": 0.9133273968536658, "grad_norm": 1.7498602867126465, "learning_rate": 1.1355378710314779e-07, "loss": 0.0607, "step": 3077, "video_reward_cumulative_accuracy": 0.853753656158596 }, { "epoch": 0.9136242208370436, "grad_norm": 2.1445226669311523, "learning_rate": 1.1278324469155888e-07, "loss": 0.0548, "step": 3078, "video_reward_cumulative_accuracy": 0.8538011695906432 }, { "epoch": 0.9139210448204215, "grad_norm": 2.0121655464172363, "learning_rate": 1.120152651888537e-07, "loss": 0.0366, "step": 3079, "video_reward_cumulative_accuracy": 0.8538486521597921 }, { "epoch": 0.9142178688037993, "grad_norm": 2.319936990737915, "learning_rate": 1.1124984941953465e-07, "loss": 0.0509, "step": 3080, "video_reward_cumulative_accuracy": 0.8538961038961039 }, { "epoch": 0.9145146927871772, "grad_norm": 3.709453582763672, "learning_rate": 1.1048699820534831e-07, "loss": 0.0419, "step": 3081, "video_reward_cumulative_accuracy": 0.8539435248296008 }, { "epoch": 0.914811516770555, "grad_norm": 1.580238938331604, "learning_rate": 1.0972671236529037e-07, "loss": 0.0369, "step": 3082, "video_reward_cumulative_accuracy": 0.853990914990266 }, { "epoch": 0.9151083407539329, "grad_norm": 1.9665991067886353, "learning_rate": 1.0896899271560152e-07, "loss": 0.0545, "step": 3083, "video_reward_cumulative_accuracy": 0.8540382744080441 }, { "epoch": 0.9154051647373108, "grad_norm": 1.6465821266174316, "learning_rate": 1.0821384006976631e-07, "loss": 0.0158, "step": 3084, "video_reward_cumulative_accuracy": 0.8540856031128404 }, { "epoch": 0.9157019887206886, "grad_norm": 3.7942326068878174, "learning_rate": 1.074612552385157e-07, "loss": 0.0206, "step": 3085, "video_reward_cumulative_accuracy": 0.8541329011345219 }, { "epoch": 0.9159988127040665, "grad_norm": 0.7322973012924194, "learning_rate": 1.0671123902982166e-07, "loss": 0.015, "step": 3086, "video_reward_cumulative_accuracy": 0.8541801685029164 }, { "epoch": 0.9162956366874443, "grad_norm": 1.750848412513733, "learning_rate": 1.0596379224889986e-07, "loss": 0.0467, "step": 3087, "video_reward_cumulative_accuracy": 0.8540654356980888 }, { "epoch": 0.9165924606708222, "grad_norm": 3.0623202323913574, "learning_rate": 1.0521891569820698e-07, "loss": 0.05, "step": 3088, "video_reward_cumulative_accuracy": 0.8541126943005182 }, { "epoch": 0.9168892846542, "grad_norm": 1.9967219829559326, "learning_rate": 1.0447661017743971e-07, "loss": 0.0225, "step": 3089, "video_reward_cumulative_accuracy": 0.8539980576238265 }, { "epoch": 0.9171861086375779, "grad_norm": 1.6754310131072998, "learning_rate": 1.0373687648353586e-07, "loss": 0.016, "step": 3090, "video_reward_cumulative_accuracy": 0.8538834951456311 }, { "epoch": 0.9174829326209558, "grad_norm": 3.090818166732788, "learning_rate": 1.02999715410671e-07, "loss": 0.0264, "step": 3091, "video_reward_cumulative_accuracy": 0.8539307667421546 }, { "epoch": 0.9177797566043336, "grad_norm": 0.48801377415657043, "learning_rate": 1.0226512775025899e-07, "loss": 0.0097, "step": 3092, "video_reward_cumulative_accuracy": 0.8539780077619664 }, { "epoch": 0.9180765805877115, "grad_norm": 0.5541077852249146, "learning_rate": 1.015331142909512e-07, "loss": 0.0132, "step": 3093, "video_reward_cumulative_accuracy": 0.8538635628839315 }, { "epoch": 0.9183734045710893, "grad_norm": 6.074105739593506, "learning_rate": 1.0080367581863425e-07, "loss": 0.0787, "step": 3094, "video_reward_cumulative_accuracy": 0.8537491919844861 }, { "epoch": 0.9186702285544672, "grad_norm": 1.0763957500457764, "learning_rate": 1.0007681311643258e-07, "loss": 0.0124, "step": 3095, "video_reward_cumulative_accuracy": 0.8537964458804523 }, { "epoch": 0.918967052537845, "grad_norm": 1.8836250305175781, "learning_rate": 9.935252696470305e-08, "loss": 0.027, "step": 3096, "video_reward_cumulative_accuracy": 0.853843669250646 }, { "epoch": 0.9192638765212229, "grad_norm": 0.8730959296226501, "learning_rate": 9.863081814103725e-08, "loss": 0.0128, "step": 3097, "video_reward_cumulative_accuracy": 0.8538908621246367 }, { "epoch": 0.9195607005046008, "grad_norm": 1.2798937559127808, "learning_rate": 9.791168742025958e-08, "loss": 0.0128, "step": 3098, "video_reward_cumulative_accuracy": 0.8539380245319561 }, { "epoch": 0.9198575244879786, "grad_norm": 1.4854799509048462, "learning_rate": 9.719513557442661e-08, "loss": 0.0283, "step": 3099, "video_reward_cumulative_accuracy": 0.8539851565020975 }, { "epoch": 0.9201543484713565, "grad_norm": 1.6215635538101196, "learning_rate": 9.648116337282631e-08, "loss": 0.0177, "step": 3100, "video_reward_cumulative_accuracy": 0.8540322580645161 }, { "epoch": 0.9204511724547343, "grad_norm": 5.3873209953308105, "learning_rate": 9.57697715819772e-08, "loss": 0.0611, "step": 3101, "video_reward_cumulative_accuracy": 0.8540793292486295 }, { "epoch": 0.9207479964381122, "grad_norm": 1.7452203035354614, "learning_rate": 9.506096096562806e-08, "loss": 0.0156, "step": 3102, "video_reward_cumulative_accuracy": 0.8541263700838169 }, { "epoch": 0.92104482042149, "grad_norm": 1.4887933731079102, "learning_rate": 9.435473228475462e-08, "loss": 0.0424, "step": 3103, "video_reward_cumulative_accuracy": 0.8541733805994199 }, { "epoch": 0.9213416444048679, "grad_norm": 2.0373613834381104, "learning_rate": 9.365108629756259e-08, "loss": 0.0173, "step": 3104, "video_reward_cumulative_accuracy": 0.8542203608247423 }, { "epoch": 0.9216384683882458, "grad_norm": 4.531852722167969, "learning_rate": 9.295002375948436e-08, "loss": 0.0994, "step": 3105, "video_reward_cumulative_accuracy": 0.8541062801932368 }, { "epoch": 0.9219352923716236, "grad_norm": 0.6652273535728455, "learning_rate": 9.225154542317843e-08, "loss": 0.0133, "step": 3106, "video_reward_cumulative_accuracy": 0.8541532517707663 }, { "epoch": 0.9222321163550015, "grad_norm": 0.861679196357727, "learning_rate": 9.15556520385294e-08, "loss": 0.0291, "step": 3107, "video_reward_cumulative_accuracy": 0.854200193112327 }, { "epoch": 0.9225289403383793, "grad_norm": 2.65281343460083, "learning_rate": 9.086234435264574e-08, "loss": 0.0827, "step": 3108, "video_reward_cumulative_accuracy": 0.8540862290862291 }, { "epoch": 0.9228257643217572, "grad_norm": 1.5000420808792114, "learning_rate": 9.017162310986067e-08, "loss": 0.0087, "step": 3109, "video_reward_cumulative_accuracy": 0.8541331617883564 }, { "epoch": 0.923122588305135, "grad_norm": 0.5715723633766174, "learning_rate": 8.94834890517307e-08, "loss": 0.0047, "step": 3110, "video_reward_cumulative_accuracy": 0.8541800643086817 }, { "epoch": 0.9234194122885129, "grad_norm": 2.6026268005371094, "learning_rate": 8.879794291703464e-08, "loss": 0.0407, "step": 3111, "video_reward_cumulative_accuracy": 0.8542269366763099 }, { "epoch": 0.9237162362718908, "grad_norm": 1.6955796480178833, "learning_rate": 8.811498544177316e-08, "loss": 0.0212, "step": 3112, "video_reward_cumulative_accuracy": 0.8542737789203085 }, { "epoch": 0.9240130602552686, "grad_norm": 2.5438053607940674, "learning_rate": 8.743461735916642e-08, "loss": 0.039, "step": 3113, "video_reward_cumulative_accuracy": 0.8543205910697077 }, { "epoch": 0.9243098842386465, "grad_norm": 2.37009859085083, "learning_rate": 8.675683939965595e-08, "loss": 0.0194, "step": 3114, "video_reward_cumulative_accuracy": 0.8543673731535003 }, { "epoch": 0.9246067082220243, "grad_norm": 3.996877908706665, "learning_rate": 8.608165229090248e-08, "loss": 0.0388, "step": 3115, "video_reward_cumulative_accuracy": 0.8540930979133227 }, { "epoch": 0.9249035322054022, "grad_norm": 1.1192494630813599, "learning_rate": 8.540905675778504e-08, "loss": 0.0274, "step": 3116, "video_reward_cumulative_accuracy": 0.8541399229781772 }, { "epoch": 0.92520035618878, "grad_norm": 3.1228842735290527, "learning_rate": 8.473905352239936e-08, "loss": 0.0702, "step": 3117, "video_reward_cumulative_accuracy": 0.8541867179980751 }, { "epoch": 0.9254971801721579, "grad_norm": 2.0726311206817627, "learning_rate": 8.407164330405976e-08, "loss": 0.0352, "step": 3118, "video_reward_cumulative_accuracy": 0.8542334830019244 }, { "epoch": 0.9257940041555358, "grad_norm": 2.6938111782073975, "learning_rate": 8.34068268192953e-08, "loss": 0.0524, "step": 3119, "video_reward_cumulative_accuracy": 0.8542802180185957 }, { "epoch": 0.9260908281389136, "grad_norm": 2.6164710521698, "learning_rate": 8.27446047818517e-08, "loss": 0.0353, "step": 3120, "video_reward_cumulative_accuracy": 0.854326923076923 }, { "epoch": 0.9263876521222915, "grad_norm": 2.0380940437316895, "learning_rate": 8.208497790268833e-08, "loss": 0.0319, "step": 3121, "video_reward_cumulative_accuracy": 0.8543735982057034 }, { "epoch": 0.9266844761056693, "grad_norm": 1.6423115730285645, "learning_rate": 8.142794688997812e-08, "loss": 0.014, "step": 3122, "video_reward_cumulative_accuracy": 0.8544202434336964 }, { "epoch": 0.9269813000890472, "grad_norm": 2.5781912803649902, "learning_rate": 8.077351244910825e-08, "loss": 0.0401, "step": 3123, "video_reward_cumulative_accuracy": 0.8543067563240474 }, { "epoch": 0.927278124072425, "grad_norm": 2.5099053382873535, "learning_rate": 8.012167528267723e-08, "loss": 0.0258, "step": 3124, "video_reward_cumulative_accuracy": 0.8541933418693982 }, { "epoch": 0.9275749480558029, "grad_norm": 1.2233213186264038, "learning_rate": 7.947243609049581e-08, "loss": 0.0303, "step": 3125, "video_reward_cumulative_accuracy": 0.85408 }, { "epoch": 0.9278717720391808, "grad_norm": 2.0083823204040527, "learning_rate": 7.882579556958536e-08, "loss": 0.0336, "step": 3126, "video_reward_cumulative_accuracy": 0.8541266794625719 }, { "epoch": 0.9281685960225586, "grad_norm": 2.4612114429473877, "learning_rate": 7.818175441417692e-08, "loss": 0.0207, "step": 3127, "video_reward_cumulative_accuracy": 0.8541733290693956 }, { "epoch": 0.9284654200059365, "grad_norm": 2.7933738231658936, "learning_rate": 7.754031331571127e-08, "loss": 0.0375, "step": 3128, "video_reward_cumulative_accuracy": 0.8542199488491049 }, { "epoch": 0.9287622439893143, "grad_norm": 3.2718217372894287, "learning_rate": 7.690147296283757e-08, "loss": 0.0352, "step": 3129, "video_reward_cumulative_accuracy": 0.8541067433684884 }, { "epoch": 0.9290590679726922, "grad_norm": 0.4795750081539154, "learning_rate": 7.626523404141328e-08, "loss": 0.0169, "step": 3130, "video_reward_cumulative_accuracy": 0.8539936102236422 }, { "epoch": 0.92935589195607, "grad_norm": 2.361750602722168, "learning_rate": 7.563159723450259e-08, "loss": 0.0193, "step": 3131, "video_reward_cumulative_accuracy": 0.8540402427339508 }, { "epoch": 0.9296527159394479, "grad_norm": 1.1606436967849731, "learning_rate": 7.500056322237576e-08, "loss": 0.0291, "step": 3132, "video_reward_cumulative_accuracy": 0.8540868454661558 }, { "epoch": 0.9299495399228258, "grad_norm": 2.2710154056549072, "learning_rate": 7.437213268250948e-08, "loss": 0.0486, "step": 3133, "video_reward_cumulative_accuracy": 0.8539738270028726 }, { "epoch": 0.9302463639062036, "grad_norm": 1.7540185451507568, "learning_rate": 7.374630628958462e-08, "loss": 0.0182, "step": 3134, "video_reward_cumulative_accuracy": 0.8540204211869815 }, { "epoch": 0.9305431878895815, "grad_norm": 0.9131705164909363, "learning_rate": 7.312308471548624e-08, "loss": 0.0137, "step": 3135, "video_reward_cumulative_accuracy": 0.854066985645933 }, { "epoch": 0.9308400118729593, "grad_norm": 1.74937105178833, "learning_rate": 7.250246862930411e-08, "loss": 0.0159, "step": 3136, "video_reward_cumulative_accuracy": 0.8541135204081632 }, { "epoch": 0.9311368358563372, "grad_norm": 0.4030010402202606, "learning_rate": 7.188445869732913e-08, "loss": 0.0111, "step": 3137, "video_reward_cumulative_accuracy": 0.854160025502072 }, { "epoch": 0.931433659839715, "grad_norm": 4.948916435241699, "learning_rate": 7.12690555830553e-08, "loss": 0.065, "step": 3138, "video_reward_cumulative_accuracy": 0.8542065009560229 }, { "epoch": 0.9317304838230929, "grad_norm": 3.3688721656799316, "learning_rate": 7.065625994717717e-08, "loss": 0.0579, "step": 3139, "video_reward_cumulative_accuracy": 0.8542529467983434 }, { "epoch": 0.9320273078064708, "grad_norm": 3.2429494857788086, "learning_rate": 7.004607244759071e-08, "loss": 0.0308, "step": 3140, "video_reward_cumulative_accuracy": 0.8542993630573248 }, { "epoch": 0.9323241317898486, "grad_norm": 2.271911144256592, "learning_rate": 6.943849373939132e-08, "loss": 0.0129, "step": 3141, "video_reward_cumulative_accuracy": 0.8543457497612226 }, { "epoch": 0.9326209557732265, "grad_norm": 3.0307109355926514, "learning_rate": 6.883352447487363e-08, "loss": 0.03, "step": 3142, "video_reward_cumulative_accuracy": 0.8543921069382558 }, { "epoch": 0.9329177797566043, "grad_norm": 3.5836822986602783, "learning_rate": 6.823116530353113e-08, "loss": 0.0431, "step": 3143, "video_reward_cumulative_accuracy": 0.8544384346166083 }, { "epoch": 0.9332146037399822, "grad_norm": 2.957613706588745, "learning_rate": 6.763141687205432e-08, "loss": 0.0648, "step": 3144, "video_reward_cumulative_accuracy": 0.8544847328244275 }, { "epoch": 0.93351142772336, "grad_norm": 1.7011269330978394, "learning_rate": 6.703427982433202e-08, "loss": 0.0163, "step": 3145, "video_reward_cumulative_accuracy": 0.8545310015898251 }, { "epoch": 0.9338082517067379, "grad_norm": 0.9397748112678528, "learning_rate": 6.64397548014481e-08, "loss": 0.0156, "step": 3146, "video_reward_cumulative_accuracy": 0.8545772409408773 }, { "epoch": 0.9341050756901158, "grad_norm": 0.7569989562034607, "learning_rate": 6.584784244168335e-08, "loss": 0.0134, "step": 3147, "video_reward_cumulative_accuracy": 0.8546234509056244 }, { "epoch": 0.9344018996734936, "grad_norm": 2.637448310852051, "learning_rate": 6.525854338051335e-08, "loss": 0.0423, "step": 3148, "video_reward_cumulative_accuracy": 0.8546696315120712 }, { "epoch": 0.9346987236568715, "grad_norm": 2.607063055038452, "learning_rate": 6.467185825060728e-08, "loss": 0.0455, "step": 3149, "video_reward_cumulative_accuracy": 0.8547157827881867 }, { "epoch": 0.9349955476402493, "grad_norm": 1.5537196397781372, "learning_rate": 6.408778768182883e-08, "loss": 0.0456, "step": 3150, "video_reward_cumulative_accuracy": 0.8546031746031746 }, { "epoch": 0.9352923716236272, "grad_norm": 2.6270835399627686, "learning_rate": 6.350633230123443e-08, "loss": 0.0361, "step": 3151, "video_reward_cumulative_accuracy": 0.8544906378927325 }, { "epoch": 0.935589195607005, "grad_norm": 2.062340259552002, "learning_rate": 6.292749273307308e-08, "loss": 0.0188, "step": 3152, "video_reward_cumulative_accuracy": 0.8545368020304569 }, { "epoch": 0.9358860195903829, "grad_norm": 1.0614312887191772, "learning_rate": 6.23512695987849e-08, "loss": 0.0088, "step": 3153, "video_reward_cumulative_accuracy": 0.8545829368855059 }, { "epoch": 0.9361828435737608, "grad_norm": 1.1237103939056396, "learning_rate": 6.177766351700115e-08, "loss": 0.0129, "step": 3154, "video_reward_cumulative_accuracy": 0.8546290424857323 }, { "epoch": 0.9364796675571386, "grad_norm": 3.2817189693450928, "learning_rate": 6.120667510354422e-08, "loss": 0.0625, "step": 3155, "video_reward_cumulative_accuracy": 0.8546751188589541 }, { "epoch": 0.9367764915405165, "grad_norm": 2.6169803142547607, "learning_rate": 6.063830497142514e-08, "loss": 0.0305, "step": 3156, "video_reward_cumulative_accuracy": 0.8547211660329531 }, { "epoch": 0.9370733155238943, "grad_norm": 2.9755945205688477, "learning_rate": 6.007255373084498e-08, "loss": 0.0336, "step": 3157, "video_reward_cumulative_accuracy": 0.8547671840354767 }, { "epoch": 0.9373701395072722, "grad_norm": 0.9379553198814392, "learning_rate": 5.950942198919202e-08, "loss": 0.0127, "step": 3158, "video_reward_cumulative_accuracy": 0.8548131728942369 }, { "epoch": 0.93766696349065, "grad_norm": 1.1103352308273315, "learning_rate": 5.8948910351042943e-08, "loss": 0.0217, "step": 3159, "video_reward_cumulative_accuracy": 0.8548591326369104 }, { "epoch": 0.9379637874740279, "grad_norm": 2.294884443283081, "learning_rate": 5.839101941816166e-08, "loss": 0.0284, "step": 3160, "video_reward_cumulative_accuracy": 0.854746835443038 }, { "epoch": 0.9382606114574058, "grad_norm": 1.6113910675048828, "learning_rate": 5.783574978949796e-08, "loss": 0.0242, "step": 3161, "video_reward_cumulative_accuracy": 0.8547927870926922 }, { "epoch": 0.9385574354407836, "grad_norm": 1.6590948104858398, "learning_rate": 5.728310206118776e-08, "loss": 0.0327, "step": 3162, "video_reward_cumulative_accuracy": 0.8548387096774194 }, { "epoch": 0.9388542594241615, "grad_norm": 0.5105149149894714, "learning_rate": 5.6733076826552267e-08, "loss": 0.0084, "step": 3163, "video_reward_cumulative_accuracy": 0.8548846032247865 }, { "epoch": 0.9391510834075393, "grad_norm": 2.3923985958099365, "learning_rate": 5.618567467609637e-08, "loss": 0.0233, "step": 3164, "video_reward_cumulative_accuracy": 0.8549304677623262 }, { "epoch": 0.9394479073909172, "grad_norm": 2.449561834335327, "learning_rate": 5.564089619751023e-08, "loss": 0.0187, "step": 3165, "video_reward_cumulative_accuracy": 0.8549763033175355 }, { "epoch": 0.939744731374295, "grad_norm": 0.8511427044868469, "learning_rate": 5.509874197566573e-08, "loss": 0.015, "step": 3166, "video_reward_cumulative_accuracy": 0.8550221099178774 }, { "epoch": 0.9400415553576729, "grad_norm": 2.5197205543518066, "learning_rate": 5.455921259261837e-08, "loss": 0.0333, "step": 3167, "video_reward_cumulative_accuracy": 0.8550678875907799 }, { "epoch": 0.9403383793410508, "grad_norm": 2.1884961128234863, "learning_rate": 5.40223086276051e-08, "loss": 0.0673, "step": 3168, "video_reward_cumulative_accuracy": 0.8551136363636364 }, { "epoch": 0.9406352033244286, "grad_norm": 2.423147678375244, "learning_rate": 5.348803065704483e-08, "loss": 0.0129, "step": 3169, "video_reward_cumulative_accuracy": 0.8550015777847901 }, { "epoch": 0.9409320273078065, "grad_norm": 2.657482624053955, "learning_rate": 5.2956379254536226e-08, "loss": 0.058, "step": 3170, "video_reward_cumulative_accuracy": 0.8550473186119874 }, { "epoch": 0.9412288512911843, "grad_norm": 3.016252040863037, "learning_rate": 5.2427354990859106e-08, "loss": 0.028, "step": 3171, "video_reward_cumulative_accuracy": 0.8550930305897193 }, { "epoch": 0.9415256752745622, "grad_norm": 2.315845012664795, "learning_rate": 5.190095843397275e-08, "loss": 0.0678, "step": 3172, "video_reward_cumulative_accuracy": 0.8551387137452712 }, { "epoch": 0.94182249925794, "grad_norm": 0.9423714876174927, "learning_rate": 5.137719014901399e-08, "loss": 0.0241, "step": 3173, "video_reward_cumulative_accuracy": 0.8551843681058935 }, { "epoch": 0.9421193232413179, "grad_norm": 1.8504040241241455, "learning_rate": 5.0856050698299684e-08, "loss": 0.0228, "step": 3174, "video_reward_cumulative_accuracy": 0.855072463768116 }, { "epoch": 0.9424161472246958, "grad_norm": 1.4434008598327637, "learning_rate": 5.0337540641322846e-08, "loss": 0.0293, "step": 3175, "video_reward_cumulative_accuracy": 0.8551181102362204 }, { "epoch": 0.9427129712080736, "grad_norm": 2.862295627593994, "learning_rate": 4.9821660534755125e-08, "loss": 0.0318, "step": 3176, "video_reward_cumulative_accuracy": 0.8550062972292192 }, { "epoch": 0.9430097951914514, "grad_norm": 1.7346522808074951, "learning_rate": 4.930841093244349e-08, "loss": 0.0158, "step": 3177, "video_reward_cumulative_accuracy": 0.8550519357884797 }, { "epoch": 0.9433066191748293, "grad_norm": 3.134310722351074, "learning_rate": 4.8797792385411325e-08, "loss": 0.0237, "step": 3178, "video_reward_cumulative_accuracy": 0.85509754562618 }, { "epoch": 0.9436034431582072, "grad_norm": 1.7474472522735596, "learning_rate": 4.828980544185735e-08, "loss": 0.0242, "step": 3179, "video_reward_cumulative_accuracy": 0.8551431267694244 }, { "epoch": 0.943900267141585, "grad_norm": 4.279950141906738, "learning_rate": 4.77844506471542e-08, "loss": 0.0626, "step": 3180, "video_reward_cumulative_accuracy": 0.855188679245283 }, { "epoch": 0.9441970911249629, "grad_norm": 2.3081438541412354, "learning_rate": 4.728172854385038e-08, "loss": 0.0226, "step": 3181, "video_reward_cumulative_accuracy": 0.8552342030807922 }, { "epoch": 0.9444939151083408, "grad_norm": 1.8738723993301392, "learning_rate": 4.678163967166582e-08, "loss": 0.0248, "step": 3182, "video_reward_cumulative_accuracy": 0.8552796983029541 }, { "epoch": 0.9447907390917186, "grad_norm": 3.566354990005493, "learning_rate": 4.628418456749495e-08, "loss": 0.0372, "step": 3183, "video_reward_cumulative_accuracy": 0.8551680804272699 }, { "epoch": 0.9450875630750964, "grad_norm": 4.133174419403076, "learning_rate": 4.5789363765404436e-08, "loss": 0.0472, "step": 3184, "video_reward_cumulative_accuracy": 0.855213567839196 }, { "epoch": 0.9453843870584743, "grad_norm": 2.822946310043335, "learning_rate": 4.529717779663129e-08, "loss": 0.0269, "step": 3185, "video_reward_cumulative_accuracy": 0.8551020408163266 }, { "epoch": 0.9456812110418522, "grad_norm": 4.163548946380615, "learning_rate": 4.4807627189586425e-08, "loss": 0.0885, "step": 3186, "video_reward_cumulative_accuracy": 0.8551475204017577 }, { "epoch": 0.94597803502523, "grad_norm": 2.597421169281006, "learning_rate": 4.432071246984859e-08, "loss": 0.0321, "step": 3187, "video_reward_cumulative_accuracy": 0.8551929714465014 }, { "epoch": 0.9462748590086079, "grad_norm": 3.691058397293091, "learning_rate": 4.383643416016908e-08, "loss": 0.0655, "step": 3188, "video_reward_cumulative_accuracy": 0.855081555834379 }, { "epoch": 0.9465716829919858, "grad_norm": 2.430384874343872, "learning_rate": 4.3354792780467004e-08, "loss": 0.026, "step": 3189, "video_reward_cumulative_accuracy": 0.8549702100972092 }, { "epoch": 0.9468685069753636, "grad_norm": 2.202420234680176, "learning_rate": 4.287578884783122e-08, "loss": 0.0367, "step": 3190, "video_reward_cumulative_accuracy": 0.8550156739811913 }, { "epoch": 0.9471653309587414, "grad_norm": 2.3409032821655273, "learning_rate": 4.2399422876518995e-08, "loss": 0.0338, "step": 3191, "video_reward_cumulative_accuracy": 0.8550611093701034 }, { "epoch": 0.9474621549421193, "grad_norm": 1.7721819877624512, "learning_rate": 4.192569537795538e-08, "loss": 0.0257, "step": 3192, "video_reward_cumulative_accuracy": 0.8551065162907269 }, { "epoch": 0.9477589789254972, "grad_norm": 1.6074609756469727, "learning_rate": 4.145460686073327e-08, "loss": 0.0371, "step": 3193, "video_reward_cumulative_accuracy": 0.8549953022236142 }, { "epoch": 0.948055802908875, "grad_norm": 2.8421127796173096, "learning_rate": 4.098615783061144e-08, "loss": 0.1067, "step": 3194, "video_reward_cumulative_accuracy": 0.8550407013149656 }, { "epoch": 0.9483526268922529, "grad_norm": 0.6036382913589478, "learning_rate": 4.0520348790515084e-08, "loss": 0.0188, "step": 3195, "video_reward_cumulative_accuracy": 0.8550860719874804 }, { "epoch": 0.9486494508756308, "grad_norm": 2.0517990589141846, "learning_rate": 4.005718024053612e-08, "loss": 0.024, "step": 3196, "video_reward_cumulative_accuracy": 0.8549749687108886 }, { "epoch": 0.9489462748590086, "grad_norm": 1.565876841545105, "learning_rate": 3.959665267793067e-08, "loss": 0.0138, "step": 3197, "video_reward_cumulative_accuracy": 0.8550203315608382 }, { "epoch": 0.9492430988423864, "grad_norm": 2.432234525680542, "learning_rate": 3.91387665971199e-08, "loss": 0.0496, "step": 3198, "video_reward_cumulative_accuracy": 0.8550656660412758 }, { "epoch": 0.9495399228257643, "grad_norm": 0.7143085598945618, "learning_rate": 3.868352248968865e-08, "loss": 0.0061, "step": 3199, "video_reward_cumulative_accuracy": 0.8551109721788058 }, { "epoch": 0.9498367468091422, "grad_norm": 1.3272526264190674, "learning_rate": 3.823092084438568e-08, "loss": 0.0249, "step": 3200, "video_reward_cumulative_accuracy": 0.85515625 }, { "epoch": 0.9498367468091422, "eval_runtime": 136.0857, "eval_samples_per_second": 5.798, "eval_steps_per_second": 0.727, "eval_test_set_accuracy": 0.8320707070707071, "step": 3200 }, { "epoch": 0.95013357079252, "grad_norm": 1.0475165843963623, "learning_rate": 3.778096214712285e-08, "loss": 0.0085, "step": 3201, "video_reward_cumulative_accuracy": 0.8552014995313965 }, { "epoch": 0.9504303947758979, "grad_norm": 2.317171335220337, "learning_rate": 3.733364688097485e-08, "loss": 0.0783, "step": 3202, "video_reward_cumulative_accuracy": 0.8552467207995003 }, { "epoch": 0.9507272187592758, "grad_norm": 2.8476126194000244, "learning_rate": 3.6888975526177815e-08, "loss": 0.0534, "step": 3203, "video_reward_cumulative_accuracy": 0.8552919138307836 }, { "epoch": 0.9510240427426536, "grad_norm": 3.3761157989501953, "learning_rate": 3.6446948560129314e-08, "loss": 0.0177, "step": 3204, "video_reward_cumulative_accuracy": 0.8553370786516854 }, { "epoch": 0.9513208667260314, "grad_norm": 1.30337655544281, "learning_rate": 3.600756645738834e-08, "loss": 0.0228, "step": 3205, "video_reward_cumulative_accuracy": 0.8553822152886116 }, { "epoch": 0.9516176907094093, "grad_norm": 4.232937812805176, "learning_rate": 3.557082968967423e-08, "loss": 0.0748, "step": 3206, "video_reward_cumulative_accuracy": 0.8552713661883967 }, { "epoch": 0.9519145146927872, "grad_norm": 3.1819849014282227, "learning_rate": 3.5136738725866646e-08, "loss": 0.0287, "step": 3207, "video_reward_cumulative_accuracy": 0.8553164951668226 }, { "epoch": 0.952211338676165, "grad_norm": 0.5942104458808899, "learning_rate": 3.47052940320039e-08, "loss": 0.0064, "step": 3208, "video_reward_cumulative_accuracy": 0.8553615960099751 }, { "epoch": 0.9525081626595429, "grad_norm": 2.6708426475524902, "learning_rate": 3.4276496071284084e-08, "loss": 0.0283, "step": 3209, "video_reward_cumulative_accuracy": 0.855406668744157 }, { "epoch": 0.9528049866429208, "grad_norm": 0.9444103240966797, "learning_rate": 3.385034530406311e-08, "loss": 0.0048, "step": 3210, "video_reward_cumulative_accuracy": 0.8554517133956386 }, { "epoch": 0.9531018106262986, "grad_norm": 1.708574652671814, "learning_rate": 3.34268421878553e-08, "loss": 0.0249, "step": 3211, "video_reward_cumulative_accuracy": 0.8554967299906571 }, { "epoch": 0.9533986346096764, "grad_norm": 1.0022554397583008, "learning_rate": 3.300598717733278e-08, "loss": 0.0133, "step": 3212, "video_reward_cumulative_accuracy": 0.8555417185554172 }, { "epoch": 0.9536954585930543, "grad_norm": 1.676685094833374, "learning_rate": 3.258778072432356e-08, "loss": 0.0193, "step": 3213, "video_reward_cumulative_accuracy": 0.8555866791160909 }, { "epoch": 0.9539922825764322, "grad_norm": 2.026139259338379, "learning_rate": 3.217222327781322e-08, "loss": 0.0297, "step": 3214, "video_reward_cumulative_accuracy": 0.8556316116988176 }, { "epoch": 0.95428910655981, "grad_norm": 0.3578716516494751, "learning_rate": 3.175931528394294e-08, "loss": 0.009, "step": 3215, "video_reward_cumulative_accuracy": 0.8556765163297045 }, { "epoch": 0.9545859305431879, "grad_norm": 1.398017406463623, "learning_rate": 3.134905718600978e-08, "loss": 0.0171, "step": 3216, "video_reward_cumulative_accuracy": 0.8557213930348259 }, { "epoch": 0.9548827545265658, "grad_norm": 2.376380205154419, "learning_rate": 3.094144942446531e-08, "loss": 0.0243, "step": 3217, "video_reward_cumulative_accuracy": 0.8557662418402238 }, { "epoch": 0.9551795785099436, "grad_norm": 2.7739336490631104, "learning_rate": 3.053649243691587e-08, "loss": 0.0437, "step": 3218, "video_reward_cumulative_accuracy": 0.8556556867619639 }, { "epoch": 0.9554764024933214, "grad_norm": 2.0257623195648193, "learning_rate": 3.013418665812257e-08, "loss": 0.0569, "step": 3219, "video_reward_cumulative_accuracy": 0.8557005281143212 }, { "epoch": 0.9557732264766993, "grad_norm": 2.442366600036621, "learning_rate": 2.973453251999936e-08, "loss": 0.0582, "step": 3220, "video_reward_cumulative_accuracy": 0.8557453416149068 }, { "epoch": 0.9560700504600772, "grad_norm": 1.158838152885437, "learning_rate": 2.933753045161386e-08, "loss": 0.0107, "step": 3221, "video_reward_cumulative_accuracy": 0.8557901272896616 }, { "epoch": 0.956366874443455, "grad_norm": 1.9528629779815674, "learning_rate": 2.8943180879186517e-08, "loss": 0.0212, "step": 3222, "video_reward_cumulative_accuracy": 0.8558348851644941 }, { "epoch": 0.9566636984268329, "grad_norm": 2.6688528060913086, "learning_rate": 2.85514842260895e-08, "loss": 0.0283, "step": 3223, "video_reward_cumulative_accuracy": 0.8557244802978592 }, { "epoch": 0.9569605224102108, "grad_norm": 0.49328845739364624, "learning_rate": 2.8162440912847532e-08, "loss": 0.0061, "step": 3224, "video_reward_cumulative_accuracy": 0.8557692307692307 }, { "epoch": 0.9572573463935886, "grad_norm": 4.010133266448975, "learning_rate": 2.7776051357135957e-08, "loss": 0.0418, "step": 3225, "video_reward_cumulative_accuracy": 0.8558139534883721 }, { "epoch": 0.9575541703769664, "grad_norm": 1.154459834098816, "learning_rate": 2.7392315973781835e-08, "loss": 0.0174, "step": 3226, "video_reward_cumulative_accuracy": 0.8558586484810912 }, { "epoch": 0.9578509943603443, "grad_norm": 1.608305811882019, "learning_rate": 2.7011235174762284e-08, "loss": 0.0243, "step": 3227, "video_reward_cumulative_accuracy": 0.8559033157731639 }, { "epoch": 0.9581478183437222, "grad_norm": 0.8377227783203125, "learning_rate": 2.6632809369204205e-08, "loss": 0.0106, "step": 3228, "video_reward_cumulative_accuracy": 0.8559479553903345 }, { "epoch": 0.9584446423271, "grad_norm": 2.1696717739105225, "learning_rate": 2.6257038963385106e-08, "loss": 0.0256, "step": 3229, "video_reward_cumulative_accuracy": 0.8559925673583153 }, { "epoch": 0.9587414663104779, "grad_norm": 1.0289219617843628, "learning_rate": 2.588392436073034e-08, "loss": 0.0131, "step": 3230, "video_reward_cumulative_accuracy": 0.8558823529411764 }, { "epoch": 0.9590382902938558, "grad_norm": 0.3926885426044464, "learning_rate": 2.5513465961814475e-08, "loss": 0.0062, "step": 3231, "video_reward_cumulative_accuracy": 0.8559269575982668 }, { "epoch": 0.9593351142772336, "grad_norm": 1.4411845207214355, "learning_rate": 2.5145664164361593e-08, "loss": 0.0107, "step": 3232, "video_reward_cumulative_accuracy": 0.8559715346534653 }, { "epoch": 0.9596319382606114, "grad_norm": 0.6198068857192993, "learning_rate": 2.4780519363241663e-08, "loss": 0.0051, "step": 3233, "video_reward_cumulative_accuracy": 0.8560160841323848 }, { "epoch": 0.9599287622439893, "grad_norm": 1.5043619871139526, "learning_rate": 2.4418031950473597e-08, "loss": 0.0278, "step": 3234, "video_reward_cumulative_accuracy": 0.8560606060606061 }, { "epoch": 0.9602255862273672, "grad_norm": 2.1934850215911865, "learning_rate": 2.405820231522249e-08, "loss": 0.0246, "step": 3235, "video_reward_cumulative_accuracy": 0.855950540958269 }, { "epoch": 0.960522410210745, "grad_norm": 3.164400100708008, "learning_rate": 2.3701030843800433e-08, "loss": 0.025, "step": 3236, "video_reward_cumulative_accuracy": 0.8559950556242274 }, { "epoch": 0.9608192341941229, "grad_norm": 0.8490694761276245, "learning_rate": 2.334651791966569e-08, "loss": 0.0187, "step": 3237, "video_reward_cumulative_accuracy": 0.8560395427865307 }, { "epoch": 0.9611160581775008, "grad_norm": 1.7095571756362915, "learning_rate": 2.2994663923422422e-08, "loss": 0.0225, "step": 3238, "video_reward_cumulative_accuracy": 0.8560840024706609 }, { "epoch": 0.9614128821608786, "grad_norm": 1.3501557111740112, "learning_rate": 2.2645469232820127e-08, "loss": 0.0204, "step": 3239, "video_reward_cumulative_accuracy": 0.8561284347020686 }, { "epoch": 0.9617097061442564, "grad_norm": 0.568364143371582, "learning_rate": 2.229893422275281e-08, "loss": 0.0066, "step": 3240, "video_reward_cumulative_accuracy": 0.8561728395061728 }, { "epoch": 0.9620065301276343, "grad_norm": 1.0702372789382935, "learning_rate": 2.1955059265259815e-08, "loss": 0.0162, "step": 3241, "video_reward_cumulative_accuracy": 0.8562172169083616 }, { "epoch": 0.9623033541110122, "grad_norm": 4.149465560913086, "learning_rate": 2.161384472952416e-08, "loss": 0.0673, "step": 3242, "video_reward_cumulative_accuracy": 0.8562615669339914 }, { "epoch": 0.96260017809439, "grad_norm": 2.0406556129455566, "learning_rate": 2.1275290981872532e-08, "loss": 0.0408, "step": 3243, "video_reward_cumulative_accuracy": 0.8563058896083873 }, { "epoch": 0.9628970020777678, "grad_norm": 1.5414153337478638, "learning_rate": 2.0939398385775578e-08, "loss": 0.0101, "step": 3244, "video_reward_cumulative_accuracy": 0.8563501849568435 }, { "epoch": 0.9631938260611458, "grad_norm": 3.055986166000366, "learning_rate": 2.0606167301846503e-08, "loss": 0.0537, "step": 3245, "video_reward_cumulative_accuracy": 0.8563944530046225 }, { "epoch": 0.9634906500445236, "grad_norm": 1.648927927017212, "learning_rate": 2.0275598087841075e-08, "loss": 0.0365, "step": 3246, "video_reward_cumulative_accuracy": 0.8564386937769563 }, { "epoch": 0.9637874740279014, "grad_norm": 3.5667824745178223, "learning_rate": 1.994769109865735e-08, "loss": 0.0396, "step": 3247, "video_reward_cumulative_accuracy": 0.8564829072990453 }, { "epoch": 0.9640842980112793, "grad_norm": 1.4304312467575073, "learning_rate": 1.962244668633595e-08, "loss": 0.0193, "step": 3248, "video_reward_cumulative_accuracy": 0.8565270935960592 }, { "epoch": 0.9643811219946572, "grad_norm": 4.079548358917236, "learning_rate": 1.9299865200057556e-08, "loss": 0.0538, "step": 3249, "video_reward_cumulative_accuracy": 0.8564173591874423 }, { "epoch": 0.964677945978035, "grad_norm": 1.535839557647705, "learning_rate": 1.8979946986145137e-08, "loss": 0.0123, "step": 3250, "video_reward_cumulative_accuracy": 0.8564615384615385 }, { "epoch": 0.9649747699614128, "grad_norm": 2.697875738143921, "learning_rate": 1.8662692388061733e-08, "loss": 0.03, "step": 3251, "video_reward_cumulative_accuracy": 0.8565056905567517 }, { "epoch": 0.9652715939447908, "grad_norm": 2.8388845920562744, "learning_rate": 1.8348101746410994e-08, "loss": 0.0312, "step": 3252, "video_reward_cumulative_accuracy": 0.856549815498155 }, { "epoch": 0.9655684179281686, "grad_norm": 3.0264766216278076, "learning_rate": 1.803617539893665e-08, "loss": 0.0642, "step": 3253, "video_reward_cumulative_accuracy": 0.85659391331079 }, { "epoch": 0.9658652419115464, "grad_norm": 1.1789016723632812, "learning_rate": 1.772691368052165e-08, "loss": 0.0103, "step": 3254, "video_reward_cumulative_accuracy": 0.8566379840196681 }, { "epoch": 0.9661620658949243, "grad_norm": 2.1506567001342773, "learning_rate": 1.742031692318874e-08, "loss": 0.0225, "step": 3255, "video_reward_cumulative_accuracy": 0.8566820276497696 }, { "epoch": 0.9664588898783022, "grad_norm": 1.4436240196228027, "learning_rate": 1.711638545609906e-08, "loss": 0.0403, "step": 3256, "video_reward_cumulative_accuracy": 0.8565724815724816 }, { "epoch": 0.96675571386168, "grad_norm": 0.8938013911247253, "learning_rate": 1.681511960555271e-08, "loss": 0.0166, "step": 3257, "video_reward_cumulative_accuracy": 0.8566165182683451 }, { "epoch": 0.9670525378450578, "grad_norm": 2.3233988285064697, "learning_rate": 1.651651969498791e-08, "loss": 0.0305, "step": 3258, "video_reward_cumulative_accuracy": 0.8566605279312461 }, { "epoch": 0.9673493618284358, "grad_norm": 0.6429560780525208, "learning_rate": 1.6220586044980448e-08, "loss": 0.0046, "step": 3259, "video_reward_cumulative_accuracy": 0.8567045105860693 }, { "epoch": 0.9676461858118136, "grad_norm": 1.2214471101760864, "learning_rate": 1.592731897324368e-08, "loss": 0.0175, "step": 3260, "video_reward_cumulative_accuracy": 0.8567484662576688 }, { "epoch": 0.9679430097951914, "grad_norm": 2.7271018028259277, "learning_rate": 1.5636718794628523e-08, "loss": 0.0793, "step": 3261, "video_reward_cumulative_accuracy": 0.8567923949708678 }, { "epoch": 0.9682398337785693, "grad_norm": 2.33453106880188, "learning_rate": 1.5348785821122648e-08, "loss": 0.0387, "step": 3262, "video_reward_cumulative_accuracy": 0.8568362967504598 }, { "epoch": 0.9685366577619472, "grad_norm": 0.8599418997764587, "learning_rate": 1.5063520361849604e-08, "loss": 0.0055, "step": 3263, "video_reward_cumulative_accuracy": 0.8568801716212074 }, { "epoch": 0.968833481745325, "grad_norm": 0.911806583404541, "learning_rate": 1.4780922723069968e-08, "loss": 0.0228, "step": 3264, "video_reward_cumulative_accuracy": 0.8569240196078431 }, { "epoch": 0.9691303057287028, "grad_norm": 1.4930540323257446, "learning_rate": 1.4500993208179382e-08, "loss": 0.0299, "step": 3265, "video_reward_cumulative_accuracy": 0.8569678407350689 }, { "epoch": 0.9694271297120808, "grad_norm": 2.40919828414917, "learning_rate": 1.4223732117709387e-08, "loss": 0.064, "step": 3266, "video_reward_cumulative_accuracy": 0.8570116350275566 }, { "epoch": 0.9697239536954586, "grad_norm": 1.3808552026748657, "learning_rate": 1.3949139749326601e-08, "loss": 0.0271, "step": 3267, "video_reward_cumulative_accuracy": 0.8570554025099479 }, { "epoch": 0.9700207776788364, "grad_norm": 1.2729172706604004, "learning_rate": 1.367721639783326e-08, "loss": 0.0252, "step": 3268, "video_reward_cumulative_accuracy": 0.8570991432068543 }, { "epoch": 0.9703176016622143, "grad_norm": 5.71325159072876, "learning_rate": 1.3407962355164728e-08, "loss": 0.0391, "step": 3269, "video_reward_cumulative_accuracy": 0.8571428571428571 }, { "epoch": 0.9706144256455922, "grad_norm": 0.38675758242607117, "learning_rate": 1.3141377910391718e-08, "loss": 0.0065, "step": 3270, "video_reward_cumulative_accuracy": 0.8571865443425076 }, { "epoch": 0.97091124962897, "grad_norm": 1.710315227508545, "learning_rate": 1.2877463349718067e-08, "loss": 0.0198, "step": 3271, "video_reward_cumulative_accuracy": 0.8572302048303271 }, { "epoch": 0.9712080736123478, "grad_norm": 1.825723648071289, "learning_rate": 1.2616218956482407e-08, "loss": 0.0223, "step": 3272, "video_reward_cumulative_accuracy": 0.8572738386308069 }, { "epoch": 0.9715048975957258, "grad_norm": 0.4373869299888611, "learning_rate": 1.2357645011155106e-08, "loss": 0.0071, "step": 3273, "video_reward_cumulative_accuracy": 0.8573174457684082 }, { "epoch": 0.9718017215791036, "grad_norm": 2.230316400527954, "learning_rate": 1.2101741791341049e-08, "loss": 0.02, "step": 3274, "video_reward_cumulative_accuracy": 0.8573610262675626 }, { "epoch": 0.9720985455624814, "grad_norm": 3.0452535152435303, "learning_rate": 1.1848509571777133e-08, "loss": 0.0309, "step": 3275, "video_reward_cumulative_accuracy": 0.8574045801526717 }, { "epoch": 0.9723953695458593, "grad_norm": 1.0447007417678833, "learning_rate": 1.1597948624332278e-08, "loss": 0.0155, "step": 3276, "video_reward_cumulative_accuracy": 0.8574481074481074 }, { "epoch": 0.9726921935292372, "grad_norm": 3.0469367504119873, "learning_rate": 1.1350059218008248e-08, "loss": 0.0523, "step": 3277, "video_reward_cumulative_accuracy": 0.8574916081782118 }, { "epoch": 0.972989017512615, "grad_norm": 1.8677681684494019, "learning_rate": 1.1104841618938545e-08, "loss": 0.0189, "step": 3278, "video_reward_cumulative_accuracy": 0.8575350823672971 }, { "epoch": 0.9732858414959928, "grad_norm": 0.8737713694572449, "learning_rate": 1.0862296090387859e-08, "loss": 0.0115, "step": 3279, "video_reward_cumulative_accuracy": 0.8575785300396462 }, { "epoch": 0.9735826654793708, "grad_norm": 1.362595796585083, "learning_rate": 1.0622422892752338e-08, "loss": 0.0202, "step": 3280, "video_reward_cumulative_accuracy": 0.8576219512195122 }, { "epoch": 0.9738794894627486, "grad_norm": 4.023233413696289, "learning_rate": 1.0385222283559037e-08, "loss": 0.071, "step": 3281, "video_reward_cumulative_accuracy": 0.8576653459311185 }, { "epoch": 0.9741763134461264, "grad_norm": 4.000472545623779, "learning_rate": 1.0150694517466198e-08, "loss": 0.0249, "step": 3282, "video_reward_cumulative_accuracy": 0.8577087141986593 }, { "epoch": 0.9744731374295043, "grad_norm": 0.7309911251068115, "learning_rate": 9.918839846261852e-09, "loss": 0.0066, "step": 3283, "video_reward_cumulative_accuracy": 0.8577520560462991 }, { "epoch": 0.9747699614128822, "grad_norm": 1.770171880722046, "learning_rate": 9.689658518864664e-09, "loss": 0.0367, "step": 3284, "video_reward_cumulative_accuracy": 0.8576431181485993 }, { "epoch": 0.97506678539626, "grad_norm": 2.3775479793548584, "learning_rate": 9.463150781322816e-09, "loss": 0.019, "step": 3285, "video_reward_cumulative_accuracy": 0.8576864535768646 }, { "epoch": 0.9753636093796378, "grad_norm": 3.345357894897461, "learning_rate": 9.239316876814564e-09, "loss": 0.0785, "step": 3286, "video_reward_cumulative_accuracy": 0.8577297626293365 }, { "epoch": 0.9756604333630158, "grad_norm": 1.9170372486114502, "learning_rate": 9.018157045647124e-09, "loss": 0.0236, "step": 3287, "video_reward_cumulative_accuracy": 0.8577730453300882 }, { "epoch": 0.9759572573463936, "grad_norm": 1.4506341218948364, "learning_rate": 8.799671525257236e-09, "loss": 0.0337, "step": 3288, "video_reward_cumulative_accuracy": 0.8576642335766423 }, { "epoch": 0.9762540813297714, "grad_norm": 3.2569446563720703, "learning_rate": 8.583860550210043e-09, "loss": 0.0416, "step": 3289, "video_reward_cumulative_accuracy": 0.857707509881423 }, { "epoch": 0.9765509053131493, "grad_norm": 2.6264233589172363, "learning_rate": 8.370724352199933e-09, "loss": 0.0574, "step": 3290, "video_reward_cumulative_accuracy": 0.8577507598784194 }, { "epoch": 0.9768477292965272, "grad_norm": 0.6166547536849976, "learning_rate": 8.160263160049143e-09, "loss": 0.0137, "step": 3291, "video_reward_cumulative_accuracy": 0.8577939835916135 }, { "epoch": 0.977144553279905, "grad_norm": 2.100522041320801, "learning_rate": 7.952477199708042e-09, "loss": 0.0219, "step": 3292, "video_reward_cumulative_accuracy": 0.8578371810449574 }, { "epoch": 0.9774413772632828, "grad_norm": 1.396987795829773, "learning_rate": 7.747366694255409e-09, "loss": 0.0129, "step": 3293, "video_reward_cumulative_accuracy": 0.8578803522623747 }, { "epoch": 0.9777382012466608, "grad_norm": 0.7728352546691895, "learning_rate": 7.544931863896765e-09, "loss": 0.0088, "step": 3294, "video_reward_cumulative_accuracy": 0.8579234972677595 }, { "epoch": 0.9780350252300386, "grad_norm": 0.9085843563079834, "learning_rate": 7.345172925966038e-09, "loss": 0.0196, "step": 3295, "video_reward_cumulative_accuracy": 0.8579666160849773 }, { "epoch": 0.9783318492134164, "grad_norm": 1.8223828077316284, "learning_rate": 7.148090094923343e-09, "loss": 0.0362, "step": 3296, "video_reward_cumulative_accuracy": 0.8580097087378641 }, { "epoch": 0.9786286731967943, "grad_norm": 5.303034782409668, "learning_rate": 6.953683582356652e-09, "loss": 0.099, "step": 3297, "video_reward_cumulative_accuracy": 0.8579011222323324 }, { "epoch": 0.9789254971801722, "grad_norm": 2.5379104614257812, "learning_rate": 6.76195359698012e-09, "loss": 0.0362, "step": 3298, "video_reward_cumulative_accuracy": 0.8577926015767131 }, { "epoch": 0.97922232116355, "grad_norm": 2.565481185913086, "learning_rate": 6.57290034463437e-09, "loss": 0.0284, "step": 3299, "video_reward_cumulative_accuracy": 0.8578357077902394 }, { "epoch": 0.9795191451469278, "grad_norm": 2.1413698196411133, "learning_rate": 6.386524028286489e-09, "loss": 0.0321, "step": 3300, "video_reward_cumulative_accuracy": 0.8578787878787879 }, { "epoch": 0.9798159691303058, "grad_norm": 2.323068857192993, "learning_rate": 6.202824848029476e-09, "loss": 0.0302, "step": 3301, "video_reward_cumulative_accuracy": 0.8579218418661012 }, { "epoch": 0.9801127931136836, "grad_norm": 1.3499592542648315, "learning_rate": 6.021803001082238e-09, "loss": 0.0093, "step": 3302, "video_reward_cumulative_accuracy": 0.8579648697758934 }, { "epoch": 0.9804096170970614, "grad_norm": 0.8288122415542603, "learning_rate": 5.843458681789594e-09, "loss": 0.0155, "step": 3303, "video_reward_cumulative_accuracy": 0.8580078716318499 }, { "epoch": 0.9807064410804393, "grad_norm": 3.430920124053955, "learning_rate": 5.66779208162116e-09, "loss": 0.0318, "step": 3304, "video_reward_cumulative_accuracy": 0.8580508474576272 }, { "epoch": 0.9810032650638172, "grad_norm": 3.632520914077759, "learning_rate": 5.4948033891721875e-09, "loss": 0.0411, "step": 3305, "video_reward_cumulative_accuracy": 0.8580937972768532 }, { "epoch": 0.981300089047195, "grad_norm": 2.451488494873047, "learning_rate": 5.3244927901627274e-09, "loss": 0.0354, "step": 3306, "video_reward_cumulative_accuracy": 0.8581367211131277 }, { "epoch": 0.9815969130305728, "grad_norm": 1.2364314794540405, "learning_rate": 5.1568604674376295e-09, "loss": 0.0326, "step": 3307, "video_reward_cumulative_accuracy": 0.8581796189900212 }, { "epoch": 0.9818937370139508, "grad_norm": 2.0246939659118652, "learning_rate": 4.991906600966823e-09, "loss": 0.0493, "step": 3308, "video_reward_cumulative_accuracy": 0.8580713422007256 }, { "epoch": 0.9821905609973286, "grad_norm": 3.2378106117248535, "learning_rate": 4.829631367844201e-09, "loss": 0.0415, "step": 3309, "video_reward_cumulative_accuracy": 0.858114233907525 }, { "epoch": 0.9824873849807064, "grad_norm": 3.8841092586517334, "learning_rate": 4.670034942287904e-09, "loss": 0.0413, "step": 3310, "video_reward_cumulative_accuracy": 0.8581570996978852 }, { "epoch": 0.9827842089640842, "grad_norm": 0.6869316101074219, "learning_rate": 4.51311749564004e-09, "loss": 0.0038, "step": 3311, "video_reward_cumulative_accuracy": 0.8581999395952884 }, { "epoch": 0.9830810329474622, "grad_norm": 1.650780439376831, "learning_rate": 4.358879196366961e-09, "loss": 0.012, "step": 3312, "video_reward_cumulative_accuracy": 0.8582427536231884 }, { "epoch": 0.98337785693084, "grad_norm": 1.0909397602081299, "learning_rate": 4.207320210058153e-09, "loss": 0.007, "step": 3313, "video_reward_cumulative_accuracy": 0.8582855418050106 }, { "epoch": 0.9836746809142178, "grad_norm": 0.3308217227458954, "learning_rate": 4.058440699427346e-09, "loss": 0.0066, "step": 3314, "video_reward_cumulative_accuracy": 0.8583283041641521 }, { "epoch": 0.9839715048975958, "grad_norm": 1.89927077293396, "learning_rate": 3.9122408243105755e-09, "loss": 0.0213, "step": 3315, "video_reward_cumulative_accuracy": 0.8583710407239818 }, { "epoch": 0.9842683288809736, "grad_norm": 2.1102702617645264, "learning_rate": 3.768720741668119e-09, "loss": 0.0126, "step": 3316, "video_reward_cumulative_accuracy": 0.8584137515078407 }, { "epoch": 0.9845651528643514, "grad_norm": 2.2954890727996826, "learning_rate": 3.6278806055825566e-09, "loss": 0.0284, "step": 3317, "video_reward_cumulative_accuracy": 0.8584564365390414 }, { "epoch": 0.9848619768477292, "grad_norm": 2.33181095123291, "learning_rate": 3.489720567259325e-09, "loss": 0.0763, "step": 3318, "video_reward_cumulative_accuracy": 0.858499095840868 }, { "epoch": 0.9851588008311072, "grad_norm": 4.192658424377441, "learning_rate": 3.3542407750264404e-09, "loss": 0.0394, "step": 3319, "video_reward_cumulative_accuracy": 0.8585417294365773 }, { "epoch": 0.985455624814485, "grad_norm": 1.9729253053665161, "learning_rate": 3.2214413743353323e-09, "loss": 0.0377, "step": 3320, "video_reward_cumulative_accuracy": 0.858433734939759 }, { "epoch": 0.9857524487978628, "grad_norm": 0.8455713987350464, "learning_rate": 3.0913225077580653e-09, "loss": 0.0084, "step": 3321, "video_reward_cumulative_accuracy": 0.8584763625414031 }, { "epoch": 0.9860492727812408, "grad_norm": 1.2491599321365356, "learning_rate": 2.9638843149906725e-09, "loss": 0.0166, "step": 3322, "video_reward_cumulative_accuracy": 0.8585189644792294 }, { "epoch": 0.9863460967646186, "grad_norm": 2.424283981323242, "learning_rate": 2.839126932850378e-09, "loss": 0.0389, "step": 3323, "video_reward_cumulative_accuracy": 0.8585615407764069 }, { "epoch": 0.9866429207479964, "grad_norm": 1.9569655656814575, "learning_rate": 2.7170504952755972e-09, "loss": 0.0246, "step": 3324, "video_reward_cumulative_accuracy": 0.858453670276775 }, { "epoch": 0.9869397447313742, "grad_norm": 2.326342821121216, "learning_rate": 2.5976551333281586e-09, "loss": 0.0267, "step": 3325, "video_reward_cumulative_accuracy": 0.8584962406015038 }, { "epoch": 0.9872365687147522, "grad_norm": 3.129794120788574, "learning_rate": 2.4809409751899718e-09, "loss": 0.086, "step": 3326, "video_reward_cumulative_accuracy": 0.8582381238725195 }, { "epoch": 0.98753339269813, "grad_norm": 1.876774787902832, "learning_rate": 2.3669081461652476e-09, "loss": 0.0505, "step": 3327, "video_reward_cumulative_accuracy": 0.8581304478509167 }, { "epoch": 0.9878302166815078, "grad_norm": 0.5072069764137268, "learning_rate": 2.2555567686791124e-09, "loss": 0.0061, "step": 3328, "video_reward_cumulative_accuracy": 0.8581730769230769 }, { "epoch": 0.9881270406648858, "grad_norm": 1.15468168258667, "learning_rate": 2.1468869622781608e-09, "loss": 0.046, "step": 3329, "video_reward_cumulative_accuracy": 0.8582156803844998 }, { "epoch": 0.9884238646482636, "grad_norm": 1.5895243883132935, "learning_rate": 2.040898843630179e-09, "loss": 0.0176, "step": 3330, "video_reward_cumulative_accuracy": 0.8582582582582583 }, { "epoch": 0.9887206886316414, "grad_norm": 2.2743732929229736, "learning_rate": 1.9375925265235907e-09, "loss": 0.0536, "step": 3331, "video_reward_cumulative_accuracy": 0.8583008105673972 }, { "epoch": 0.9890175126150192, "grad_norm": 0.7520642280578613, "learning_rate": 1.8369681218677327e-09, "loss": 0.0133, "step": 3332, "video_reward_cumulative_accuracy": 0.8581932773109243 }, { "epoch": 0.9893143365983972, "grad_norm": 1.7850524187088013, "learning_rate": 1.739025737692579e-09, "loss": 0.0138, "step": 3333, "video_reward_cumulative_accuracy": 0.858085808580858 }, { "epoch": 0.989611160581775, "grad_norm": 0.7477326989173889, "learning_rate": 1.64376547914874e-09, "loss": 0.0129, "step": 3334, "video_reward_cumulative_accuracy": 0.8579784043191362 }, { "epoch": 0.9899079845651528, "grad_norm": 2.771977663040161, "learning_rate": 1.551187448507463e-09, "loss": 0.0591, "step": 3335, "video_reward_cumulative_accuracy": 0.8578710644677661 }, { "epoch": 0.9902048085485308, "grad_norm": 5.376701831817627, "learning_rate": 1.4612917451603536e-09, "loss": 0.0271, "step": 3336, "video_reward_cumulative_accuracy": 0.8579136690647482 }, { "epoch": 0.9905016325319086, "grad_norm": 1.8040037155151367, "learning_rate": 1.3740784656190998e-09, "loss": 0.0504, "step": 3337, "video_reward_cumulative_accuracy": 0.8578064129457597 }, { "epoch": 0.9907984565152864, "grad_norm": 1.739055871963501, "learning_rate": 1.2895477035154703e-09, "loss": 0.0359, "step": 3338, "video_reward_cumulative_accuracy": 0.8578490113840623 }, { "epoch": 0.9910952804986642, "grad_norm": 2.0168232917785645, "learning_rate": 1.2076995496015931e-09, "loss": 0.04, "step": 3339, "video_reward_cumulative_accuracy": 0.8578915843066787 }, { "epoch": 0.9913921044820422, "grad_norm": 1.7344579696655273, "learning_rate": 1.1285340917494004e-09, "loss": 0.0256, "step": 3340, "video_reward_cumulative_accuracy": 0.857934131736527 }, { "epoch": 0.99168892846542, "grad_norm": 1.7917364835739136, "learning_rate": 1.0520514149506278e-09, "loss": 0.021, "step": 3341, "video_reward_cumulative_accuracy": 0.857976653696498 }, { "epoch": 0.9919857524487978, "grad_norm": 1.345489740371704, "learning_rate": 9.782516013168154e-10, "loss": 0.0126, "step": 3342, "video_reward_cumulative_accuracy": 0.8580191502094554 }, { "epoch": 0.9922825764321758, "grad_norm": 0.9671461582183838, "learning_rate": 9.071347300793065e-10, "loss": 0.0065, "step": 3343, "video_reward_cumulative_accuracy": 0.8580616212982352 }, { "epoch": 0.9925794004155536, "grad_norm": 1.9350098371505737, "learning_rate": 8.387008775889716e-10, "loss": 0.0142, "step": 3344, "video_reward_cumulative_accuracy": 0.8581040669856459 }, { "epoch": 0.9928762243989314, "grad_norm": 1.1504771709442139, "learning_rate": 7.729501173162068e-10, "loss": 0.009, "step": 3345, "video_reward_cumulative_accuracy": 0.8581464872944694 }, { "epoch": 0.9931730483823092, "grad_norm": 1.2408806085586548, "learning_rate": 7.098825198509351e-10, "loss": 0.0086, "step": 3346, "video_reward_cumulative_accuracy": 0.8581888822474597 }, { "epoch": 0.9934698723656872, "grad_norm": 3.347973108291626, "learning_rate": 6.494981529020505e-10, "loss": 0.0431, "step": 3347, "video_reward_cumulative_accuracy": 0.8582312518673438 }, { "epoch": 0.993766696349065, "grad_norm": 1.9659161567687988, "learning_rate": 5.91797081298251e-10, "loss": 0.0124, "step": 3348, "video_reward_cumulative_accuracy": 0.8581242532855436 }, { "epoch": 0.9940635203324428, "grad_norm": 4.217858791351318, "learning_rate": 5.367793669874832e-10, "loss": 0.0681, "step": 3349, "video_reward_cumulative_accuracy": 0.8581666169005673 }, { "epoch": 0.9943603443158208, "grad_norm": 2.9570226669311523, "learning_rate": 4.844450690358327e-10, "loss": 0.0385, "step": 3350, "video_reward_cumulative_accuracy": 0.8582089552238806 }, { "epoch": 0.9946571682991986, "grad_norm": 0.8007720708847046, "learning_rate": 4.347942436300212e-10, "loss": 0.0163, "step": 3351, "video_reward_cumulative_accuracy": 0.8582512682781259 }, { "epoch": 0.9949539922825764, "grad_norm": 1.299566626548767, "learning_rate": 3.8782694407463184e-10, "loss": 0.0321, "step": 3352, "video_reward_cumulative_accuracy": 0.8582935560859188 }, { "epoch": 0.9952508162659542, "grad_norm": 2.04730486869812, "learning_rate": 3.435432207937739e-10, "loss": 0.0355, "step": 3353, "video_reward_cumulative_accuracy": 0.8583358186698479 }, { "epoch": 0.9955476402493322, "grad_norm": 2.532416343688965, "learning_rate": 3.019431213299728e-10, "loss": 0.0196, "step": 3354, "video_reward_cumulative_accuracy": 0.8583780560524746 }, { "epoch": 0.99584446423271, "grad_norm": 1.875337839126587, "learning_rate": 2.6302669034555807e-10, "loss": 0.0339, "step": 3355, "video_reward_cumulative_accuracy": 0.8584202682563339 }, { "epoch": 0.9961412882160878, "grad_norm": 2.1054089069366455, "learning_rate": 2.2679396962071999e-10, "loss": 0.0239, "step": 3356, "video_reward_cumulative_accuracy": 0.8584624553039333 }, { "epoch": 0.9964381121994658, "grad_norm": 2.2614123821258545, "learning_rate": 1.9324499805489783e-10, "loss": 0.0458, "step": 3357, "video_reward_cumulative_accuracy": 0.858504617217754 }, { "epoch": 0.9967349361828436, "grad_norm": 1.6584018468856812, "learning_rate": 1.6237981166622451e-10, "loss": 0.0176, "step": 3358, "video_reward_cumulative_accuracy": 0.8585467540202502 }, { "epoch": 0.9970317601662214, "grad_norm": 2.8550844192504883, "learning_rate": 1.341984435912491e-10, "loss": 0.0352, "step": 3359, "video_reward_cumulative_accuracy": 0.8585888657338494 }, { "epoch": 0.9973285841495992, "grad_norm": 3.7079176902770996, "learning_rate": 1.0870092408576949e-10, "loss": 0.0719, "step": 3360, "video_reward_cumulative_accuracy": 0.8584821428571429 }, { "epoch": 0.9976254081329772, "grad_norm": 1.3505829572677612, "learning_rate": 8.588728052344453e-11, "loss": 0.0224, "step": 3361, "video_reward_cumulative_accuracy": 0.8585242487354954 }, { "epoch": 0.997922232116355, "grad_norm": 1.7025647163391113, "learning_rate": 6.575753739718193e-11, "loss": 0.0293, "step": 3362, "video_reward_cumulative_accuracy": 0.8585663295657346 }, { "epoch": 0.9982190560997328, "grad_norm": 2.158022403717041, "learning_rate": 4.8311716318028e-11, "loss": 0.0603, "step": 3363, "video_reward_cumulative_accuracy": 0.8586083853702052 }, { "epoch": 0.9985158800831108, "grad_norm": 2.8020150661468506, "learning_rate": 3.354983601600026e-11, "loss": 0.0205, "step": 3364, "video_reward_cumulative_accuracy": 0.8586504161712247 }, { "epoch": 0.9988127040664886, "grad_norm": 3.6079211235046387, "learning_rate": 2.1471912339532386e-11, "loss": 0.0442, "step": 3365, "video_reward_cumulative_accuracy": 0.8585438335809806 }, { "epoch": 0.9991095280498664, "grad_norm": 1.7951223850250244, "learning_rate": 1.2077958254919087e-11, "loss": 0.0145, "step": 3366, "video_reward_cumulative_accuracy": 0.8585858585858586 }, { "epoch": 0.9994063520332442, "grad_norm": 2.862751007080078, "learning_rate": 5.367983847981428e-12, "loss": 0.0244, "step": 3367, "video_reward_cumulative_accuracy": 0.8584793584793585 }, { "epoch": 0.9997031760166222, "grad_norm": 1.4409502744674683, "learning_rate": 1.3419963221239506e-12, "loss": 0.0109, "step": 3368, "video_reward_cumulative_accuracy": 0.858521377672209 }, { "epoch": 1.0, "grad_norm": 1.1117860078811646, "learning_rate": 0.0, "loss": 0.0134, "step": 3369, "video_reward_cumulative_accuracy": 0.8585633719204512 } ], "logging_steps": 1, "max_steps": 3369, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6663849150126752e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }