diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,27114 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 3369, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029682398337785694, + "grad_norm": 16.833446502685547, + "learning_rate": 1.483679525222552e-08, + "loss": 0.1938, + "step": 1, + "video_reward_cumulative_accuracy": 0.5 + }, + { + "epoch": 0.0005936479667557139, + "grad_norm": 24.454694747924805, + "learning_rate": 2.967359050445104e-08, + "loss": 0.3887, + "step": 2, + "video_reward_cumulative_accuracy": 0.5 + }, + { + "epoch": 0.0008904719501335708, + "grad_norm": 21.62911605834961, + "learning_rate": 4.451038575667656e-08, + "loss": 0.2016, + "step": 3, + "video_reward_cumulative_accuracy": 0.5 + }, + { + "epoch": 0.0011872959335114278, + "grad_norm": 18.787561416625977, + "learning_rate": 5.934718100890208e-08, + "loss": 0.1834, + "step": 4, + "video_reward_cumulative_accuracy": 0.625 + }, + { + "epoch": 0.0014841199168892847, + "grad_norm": 25.317777633666992, + "learning_rate": 7.418397626112761e-08, + "loss": 0.3673, + "step": 5, + "video_reward_cumulative_accuracy": 0.7 + }, + { + "epoch": 0.0017809439002671415, + "grad_norm": 12.732484817504883, + "learning_rate": 8.902077151335312e-08, + "loss": 0.1034, + "step": 6, + "video_reward_cumulative_accuracy": 0.5833333333333334 + }, + { + "epoch": 0.0020777678836449986, + "grad_norm": 20.14723777770996, + "learning_rate": 1.0385756676557864e-07, + "loss": 0.2094, + "step": 7, + "video_reward_cumulative_accuracy": 0.5714285714285714 + }, + { + "epoch": 0.0023745918670228555, + "grad_norm": 33.73411178588867, + "learning_rate": 1.1869436201780416e-07, + "loss": 0.3328, + "step": 8, + "video_reward_cumulative_accuracy": 0.5625 + }, + { + "epoch": 0.0026714158504007124, + "grad_norm": 21.074481964111328, + "learning_rate": 1.3353115727002968e-07, + "loss": 0.1414, + "step": 9, + "video_reward_cumulative_accuracy": 0.5555555555555556 + }, + { + "epoch": 0.0029682398337785693, + "grad_norm": 23.00200080871582, + "learning_rate": 1.4836795252225522e-07, + "loss": 0.4463, + "step": 10, + "video_reward_cumulative_accuracy": 0.55 + }, + { + "epoch": 0.003265063817156426, + "grad_norm": 23.951406478881836, + "learning_rate": 1.6320474777448073e-07, + "loss": 0.3231, + "step": 11, + "video_reward_cumulative_accuracy": 0.5909090909090909 + }, + { + "epoch": 0.003561887800534283, + "grad_norm": 25.632526397705078, + "learning_rate": 1.7804154302670624e-07, + "loss": 0.2746, + "step": 12, + "video_reward_cumulative_accuracy": 0.625 + }, + { + "epoch": 0.00385871178391214, + "grad_norm": 17.966325759887695, + "learning_rate": 1.9287833827893176e-07, + "loss": 0.224, + "step": 13, + "video_reward_cumulative_accuracy": 0.6538461538461539 + }, + { + "epoch": 0.004155535767289997, + "grad_norm": 27.712692260742188, + "learning_rate": 2.0771513353115727e-07, + "loss": 0.1764, + "step": 14, + "video_reward_cumulative_accuracy": 0.6071428571428571 + }, + { + "epoch": 0.004452359750667854, + "grad_norm": 24.923372268676758, + "learning_rate": 2.225519287833828e-07, + "loss": 0.2788, + "step": 15, + "video_reward_cumulative_accuracy": 0.6333333333333333 + }, + { + "epoch": 0.004749183734045711, + "grad_norm": 13.765485763549805, + "learning_rate": 2.3738872403560833e-07, + "loss": 0.1745, + "step": 16, + "video_reward_cumulative_accuracy": 0.65625 + }, + { + "epoch": 0.0050460077174235675, + "grad_norm": 17.97304344177246, + "learning_rate": 2.5222551928783384e-07, + "loss": 0.2362, + "step": 17, + "video_reward_cumulative_accuracy": 0.6470588235294118 + }, + { + "epoch": 0.005342831700801425, + "grad_norm": 10.485559463500977, + "learning_rate": 2.6706231454005935e-07, + "loss": 0.1295, + "step": 18, + "video_reward_cumulative_accuracy": 0.6666666666666666 + }, + { + "epoch": 0.005639655684179281, + "grad_norm": 15.357043266296387, + "learning_rate": 2.8189910979228487e-07, + "loss": 0.2338, + "step": 19, + "video_reward_cumulative_accuracy": 0.6842105263157895 + }, + { + "epoch": 0.005936479667557139, + "grad_norm": 15.526144027709961, + "learning_rate": 2.9673590504451043e-07, + "loss": 0.2443, + "step": 20, + "video_reward_cumulative_accuracy": 0.7 + }, + { + "epoch": 0.006233303650934996, + "grad_norm": 19.776208877563477, + "learning_rate": 3.1157270029673595e-07, + "loss": 0.1632, + "step": 21, + "video_reward_cumulative_accuracy": 0.6904761904761905 + }, + { + "epoch": 0.006530127634312852, + "grad_norm": 31.176301956176758, + "learning_rate": 3.2640949554896146e-07, + "loss": 0.3122, + "step": 22, + "video_reward_cumulative_accuracy": 0.6590909090909091 + }, + { + "epoch": 0.00682695161769071, + "grad_norm": 12.820406913757324, + "learning_rate": 3.41246290801187e-07, + "loss": 0.1422, + "step": 23, + "video_reward_cumulative_accuracy": 0.6521739130434783 + }, + { + "epoch": 0.007123775601068566, + "grad_norm": 12.706981658935547, + "learning_rate": 3.560830860534125e-07, + "loss": 0.169, + "step": 24, + "video_reward_cumulative_accuracy": 0.6458333333333334 + }, + { + "epoch": 0.0074205995844464235, + "grad_norm": 16.90671157836914, + "learning_rate": 3.70919881305638e-07, + "loss": 0.1716, + "step": 25, + "video_reward_cumulative_accuracy": 0.64 + }, + { + "epoch": 0.00771742356782428, + "grad_norm": 12.562002182006836, + "learning_rate": 3.857566765578635e-07, + "loss": 0.1565, + "step": 26, + "video_reward_cumulative_accuracy": 0.6538461538461539 + }, + { + "epoch": 0.008014247551202136, + "grad_norm": 10.192179679870605, + "learning_rate": 4.005934718100891e-07, + "loss": 0.1055, + "step": 27, + "video_reward_cumulative_accuracy": 0.6481481481481481 + }, + { + "epoch": 0.008311071534579995, + "grad_norm": 15.415340423583984, + "learning_rate": 4.1543026706231454e-07, + "loss": 0.2231, + "step": 28, + "video_reward_cumulative_accuracy": 0.6428571428571429 + }, + { + "epoch": 0.008607895517957851, + "grad_norm": 21.16411781311035, + "learning_rate": 4.302670623145401e-07, + "loss": 0.3278, + "step": 29, + "video_reward_cumulative_accuracy": 0.6379310344827587 + }, + { + "epoch": 0.008904719501335707, + "grad_norm": 21.195640563964844, + "learning_rate": 4.451038575667656e-07, + "loss": 0.2429, + "step": 30, + "video_reward_cumulative_accuracy": 0.6333333333333333 + }, + { + "epoch": 0.009201543484713566, + "grad_norm": 10.36475658416748, + "learning_rate": 4.5994065281899114e-07, + "loss": 0.125, + "step": 31, + "video_reward_cumulative_accuracy": 0.6290322580645161 + }, + { + "epoch": 0.009498367468091422, + "grad_norm": 14.530949592590332, + "learning_rate": 4.7477744807121665e-07, + "loss": 0.2392, + "step": 32, + "video_reward_cumulative_accuracy": 0.640625 + }, + { + "epoch": 0.009795191451469279, + "grad_norm": 17.089160919189453, + "learning_rate": 4.896142433234421e-07, + "loss": 0.2253, + "step": 33, + "video_reward_cumulative_accuracy": 0.6363636363636364 + }, + { + "epoch": 0.010092015434847135, + "grad_norm": 14.45820426940918, + "learning_rate": 5.044510385756677e-07, + "loss": 0.2298, + "step": 34, + "video_reward_cumulative_accuracy": 0.6323529411764706 + }, + { + "epoch": 0.010388839418224993, + "grad_norm": 12.284329414367676, + "learning_rate": 5.192878338278932e-07, + "loss": 0.0988, + "step": 35, + "video_reward_cumulative_accuracy": 0.6428571428571429 + }, + { + "epoch": 0.01068566340160285, + "grad_norm": 14.295129776000977, + "learning_rate": 5.341246290801187e-07, + "loss": 0.2106, + "step": 36, + "video_reward_cumulative_accuracy": 0.6388888888888888 + }, + { + "epoch": 0.010982487384980706, + "grad_norm": 7.684972286224365, + "learning_rate": 5.489614243323443e-07, + "loss": 0.0939, + "step": 37, + "video_reward_cumulative_accuracy": 0.6486486486486487 + }, + { + "epoch": 0.011279311368358563, + "grad_norm": 9.849855422973633, + "learning_rate": 5.637982195845697e-07, + "loss": 0.1303, + "step": 38, + "video_reward_cumulative_accuracy": 0.6578947368421053 + }, + { + "epoch": 0.01157613535173642, + "grad_norm": 10.725906372070312, + "learning_rate": 5.786350148367953e-07, + "loss": 0.1924, + "step": 39, + "video_reward_cumulative_accuracy": 0.6538461538461539 + }, + { + "epoch": 0.011872959335114277, + "grad_norm": 9.24576473236084, + "learning_rate": 5.934718100890209e-07, + "loss": 0.108, + "step": 40, + "video_reward_cumulative_accuracy": 0.6375 + }, + { + "epoch": 0.012169783318492134, + "grad_norm": 6.677659511566162, + "learning_rate": 6.083086053412463e-07, + "loss": 0.0879, + "step": 41, + "video_reward_cumulative_accuracy": 0.6463414634146342 + }, + { + "epoch": 0.012466607301869992, + "grad_norm": 7.723426342010498, + "learning_rate": 6.231454005934719e-07, + "loss": 0.073, + "step": 42, + "video_reward_cumulative_accuracy": 0.6428571428571429 + }, + { + "epoch": 0.012763431285247848, + "grad_norm": 10.541435241699219, + "learning_rate": 6.379821958456974e-07, + "loss": 0.1862, + "step": 43, + "video_reward_cumulative_accuracy": 0.6395348837209303 + }, + { + "epoch": 0.013060255268625705, + "grad_norm": 7.638758659362793, + "learning_rate": 6.528189910979229e-07, + "loss": 0.1184, + "step": 44, + "video_reward_cumulative_accuracy": 0.6477272727272727 + }, + { + "epoch": 0.013357079252003561, + "grad_norm": 10.385604858398438, + "learning_rate": 6.676557863501485e-07, + "loss": 0.1647, + "step": 45, + "video_reward_cumulative_accuracy": 0.6555555555555556 + }, + { + "epoch": 0.01365390323538142, + "grad_norm": 6.365200996398926, + "learning_rate": 6.82492581602374e-07, + "loss": 0.0985, + "step": 46, + "video_reward_cumulative_accuracy": 0.6413043478260869 + }, + { + "epoch": 0.013950727218759276, + "grad_norm": 7.856001853942871, + "learning_rate": 6.973293768545995e-07, + "loss": 0.1468, + "step": 47, + "video_reward_cumulative_accuracy": 0.6382978723404256 + }, + { + "epoch": 0.014247551202137132, + "grad_norm": 6.851215839385986, + "learning_rate": 7.12166172106825e-07, + "loss": 0.0869, + "step": 48, + "video_reward_cumulative_accuracy": 0.6354166666666666 + }, + { + "epoch": 0.014544375185514989, + "grad_norm": 7.089118003845215, + "learning_rate": 7.270029673590504e-07, + "loss": 0.1313, + "step": 49, + "video_reward_cumulative_accuracy": 0.6326530612244898 + }, + { + "epoch": 0.014841199168892847, + "grad_norm": 3.806837558746338, + "learning_rate": 7.41839762611276e-07, + "loss": 0.0955, + "step": 50, + "video_reward_cumulative_accuracy": 0.63 + }, + { + "epoch": 0.015138023152270703, + "grad_norm": 3.002065420150757, + "learning_rate": 7.566765578635016e-07, + "loss": 0.0829, + "step": 51, + "video_reward_cumulative_accuracy": 0.6372549019607843 + }, + { + "epoch": 0.01543484713564856, + "grad_norm": 8.006941795349121, + "learning_rate": 7.71513353115727e-07, + "loss": 0.1208, + "step": 52, + "video_reward_cumulative_accuracy": 0.6442307692307693 + }, + { + "epoch": 0.015731671119026416, + "grad_norm": 4.04028844833374, + "learning_rate": 7.863501483679525e-07, + "loss": 0.0905, + "step": 53, + "video_reward_cumulative_accuracy": 0.6415094339622641 + }, + { + "epoch": 0.016028495102404273, + "grad_norm": 11.572624206542969, + "learning_rate": 8.011869436201782e-07, + "loss": 0.1112, + "step": 54, + "video_reward_cumulative_accuracy": 0.6296296296296297 + }, + { + "epoch": 0.016325319085782133, + "grad_norm": 2.590923309326172, + "learning_rate": 8.160237388724036e-07, + "loss": 0.0886, + "step": 55, + "video_reward_cumulative_accuracy": 0.6363636363636364 + }, + { + "epoch": 0.01662214306915999, + "grad_norm": 6.2428717613220215, + "learning_rate": 8.308605341246291e-07, + "loss": 0.1035, + "step": 56, + "video_reward_cumulative_accuracy": 0.6339285714285714 + }, + { + "epoch": 0.016918967052537846, + "grad_norm": 5.258223056793213, + "learning_rate": 8.456973293768548e-07, + "loss": 0.0816, + "step": 57, + "video_reward_cumulative_accuracy": 0.6403508771929824 + }, + { + "epoch": 0.017215791035915702, + "grad_norm": 2.0548617839813232, + "learning_rate": 8.605341246290802e-07, + "loss": 0.083, + "step": 58, + "video_reward_cumulative_accuracy": 0.6379310344827587 + }, + { + "epoch": 0.01751261501929356, + "grad_norm": 6.927907466888428, + "learning_rate": 8.753709198813057e-07, + "loss": 0.1002, + "step": 59, + "video_reward_cumulative_accuracy": 0.635593220338983 + }, + { + "epoch": 0.017809439002671415, + "grad_norm": 6.766244411468506, + "learning_rate": 8.902077151335312e-07, + "loss": 0.096, + "step": 60, + "video_reward_cumulative_accuracy": 0.6416666666666667 + }, + { + "epoch": 0.01810626298604927, + "grad_norm": 3.500997304916382, + "learning_rate": 9.050445103857568e-07, + "loss": 0.0837, + "step": 61, + "video_reward_cumulative_accuracy": 0.6475409836065574 + }, + { + "epoch": 0.01840308696942713, + "grad_norm": 2.852778196334839, + "learning_rate": 9.198813056379823e-07, + "loss": 0.0913, + "step": 62, + "video_reward_cumulative_accuracy": 0.6451612903225806 + }, + { + "epoch": 0.018699910952804988, + "grad_norm": 10.269067764282227, + "learning_rate": 9.347181008902077e-07, + "loss": 0.0754, + "step": 63, + "video_reward_cumulative_accuracy": 0.6507936507936508 + }, + { + "epoch": 0.018996734936182844, + "grad_norm": 3.8665616512298584, + "learning_rate": 9.495548961424333e-07, + "loss": 0.0886, + "step": 64, + "video_reward_cumulative_accuracy": 0.65625 + }, + { + "epoch": 0.0192935589195607, + "grad_norm": 2.6777122020721436, + "learning_rate": 9.643916913946588e-07, + "loss": 0.0877, + "step": 65, + "video_reward_cumulative_accuracy": 0.6615384615384615 + }, + { + "epoch": 0.019590382902938557, + "grad_norm": 1.6913548707962036, + "learning_rate": 9.792284866468842e-07, + "loss": 0.0684, + "step": 66, + "video_reward_cumulative_accuracy": 0.6666666666666666 + }, + { + "epoch": 0.019887206886316414, + "grad_norm": 2.17547345161438, + "learning_rate": 9.9406528189911e-07, + "loss": 0.0903, + "step": 67, + "video_reward_cumulative_accuracy": 0.6716417910447762 + }, + { + "epoch": 0.02018403086969427, + "grad_norm": 9.660552978515625, + "learning_rate": 1.0089020771513354e-06, + "loss": 0.1137, + "step": 68, + "video_reward_cumulative_accuracy": 0.6617647058823529 + }, + { + "epoch": 0.020480854853072127, + "grad_norm": 6.081414222717285, + "learning_rate": 1.0237388724035608e-06, + "loss": 0.0836, + "step": 69, + "video_reward_cumulative_accuracy": 0.6594202898550725 + }, + { + "epoch": 0.020777678836449986, + "grad_norm": 5.362737655639648, + "learning_rate": 1.0385756676557865e-06, + "loss": 0.0828, + "step": 70, + "video_reward_cumulative_accuracy": 0.6642857142857143 + }, + { + "epoch": 0.021074502819827843, + "grad_norm": 3.4765052795410156, + "learning_rate": 1.053412462908012e-06, + "loss": 0.0678, + "step": 71, + "video_reward_cumulative_accuracy": 0.6619718309859155 + }, + { + "epoch": 0.0213713268032057, + "grad_norm": 3.868277072906494, + "learning_rate": 1.0682492581602374e-06, + "loss": 0.0704, + "step": 72, + "video_reward_cumulative_accuracy": 0.6666666666666666 + }, + { + "epoch": 0.021668150786583556, + "grad_norm": 2.056610107421875, + "learning_rate": 1.083086053412463e-06, + "loss": 0.0699, + "step": 73, + "video_reward_cumulative_accuracy": 0.6712328767123288 + }, + { + "epoch": 0.021964974769961412, + "grad_norm": 4.672041893005371, + "learning_rate": 1.0979228486646885e-06, + "loss": 0.0789, + "step": 74, + "video_reward_cumulative_accuracy": 0.668918918918919 + }, + { + "epoch": 0.02226179875333927, + "grad_norm": 10.368182182312012, + "learning_rate": 1.112759643916914e-06, + "loss": 0.0785, + "step": 75, + "video_reward_cumulative_accuracy": 0.6666666666666666 + }, + { + "epoch": 0.022558622736717125, + "grad_norm": 5.704588890075684, + "learning_rate": 1.1275964391691395e-06, + "loss": 0.0781, + "step": 76, + "video_reward_cumulative_accuracy": 0.6710526315789473 + }, + { + "epoch": 0.022855446720094985, + "grad_norm": 4.419633388519287, + "learning_rate": 1.1424332344213651e-06, + "loss": 0.0859, + "step": 77, + "video_reward_cumulative_accuracy": 0.6623376623376623 + }, + { + "epoch": 0.02315227070347284, + "grad_norm": 8.239113807678223, + "learning_rate": 1.1572700296735906e-06, + "loss": 0.0967, + "step": 78, + "video_reward_cumulative_accuracy": 0.6538461538461539 + }, + { + "epoch": 0.023449094686850698, + "grad_norm": 2.648416519165039, + "learning_rate": 1.172106824925816e-06, + "loss": 0.0906, + "step": 79, + "video_reward_cumulative_accuracy": 0.6582278481012658 + }, + { + "epoch": 0.023745918670228554, + "grad_norm": 6.028584003448486, + "learning_rate": 1.1869436201780417e-06, + "loss": 0.0789, + "step": 80, + "video_reward_cumulative_accuracy": 0.65625 + }, + { + "epoch": 0.02404274265360641, + "grad_norm": 4.417842864990234, + "learning_rate": 1.2017804154302672e-06, + "loss": 0.0756, + "step": 81, + "video_reward_cumulative_accuracy": 0.654320987654321 + }, + { + "epoch": 0.024339566636984267, + "grad_norm": 5.482295513153076, + "learning_rate": 1.2166172106824927e-06, + "loss": 0.0518, + "step": 82, + "video_reward_cumulative_accuracy": 0.6585365853658537 + }, + { + "epoch": 0.024636390620362124, + "grad_norm": 3.1841440200805664, + "learning_rate": 1.2314540059347183e-06, + "loss": 0.0881, + "step": 83, + "video_reward_cumulative_accuracy": 0.6566265060240963 + }, + { + "epoch": 0.024933214603739984, + "grad_norm": 5.477931976318359, + "learning_rate": 1.2462908011869438e-06, + "loss": 0.0712, + "step": 84, + "video_reward_cumulative_accuracy": 0.6547619047619048 + }, + { + "epoch": 0.02523003858711784, + "grad_norm": 2.8534862995147705, + "learning_rate": 1.2611275964391693e-06, + "loss": 0.0796, + "step": 85, + "video_reward_cumulative_accuracy": 0.6588235294117647 + }, + { + "epoch": 0.025526862570495697, + "grad_norm": 6.181415557861328, + "learning_rate": 1.2759643916913947e-06, + "loss": 0.0667, + "step": 86, + "video_reward_cumulative_accuracy": 0.6569767441860465 + }, + { + "epoch": 0.025823686553873553, + "grad_norm": 4.147750377655029, + "learning_rate": 1.2908011869436202e-06, + "loss": 0.0738, + "step": 87, + "video_reward_cumulative_accuracy": 0.6609195402298851 + }, + { + "epoch": 0.02612051053725141, + "grad_norm": 4.225668907165527, + "learning_rate": 1.3056379821958458e-06, + "loss": 0.1034, + "step": 88, + "video_reward_cumulative_accuracy": 0.6647727272727273 + }, + { + "epoch": 0.026417334520629266, + "grad_norm": 2.278589963912964, + "learning_rate": 1.3204747774480713e-06, + "loss": 0.0505, + "step": 89, + "video_reward_cumulative_accuracy": 0.6685393258426966 + }, + { + "epoch": 0.026714158504007122, + "grad_norm": 6.009743690490723, + "learning_rate": 1.335311572700297e-06, + "loss": 0.0665, + "step": 90, + "video_reward_cumulative_accuracy": 0.6722222222222223 + }, + { + "epoch": 0.027010982487384982, + "grad_norm": 3.622457265853882, + "learning_rate": 1.3501483679525224e-06, + "loss": 0.0803, + "step": 91, + "video_reward_cumulative_accuracy": 0.6703296703296703 + }, + { + "epoch": 0.02730780647076284, + "grad_norm": 2.3656132221221924, + "learning_rate": 1.364985163204748e-06, + "loss": 0.0679, + "step": 92, + "video_reward_cumulative_accuracy": 0.6739130434782609 + }, + { + "epoch": 0.027604630454140695, + "grad_norm": 5.818204879760742, + "learning_rate": 1.3798219584569734e-06, + "loss": 0.0931, + "step": 93, + "video_reward_cumulative_accuracy": 0.6720430107526881 + }, + { + "epoch": 0.027901454437518552, + "grad_norm": 3.5401792526245117, + "learning_rate": 1.394658753709199e-06, + "loss": 0.0723, + "step": 94, + "video_reward_cumulative_accuracy": 0.675531914893617 + }, + { + "epoch": 0.028198278420896408, + "grad_norm": 6.389036178588867, + "learning_rate": 1.4094955489614245e-06, + "loss": 0.0554, + "step": 95, + "video_reward_cumulative_accuracy": 0.6789473684210526 + }, + { + "epoch": 0.028495102404274265, + "grad_norm": 3.825948476791382, + "learning_rate": 1.42433234421365e-06, + "loss": 0.0705, + "step": 96, + "video_reward_cumulative_accuracy": 0.6822916666666666 + }, + { + "epoch": 0.02879192638765212, + "grad_norm": 3.565723180770874, + "learning_rate": 1.4391691394658754e-06, + "loss": 0.0737, + "step": 97, + "video_reward_cumulative_accuracy": 0.6855670103092784 + }, + { + "epoch": 0.029088750371029978, + "grad_norm": 2.599555253982544, + "learning_rate": 1.4540059347181009e-06, + "loss": 0.0468, + "step": 98, + "video_reward_cumulative_accuracy": 0.6887755102040817 + }, + { + "epoch": 0.029385574354407838, + "grad_norm": 2.7549595832824707, + "learning_rate": 1.4688427299703265e-06, + "loss": 0.0644, + "step": 99, + "video_reward_cumulative_accuracy": 0.6919191919191919 + }, + { + "epoch": 0.029682398337785694, + "grad_norm": 5.881991386413574, + "learning_rate": 1.483679525222552e-06, + "loss": 0.0955, + "step": 100, + "video_reward_cumulative_accuracy": 0.685 + }, + { + "epoch": 0.02997922232116355, + "grad_norm": 2.9013118743896484, + "learning_rate": 1.4985163204747777e-06, + "loss": 0.0603, + "step": 101, + "video_reward_cumulative_accuracy": 0.6881188118811881 + }, + { + "epoch": 0.030276046304541407, + "grad_norm": 3.3732762336730957, + "learning_rate": 1.5133531157270031e-06, + "loss": 0.0615, + "step": 102, + "video_reward_cumulative_accuracy": 0.6911764705882353 + }, + { + "epoch": 0.030572870287919263, + "grad_norm": 4.168172359466553, + "learning_rate": 1.5281899109792286e-06, + "loss": 0.1154, + "step": 103, + "video_reward_cumulative_accuracy": 0.6893203883495146 + }, + { + "epoch": 0.03086969427129712, + "grad_norm": 3.426560163497925, + "learning_rate": 1.543026706231454e-06, + "loss": 0.0481, + "step": 104, + "video_reward_cumulative_accuracy": 0.6923076923076923 + }, + { + "epoch": 0.031166518254674976, + "grad_norm": 8.7980375289917, + "learning_rate": 1.5578635014836795e-06, + "loss": 0.116, + "step": 105, + "video_reward_cumulative_accuracy": 0.6952380952380952 + }, + { + "epoch": 0.03146334223805283, + "grad_norm": 3.171637773513794, + "learning_rate": 1.572700296735905e-06, + "loss": 0.0592, + "step": 106, + "video_reward_cumulative_accuracy": 0.6933962264150944 + }, + { + "epoch": 0.03176016622143069, + "grad_norm": 2.1238746643066406, + "learning_rate": 1.5875370919881309e-06, + "loss": 0.0498, + "step": 107, + "video_reward_cumulative_accuracy": 0.6962616822429907 + }, + { + "epoch": 0.032056990204808546, + "grad_norm": 3.829890727996826, + "learning_rate": 1.6023738872403563e-06, + "loss": 0.0897, + "step": 108, + "video_reward_cumulative_accuracy": 0.6990740740740741 + }, + { + "epoch": 0.032353814188186406, + "grad_norm": 3.1230406761169434, + "learning_rate": 1.6172106824925818e-06, + "loss": 0.0446, + "step": 109, + "video_reward_cumulative_accuracy": 0.7018348623853211 + }, + { + "epoch": 0.032650638171564265, + "grad_norm": 5.97599983215332, + "learning_rate": 1.6320474777448073e-06, + "loss": 0.1308, + "step": 110, + "video_reward_cumulative_accuracy": 0.7045454545454546 + }, + { + "epoch": 0.03294746215494212, + "grad_norm": 2.300419330596924, + "learning_rate": 1.6468842729970327e-06, + "loss": 0.0632, + "step": 111, + "video_reward_cumulative_accuracy": 0.7072072072072072 + }, + { + "epoch": 0.03324428613831998, + "grad_norm": 2.3834316730499268, + "learning_rate": 1.6617210682492582e-06, + "loss": 0.0585, + "step": 112, + "video_reward_cumulative_accuracy": 0.7098214285714286 + }, + { + "epoch": 0.03354111012169783, + "grad_norm": 2.218229293823242, + "learning_rate": 1.6765578635014836e-06, + "loss": 0.0574, + "step": 113, + "video_reward_cumulative_accuracy": 0.7079646017699115 + }, + { + "epoch": 0.03383793410507569, + "grad_norm": 6.411158084869385, + "learning_rate": 1.6913946587537095e-06, + "loss": 0.0608, + "step": 114, + "video_reward_cumulative_accuracy": 0.706140350877193 + }, + { + "epoch": 0.034134758088453544, + "grad_norm": 3.1801657676696777, + "learning_rate": 1.706231454005935e-06, + "loss": 0.0894, + "step": 115, + "video_reward_cumulative_accuracy": 0.7043478260869566 + }, + { + "epoch": 0.034431582071831404, + "grad_norm": 6.5272536277771, + "learning_rate": 1.7210682492581604e-06, + "loss": 0.0919, + "step": 116, + "video_reward_cumulative_accuracy": 0.7025862068965517 + }, + { + "epoch": 0.034728406055209264, + "grad_norm": 4.742123603820801, + "learning_rate": 1.735905044510386e-06, + "loss": 0.0324, + "step": 117, + "video_reward_cumulative_accuracy": 0.7051282051282052 + }, + { + "epoch": 0.03502523003858712, + "grad_norm": 3.718932628631592, + "learning_rate": 1.7507418397626114e-06, + "loss": 0.0611, + "step": 118, + "video_reward_cumulative_accuracy": 0.7033898305084746 + }, + { + "epoch": 0.03532205402196498, + "grad_norm": 4.320330619812012, + "learning_rate": 1.7655786350148368e-06, + "loss": 0.054, + "step": 119, + "video_reward_cumulative_accuracy": 0.7058823529411765 + }, + { + "epoch": 0.03561887800534283, + "grad_norm": 4.672208786010742, + "learning_rate": 1.7804154302670625e-06, + "loss": 0.0525, + "step": 120, + "video_reward_cumulative_accuracy": 0.7083333333333334 + }, + { + "epoch": 0.03591570198872069, + "grad_norm": 2.6272125244140625, + "learning_rate": 1.7952522255192882e-06, + "loss": 0.0652, + "step": 121, + "video_reward_cumulative_accuracy": 0.7107438016528925 + }, + { + "epoch": 0.03621252597209854, + "grad_norm": 1.9446464776992798, + "learning_rate": 1.8100890207715136e-06, + "loss": 0.043, + "step": 122, + "video_reward_cumulative_accuracy": 0.7131147540983607 + }, + { + "epoch": 0.0365093499554764, + "grad_norm": 4.1938910484313965, + "learning_rate": 1.824925816023739e-06, + "loss": 0.1023, + "step": 123, + "video_reward_cumulative_accuracy": 0.7154471544715447 + }, + { + "epoch": 0.03680617393885426, + "grad_norm": 3.999626874923706, + "learning_rate": 1.8397626112759646e-06, + "loss": 0.062, + "step": 124, + "video_reward_cumulative_accuracy": 0.7137096774193549 + }, + { + "epoch": 0.037102997922232116, + "grad_norm": 2.076876640319824, + "learning_rate": 1.85459940652819e-06, + "loss": 0.0479, + "step": 125, + "video_reward_cumulative_accuracy": 0.716 + }, + { + "epoch": 0.037399821905609976, + "grad_norm": 6.920149326324463, + "learning_rate": 1.8694362017804155e-06, + "loss": 0.0538, + "step": 126, + "video_reward_cumulative_accuracy": 0.7182539682539683 + }, + { + "epoch": 0.03769664588898783, + "grad_norm": 2.615006923675537, + "learning_rate": 1.8842729970326411e-06, + "loss": 0.0352, + "step": 127, + "video_reward_cumulative_accuracy": 0.7204724409448819 + }, + { + "epoch": 0.03799346987236569, + "grad_norm": 2.167612075805664, + "learning_rate": 1.8991097922848666e-06, + "loss": 0.0397, + "step": 128, + "video_reward_cumulative_accuracy": 0.72265625 + }, + { + "epoch": 0.03829029385574354, + "grad_norm": 3.5347766876220703, + "learning_rate": 1.9139465875370923e-06, + "loss": 0.0699, + "step": 129, + "video_reward_cumulative_accuracy": 0.7209302325581395 + }, + { + "epoch": 0.0385871178391214, + "grad_norm": 6.354689121246338, + "learning_rate": 1.9287833827893175e-06, + "loss": 0.0554, + "step": 130, + "video_reward_cumulative_accuracy": 0.7192307692307692 + }, + { + "epoch": 0.03888394182249926, + "grad_norm": 2.4313793182373047, + "learning_rate": 1.943620178041543e-06, + "loss": 0.0529, + "step": 131, + "video_reward_cumulative_accuracy": 0.7175572519083969 + }, + { + "epoch": 0.039180765805877114, + "grad_norm": 5.276021480560303, + "learning_rate": 1.9584569732937684e-06, + "loss": 0.0958, + "step": 132, + "video_reward_cumulative_accuracy": 0.7159090909090909 + }, + { + "epoch": 0.039477589789254974, + "grad_norm": 6.521552562713623, + "learning_rate": 1.9732937685459945e-06, + "loss": 0.0698, + "step": 133, + "video_reward_cumulative_accuracy": 0.7142857142857143 + }, + { + "epoch": 0.03977441377263283, + "grad_norm": 4.296199798583984, + "learning_rate": 1.98813056379822e-06, + "loss": 0.0632, + "step": 134, + "video_reward_cumulative_accuracy": 0.7089552238805971 + }, + { + "epoch": 0.04007123775601069, + "grad_norm": 5.775026321411133, + "learning_rate": 2.0029673590504455e-06, + "loss": 0.1013, + "step": 135, + "video_reward_cumulative_accuracy": 0.7111111111111111 + }, + { + "epoch": 0.04036806173938854, + "grad_norm": 2.675187349319458, + "learning_rate": 2.0178041543026707e-06, + "loss": 0.0515, + "step": 136, + "video_reward_cumulative_accuracy": 0.7132352941176471 + }, + { + "epoch": 0.0406648857227664, + "grad_norm": 6.260149955749512, + "learning_rate": 2.0326409495548964e-06, + "loss": 0.0883, + "step": 137, + "video_reward_cumulative_accuracy": 0.7153284671532847 + }, + { + "epoch": 0.04096170970614425, + "grad_norm": 11.186830520629883, + "learning_rate": 2.0474777448071216e-06, + "loss": 0.0918, + "step": 138, + "video_reward_cumulative_accuracy": 0.717391304347826 + }, + { + "epoch": 0.04125853368952211, + "grad_norm": 6.043707847595215, + "learning_rate": 2.0623145400593473e-06, + "loss": 0.085, + "step": 139, + "video_reward_cumulative_accuracy": 0.7158273381294964 + }, + { + "epoch": 0.04155535767289997, + "grad_norm": 9.900096893310547, + "learning_rate": 2.077151335311573e-06, + "loss": 0.0977, + "step": 140, + "video_reward_cumulative_accuracy": 0.7178571428571429 + }, + { + "epoch": 0.041852181656277826, + "grad_norm": 2.8422248363494873, + "learning_rate": 2.0919881305637987e-06, + "loss": 0.0399, + "step": 141, + "video_reward_cumulative_accuracy": 0.7163120567375887 + }, + { + "epoch": 0.042149005639655686, + "grad_norm": 2.6769790649414062, + "learning_rate": 2.106824925816024e-06, + "loss": 0.0355, + "step": 142, + "video_reward_cumulative_accuracy": 0.7183098591549296 + }, + { + "epoch": 0.04244582962303354, + "grad_norm": 3.1609225273132324, + "learning_rate": 2.1216617210682496e-06, + "loss": 0.0677, + "step": 143, + "video_reward_cumulative_accuracy": 0.7132867132867133 + }, + { + "epoch": 0.0427426536064114, + "grad_norm": 2.4717013835906982, + "learning_rate": 2.136498516320475e-06, + "loss": 0.0365, + "step": 144, + "video_reward_cumulative_accuracy": 0.7118055555555556 + }, + { + "epoch": 0.04303947758978925, + "grad_norm": 8.373668670654297, + "learning_rate": 2.1513353115727005e-06, + "loss": 0.1185, + "step": 145, + "video_reward_cumulative_accuracy": 0.7103448275862069 + }, + { + "epoch": 0.04333630157316711, + "grad_norm": 2.4484148025512695, + "learning_rate": 2.166172106824926e-06, + "loss": 0.0479, + "step": 146, + "video_reward_cumulative_accuracy": 0.7123287671232876 + }, + { + "epoch": 0.04363312555654497, + "grad_norm": 4.690200328826904, + "learning_rate": 2.1810089020771514e-06, + "loss": 0.0706, + "step": 147, + "video_reward_cumulative_accuracy": 0.7142857142857143 + }, + { + "epoch": 0.043929949539922825, + "grad_norm": 5.8376898765563965, + "learning_rate": 2.195845697329377e-06, + "loss": 0.0849, + "step": 148, + "video_reward_cumulative_accuracy": 0.7162162162162162 + }, + { + "epoch": 0.044226773523300685, + "grad_norm": 2.741074562072754, + "learning_rate": 2.2106824925816028e-06, + "loss": 0.0741, + "step": 149, + "video_reward_cumulative_accuracy": 0.7114093959731543 + }, + { + "epoch": 0.04452359750667854, + "grad_norm": 5.629610061645508, + "learning_rate": 2.225519287833828e-06, + "loss": 0.0525, + "step": 150, + "video_reward_cumulative_accuracy": 0.71 + }, + { + "epoch": 0.0448204214900564, + "grad_norm": 6.277879238128662, + "learning_rate": 2.2403560830860537e-06, + "loss": 0.0803, + "step": 151, + "video_reward_cumulative_accuracy": 0.7086092715231788 + }, + { + "epoch": 0.04511724547343425, + "grad_norm": 2.9074411392211914, + "learning_rate": 2.255192878338279e-06, + "loss": 0.0607, + "step": 152, + "video_reward_cumulative_accuracy": 0.7105263157894737 + }, + { + "epoch": 0.04541406945681211, + "grad_norm": 8.067234992980957, + "learning_rate": 2.2700296735905046e-06, + "loss": 0.0987, + "step": 153, + "video_reward_cumulative_accuracy": 0.7091503267973857 + }, + { + "epoch": 0.04571089344018997, + "grad_norm": 3.610557794570923, + "learning_rate": 2.2848664688427303e-06, + "loss": 0.0549, + "step": 154, + "video_reward_cumulative_accuracy": 0.7077922077922078 + }, + { + "epoch": 0.04600771742356782, + "grad_norm": 3.5607757568359375, + "learning_rate": 2.2997032640949555e-06, + "loss": 0.0864, + "step": 155, + "video_reward_cumulative_accuracy": 0.7064516129032258 + }, + { + "epoch": 0.04630454140694568, + "grad_norm": 4.017952919006348, + "learning_rate": 2.314540059347181e-06, + "loss": 0.0522, + "step": 156, + "video_reward_cumulative_accuracy": 0.7083333333333334 + }, + { + "epoch": 0.046601365390323536, + "grad_norm": 4.041268825531006, + "learning_rate": 2.3293768545994065e-06, + "loss": 0.0456, + "step": 157, + "video_reward_cumulative_accuracy": 0.7101910828025477 + }, + { + "epoch": 0.046898189373701396, + "grad_norm": 2.6721463203430176, + "learning_rate": 2.344213649851632e-06, + "loss": 0.0379, + "step": 158, + "video_reward_cumulative_accuracy": 0.7120253164556962 + }, + { + "epoch": 0.04719501335707925, + "grad_norm": 5.769506931304932, + "learning_rate": 2.359050445103858e-06, + "loss": 0.0505, + "step": 159, + "video_reward_cumulative_accuracy": 0.710691823899371 + }, + { + "epoch": 0.04749183734045711, + "grad_norm": 2.384072780609131, + "learning_rate": 2.3738872403560835e-06, + "loss": 0.0713, + "step": 160, + "video_reward_cumulative_accuracy": 0.70625 + }, + { + "epoch": 0.04778866132383497, + "grad_norm": 4.968862533569336, + "learning_rate": 2.3887240356083087e-06, + "loss": 0.0545, + "step": 161, + "video_reward_cumulative_accuracy": 0.7080745341614907 + }, + { + "epoch": 0.04808548530721282, + "grad_norm": 2.6680426597595215, + "learning_rate": 2.4035608308605344e-06, + "loss": 0.0545, + "step": 162, + "video_reward_cumulative_accuracy": 0.7098765432098766 + }, + { + "epoch": 0.04838230929059068, + "grad_norm": 5.463686943054199, + "learning_rate": 2.4183976261127596e-06, + "loss": 0.0798, + "step": 163, + "video_reward_cumulative_accuracy": 0.7085889570552147 + }, + { + "epoch": 0.048679133273968535, + "grad_norm": 4.160032749176025, + "learning_rate": 2.4332344213649853e-06, + "loss": 0.0885, + "step": 164, + "video_reward_cumulative_accuracy": 0.7042682926829268 + }, + { + "epoch": 0.048975957257346395, + "grad_norm": 8.451370239257812, + "learning_rate": 2.4480712166172106e-06, + "loss": 0.0836, + "step": 165, + "video_reward_cumulative_accuracy": 0.706060606060606 + }, + { + "epoch": 0.04927278124072425, + "grad_norm": 2.2728710174560547, + "learning_rate": 2.4629080118694367e-06, + "loss": 0.039, + "step": 166, + "video_reward_cumulative_accuracy": 0.7078313253012049 + }, + { + "epoch": 0.04956960522410211, + "grad_norm": 6.519056797027588, + "learning_rate": 2.477744807121662e-06, + "loss": 0.0594, + "step": 167, + "video_reward_cumulative_accuracy": 0.7065868263473054 + }, + { + "epoch": 0.04986642920747997, + "grad_norm": 11.774324417114258, + "learning_rate": 2.4925816023738876e-06, + "loss": 0.1125, + "step": 168, + "video_reward_cumulative_accuracy": 0.7083333333333334 + }, + { + "epoch": 0.05016325319085782, + "grad_norm": 1.901655673980713, + "learning_rate": 2.507418397626113e-06, + "loss": 0.0589, + "step": 169, + "video_reward_cumulative_accuracy": 0.7100591715976331 + }, + { + "epoch": 0.05046007717423568, + "grad_norm": 1.9185417890548706, + "learning_rate": 2.5222551928783385e-06, + "loss": 0.041, + "step": 170, + "video_reward_cumulative_accuracy": 0.711764705882353 + }, + { + "epoch": 0.05075690115761353, + "grad_norm": 1.7406995296478271, + "learning_rate": 2.5370919881305638e-06, + "loss": 0.0625, + "step": 171, + "video_reward_cumulative_accuracy": 0.7105263157894737 + }, + { + "epoch": 0.05105372514099139, + "grad_norm": 2.75722074508667, + "learning_rate": 2.5519287833827894e-06, + "loss": 0.036, + "step": 172, + "video_reward_cumulative_accuracy": 0.7122093023255814 + }, + { + "epoch": 0.051350549124369246, + "grad_norm": 4.977455139160156, + "learning_rate": 2.5667655786350147e-06, + "loss": 0.0586, + "step": 173, + "video_reward_cumulative_accuracy": 0.7109826589595376 + }, + { + "epoch": 0.051647373107747106, + "grad_norm": 3.5028724670410156, + "learning_rate": 2.5816023738872403e-06, + "loss": 0.0368, + "step": 174, + "video_reward_cumulative_accuracy": 0.7126436781609196 + }, + { + "epoch": 0.051944197091124966, + "grad_norm": 4.553066730499268, + "learning_rate": 2.5964391691394664e-06, + "loss": 0.042, + "step": 175, + "video_reward_cumulative_accuracy": 0.7142857142857143 + }, + { + "epoch": 0.05224102107450282, + "grad_norm": 5.41193962097168, + "learning_rate": 2.6112759643916917e-06, + "loss": 0.0806, + "step": 176, + "video_reward_cumulative_accuracy": 0.7159090909090909 + }, + { + "epoch": 0.05253784505788068, + "grad_norm": 2.606978178024292, + "learning_rate": 2.6261127596439174e-06, + "loss": 0.0336, + "step": 177, + "video_reward_cumulative_accuracy": 0.7175141242937854 + }, + { + "epoch": 0.05283466904125853, + "grad_norm": 3.2309417724609375, + "learning_rate": 2.6409495548961426e-06, + "loss": 0.0476, + "step": 178, + "video_reward_cumulative_accuracy": 0.7191011235955056 + }, + { + "epoch": 0.05313149302463639, + "grad_norm": 4.6940178871154785, + "learning_rate": 2.6557863501483683e-06, + "loss": 0.0644, + "step": 179, + "video_reward_cumulative_accuracy": 0.7178770949720671 + }, + { + "epoch": 0.053428317008014245, + "grad_norm": 4.964528560638428, + "learning_rate": 2.670623145400594e-06, + "loss": 0.0668, + "step": 180, + "video_reward_cumulative_accuracy": 0.7166666666666667 + }, + { + "epoch": 0.053725140991392105, + "grad_norm": 1.1753082275390625, + "learning_rate": 2.685459940652819e-06, + "loss": 0.0107, + "step": 181, + "video_reward_cumulative_accuracy": 0.7182320441988951 + }, + { + "epoch": 0.054021964974769965, + "grad_norm": 3.13619065284729, + "learning_rate": 2.700296735905045e-06, + "loss": 0.0513, + "step": 182, + "video_reward_cumulative_accuracy": 0.717032967032967 + }, + { + "epoch": 0.05431878895814782, + "grad_norm": 8.615299224853516, + "learning_rate": 2.71513353115727e-06, + "loss": 0.1667, + "step": 183, + "video_reward_cumulative_accuracy": 0.7158469945355191 + }, + { + "epoch": 0.05461561294152568, + "grad_norm": 8.474091529846191, + "learning_rate": 2.729970326409496e-06, + "loss": 0.0565, + "step": 184, + "video_reward_cumulative_accuracy": 0.717391304347826 + }, + { + "epoch": 0.05491243692490353, + "grad_norm": 3.5511362552642822, + "learning_rate": 2.744807121661721e-06, + "loss": 0.0296, + "step": 185, + "video_reward_cumulative_accuracy": 0.7189189189189189 + }, + { + "epoch": 0.05520926090828139, + "grad_norm": 3.176490306854248, + "learning_rate": 2.7596439169139467e-06, + "loss": 0.0326, + "step": 186, + "video_reward_cumulative_accuracy": 0.7204301075268817 + }, + { + "epoch": 0.055506084891659244, + "grad_norm": 4.008922100067139, + "learning_rate": 2.774480712166172e-06, + "loss": 0.0519, + "step": 187, + "video_reward_cumulative_accuracy": 0.7192513368983957 + }, + { + "epoch": 0.055802908875037104, + "grad_norm": 6.8299560546875, + "learning_rate": 2.789317507418398e-06, + "loss": 0.1015, + "step": 188, + "video_reward_cumulative_accuracy": 0.7154255319148937 + }, + { + "epoch": 0.05609973285841496, + "grad_norm": 2.7319183349609375, + "learning_rate": 2.8041543026706237e-06, + "loss": 0.0582, + "step": 189, + "video_reward_cumulative_accuracy": 0.716931216931217 + }, + { + "epoch": 0.056396556841792816, + "grad_norm": 7.440029621124268, + "learning_rate": 2.818991097922849e-06, + "loss": 0.0758, + "step": 190, + "video_reward_cumulative_accuracy": 0.7157894736842105 + }, + { + "epoch": 0.056693380825170676, + "grad_norm": 8.07271957397461, + "learning_rate": 2.8338278931750747e-06, + "loss": 0.0519, + "step": 191, + "video_reward_cumulative_accuracy": 0.7172774869109948 + }, + { + "epoch": 0.05699020480854853, + "grad_norm": 4.536227703094482, + "learning_rate": 2.8486646884273e-06, + "loss": 0.0574, + "step": 192, + "video_reward_cumulative_accuracy": 0.71875 + }, + { + "epoch": 0.05728702879192639, + "grad_norm": 2.7957005500793457, + "learning_rate": 2.8635014836795256e-06, + "loss": 0.0303, + "step": 193, + "video_reward_cumulative_accuracy": 0.7176165803108808 + }, + { + "epoch": 0.05758385277530424, + "grad_norm": 2.4321742057800293, + "learning_rate": 2.878338278931751e-06, + "loss": 0.0494, + "step": 194, + "video_reward_cumulative_accuracy": 0.7190721649484536 + }, + { + "epoch": 0.0578806767586821, + "grad_norm": 3.885902166366577, + "learning_rate": 2.8931750741839765e-06, + "loss": 0.0772, + "step": 195, + "video_reward_cumulative_accuracy": 0.7205128205128205 + }, + { + "epoch": 0.058177500742059955, + "grad_norm": 6.06294584274292, + "learning_rate": 2.9080118694362018e-06, + "loss": 0.0446, + "step": 196, + "video_reward_cumulative_accuracy": 0.7219387755102041 + }, + { + "epoch": 0.058474324725437815, + "grad_norm": 2.9600000381469727, + "learning_rate": 2.9228486646884274e-06, + "loss": 0.0723, + "step": 197, + "video_reward_cumulative_accuracy": 0.7233502538071066 + }, + { + "epoch": 0.058771148708815675, + "grad_norm": 5.309525012969971, + "learning_rate": 2.937685459940653e-06, + "loss": 0.0678, + "step": 198, + "video_reward_cumulative_accuracy": 0.7247474747474747 + }, + { + "epoch": 0.05906797269219353, + "grad_norm": 6.021256446838379, + "learning_rate": 2.9525222551928783e-06, + "loss": 0.1086, + "step": 199, + "video_reward_cumulative_accuracy": 0.7236180904522613 + }, + { + "epoch": 0.05936479667557139, + "grad_norm": 4.920889377593994, + "learning_rate": 2.967359050445104e-06, + "loss": 0.0766, + "step": 200, + "video_reward_cumulative_accuracy": 0.7225 + }, + { + "epoch": 0.05936479667557139, + "eval_runtime": 129.5769, + "eval_samples_per_second": 6.089, + "eval_steps_per_second": 0.764, + "eval_test_set_accuracy": 0.696969696969697, + "step": 200 + }, + { + "epoch": 0.05966162065894924, + "grad_norm": 8.54822063446045, + "learning_rate": 2.9821958456973297e-06, + "loss": 0.0915, + "step": 201, + "video_reward_cumulative_accuracy": 0.7213930348258707 + }, + { + "epoch": 0.0599584446423271, + "grad_norm": 3.513885974884033, + "learning_rate": 2.9970326409495554e-06, + "loss": 0.0686, + "step": 202, + "video_reward_cumulative_accuracy": 0.7227722772277227 + }, + { + "epoch": 0.060255268625704954, + "grad_norm": 4.311375141143799, + "learning_rate": 3.011869436201781e-06, + "loss": 0.0586, + "step": 203, + "video_reward_cumulative_accuracy": 0.7192118226600985 + }, + { + "epoch": 0.060552092609082814, + "grad_norm": 4.355630397796631, + "learning_rate": 3.0267062314540063e-06, + "loss": 0.0683, + "step": 204, + "video_reward_cumulative_accuracy": 0.7181372549019608 + }, + { + "epoch": 0.060848916592460674, + "grad_norm": 5.103359699249268, + "learning_rate": 3.041543026706232e-06, + "loss": 0.0869, + "step": 205, + "video_reward_cumulative_accuracy": 0.7195121951219512 + }, + { + "epoch": 0.06114574057583853, + "grad_norm": 4.328181266784668, + "learning_rate": 3.056379821958457e-06, + "loss": 0.0628, + "step": 206, + "video_reward_cumulative_accuracy": 0.720873786407767 + }, + { + "epoch": 0.06144256455921639, + "grad_norm": 2.0452539920806885, + "learning_rate": 3.071216617210683e-06, + "loss": 0.0691, + "step": 207, + "video_reward_cumulative_accuracy": 0.7222222222222222 + }, + { + "epoch": 0.06173938854259424, + "grad_norm": 2.371507167816162, + "learning_rate": 3.086053412462908e-06, + "loss": 0.0485, + "step": 208, + "video_reward_cumulative_accuracy": 0.7235576923076923 + }, + { + "epoch": 0.0620362125259721, + "grad_norm": 4.104339122772217, + "learning_rate": 3.100890207715134e-06, + "loss": 0.0686, + "step": 209, + "video_reward_cumulative_accuracy": 0.7248803827751196 + }, + { + "epoch": 0.06233303650934995, + "grad_norm": 8.020886421203613, + "learning_rate": 3.115727002967359e-06, + "loss": 0.0653, + "step": 210, + "video_reward_cumulative_accuracy": 0.7238095238095238 + }, + { + "epoch": 0.06262986049272781, + "grad_norm": 2.7191717624664307, + "learning_rate": 3.1305637982195847e-06, + "loss": 0.0474, + "step": 211, + "video_reward_cumulative_accuracy": 0.7251184834123223 + }, + { + "epoch": 0.06292668447610567, + "grad_norm": 2.388265609741211, + "learning_rate": 3.14540059347181e-06, + "loss": 0.0625, + "step": 212, + "video_reward_cumulative_accuracy": 0.7264150943396226 + }, + { + "epoch": 0.06322350845948353, + "grad_norm": 2.529482841491699, + "learning_rate": 3.1602373887240356e-06, + "loss": 0.054, + "step": 213, + "video_reward_cumulative_accuracy": 0.7253521126760564 + }, + { + "epoch": 0.06352033244286139, + "grad_norm": 5.697103977203369, + "learning_rate": 3.1750741839762617e-06, + "loss": 0.0667, + "step": 214, + "video_reward_cumulative_accuracy": 0.7242990654205608 + }, + { + "epoch": 0.06381715642623924, + "grad_norm": 2.470099449157715, + "learning_rate": 3.189910979228487e-06, + "loss": 0.0767, + "step": 215, + "video_reward_cumulative_accuracy": 0.7255813953488373 + }, + { + "epoch": 0.06411398040961709, + "grad_norm": 2.413121223449707, + "learning_rate": 3.2047477744807127e-06, + "loss": 0.0707, + "step": 216, + "video_reward_cumulative_accuracy": 0.7222222222222222 + }, + { + "epoch": 0.06441080439299496, + "grad_norm": 4.319202899932861, + "learning_rate": 3.219584569732938e-06, + "loss": 0.0661, + "step": 217, + "video_reward_cumulative_accuracy": 0.7235023041474654 + }, + { + "epoch": 0.06470762837637281, + "grad_norm": 10.807517051696777, + "learning_rate": 3.2344213649851636e-06, + "loss": 0.1928, + "step": 218, + "video_reward_cumulative_accuracy": 0.7201834862385321 + }, + { + "epoch": 0.06500445235975066, + "grad_norm": 3.2382359504699707, + "learning_rate": 3.2492581602373893e-06, + "loss": 0.0677, + "step": 219, + "video_reward_cumulative_accuracy": 0.7214611872146118 + }, + { + "epoch": 0.06530127634312853, + "grad_norm": 3.427091121673584, + "learning_rate": 3.2640949554896145e-06, + "loss": 0.0519, + "step": 220, + "video_reward_cumulative_accuracy": 0.7204545454545455 + }, + { + "epoch": 0.06559810032650638, + "grad_norm": 5.226832389831543, + "learning_rate": 3.27893175074184e-06, + "loss": 0.0667, + "step": 221, + "video_reward_cumulative_accuracy": 0.7194570135746606 + }, + { + "epoch": 0.06589492430988424, + "grad_norm": 4.402886390686035, + "learning_rate": 3.2937685459940654e-06, + "loss": 0.0532, + "step": 222, + "video_reward_cumulative_accuracy": 0.7207207207207207 + }, + { + "epoch": 0.06619174829326209, + "grad_norm": 5.255795001983643, + "learning_rate": 3.308605341246291e-06, + "loss": 0.0755, + "step": 223, + "video_reward_cumulative_accuracy": 0.7219730941704036 + }, + { + "epoch": 0.06648857227663996, + "grad_norm": 8.409960746765137, + "learning_rate": 3.3234421364985163e-06, + "loss": 0.1155, + "step": 224, + "video_reward_cumulative_accuracy": 0.7232142857142857 + }, + { + "epoch": 0.06678539626001781, + "grad_norm": 8.938908576965332, + "learning_rate": 3.338278931750742e-06, + "loss": 0.1178, + "step": 225, + "video_reward_cumulative_accuracy": 0.7222222222222222 + }, + { + "epoch": 0.06708222024339566, + "grad_norm": 8.283513069152832, + "learning_rate": 3.3531157270029673e-06, + "loss": 0.1187, + "step": 226, + "video_reward_cumulative_accuracy": 0.7234513274336283 + }, + { + "epoch": 0.06737904422677353, + "grad_norm": 6.216405391693115, + "learning_rate": 3.3679525222551934e-06, + "loss": 0.0735, + "step": 227, + "video_reward_cumulative_accuracy": 0.7224669603524229 + }, + { + "epoch": 0.06767586821015138, + "grad_norm": 4.862206935882568, + "learning_rate": 3.382789317507419e-06, + "loss": 0.0792, + "step": 228, + "video_reward_cumulative_accuracy": 0.7236842105263158 + }, + { + "epoch": 0.06797269219352924, + "grad_norm": 2.139225959777832, + "learning_rate": 3.3976261127596443e-06, + "loss": 0.0682, + "step": 229, + "video_reward_cumulative_accuracy": 0.7248908296943232 + }, + { + "epoch": 0.06826951617690709, + "grad_norm": 3.5411124229431152, + "learning_rate": 3.41246290801187e-06, + "loss": 0.0719, + "step": 230, + "video_reward_cumulative_accuracy": 0.7260869565217392 + }, + { + "epoch": 0.06856634016028496, + "grad_norm": 3.569051742553711, + "learning_rate": 3.427299703264095e-06, + "loss": 0.0443, + "step": 231, + "video_reward_cumulative_accuracy": 0.7272727272727273 + }, + { + "epoch": 0.06886316414366281, + "grad_norm": 5.477877616882324, + "learning_rate": 3.442136498516321e-06, + "loss": 0.0875, + "step": 232, + "video_reward_cumulative_accuracy": 0.7262931034482759 + }, + { + "epoch": 0.06915998812704066, + "grad_norm": 4.547797203063965, + "learning_rate": 3.456973293768546e-06, + "loss": 0.0537, + "step": 233, + "video_reward_cumulative_accuracy": 0.7274678111587983 + }, + { + "epoch": 0.06945681211041853, + "grad_norm": 3.6796634197235107, + "learning_rate": 3.471810089020772e-06, + "loss": 0.0563, + "step": 234, + "video_reward_cumulative_accuracy": 0.7264957264957265 + }, + { + "epoch": 0.06975363609379638, + "grad_norm": 1.6680197715759277, + "learning_rate": 3.486646884272997e-06, + "loss": 0.0447, + "step": 235, + "video_reward_cumulative_accuracy": 0.7276595744680852 + }, + { + "epoch": 0.07005046007717423, + "grad_norm": 3.814924478530884, + "learning_rate": 3.5014836795252227e-06, + "loss": 0.0532, + "step": 236, + "video_reward_cumulative_accuracy": 0.7266949152542372 + }, + { + "epoch": 0.07034728406055209, + "grad_norm": 2.366469383239746, + "learning_rate": 3.5163204747774484e-06, + "loss": 0.0393, + "step": 237, + "video_reward_cumulative_accuracy": 0.7278481012658228 + }, + { + "epoch": 0.07064410804392995, + "grad_norm": 5.3031206130981445, + "learning_rate": 3.5311572700296736e-06, + "loss": 0.0637, + "step": 238, + "video_reward_cumulative_accuracy": 0.7289915966386554 + }, + { + "epoch": 0.07094093202730781, + "grad_norm": 2.591233491897583, + "learning_rate": 3.5459940652818993e-06, + "loss": 0.0779, + "step": 239, + "video_reward_cumulative_accuracy": 0.7301255230125523 + }, + { + "epoch": 0.07123775601068566, + "grad_norm": 5.400726795196533, + "learning_rate": 3.560830860534125e-06, + "loss": 0.0881, + "step": 240, + "video_reward_cumulative_accuracy": 0.73125 + }, + { + "epoch": 0.07153457999406353, + "grad_norm": 6.161452293395996, + "learning_rate": 3.5756676557863507e-06, + "loss": 0.1075, + "step": 241, + "video_reward_cumulative_accuracy": 0.7323651452282157 + }, + { + "epoch": 0.07183140397744138, + "grad_norm": 4.78709077835083, + "learning_rate": 3.5905044510385763e-06, + "loss": 0.0733, + "step": 242, + "video_reward_cumulative_accuracy": 0.7334710743801653 + }, + { + "epoch": 0.07212822796081923, + "grad_norm": 2.570233106613159, + "learning_rate": 3.6053412462908016e-06, + "loss": 0.0624, + "step": 243, + "video_reward_cumulative_accuracy": 0.7345679012345679 + }, + { + "epoch": 0.07242505194419709, + "grad_norm": 2.175311803817749, + "learning_rate": 3.6201780415430273e-06, + "loss": 0.0712, + "step": 244, + "video_reward_cumulative_accuracy": 0.735655737704918 + }, + { + "epoch": 0.07272187592757495, + "grad_norm": 2.6615543365478516, + "learning_rate": 3.6350148367952525e-06, + "loss": 0.0627, + "step": 245, + "video_reward_cumulative_accuracy": 0.736734693877551 + }, + { + "epoch": 0.0730186999109528, + "grad_norm": 3.832801580429077, + "learning_rate": 3.649851632047478e-06, + "loss": 0.0511, + "step": 246, + "video_reward_cumulative_accuracy": 0.7378048780487805 + }, + { + "epoch": 0.07331552389433066, + "grad_norm": 2.9405055046081543, + "learning_rate": 3.6646884272997034e-06, + "loss": 0.0773, + "step": 247, + "video_reward_cumulative_accuracy": 0.7388663967611336 + }, + { + "epoch": 0.07361234787770853, + "grad_norm": 1.5752394199371338, + "learning_rate": 3.679525222551929e-06, + "loss": 0.0362, + "step": 248, + "video_reward_cumulative_accuracy": 0.7399193548387096 + }, + { + "epoch": 0.07390917186108638, + "grad_norm": 2.3977644443511963, + "learning_rate": 3.6943620178041544e-06, + "loss": 0.0669, + "step": 249, + "video_reward_cumulative_accuracy": 0.7409638554216867 + }, + { + "epoch": 0.07420599584446423, + "grad_norm": 2.2234885692596436, + "learning_rate": 3.70919881305638e-06, + "loss": 0.0717, + "step": 250, + "video_reward_cumulative_accuracy": 0.74 + }, + { + "epoch": 0.07450281982784208, + "grad_norm": 2.77751088142395, + "learning_rate": 3.7240356083086053e-06, + "loss": 0.0726, + "step": 251, + "video_reward_cumulative_accuracy": 0.7410358565737052 + }, + { + "epoch": 0.07479964381121995, + "grad_norm": 2.137138843536377, + "learning_rate": 3.738872403560831e-06, + "loss": 0.046, + "step": 252, + "video_reward_cumulative_accuracy": 0.7420634920634921 + }, + { + "epoch": 0.0750964677945978, + "grad_norm": 3.0533056259155273, + "learning_rate": 3.753709198813057e-06, + "loss": 0.038, + "step": 253, + "video_reward_cumulative_accuracy": 0.7430830039525692 + }, + { + "epoch": 0.07539329177797566, + "grad_norm": 1.6132036447525024, + "learning_rate": 3.7685459940652823e-06, + "loss": 0.0406, + "step": 254, + "video_reward_cumulative_accuracy": 0.7440944881889764 + }, + { + "epoch": 0.07569011576135352, + "grad_norm": 1.4988843202590942, + "learning_rate": 3.783382789317508e-06, + "loss": 0.039, + "step": 255, + "video_reward_cumulative_accuracy": 0.7450980392156863 + }, + { + "epoch": 0.07598693974473138, + "grad_norm": 3.1757826805114746, + "learning_rate": 3.7982195845697332e-06, + "loss": 0.0607, + "step": 256, + "video_reward_cumulative_accuracy": 0.74609375 + }, + { + "epoch": 0.07628376372810923, + "grad_norm": 5.67103385925293, + "learning_rate": 3.813056379821959e-06, + "loss": 0.0528, + "step": 257, + "video_reward_cumulative_accuracy": 0.7470817120622568 + }, + { + "epoch": 0.07658058771148708, + "grad_norm": 2.0683236122131348, + "learning_rate": 3.8278931750741846e-06, + "loss": 0.0275, + "step": 258, + "video_reward_cumulative_accuracy": 0.748062015503876 + }, + { + "epoch": 0.07687741169486495, + "grad_norm": 4.611932277679443, + "learning_rate": 3.84272997032641e-06, + "loss": 0.0939, + "step": 259, + "video_reward_cumulative_accuracy": 0.747104247104247 + }, + { + "epoch": 0.0771742356782428, + "grad_norm": 4.704992294311523, + "learning_rate": 3.857566765578635e-06, + "loss": 0.1056, + "step": 260, + "video_reward_cumulative_accuracy": 0.7480769230769231 + }, + { + "epoch": 0.07747105966162066, + "grad_norm": 6.33126974105835, + "learning_rate": 3.872403560830861e-06, + "loss": 0.1326, + "step": 261, + "video_reward_cumulative_accuracy": 0.7490421455938697 + }, + { + "epoch": 0.07776788364499852, + "grad_norm": 2.4778928756713867, + "learning_rate": 3.887240356083086e-06, + "loss": 0.0467, + "step": 262, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.07806470762837638, + "grad_norm": 11.440229415893555, + "learning_rate": 3.902077151335312e-06, + "loss": 0.1549, + "step": 263, + "video_reward_cumulative_accuracy": 0.7490494296577946 + }, + { + "epoch": 0.07836153161175423, + "grad_norm": 4.551571369171143, + "learning_rate": 3.916913946587537e-06, + "loss": 0.0389, + "step": 264, + "video_reward_cumulative_accuracy": 0.7481060606060606 + }, + { + "epoch": 0.07865835559513208, + "grad_norm": 4.240347385406494, + "learning_rate": 3.931750741839763e-06, + "loss": 0.0743, + "step": 265, + "video_reward_cumulative_accuracy": 0.7490566037735849 + }, + { + "epoch": 0.07895517957850995, + "grad_norm": 7.178493976593018, + "learning_rate": 3.946587537091989e-06, + "loss": 0.083, + "step": 266, + "video_reward_cumulative_accuracy": 0.7481203007518797 + }, + { + "epoch": 0.0792520035618878, + "grad_norm": 8.066522598266602, + "learning_rate": 3.961424332344214e-06, + "loss": 0.0835, + "step": 267, + "video_reward_cumulative_accuracy": 0.7471910112359551 + }, + { + "epoch": 0.07954882754526565, + "grad_norm": 6.96315860748291, + "learning_rate": 3.97626112759644e-06, + "loss": 0.0599, + "step": 268, + "video_reward_cumulative_accuracy": 0.7481343283582089 + }, + { + "epoch": 0.07984565152864351, + "grad_norm": 2.7894434928894043, + "learning_rate": 3.991097922848665e-06, + "loss": 0.0418, + "step": 269, + "video_reward_cumulative_accuracy": 0.7490706319702602 + }, + { + "epoch": 0.08014247551202137, + "grad_norm": 2.1485440731048584, + "learning_rate": 4.005934718100891e-06, + "loss": 0.058, + "step": 270, + "video_reward_cumulative_accuracy": 0.7481481481481481 + }, + { + "epoch": 0.08043929949539923, + "grad_norm": 3.671163320541382, + "learning_rate": 4.020771513353116e-06, + "loss": 0.0447, + "step": 271, + "video_reward_cumulative_accuracy": 0.7490774907749077 + }, + { + "epoch": 0.08073612347877708, + "grad_norm": 2.1427624225616455, + "learning_rate": 4.0356083086053414e-06, + "loss": 0.0365, + "step": 272, + "video_reward_cumulative_accuracy": 0.7481617647058824 + }, + { + "epoch": 0.08103294746215495, + "grad_norm": 2.3186142444610596, + "learning_rate": 4.050445103857567e-06, + "loss": 0.0578, + "step": 273, + "video_reward_cumulative_accuracy": 0.7472527472527473 + }, + { + "epoch": 0.0813297714455328, + "grad_norm": 7.067409515380859, + "learning_rate": 4.065281899109793e-06, + "loss": 0.1045, + "step": 274, + "video_reward_cumulative_accuracy": 0.7463503649635036 + }, + { + "epoch": 0.08162659542891065, + "grad_norm": 0.9794759154319763, + "learning_rate": 4.080118694362018e-06, + "loss": 0.0146, + "step": 275, + "video_reward_cumulative_accuracy": 0.7472727272727273 + }, + { + "epoch": 0.0819234194122885, + "grad_norm": 5.27300500869751, + "learning_rate": 4.094955489614243e-06, + "loss": 0.0394, + "step": 276, + "video_reward_cumulative_accuracy": 0.7481884057971014 + }, + { + "epoch": 0.08222024339566637, + "grad_norm": 4.100281238555908, + "learning_rate": 4.109792284866469e-06, + "loss": 0.0616, + "step": 277, + "video_reward_cumulative_accuracy": 0.7490974729241877 + }, + { + "epoch": 0.08251706737904423, + "grad_norm": 4.657914638519287, + "learning_rate": 4.124629080118695e-06, + "loss": 0.0861, + "step": 278, + "video_reward_cumulative_accuracy": 0.7482014388489209 + }, + { + "epoch": 0.08281389136242208, + "grad_norm": 4.093924045562744, + "learning_rate": 4.139465875370921e-06, + "loss": 0.048, + "step": 279, + "video_reward_cumulative_accuracy": 0.7491039426523297 + }, + { + "epoch": 0.08311071534579995, + "grad_norm": 7.382143020629883, + "learning_rate": 4.154302670623146e-06, + "loss": 0.0895, + "step": 280, + "video_reward_cumulative_accuracy": 0.7464285714285714 + }, + { + "epoch": 0.0834075393291778, + "grad_norm": 11.922904968261719, + "learning_rate": 4.169139465875371e-06, + "loss": 0.1006, + "step": 281, + "video_reward_cumulative_accuracy": 0.7473309608540926 + }, + { + "epoch": 0.08370436331255565, + "grad_norm": 3.6098527908325195, + "learning_rate": 4.183976261127597e-06, + "loss": 0.0253, + "step": 282, + "video_reward_cumulative_accuracy": 0.74822695035461 + }, + { + "epoch": 0.0840011872959335, + "grad_norm": 4.491762161254883, + "learning_rate": 4.1988130563798226e-06, + "loss": 0.0394, + "step": 283, + "video_reward_cumulative_accuracy": 0.7473498233215548 + }, + { + "epoch": 0.08429801127931137, + "grad_norm": 9.078802108764648, + "learning_rate": 4.213649851632048e-06, + "loss": 0.1382, + "step": 284, + "video_reward_cumulative_accuracy": 0.7464788732394366 + }, + { + "epoch": 0.08459483526268922, + "grad_norm": 4.624719619750977, + "learning_rate": 4.228486646884273e-06, + "loss": 0.0321, + "step": 285, + "video_reward_cumulative_accuracy": 0.7473684210526316 + }, + { + "epoch": 0.08489165924606708, + "grad_norm": 4.962191104888916, + "learning_rate": 4.243323442136499e-06, + "loss": 0.1042, + "step": 286, + "video_reward_cumulative_accuracy": 0.7482517482517482 + }, + { + "epoch": 0.08518848322944494, + "grad_norm": 3.402569055557251, + "learning_rate": 4.258160237388724e-06, + "loss": 0.0473, + "step": 287, + "video_reward_cumulative_accuracy": 0.7491289198606271 + }, + { + "epoch": 0.0854853072128228, + "grad_norm": 5.989389419555664, + "learning_rate": 4.27299703264095e-06, + "loss": 0.1256, + "step": 288, + "video_reward_cumulative_accuracy": 0.7482638888888888 + }, + { + "epoch": 0.08578213119620065, + "grad_norm": 7.128279685974121, + "learning_rate": 4.287833827893175e-06, + "loss": 0.0807, + "step": 289, + "video_reward_cumulative_accuracy": 0.7491349480968859 + }, + { + "epoch": 0.0860789551795785, + "grad_norm": 3.677250623703003, + "learning_rate": 4.302670623145401e-06, + "loss": 0.0731, + "step": 290, + "video_reward_cumulative_accuracy": 0.7482758620689656 + }, + { + "epoch": 0.08637577916295637, + "grad_norm": 3.578486680984497, + "learning_rate": 4.317507418397626e-06, + "loss": 0.0695, + "step": 291, + "video_reward_cumulative_accuracy": 0.7491408934707904 + }, + { + "epoch": 0.08667260314633422, + "grad_norm": 2.7287142276763916, + "learning_rate": 4.332344213649852e-06, + "loss": 0.0301, + "step": 292, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.08696942712971208, + "grad_norm": 2.3789162635803223, + "learning_rate": 4.347181008902078e-06, + "loss": 0.0378, + "step": 293, + "video_reward_cumulative_accuracy": 0.7491467576791809 + }, + { + "epoch": 0.08726625111308994, + "grad_norm": 1.2172014713287354, + "learning_rate": 4.362017804154303e-06, + "loss": 0.0288, + "step": 294, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.0875630750964678, + "grad_norm": 4.625653266906738, + "learning_rate": 4.376854599406529e-06, + "loss": 0.096, + "step": 295, + "video_reward_cumulative_accuracy": 0.7491525423728813 + }, + { + "epoch": 0.08785989907984565, + "grad_norm": 3.8820786476135254, + "learning_rate": 4.391691394658754e-06, + "loss": 0.0471, + "step": 296, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.0881567230632235, + "grad_norm": 5.01461935043335, + "learning_rate": 4.4065281899109794e-06, + "loss": 0.0686, + "step": 297, + "video_reward_cumulative_accuracy": 0.7508417508417509 + }, + { + "epoch": 0.08845354704660137, + "grad_norm": 8.346698760986328, + "learning_rate": 4.4213649851632055e-06, + "loss": 0.1304, + "step": 298, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.08875037102997922, + "grad_norm": 2.789457082748413, + "learning_rate": 4.436201780415431e-06, + "loss": 0.0618, + "step": 299, + "video_reward_cumulative_accuracy": 0.7508361204013378 + }, + { + "epoch": 0.08904719501335707, + "grad_norm": 4.110484600067139, + "learning_rate": 4.451038575667656e-06, + "loss": 0.091, + "step": 300, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.08934401899673494, + "grad_norm": 1.834205150604248, + "learning_rate": 4.465875370919881e-06, + "loss": 0.0305, + "step": 301, + "video_reward_cumulative_accuracy": 0.7508305647840532 + }, + { + "epoch": 0.0896408429801128, + "grad_norm": 3.296640157699585, + "learning_rate": 4.480712166172107e-06, + "loss": 0.0954, + "step": 302, + "video_reward_cumulative_accuracy": 0.75 + }, + { + "epoch": 0.08993766696349065, + "grad_norm": 1.8095667362213135, + "learning_rate": 4.495548961424333e-06, + "loss": 0.0251, + "step": 303, + "video_reward_cumulative_accuracy": 0.7508250825082509 + }, + { + "epoch": 0.0902344909468685, + "grad_norm": 4.808000564575195, + "learning_rate": 4.510385756676558e-06, + "loss": 0.0786, + "step": 304, + "video_reward_cumulative_accuracy": 0.7516447368421053 + }, + { + "epoch": 0.09053131493024637, + "grad_norm": 4.050952911376953, + "learning_rate": 4.525222551928784e-06, + "loss": 0.0847, + "step": 305, + "video_reward_cumulative_accuracy": 0.7524590163934426 + }, + { + "epoch": 0.09082813891362422, + "grad_norm": 6.732780933380127, + "learning_rate": 4.540059347181009e-06, + "loss": 0.1121, + "step": 306, + "video_reward_cumulative_accuracy": 0.7532679738562091 + }, + { + "epoch": 0.09112496289700207, + "grad_norm": 5.494693279266357, + "learning_rate": 4.554896142433235e-06, + "loss": 0.0543, + "step": 307, + "video_reward_cumulative_accuracy": 0.754071661237785 + }, + { + "epoch": 0.09142178688037994, + "grad_norm": 2.1636104583740234, + "learning_rate": 4.5697329376854606e-06, + "loss": 0.0511, + "step": 308, + "video_reward_cumulative_accuracy": 0.7532467532467533 + }, + { + "epoch": 0.0917186108637578, + "grad_norm": 6.281263828277588, + "learning_rate": 4.584569732937686e-06, + "loss": 0.0951, + "step": 309, + "video_reward_cumulative_accuracy": 0.7540453074433657 + }, + { + "epoch": 0.09201543484713565, + "grad_norm": 3.543318033218384, + "learning_rate": 4.599406528189911e-06, + "loss": 0.0731, + "step": 310, + "video_reward_cumulative_accuracy": 0.7532258064516129 + }, + { + "epoch": 0.0923122588305135, + "grad_norm": 2.5978658199310303, + "learning_rate": 4.614243323442137e-06, + "loss": 0.0714, + "step": 311, + "video_reward_cumulative_accuracy": 0.7540192926045016 + }, + { + "epoch": 0.09260908281389137, + "grad_norm": 2.553865909576416, + "learning_rate": 4.629080118694362e-06, + "loss": 0.0742, + "step": 312, + "video_reward_cumulative_accuracy": 0.7548076923076923 + }, + { + "epoch": 0.09290590679726922, + "grad_norm": 1.7062216997146606, + "learning_rate": 4.643916913946588e-06, + "loss": 0.0572, + "step": 313, + "video_reward_cumulative_accuracy": 0.7539936102236422 + }, + { + "epoch": 0.09320273078064707, + "grad_norm": 1.2176084518432617, + "learning_rate": 4.658753709198813e-06, + "loss": 0.049, + "step": 314, + "video_reward_cumulative_accuracy": 0.7547770700636943 + }, + { + "epoch": 0.09349955476402494, + "grad_norm": 1.2772020101547241, + "learning_rate": 4.673590504451039e-06, + "loss": 0.0449, + "step": 315, + "video_reward_cumulative_accuracy": 0.753968253968254 + }, + { + "epoch": 0.09379637874740279, + "grad_norm": 1.6296868324279785, + "learning_rate": 4.688427299703264e-06, + "loss": 0.043, + "step": 316, + "video_reward_cumulative_accuracy": 0.754746835443038 + }, + { + "epoch": 0.09409320273078065, + "grad_norm": 2.408234119415283, + "learning_rate": 4.7032640949554895e-06, + "loss": 0.0572, + "step": 317, + "video_reward_cumulative_accuracy": 0.7555205047318612 + }, + { + "epoch": 0.0943900267141585, + "grad_norm": 1.397177815437317, + "learning_rate": 4.718100890207716e-06, + "loss": 0.0276, + "step": 318, + "video_reward_cumulative_accuracy": 0.7562893081761006 + }, + { + "epoch": 0.09468685069753636, + "grad_norm": 2.097848415374756, + "learning_rate": 4.732937685459941e-06, + "loss": 0.0501, + "step": 319, + "video_reward_cumulative_accuracy": 0.7570532915360502 + }, + { + "epoch": 0.09498367468091422, + "grad_norm": 2.369933605194092, + "learning_rate": 4.747774480712167e-06, + "loss": 0.0377, + "step": 320, + "video_reward_cumulative_accuracy": 0.7578125 + }, + { + "epoch": 0.09528049866429207, + "grad_norm": 2.974731922149658, + "learning_rate": 4.762611275964392e-06, + "loss": 0.0609, + "step": 321, + "video_reward_cumulative_accuracy": 0.7570093457943925 + }, + { + "epoch": 0.09557732264766994, + "grad_norm": 2.996340036392212, + "learning_rate": 4.7774480712166174e-06, + "loss": 0.0795, + "step": 322, + "video_reward_cumulative_accuracy": 0.7577639751552795 + }, + { + "epoch": 0.09587414663104779, + "grad_norm": 2.6763088703155518, + "learning_rate": 4.7922848664688435e-06, + "loss": 0.067, + "step": 323, + "video_reward_cumulative_accuracy": 0.7585139318885449 + }, + { + "epoch": 0.09617097061442564, + "grad_norm": 3.8970932960510254, + "learning_rate": 4.807121661721069e-06, + "loss": 0.0637, + "step": 324, + "video_reward_cumulative_accuracy": 0.7592592592592593 + }, + { + "epoch": 0.0964677945978035, + "grad_norm": 3.0511136054992676, + "learning_rate": 4.821958456973294e-06, + "loss": 0.0545, + "step": 325, + "video_reward_cumulative_accuracy": 0.76 + }, + { + "epoch": 0.09676461858118136, + "grad_norm": 2.455991744995117, + "learning_rate": 4.836795252225519e-06, + "loss": 0.052, + "step": 326, + "video_reward_cumulative_accuracy": 0.75920245398773 + }, + { + "epoch": 0.09706144256455922, + "grad_norm": 5.562345504760742, + "learning_rate": 4.851632047477745e-06, + "loss": 0.0677, + "step": 327, + "video_reward_cumulative_accuracy": 0.7584097859327217 + }, + { + "epoch": 0.09735826654793707, + "grad_norm": 3.8642868995666504, + "learning_rate": 4.866468842729971e-06, + "loss": 0.0758, + "step": 328, + "video_reward_cumulative_accuracy": 0.7576219512195121 + }, + { + "epoch": 0.09765509053131494, + "grad_norm": 5.224849224090576, + "learning_rate": 4.881305637982196e-06, + "loss": 0.0496, + "step": 329, + "video_reward_cumulative_accuracy": 0.7583586626139818 + }, + { + "epoch": 0.09795191451469279, + "grad_norm": 2.613288402557373, + "learning_rate": 4.896142433234421e-06, + "loss": 0.0323, + "step": 330, + "video_reward_cumulative_accuracy": 0.759090909090909 + }, + { + "epoch": 0.09824873849807064, + "grad_norm": 1.3308014869689941, + "learning_rate": 4.910979228486647e-06, + "loss": 0.0467, + "step": 331, + "video_reward_cumulative_accuracy": 0.7598187311178247 + }, + { + "epoch": 0.0985455624814485, + "grad_norm": 2.2056570053100586, + "learning_rate": 4.925816023738873e-06, + "loss": 0.0739, + "step": 332, + "video_reward_cumulative_accuracy": 0.7605421686746988 + }, + { + "epoch": 0.09884238646482636, + "grad_norm": 1.1088576316833496, + "learning_rate": 4.9406528189910986e-06, + "loss": 0.0498, + "step": 333, + "video_reward_cumulative_accuracy": 0.7597597597597597 + }, + { + "epoch": 0.09913921044820422, + "grad_norm": 1.563938856124878, + "learning_rate": 4.955489614243324e-06, + "loss": 0.0737, + "step": 334, + "video_reward_cumulative_accuracy": 0.7604790419161677 + }, + { + "epoch": 0.09943603443158207, + "grad_norm": 1.9494915008544922, + "learning_rate": 4.970326409495549e-06, + "loss": 0.0488, + "step": 335, + "video_reward_cumulative_accuracy": 0.7611940298507462 + }, + { + "epoch": 0.09973285841495994, + "grad_norm": 4.285027980804443, + "learning_rate": 4.985163204747775e-06, + "loss": 0.0958, + "step": 336, + "video_reward_cumulative_accuracy": 0.7619047619047619 + }, + { + "epoch": 0.10002968239833779, + "grad_norm": 1.7529289722442627, + "learning_rate": 5e-06, + "loss": 0.0616, + "step": 337, + "video_reward_cumulative_accuracy": 0.7611275964391692 + }, + { + "epoch": 0.10032650638171564, + "grad_norm": 2.010082960128784, + "learning_rate": 4.999998658003678e-06, + "loss": 0.0854, + "step": 338, + "video_reward_cumulative_accuracy": 0.7618343195266272 + }, + { + "epoch": 0.1006233303650935, + "grad_norm": 2.320046901702881, + "learning_rate": 4.9999946320161525e-06, + "loss": 0.0489, + "step": 339, + "video_reward_cumulative_accuracy": 0.7625368731563422 + }, + { + "epoch": 0.10092015434847136, + "grad_norm": 3.510748863220215, + "learning_rate": 4.999987922041746e-06, + "loss": 0.1, + "step": 340, + "video_reward_cumulative_accuracy": 0.7602941176470588 + }, + { + "epoch": 0.10121697833184921, + "grad_norm": 4.800668239593506, + "learning_rate": 4.999978528087661e-06, + "loss": 0.0532, + "step": 341, + "video_reward_cumulative_accuracy": 0.7609970674486803 + }, + { + "epoch": 0.10151380231522707, + "grad_norm": 3.035673141479492, + "learning_rate": 4.999966450163984e-06, + "loss": 0.0732, + "step": 342, + "video_reward_cumulative_accuracy": 0.7616959064327485 + }, + { + "epoch": 0.10181062629860493, + "grad_norm": 1.4098176956176758, + "learning_rate": 4.999951688283682e-06, + "loss": 0.0352, + "step": 343, + "video_reward_cumulative_accuracy": 0.7623906705539358 + }, + { + "epoch": 0.10210745028198279, + "grad_norm": 2.0432138442993164, + "learning_rate": 4.999934242462603e-06, + "loss": 0.0546, + "step": 344, + "video_reward_cumulative_accuracy": 0.7630813953488372 + }, + { + "epoch": 0.10240427426536064, + "grad_norm": 3.4673523902893066, + "learning_rate": 4.999914112719477e-06, + "loss": 0.0746, + "step": 345, + "video_reward_cumulative_accuracy": 0.763768115942029 + }, + { + "epoch": 0.10270109824873849, + "grad_norm": 1.0772874355316162, + "learning_rate": 4.9998912990759146e-06, + "loss": 0.049, + "step": 346, + "video_reward_cumulative_accuracy": 0.7644508670520231 + }, + { + "epoch": 0.10299792223211636, + "grad_norm": 2.790788412094116, + "learning_rate": 4.999865801556409e-06, + "loss": 0.0442, + "step": 347, + "video_reward_cumulative_accuracy": 0.7651296829971181 + }, + { + "epoch": 0.10329474621549421, + "grad_norm": 3.4424827098846436, + "learning_rate": 4.999837620188334e-06, + "loss": 0.0384, + "step": 348, + "video_reward_cumulative_accuracy": 0.7658045977011494 + }, + { + "epoch": 0.10359157019887207, + "grad_norm": 2.485424280166626, + "learning_rate": 4.999806755001946e-06, + "loss": 0.0333, + "step": 349, + "video_reward_cumulative_accuracy": 0.7664756446991404 + }, + { + "epoch": 0.10388839418224993, + "grad_norm": 1.6122627258300781, + "learning_rate": 4.999773206030379e-06, + "loss": 0.0689, + "step": 350, + "video_reward_cumulative_accuracy": 0.7671428571428571 + }, + { + "epoch": 0.10418521816562779, + "grad_norm": 3.075976848602295, + "learning_rate": 4.999736973309655e-06, + "loss": 0.0376, + "step": 351, + "video_reward_cumulative_accuracy": 0.7663817663817664 + }, + { + "epoch": 0.10448204214900564, + "grad_norm": 3.550297260284424, + "learning_rate": 4.99969805687867e-06, + "loss": 0.048, + "step": 352, + "video_reward_cumulative_accuracy": 0.7670454545454546 + }, + { + "epoch": 0.10477886613238349, + "grad_norm": 6.603730201721191, + "learning_rate": 4.999656456779207e-06, + "loss": 0.0667, + "step": 353, + "video_reward_cumulative_accuracy": 0.7662889518413598 + }, + { + "epoch": 0.10507569011576136, + "grad_norm": 1.7593069076538086, + "learning_rate": 4.999612173055926e-06, + "loss": 0.0376, + "step": 354, + "video_reward_cumulative_accuracy": 0.7669491525423728 + }, + { + "epoch": 0.10537251409913921, + "grad_norm": 1.126291275024414, + "learning_rate": 4.99956520575637e-06, + "loss": 0.0315, + "step": 355, + "video_reward_cumulative_accuracy": 0.7676056338028169 + }, + { + "epoch": 0.10566933808251706, + "grad_norm": 5.531168460845947, + "learning_rate": 4.999515554930965e-06, + "loss": 0.0701, + "step": 356, + "video_reward_cumulative_accuracy": 0.7682584269662921 + }, + { + "epoch": 0.10596616206589493, + "grad_norm": 1.5562381744384766, + "learning_rate": 4.999463220633013e-06, + "loss": 0.0385, + "step": 357, + "video_reward_cumulative_accuracy": 0.7675070028011205 + }, + { + "epoch": 0.10626298604927278, + "grad_norm": 11.684016227722168, + "learning_rate": 4.999408202918702e-06, + "loss": 0.1559, + "step": 358, + "video_reward_cumulative_accuracy": 0.7667597765363129 + }, + { + "epoch": 0.10655981003265064, + "grad_norm": 3.8814454078674316, + "learning_rate": 4.999350501847098e-06, + "loss": 0.0488, + "step": 359, + "video_reward_cumulative_accuracy": 0.7674094707520891 + }, + { + "epoch": 0.10685663401602849, + "grad_norm": 8.800138473510742, + "learning_rate": 4.999290117480149e-06, + "loss": 0.1598, + "step": 360, + "video_reward_cumulative_accuracy": 0.7680555555555556 + }, + { + "epoch": 0.10715345799940636, + "grad_norm": 4.154999256134033, + "learning_rate": 4.999227049882684e-06, + "loss": 0.0748, + "step": 361, + "video_reward_cumulative_accuracy": 0.7686980609418282 + }, + { + "epoch": 0.10745028198278421, + "grad_norm": 4.171562671661377, + "learning_rate": 4.999161299122411e-06, + "loss": 0.0392, + "step": 362, + "video_reward_cumulative_accuracy": 0.7693370165745856 + }, + { + "epoch": 0.10774710596616206, + "grad_norm": 3.6571145057678223, + "learning_rate": 4.9990928652699214e-06, + "loss": 0.0314, + "step": 363, + "video_reward_cumulative_accuracy": 0.7699724517906336 + }, + { + "epoch": 0.10804392994953993, + "grad_norm": 12.863832473754883, + "learning_rate": 4.999021748398684e-06, + "loss": 0.1173, + "step": 364, + "video_reward_cumulative_accuracy": 0.7678571428571429 + }, + { + "epoch": 0.10834075393291778, + "grad_norm": 4.573801517486572, + "learning_rate": 4.99894794858505e-06, + "loss": 0.0748, + "step": 365, + "video_reward_cumulative_accuracy": 0.7684931506849315 + }, + { + "epoch": 0.10863757791629564, + "grad_norm": 7.55157470703125, + "learning_rate": 4.998871465908251e-06, + "loss": 0.1099, + "step": 366, + "video_reward_cumulative_accuracy": 0.76775956284153 + }, + { + "epoch": 0.10893440189967349, + "grad_norm": 2.933627128601074, + "learning_rate": 4.998792300450399e-06, + "loss": 0.0819, + "step": 367, + "video_reward_cumulative_accuracy": 0.7683923705722071 + }, + { + "epoch": 0.10923122588305136, + "grad_norm": 6.3735575675964355, + "learning_rate": 4.998710452296485e-06, + "loss": 0.0566, + "step": 368, + "video_reward_cumulative_accuracy": 0.7690217391304348 + }, + { + "epoch": 0.10952804986642921, + "grad_norm": 1.9880731105804443, + "learning_rate": 4.9986259215343814e-06, + "loss": 0.0467, + "step": 369, + "video_reward_cumulative_accuracy": 0.7696476964769647 + }, + { + "epoch": 0.10982487384980706, + "grad_norm": 5.522644996643066, + "learning_rate": 4.99853870825484e-06, + "loss": 0.0861, + "step": 370, + "video_reward_cumulative_accuracy": 0.768918918918919 + }, + { + "epoch": 0.11012169783318491, + "grad_norm": 4.956112861633301, + "learning_rate": 4.998448812551493e-06, + "loss": 0.0501, + "step": 371, + "video_reward_cumulative_accuracy": 0.7695417789757413 + }, + { + "epoch": 0.11041852181656278, + "grad_norm": 4.254426956176758, + "learning_rate": 4.998356234520851e-06, + "loss": 0.0427, + "step": 372, + "video_reward_cumulative_accuracy": 0.7701612903225806 + }, + { + "epoch": 0.11071534579994063, + "grad_norm": 3.666555166244507, + "learning_rate": 4.998260974262308e-06, + "loss": 0.079, + "step": 373, + "video_reward_cumulative_accuracy": 0.7694369973190348 + }, + { + "epoch": 0.11101216978331849, + "grad_norm": 3.9171857833862305, + "learning_rate": 4.998163031878133e-06, + "loss": 0.0739, + "step": 374, + "video_reward_cumulative_accuracy": 0.7687165775401069 + }, + { + "epoch": 0.11130899376669635, + "grad_norm": 4.026345252990723, + "learning_rate": 4.998062407473477e-06, + "loss": 0.0506, + "step": 375, + "video_reward_cumulative_accuracy": 0.7693333333333333 + }, + { + "epoch": 0.11160581775007421, + "grad_norm": 1.3979490995407104, + "learning_rate": 4.99795910115637e-06, + "loss": 0.0479, + "step": 376, + "video_reward_cumulative_accuracy": 0.7686170212765957 + }, + { + "epoch": 0.11190264173345206, + "grad_norm": 1.5546613931655884, + "learning_rate": 4.997853113037722e-06, + "loss": 0.0323, + "step": 377, + "video_reward_cumulative_accuracy": 0.7692307692307693 + }, + { + "epoch": 0.11219946571682991, + "grad_norm": 2.0145232677459717, + "learning_rate": 4.997744443231321e-06, + "loss": 0.0471, + "step": 378, + "video_reward_cumulative_accuracy": 0.7698412698412699 + }, + { + "epoch": 0.11249628970020778, + "grad_norm": 3.335103750228882, + "learning_rate": 4.9976330918538356e-06, + "loss": 0.0416, + "step": 379, + "video_reward_cumulative_accuracy": 0.7704485488126649 + }, + { + "epoch": 0.11279311368358563, + "grad_norm": 3.545008897781372, + "learning_rate": 4.99751905902481e-06, + "loss": 0.039, + "step": 380, + "video_reward_cumulative_accuracy": 0.7710526315789473 + }, + { + "epoch": 0.11308993766696349, + "grad_norm": 6.1864190101623535, + "learning_rate": 4.997402344866672e-06, + "loss": 0.0831, + "step": 381, + "video_reward_cumulative_accuracy": 0.7703412073490814 + }, + { + "epoch": 0.11338676165034135, + "grad_norm": 1.5058016777038574, + "learning_rate": 4.997282949504725e-06, + "loss": 0.0472, + "step": 382, + "video_reward_cumulative_accuracy": 0.7709424083769634 + }, + { + "epoch": 0.1136835856337192, + "grad_norm": 5.477189064025879, + "learning_rate": 4.99716087306715e-06, + "loss": 0.0518, + "step": 383, + "video_reward_cumulative_accuracy": 0.7702349869451697 + }, + { + "epoch": 0.11398040961709706, + "grad_norm": 2.9288504123687744, + "learning_rate": 4.99703611568501e-06, + "loss": 0.0564, + "step": 384, + "video_reward_cumulative_accuracy": 0.76953125 + }, + { + "epoch": 0.11427723360047491, + "grad_norm": 3.441972017288208, + "learning_rate": 4.996908677492243e-06, + "loss": 0.019, + "step": 385, + "video_reward_cumulative_accuracy": 0.7701298701298701 + }, + { + "epoch": 0.11457405758385278, + "grad_norm": 2.967977523803711, + "learning_rate": 4.996778558625666e-06, + "loss": 0.058, + "step": 386, + "video_reward_cumulative_accuracy": 0.7707253886010362 + }, + { + "epoch": 0.11487088156723063, + "grad_norm": 3.720794200897217, + "learning_rate": 4.996645759224974e-06, + "loss": 0.0383, + "step": 387, + "video_reward_cumulative_accuracy": 0.7713178294573644 + }, + { + "epoch": 0.11516770555060848, + "grad_norm": 4.410665035247803, + "learning_rate": 4.9965102794327416e-06, + "loss": 0.0816, + "step": 388, + "video_reward_cumulative_accuracy": 0.7719072164948454 + }, + { + "epoch": 0.11546452953398635, + "grad_norm": 4.823568344116211, + "learning_rate": 4.996372119394418e-06, + "loss": 0.0597, + "step": 389, + "video_reward_cumulative_accuracy": 0.7699228791773779 + }, + { + "epoch": 0.1157613535173642, + "grad_norm": 6.129620552062988, + "learning_rate": 4.9962312792583325e-06, + "loss": 0.1142, + "step": 390, + "video_reward_cumulative_accuracy": 0.7705128205128206 + }, + { + "epoch": 0.11605817750074206, + "grad_norm": 4.6021037101745605, + "learning_rate": 4.9960877591756895e-06, + "loss": 0.0491, + "step": 391, + "video_reward_cumulative_accuracy": 0.7710997442455243 + }, + { + "epoch": 0.11635500148411991, + "grad_norm": 6.3145341873168945, + "learning_rate": 4.9959415593005734e-06, + "loss": 0.1095, + "step": 392, + "video_reward_cumulative_accuracy": 0.7704081632653061 + }, + { + "epoch": 0.11665182546749778, + "grad_norm": 2.623091697692871, + "learning_rate": 4.995792679789943e-06, + "loss": 0.0661, + "step": 393, + "video_reward_cumulative_accuracy": 0.7709923664122137 + }, + { + "epoch": 0.11694864945087563, + "grad_norm": 4.27155065536499, + "learning_rate": 4.995641120803634e-06, + "loss": 0.0581, + "step": 394, + "video_reward_cumulative_accuracy": 0.7715736040609137 + }, + { + "epoch": 0.11724547343425348, + "grad_norm": 4.873302936553955, + "learning_rate": 4.99548688250436e-06, + "loss": 0.0637, + "step": 395, + "video_reward_cumulative_accuracy": 0.7708860759493671 + }, + { + "epoch": 0.11754229741763135, + "grad_norm": 1.3667075634002686, + "learning_rate": 4.995329965057712e-06, + "loss": 0.0398, + "step": 396, + "video_reward_cumulative_accuracy": 0.7714646464646465 + }, + { + "epoch": 0.1178391214010092, + "grad_norm": 2.0029561519622803, + "learning_rate": 4.995170368632156e-06, + "loss": 0.0376, + "step": 397, + "video_reward_cumulative_accuracy": 0.7720403022670025 + }, + { + "epoch": 0.11813594538438706, + "grad_norm": 2.7206361293792725, + "learning_rate": 4.995008093399034e-06, + "loss": 0.0364, + "step": 398, + "video_reward_cumulative_accuracy": 0.7726130653266332 + }, + { + "epoch": 0.11843276936776491, + "grad_norm": 3.3953866958618164, + "learning_rate": 4.9948431395325626e-06, + "loss": 0.0533, + "step": 399, + "video_reward_cumulative_accuracy": 0.7731829573934837 + }, + { + "epoch": 0.11872959335114278, + "grad_norm": 4.966310024261475, + "learning_rate": 4.994675507209837e-06, + "loss": 0.0953, + "step": 400, + "video_reward_cumulative_accuracy": 0.77375 + }, + { + "epoch": 0.11872959335114278, + "eval_runtime": 133.1422, + "eval_samples_per_second": 5.926, + "eval_steps_per_second": 0.744, + "eval_test_set_accuracy": 0.7323232323232324, + "step": 400 + }, + { + "epoch": 0.11902641733452063, + "grad_norm": 1.743256688117981, + "learning_rate": 4.9945051966108285e-06, + "loss": 0.0466, + "step": 401, + "video_reward_cumulative_accuracy": 0.7743142144638404 + }, + { + "epoch": 0.11932324131789848, + "grad_norm": 1.3353296518325806, + "learning_rate": 4.99433220791838e-06, + "loss": 0.0713, + "step": 402, + "video_reward_cumulative_accuracy": 0.7723880597014925 + }, + { + "epoch": 0.11962006530127635, + "grad_norm": 3.405177593231201, + "learning_rate": 4.994156541318211e-06, + "loss": 0.049, + "step": 403, + "video_reward_cumulative_accuracy": 0.771712158808933 + }, + { + "epoch": 0.1199168892846542, + "grad_norm": 1.6333727836608887, + "learning_rate": 4.993978196998918e-06, + "loss": 0.0639, + "step": 404, + "video_reward_cumulative_accuracy": 0.7722772277227723 + }, + { + "epoch": 0.12021371326803205, + "grad_norm": 2.6365771293640137, + "learning_rate": 4.993797175151971e-06, + "loss": 0.0343, + "step": 405, + "video_reward_cumulative_accuracy": 0.7728395061728395 + }, + { + "epoch": 0.12051053725140991, + "grad_norm": 2.222435712814331, + "learning_rate": 4.9936134759717134e-06, + "loss": 0.0686, + "step": 406, + "video_reward_cumulative_accuracy": 0.7733990147783252 + }, + { + "epoch": 0.12080736123478777, + "grad_norm": 7.1145405769348145, + "learning_rate": 4.993427099655366e-06, + "loss": 0.0705, + "step": 407, + "video_reward_cumulative_accuracy": 0.7727272727272727 + }, + { + "epoch": 0.12110418521816563, + "grad_norm": 4.418581008911133, + "learning_rate": 4.993238046403021e-06, + "loss": 0.0407, + "step": 408, + "video_reward_cumulative_accuracy": 0.7720588235294118 + }, + { + "epoch": 0.12140100920154348, + "grad_norm": 1.4404574632644653, + "learning_rate": 4.993046316417643e-06, + "loss": 0.0407, + "step": 409, + "video_reward_cumulative_accuracy": 0.7713936430317848 + }, + { + "epoch": 0.12169783318492135, + "grad_norm": 1.7548354864120483, + "learning_rate": 4.992851909905077e-06, + "loss": 0.0422, + "step": 410, + "video_reward_cumulative_accuracy": 0.7719512195121951 + }, + { + "epoch": 0.1219946571682992, + "grad_norm": 2.8836653232574463, + "learning_rate": 4.992654827074034e-06, + "loss": 0.0519, + "step": 411, + "video_reward_cumulative_accuracy": 0.7725060827250608 + }, + { + "epoch": 0.12229148115167705, + "grad_norm": 6.0396504402160645, + "learning_rate": 4.992455068136104e-06, + "loss": 0.0834, + "step": 412, + "video_reward_cumulative_accuracy": 0.7730582524271845 + }, + { + "epoch": 0.1225883051350549, + "grad_norm": 1.5045963525772095, + "learning_rate": 4.992252633305745e-06, + "loss": 0.0404, + "step": 413, + "video_reward_cumulative_accuracy": 0.7736077481840193 + }, + { + "epoch": 0.12288512911843277, + "grad_norm": 1.5633553266525269, + "learning_rate": 4.992047522800292e-06, + "loss": 0.0249, + "step": 414, + "video_reward_cumulative_accuracy": 0.7741545893719807 + }, + { + "epoch": 0.12318195310181063, + "grad_norm": 2.4161694049835205, + "learning_rate": 4.991839736839951e-06, + "loss": 0.047, + "step": 415, + "video_reward_cumulative_accuracy": 0.7746987951807229 + }, + { + "epoch": 0.12347877708518848, + "grad_norm": 4.570451259613037, + "learning_rate": 4.9916292756478e-06, + "loss": 0.0645, + "step": 416, + "video_reward_cumulative_accuracy": 0.7752403846153846 + }, + { + "epoch": 0.12377560106856635, + "grad_norm": 2.0078325271606445, + "learning_rate": 4.991416139449791e-06, + "loss": 0.053, + "step": 417, + "video_reward_cumulative_accuracy": 0.7757793764988009 + }, + { + "epoch": 0.1240724250519442, + "grad_norm": 2.8492980003356934, + "learning_rate": 4.991200328474743e-06, + "loss": 0.041, + "step": 418, + "video_reward_cumulative_accuracy": 0.7763157894736842 + }, + { + "epoch": 0.12436924903532205, + "grad_norm": 1.4428461790084839, + "learning_rate": 4.990981842954353e-06, + "loss": 0.0213, + "step": 419, + "video_reward_cumulative_accuracy": 0.7768496420047732 + }, + { + "epoch": 0.1246660730186999, + "grad_norm": 1.667288899421692, + "learning_rate": 4.990760683123186e-06, + "loss": 0.0402, + "step": 420, + "video_reward_cumulative_accuracy": 0.7773809523809524 + }, + { + "epoch": 0.12496289700207777, + "grad_norm": 5.119730472564697, + "learning_rate": 4.990536849218678e-06, + "loss": 0.0759, + "step": 421, + "video_reward_cumulative_accuracy": 0.7779097387173397 + }, + { + "epoch": 0.12525972098545562, + "grad_norm": 7.024728298187256, + "learning_rate": 4.990310341481136e-06, + "loss": 0.1402, + "step": 422, + "video_reward_cumulative_accuracy": 0.7784360189573459 + }, + { + "epoch": 0.12555654496883348, + "grad_norm": 3.2602407932281494, + "learning_rate": 4.990081160153738e-06, + "loss": 0.0567, + "step": 423, + "video_reward_cumulative_accuracy": 0.7789598108747045 + }, + { + "epoch": 0.12585336895221133, + "grad_norm": 3.0349199771881104, + "learning_rate": 4.989849305482534e-06, + "loss": 0.1118, + "step": 424, + "video_reward_cumulative_accuracy": 0.7783018867924528 + }, + { + "epoch": 0.12615019293558918, + "grad_norm": 4.9538774490356445, + "learning_rate": 4.989614777716442e-06, + "loss": 0.0844, + "step": 425, + "video_reward_cumulative_accuracy": 0.7776470588235294 + }, + { + "epoch": 0.12644701691896706, + "grad_norm": 4.11725378036499, + "learning_rate": 4.989377577107248e-06, + "loss": 0.0852, + "step": 426, + "video_reward_cumulative_accuracy": 0.7769953051643192 + }, + { + "epoch": 0.12674384090234492, + "grad_norm": 2.492112874984741, + "learning_rate": 4.989137703909612e-06, + "loss": 0.0574, + "step": 427, + "video_reward_cumulative_accuracy": 0.7775175644028103 + }, + { + "epoch": 0.12704066488572277, + "grad_norm": 3.3286123275756836, + "learning_rate": 4.988895158381062e-06, + "loss": 0.0402, + "step": 428, + "video_reward_cumulative_accuracy": 0.7780373831775701 + }, + { + "epoch": 0.12733748886910062, + "grad_norm": 5.785702228546143, + "learning_rate": 4.988649940781992e-06, + "loss": 0.0592, + "step": 429, + "video_reward_cumulative_accuracy": 0.7773892773892774 + }, + { + "epoch": 0.12763431285247848, + "grad_norm": 1.343465805053711, + "learning_rate": 4.988402051375668e-06, + "loss": 0.0254, + "step": 430, + "video_reward_cumulative_accuracy": 0.7779069767441861 + }, + { + "epoch": 0.12793113683585633, + "grad_norm": 2.176379680633545, + "learning_rate": 4.988151490428223e-06, + "loss": 0.1113, + "step": 431, + "video_reward_cumulative_accuracy": 0.7761020881670534 + }, + { + "epoch": 0.12822796081923418, + "grad_norm": 4.0742340087890625, + "learning_rate": 4.987898258208659e-06, + "loss": 0.0802, + "step": 432, + "video_reward_cumulative_accuracy": 0.7766203703703703 + }, + { + "epoch": 0.12852478480261206, + "grad_norm": 1.4879510402679443, + "learning_rate": 4.987642354988845e-06, + "loss": 0.0415, + "step": 433, + "video_reward_cumulative_accuracy": 0.7771362586605081 + }, + { + "epoch": 0.12882160878598992, + "grad_norm": 2.5588343143463135, + "learning_rate": 4.987383781043517e-06, + "loss": 0.0591, + "step": 434, + "video_reward_cumulative_accuracy": 0.7764976958525346 + }, + { + "epoch": 0.12911843276936777, + "grad_norm": 1.5112923383712769, + "learning_rate": 4.987122536650282e-06, + "loss": 0.0583, + "step": 435, + "video_reward_cumulative_accuracy": 0.7770114942528735 + }, + { + "epoch": 0.12941525675274562, + "grad_norm": 3.004580020904541, + "learning_rate": 4.986858622089609e-06, + "loss": 0.1139, + "step": 436, + "video_reward_cumulative_accuracy": 0.7775229357798165 + }, + { + "epoch": 0.12971208073612348, + "grad_norm": 4.673270225524902, + "learning_rate": 4.986592037644836e-06, + "loss": 0.1071, + "step": 437, + "video_reward_cumulative_accuracy": 0.7768878718535469 + }, + { + "epoch": 0.13000890471950133, + "grad_norm": 1.8337359428405762, + "learning_rate": 4.986322783602167e-06, + "loss": 0.0351, + "step": 438, + "video_reward_cumulative_accuracy": 0.7773972602739726 + }, + { + "epoch": 0.13030572870287918, + "grad_norm": 4.02969217300415, + "learning_rate": 4.986050860250674e-06, + "loss": 0.0719, + "step": 439, + "video_reward_cumulative_accuracy": 0.7767653758542141 + }, + { + "epoch": 0.13060255268625706, + "grad_norm": 9.999349594116211, + "learning_rate": 4.985776267882291e-06, + "loss": 0.1391, + "step": 440, + "video_reward_cumulative_accuracy": 0.7761363636363636 + }, + { + "epoch": 0.13089937666963491, + "grad_norm": 1.213397741317749, + "learning_rate": 4.985499006791822e-06, + "loss": 0.0421, + "step": 441, + "video_reward_cumulative_accuracy": 0.7766439909297053 + }, + { + "epoch": 0.13119620065301277, + "grad_norm": 1.5377864837646484, + "learning_rate": 4.9852190772769304e-06, + "loss": 0.0629, + "step": 442, + "video_reward_cumulative_accuracy": 0.7748868778280543 + }, + { + "epoch": 0.13149302463639062, + "grad_norm": 2.056608200073242, + "learning_rate": 4.984936479638151e-06, + "loss": 0.0557, + "step": 443, + "video_reward_cumulative_accuracy": 0.7731376975169301 + }, + { + "epoch": 0.13178984861976847, + "grad_norm": 2.519721746444702, + "learning_rate": 4.9846512141788774e-06, + "loss": 0.0692, + "step": 444, + "video_reward_cumulative_accuracy": 0.7725225225225225 + }, + { + "epoch": 0.13208667260314633, + "grad_norm": 1.5284984111785889, + "learning_rate": 4.984363281205372e-06, + "loss": 0.0448, + "step": 445, + "video_reward_cumulative_accuracy": 0.7730337078651686 + }, + { + "epoch": 0.13238349658652418, + "grad_norm": 1.4093446731567383, + "learning_rate": 4.984072681026757e-06, + "loss": 0.0707, + "step": 446, + "video_reward_cumulative_accuracy": 0.773542600896861 + }, + { + "epoch": 0.13268032056990206, + "grad_norm": 4.844381332397461, + "learning_rate": 4.98377941395502e-06, + "loss": 0.0652, + "step": 447, + "video_reward_cumulative_accuracy": 0.772930648769575 + }, + { + "epoch": 0.1329771445532799, + "grad_norm": 3.3423585891723633, + "learning_rate": 4.983483480305012e-06, + "loss": 0.0597, + "step": 448, + "video_reward_cumulative_accuracy": 0.7723214285714286 + }, + { + "epoch": 0.13327396853665777, + "grad_norm": 1.8888392448425293, + "learning_rate": 4.983184880394447e-06, + "loss": 0.0142, + "step": 449, + "video_reward_cumulative_accuracy": 0.7728285077951003 + }, + { + "epoch": 0.13357079252003562, + "grad_norm": 4.848865032196045, + "learning_rate": 4.982883614543901e-06, + "loss": 0.0525, + "step": 450, + "video_reward_cumulative_accuracy": 0.7733333333333333 + }, + { + "epoch": 0.13386761650341347, + "grad_norm": 3.70519757270813, + "learning_rate": 4.982579683076811e-06, + "loss": 0.0716, + "step": 451, + "video_reward_cumulative_accuracy": 0.7727272727272727 + }, + { + "epoch": 0.13416444048679133, + "grad_norm": 1.4521280527114868, + "learning_rate": 4.982273086319479e-06, + "loss": 0.0166, + "step": 452, + "video_reward_cumulative_accuracy": 0.7732300884955752 + }, + { + "epoch": 0.13446126447016918, + "grad_norm": 1.7118197679519653, + "learning_rate": 4.981963824601064e-06, + "loss": 0.0634, + "step": 453, + "video_reward_cumulative_accuracy": 0.7737306843267108 + }, + { + "epoch": 0.13475808845354706, + "grad_norm": 3.1727960109710693, + "learning_rate": 4.98165189825359e-06, + "loss": 0.0269, + "step": 454, + "video_reward_cumulative_accuracy": 0.7742290748898678 + }, + { + "epoch": 0.1350549124369249, + "grad_norm": 2.006000280380249, + "learning_rate": 4.981337307611939e-06, + "loss": 0.0942, + "step": 455, + "video_reward_cumulative_accuracy": 0.7747252747252747 + }, + { + "epoch": 0.13535173642030277, + "grad_norm": 2.203691244125366, + "learning_rate": 4.981020053013855e-06, + "loss": 0.0439, + "step": 456, + "video_reward_cumulative_accuracy": 0.7741228070175439 + }, + { + "epoch": 0.13564856040368062, + "grad_norm": 3.0224599838256836, + "learning_rate": 4.9807001347999424e-06, + "loss": 0.066, + "step": 457, + "video_reward_cumulative_accuracy": 0.774617067833698 + }, + { + "epoch": 0.13594538438705847, + "grad_norm": 1.738229513168335, + "learning_rate": 4.980377553313665e-06, + "loss": 0.0393, + "step": 458, + "video_reward_cumulative_accuracy": 0.7751091703056768 + }, + { + "epoch": 0.13624220837043632, + "grad_norm": 3.5705530643463135, + "learning_rate": 4.980052308901343e-06, + "loss": 0.0345, + "step": 459, + "video_reward_cumulative_accuracy": 0.7745098039215687 + }, + { + "epoch": 0.13653903235381418, + "grad_norm": 2.2670326232910156, + "learning_rate": 4.9797244019121595e-06, + "loss": 0.0286, + "step": 460, + "video_reward_cumulative_accuracy": 0.775 + }, + { + "epoch": 0.13683585633719206, + "grad_norm": 2.399627923965454, + "learning_rate": 4.979393832698154e-06, + "loss": 0.0963, + "step": 461, + "video_reward_cumulative_accuracy": 0.7754880694143167 + }, + { + "epoch": 0.1371326803205699, + "grad_norm": 4.614706039428711, + "learning_rate": 4.979060601614225e-06, + "loss": 0.1159, + "step": 462, + "video_reward_cumulative_accuracy": 0.7748917748917749 + }, + { + "epoch": 0.13742950430394776, + "grad_norm": 3.261317729949951, + "learning_rate": 4.978724709018128e-06, + "loss": 0.0661, + "step": 463, + "video_reward_cumulative_accuracy": 0.775377969762419 + }, + { + "epoch": 0.13772632828732562, + "grad_norm": 3.9476890563964844, + "learning_rate": 4.978386155270477e-06, + "loss": 0.0487, + "step": 464, + "video_reward_cumulative_accuracy": 0.7747844827586207 + }, + { + "epoch": 0.13802315227070347, + "grad_norm": 2.917313575744629, + "learning_rate": 4.9780449407347405e-06, + "loss": 0.0759, + "step": 465, + "video_reward_cumulative_accuracy": 0.7731182795698924 + }, + { + "epoch": 0.13831997625408132, + "grad_norm": 1.9969302415847778, + "learning_rate": 4.977701065777247e-06, + "loss": 0.0262, + "step": 466, + "video_reward_cumulative_accuracy": 0.7736051502145923 + }, + { + "epoch": 0.13861680023745918, + "grad_norm": 2.4382143020629883, + "learning_rate": 4.97735453076718e-06, + "loss": 0.0622, + "step": 467, + "video_reward_cumulative_accuracy": 0.7740899357601713 + }, + { + "epoch": 0.13891362422083706, + "grad_norm": 1.4531607627868652, + "learning_rate": 4.977005336076578e-06, + "loss": 0.0309, + "step": 468, + "video_reward_cumulative_accuracy": 0.7745726495726496 + }, + { + "epoch": 0.1392104482042149, + "grad_norm": 4.770167350769043, + "learning_rate": 4.976653482080335e-06, + "loss": 0.0523, + "step": 469, + "video_reward_cumulative_accuracy": 0.7750533049040512 + }, + { + "epoch": 0.13950727218759276, + "grad_norm": 1.4525412321090698, + "learning_rate": 4.9762989691562006e-06, + "loss": 0.0469, + "step": 470, + "video_reward_cumulative_accuracy": 0.774468085106383 + }, + { + "epoch": 0.13980409617097062, + "grad_norm": 7.346729755401611, + "learning_rate": 4.975941797684778e-06, + "loss": 0.104, + "step": 471, + "video_reward_cumulative_accuracy": 0.772823779193206 + }, + { + "epoch": 0.14010092015434847, + "grad_norm": 2.2593302726745605, + "learning_rate": 4.975581968049527e-06, + "loss": 0.0912, + "step": 472, + "video_reward_cumulative_accuracy": 0.7733050847457628 + }, + { + "epoch": 0.14039774413772632, + "grad_norm": 2.1568541526794434, + "learning_rate": 4.9752194806367585e-06, + "loss": 0.0645, + "step": 473, + "video_reward_cumulative_accuracy": 0.7716701902748414 + }, + { + "epoch": 0.14069456812110417, + "grad_norm": 2.715193033218384, + "learning_rate": 4.974854335835639e-06, + "loss": 0.0426, + "step": 474, + "video_reward_cumulative_accuracy": 0.7710970464135021 + }, + { + "epoch": 0.14099139210448206, + "grad_norm": 2.6449036598205566, + "learning_rate": 4.974486534038185e-06, + "loss": 0.0349, + "step": 475, + "video_reward_cumulative_accuracy": 0.771578947368421 + }, + { + "epoch": 0.1412882160878599, + "grad_norm": 2.4418740272521973, + "learning_rate": 4.9741160756392705e-06, + "loss": 0.0411, + "step": 476, + "video_reward_cumulative_accuracy": 0.7720588235294118 + }, + { + "epoch": 0.14158504007123776, + "grad_norm": 1.6195287704467773, + "learning_rate": 4.973742961036615e-06, + "loss": 0.0366, + "step": 477, + "video_reward_cumulative_accuracy": 0.7725366876310272 + }, + { + "epoch": 0.14188186405461561, + "grad_norm": 1.6140589714050293, + "learning_rate": 4.973367190630796e-06, + "loss": 0.0646, + "step": 478, + "video_reward_cumulative_accuracy": 0.7719665271966527 + }, + { + "epoch": 0.14217868803799347, + "grad_norm": 3.116804361343384, + "learning_rate": 4.972988764825239e-06, + "loss": 0.0293, + "step": 479, + "video_reward_cumulative_accuracy": 0.7724425887265136 + }, + { + "epoch": 0.14247551202137132, + "grad_norm": 1.9201480150222778, + "learning_rate": 4.972607684026218e-06, + "loss": 0.039, + "step": 480, + "video_reward_cumulative_accuracy": 0.771875 + }, + { + "epoch": 0.14277233600474917, + "grad_norm": 1.579925298690796, + "learning_rate": 4.972223948642865e-06, + "loss": 0.0468, + "step": 481, + "video_reward_cumulative_accuracy": 0.7713097713097713 + }, + { + "epoch": 0.14306915998812705, + "grad_norm": 2.824054718017578, + "learning_rate": 4.971837559087153e-06, + "loss": 0.0691, + "step": 482, + "video_reward_cumulative_accuracy": 0.770746887966805 + }, + { + "epoch": 0.1433659839715049, + "grad_norm": 3.3171064853668213, + "learning_rate": 4.971448515773911e-06, + "loss": 0.0773, + "step": 483, + "video_reward_cumulative_accuracy": 0.7701863354037267 + }, + { + "epoch": 0.14366280795488276, + "grad_norm": 4.4460577964782715, + "learning_rate": 4.971056819120814e-06, + "loss": 0.055, + "step": 484, + "video_reward_cumulative_accuracy": 0.7696280991735537 + }, + { + "epoch": 0.1439596319382606, + "grad_norm": 2.3328418731689453, + "learning_rate": 4.970662469548386e-06, + "loss": 0.0411, + "step": 485, + "video_reward_cumulative_accuracy": 0.7701030927835052 + }, + { + "epoch": 0.14425645592163847, + "grad_norm": 2.608328342437744, + "learning_rate": 4.970265467480001e-06, + "loss": 0.0615, + "step": 486, + "video_reward_cumulative_accuracy": 0.7695473251028807 + }, + { + "epoch": 0.14455327990501632, + "grad_norm": 1.6409941911697388, + "learning_rate": 4.969865813341878e-06, + "loss": 0.0431, + "step": 487, + "video_reward_cumulative_accuracy": 0.7700205338809035 + }, + { + "epoch": 0.14485010388839417, + "grad_norm": 6.2619805335998535, + "learning_rate": 4.969463507563085e-06, + "loss": 0.0885, + "step": 488, + "video_reward_cumulative_accuracy": 0.7704918032786885 + }, + { + "epoch": 0.14514692787177205, + "grad_norm": 2.390130043029785, + "learning_rate": 4.969058550575535e-06, + "loss": 0.0684, + "step": 489, + "video_reward_cumulative_accuracy": 0.7709611451942741 + }, + { + "epoch": 0.1454437518551499, + "grad_norm": 1.9959198236465454, + "learning_rate": 4.968650942813991e-06, + "loss": 0.0635, + "step": 490, + "video_reward_cumulative_accuracy": 0.7704081632653061 + }, + { + "epoch": 0.14574057583852776, + "grad_norm": 3.4746286869049072, + "learning_rate": 4.968240684716058e-06, + "loss": 0.0764, + "step": 491, + "video_reward_cumulative_accuracy": 0.769857433808554 + }, + { + "epoch": 0.1460373998219056, + "grad_norm": 2.226306438446045, + "learning_rate": 4.967827776722187e-06, + "loss": 0.0627, + "step": 492, + "video_reward_cumulative_accuracy": 0.7703252032520326 + }, + { + "epoch": 0.14633422380528346, + "grad_norm": 1.9936774969100952, + "learning_rate": 4.967412219275677e-06, + "loss": 0.026, + "step": 493, + "video_reward_cumulative_accuracy": 0.77079107505071 + }, + { + "epoch": 0.14663104778866132, + "grad_norm": 2.9451053142547607, + "learning_rate": 4.966994012822668e-06, + "loss": 0.037, + "step": 494, + "video_reward_cumulative_accuracy": 0.7692307692307693 + }, + { + "epoch": 0.14692787177203917, + "grad_norm": 2.2165896892547607, + "learning_rate": 4.9665731578121445e-06, + "loss": 0.0604, + "step": 495, + "video_reward_cumulative_accuracy": 0.7696969696969697 + }, + { + "epoch": 0.14722469575541705, + "grad_norm": 4.33952522277832, + "learning_rate": 4.966149654695937e-06, + "loss": 0.0512, + "step": 496, + "video_reward_cumulative_accuracy": 0.7691532258064516 + }, + { + "epoch": 0.1475215197387949, + "grad_norm": 1.7252819538116455, + "learning_rate": 4.9657235039287165e-06, + "loss": 0.0451, + "step": 497, + "video_reward_cumulative_accuracy": 0.7686116700201208 + }, + { + "epoch": 0.14781834372217276, + "grad_norm": 1.3271393775939941, + "learning_rate": 4.965294705967997e-06, + "loss": 0.0548, + "step": 498, + "video_reward_cumulative_accuracy": 0.7680722891566265 + }, + { + "epoch": 0.1481151677055506, + "grad_norm": 4.343282699584961, + "learning_rate": 4.964863261274134e-06, + "loss": 0.0571, + "step": 499, + "video_reward_cumulative_accuracy": 0.7675350701402806 + }, + { + "epoch": 0.14841199168892846, + "grad_norm": 1.385603666305542, + "learning_rate": 4.964429170310327e-06, + "loss": 0.0579, + "step": 500, + "video_reward_cumulative_accuracy": 0.767 + }, + { + "epoch": 0.14870881567230632, + "grad_norm": 1.3973246812820435, + "learning_rate": 4.963992433542612e-06, + "loss": 0.0573, + "step": 501, + "video_reward_cumulative_accuracy": 0.7674650698602794 + }, + { + "epoch": 0.14900563965568417, + "grad_norm": 1.4018256664276123, + "learning_rate": 4.963553051439871e-06, + "loss": 0.0543, + "step": 502, + "video_reward_cumulative_accuracy": 0.7679282868525896 + }, + { + "epoch": 0.14930246363906205, + "grad_norm": 1.3220703601837158, + "learning_rate": 4.963111024473823e-06, + "loss": 0.0535, + "step": 503, + "video_reward_cumulative_accuracy": 0.768389662027833 + }, + { + "epoch": 0.1495992876224399, + "grad_norm": 5.434345722198486, + "learning_rate": 4.962666353119025e-06, + "loss": 0.0945, + "step": 504, + "video_reward_cumulative_accuracy": 0.7688492063492064 + }, + { + "epoch": 0.14989611160581776, + "grad_norm": 1.8976709842681885, + "learning_rate": 4.9622190378528775e-06, + "loss": 0.0447, + "step": 505, + "video_reward_cumulative_accuracy": 0.7683168316831683 + }, + { + "epoch": 0.1501929355891956, + "grad_norm": 0.9916190505027771, + "learning_rate": 4.961769079155615e-06, + "loss": 0.0367, + "step": 506, + "video_reward_cumulative_accuracy": 0.7687747035573123 + }, + { + "epoch": 0.15048975957257346, + "grad_norm": 3.0541810989379883, + "learning_rate": 4.961316477510312e-06, + "loss": 0.0512, + "step": 507, + "video_reward_cumulative_accuracy": 0.7682445759368837 + }, + { + "epoch": 0.15078658355595131, + "grad_norm": 1.1684255599975586, + "learning_rate": 4.960861233402881e-06, + "loss": 0.0324, + "step": 508, + "video_reward_cumulative_accuracy": 0.7687007874015748 + }, + { + "epoch": 0.15108340753932917, + "grad_norm": 2.5912883281707764, + "learning_rate": 4.960403347322069e-06, + "loss": 0.0573, + "step": 509, + "video_reward_cumulative_accuracy": 0.768172888015717 + }, + { + "epoch": 0.15138023152270705, + "grad_norm": 2.920675039291382, + "learning_rate": 4.959942819759464e-06, + "loss": 0.0379, + "step": 510, + "video_reward_cumulative_accuracy": 0.7676470588235295 + }, + { + "epoch": 0.1516770555060849, + "grad_norm": 7.364986419677734, + "learning_rate": 4.959479651209485e-06, + "loss": 0.109, + "step": 511, + "video_reward_cumulative_accuracy": 0.7681017612524462 + }, + { + "epoch": 0.15197387948946275, + "grad_norm": 2.541637420654297, + "learning_rate": 4.959013842169389e-06, + "loss": 0.0187, + "step": 512, + "video_reward_cumulative_accuracy": 0.7685546875 + }, + { + "epoch": 0.1522707034728406, + "grad_norm": 1.8760055303573608, + "learning_rate": 4.9585453931392665e-06, + "loss": 0.0403, + "step": 513, + "video_reward_cumulative_accuracy": 0.7680311890838206 + }, + { + "epoch": 0.15256752745621846, + "grad_norm": 1.9295579195022583, + "learning_rate": 4.958074304622045e-06, + "loss": 0.0487, + "step": 514, + "video_reward_cumulative_accuracy": 0.7684824902723736 + }, + { + "epoch": 0.1528643514395963, + "grad_norm": 4.06351900100708, + "learning_rate": 4.957600577123482e-06, + "loss": 0.0608, + "step": 515, + "video_reward_cumulative_accuracy": 0.7679611650485437 + }, + { + "epoch": 0.15316117542297417, + "grad_norm": 2.601158618927002, + "learning_rate": 4.957124211152169e-06, + "loss": 0.0148, + "step": 516, + "video_reward_cumulative_accuracy": 0.7684108527131783 + }, + { + "epoch": 0.15345799940635205, + "grad_norm": 1.6226683855056763, + "learning_rate": 4.9566452072195335e-06, + "loss": 0.0445, + "step": 517, + "video_reward_cumulative_accuracy": 0.7688588007736944 + }, + { + "epoch": 0.1537548233897299, + "grad_norm": 1.285947322845459, + "learning_rate": 4.956163565839831e-06, + "loss": 0.0194, + "step": 518, + "video_reward_cumulative_accuracy": 0.7693050193050193 + }, + { + "epoch": 0.15405164737310775, + "grad_norm": 5.118712425231934, + "learning_rate": 4.955679287530152e-06, + "loss": 0.1169, + "step": 519, + "video_reward_cumulative_accuracy": 0.7687861271676301 + }, + { + "epoch": 0.1543484713564856, + "grad_norm": 1.981034755706787, + "learning_rate": 4.955192372810414e-06, + "loss": 0.0227, + "step": 520, + "video_reward_cumulative_accuracy": 0.7692307692307693 + }, + { + "epoch": 0.15464529533986346, + "grad_norm": 3.3112709522247314, + "learning_rate": 4.954702822203369e-06, + "loss": 0.0877, + "step": 521, + "video_reward_cumulative_accuracy": 0.7687140115163148 + }, + { + "epoch": 0.1549421193232413, + "grad_norm": 2.729583501815796, + "learning_rate": 4.954210636234597e-06, + "loss": 0.0301, + "step": 522, + "video_reward_cumulative_accuracy": 0.7691570881226054 + }, + { + "epoch": 0.15523894330661916, + "grad_norm": 9.558045387268066, + "learning_rate": 4.953715815432505e-06, + "loss": 0.0811, + "step": 523, + "video_reward_cumulative_accuracy": 0.7695984703632888 + }, + { + "epoch": 0.15553576728999705, + "grad_norm": 6.721735000610352, + "learning_rate": 4.9532183603283345e-06, + "loss": 0.0445, + "step": 524, + "video_reward_cumulative_accuracy": 0.7690839694656488 + }, + { + "epoch": 0.1558325912733749, + "grad_norm": 1.9270586967468262, + "learning_rate": 4.952718271456151e-06, + "loss": 0.0429, + "step": 525, + "video_reward_cumulative_accuracy": 0.7695238095238095 + }, + { + "epoch": 0.15612941525675275, + "grad_norm": 2.5546162128448486, + "learning_rate": 4.952215549352846e-06, + "loss": 0.0786, + "step": 526, + "video_reward_cumulative_accuracy": 0.7690114068441065 + }, + { + "epoch": 0.1564262392401306, + "grad_norm": 2.585820436477661, + "learning_rate": 4.951710194558144e-06, + "loss": 0.0416, + "step": 527, + "video_reward_cumulative_accuracy": 0.7694497153700189 + }, + { + "epoch": 0.15672306322350846, + "grad_norm": 2.7101705074310303, + "learning_rate": 4.9512022076145895e-06, + "loss": 0.0562, + "step": 528, + "video_reward_cumulative_accuracy": 0.7698863636363636 + }, + { + "epoch": 0.1570198872068863, + "grad_norm": 1.0189766883850098, + "learning_rate": 4.9506915890675566e-06, + "loss": 0.0526, + "step": 529, + "video_reward_cumulative_accuracy": 0.7703213610586012 + }, + { + "epoch": 0.15731671119026416, + "grad_norm": 6.756640911102295, + "learning_rate": 4.9501783394652455e-06, + "loss": 0.0875, + "step": 530, + "video_reward_cumulative_accuracy": 0.7707547169811321 + }, + { + "epoch": 0.15761353517364202, + "grad_norm": 1.9713293313980103, + "learning_rate": 4.9496624593586775e-06, + "loss": 0.076, + "step": 531, + "video_reward_cumulative_accuracy": 0.7702448210922788 + }, + { + "epoch": 0.1579103591570199, + "grad_norm": 2.242279529571533, + "learning_rate": 4.949143949301701e-06, + "loss": 0.0646, + "step": 532, + "video_reward_cumulative_accuracy": 0.7706766917293233 + }, + { + "epoch": 0.15820718314039775, + "grad_norm": 3.1193904876708984, + "learning_rate": 4.9486228098509865e-06, + "loss": 0.074, + "step": 533, + "video_reward_cumulative_accuracy": 0.7692307692307693 + }, + { + "epoch": 0.1585040071237756, + "grad_norm": 1.8932733535766602, + "learning_rate": 4.9480990415660276e-06, + "loss": 0.0565, + "step": 534, + "video_reward_cumulative_accuracy": 0.7696629213483146 + }, + { + "epoch": 0.15880083110715346, + "grad_norm": 2.4588887691497803, + "learning_rate": 4.947572645009141e-06, + "loss": 0.0547, + "step": 535, + "video_reward_cumulative_accuracy": 0.7691588785046729 + }, + { + "epoch": 0.1590976550905313, + "grad_norm": 4.820741176605225, + "learning_rate": 4.947043620745464e-06, + "loss": 0.0805, + "step": 536, + "video_reward_cumulative_accuracy": 0.769589552238806 + }, + { + "epoch": 0.15939447907390916, + "grad_norm": 1.0390130281448364, + "learning_rate": 4.946511969342956e-06, + "loss": 0.0558, + "step": 537, + "video_reward_cumulative_accuracy": 0.7700186219739292 + }, + { + "epoch": 0.15969130305728702, + "grad_norm": 2.8653810024261475, + "learning_rate": 4.945977691372396e-06, + "loss": 0.0489, + "step": 538, + "video_reward_cumulative_accuracy": 0.7704460966542751 + }, + { + "epoch": 0.1599881270406649, + "grad_norm": 3.0216479301452637, + "learning_rate": 4.945440787407382e-06, + "loss": 0.0597, + "step": 539, + "video_reward_cumulative_accuracy": 0.7708719851576994 + }, + { + "epoch": 0.16028495102404275, + "grad_norm": 1.2938923835754395, + "learning_rate": 4.944901258024335e-06, + "loss": 0.0615, + "step": 540, + "video_reward_cumulative_accuracy": 0.7703703703703704 + }, + { + "epoch": 0.1605817750074206, + "grad_norm": 1.7534013986587524, + "learning_rate": 4.94435910380249e-06, + "loss": 0.0746, + "step": 541, + "video_reward_cumulative_accuracy": 0.7689463955637708 + }, + { + "epoch": 0.16087859899079845, + "grad_norm": 2.365793228149414, + "learning_rate": 4.943814325323904e-06, + "loss": 0.0314, + "step": 542, + "video_reward_cumulative_accuracy": 0.7693726937269373 + }, + { + "epoch": 0.1611754229741763, + "grad_norm": 5.616259574890137, + "learning_rate": 4.943266923173449e-06, + "loss": 0.1097, + "step": 543, + "video_reward_cumulative_accuracy": 0.7697974217311234 + }, + { + "epoch": 0.16147224695755416, + "grad_norm": 1.3462814092636108, + "learning_rate": 4.942716897938813e-06, + "loss": 0.0486, + "step": 544, + "video_reward_cumulative_accuracy": 0.7702205882352942 + }, + { + "epoch": 0.161769070940932, + "grad_norm": 1.065239429473877, + "learning_rate": 4.9421642502105025e-06, + "loss": 0.0442, + "step": 545, + "video_reward_cumulative_accuracy": 0.7697247706422018 + }, + { + "epoch": 0.1620658949243099, + "grad_norm": 2.191693067550659, + "learning_rate": 4.941608980581839e-06, + "loss": 0.0351, + "step": 546, + "video_reward_cumulative_accuracy": 0.7692307692307693 + }, + { + "epoch": 0.16236271890768775, + "grad_norm": 2.6450510025024414, + "learning_rate": 4.941051089648958e-06, + "loss": 0.0778, + "step": 547, + "video_reward_cumulative_accuracy": 0.7687385740402194 + }, + { + "epoch": 0.1626595428910656, + "grad_norm": 4.044307231903076, + "learning_rate": 4.940490578010808e-06, + "loss": 0.068, + "step": 548, + "video_reward_cumulative_accuracy": 0.7691605839416058 + }, + { + "epoch": 0.16295636687444345, + "grad_norm": 2.968937873840332, + "learning_rate": 4.9399274462691555e-06, + "loss": 0.0773, + "step": 549, + "video_reward_cumulative_accuracy": 0.7695810564663024 + }, + { + "epoch": 0.1632531908578213, + "grad_norm": 1.9103548526763916, + "learning_rate": 4.939361695028575e-06, + "loss": 0.0691, + "step": 550, + "video_reward_cumulative_accuracy": 0.769090909090909 + }, + { + "epoch": 0.16355001484119916, + "grad_norm": 3.47516131401062, + "learning_rate": 4.938793324896456e-06, + "loss": 0.0912, + "step": 551, + "video_reward_cumulative_accuracy": 0.7686025408348457 + }, + { + "epoch": 0.163846838824577, + "grad_norm": 3.028697967529297, + "learning_rate": 4.9382223364829995e-06, + "loss": 0.0532, + "step": 552, + "video_reward_cumulative_accuracy": 0.7690217391304348 + }, + { + "epoch": 0.1641436628079549, + "grad_norm": 3.2655930519104004, + "learning_rate": 4.937648730401215e-06, + "loss": 0.036, + "step": 553, + "video_reward_cumulative_accuracy": 0.7694394213381555 + }, + { + "epoch": 0.16444048679133275, + "grad_norm": 1.8100935220718384, + "learning_rate": 4.937072507266928e-06, + "loss": 0.0492, + "step": 554, + "video_reward_cumulative_accuracy": 0.76985559566787 + }, + { + "epoch": 0.1647373107747106, + "grad_norm": 3.864176034927368, + "learning_rate": 4.936493667698766e-06, + "loss": 0.0589, + "step": 555, + "video_reward_cumulative_accuracy": 0.7702702702702703 + }, + { + "epoch": 0.16503413475808845, + "grad_norm": 2.1781342029571533, + "learning_rate": 4.935912212318171e-06, + "loss": 0.0196, + "step": 556, + "video_reward_cumulative_accuracy": 0.77068345323741 + }, + { + "epoch": 0.1653309587414663, + "grad_norm": 1.995492696762085, + "learning_rate": 4.935328141749393e-06, + "loss": 0.049, + "step": 557, + "video_reward_cumulative_accuracy": 0.770197486535009 + }, + { + "epoch": 0.16562778272484416, + "grad_norm": 2.0720860958099365, + "learning_rate": 4.934741456619488e-06, + "loss": 0.0585, + "step": 558, + "video_reward_cumulative_accuracy": 0.7697132616487455 + }, + { + "epoch": 0.165924606708222, + "grad_norm": 1.4079474210739136, + "learning_rate": 4.934152157558317e-06, + "loss": 0.0322, + "step": 559, + "video_reward_cumulative_accuracy": 0.7701252236135957 + }, + { + "epoch": 0.1662214306915999, + "grad_norm": 4.133894443511963, + "learning_rate": 4.933560245198552e-06, + "loss": 0.0604, + "step": 560, + "video_reward_cumulative_accuracy": 0.7705357142857143 + }, + { + "epoch": 0.16651825467497774, + "grad_norm": 3.3255507946014404, + "learning_rate": 4.932965720175669e-06, + "loss": 0.0559, + "step": 561, + "video_reward_cumulative_accuracy": 0.7709447415329769 + }, + { + "epoch": 0.1668150786583556, + "grad_norm": 3.746882677078247, + "learning_rate": 4.9323685831279465e-06, + "loss": 0.0728, + "step": 562, + "video_reward_cumulative_accuracy": 0.7713523131672598 + }, + { + "epoch": 0.16711190264173345, + "grad_norm": 2.2193782329559326, + "learning_rate": 4.93176883469647e-06, + "loss": 0.0512, + "step": 563, + "video_reward_cumulative_accuracy": 0.7708703374777975 + }, + { + "epoch": 0.1674087266251113, + "grad_norm": 0.7712289094924927, + "learning_rate": 4.9311664755251265e-06, + "loss": 0.0202, + "step": 564, + "video_reward_cumulative_accuracy": 0.7712765957446809 + }, + { + "epoch": 0.16770555060848916, + "grad_norm": 1.9561268091201782, + "learning_rate": 4.93056150626061e-06, + "loss": 0.045, + "step": 565, + "video_reward_cumulative_accuracy": 0.7716814159292036 + }, + { + "epoch": 0.168002374591867, + "grad_norm": 2.4627063274383545, + "learning_rate": 4.92995392755241e-06, + "loss": 0.0474, + "step": 566, + "video_reward_cumulative_accuracy": 0.7720848056537103 + }, + { + "epoch": 0.1682991985752449, + "grad_norm": 2.0036139488220215, + "learning_rate": 4.929343740052823e-06, + "loss": 0.0358, + "step": 567, + "video_reward_cumulative_accuracy": 0.7724867724867724 + }, + { + "epoch": 0.16859602255862274, + "grad_norm": 3.07709002494812, + "learning_rate": 4.928730944416945e-06, + "loss": 0.0407, + "step": 568, + "video_reward_cumulative_accuracy": 0.772887323943662 + }, + { + "epoch": 0.1688928465420006, + "grad_norm": 1.9337095022201538, + "learning_rate": 4.928115541302672e-06, + "loss": 0.0386, + "step": 569, + "video_reward_cumulative_accuracy": 0.7724077328646749 + }, + { + "epoch": 0.16918967052537845, + "grad_norm": 6.987502574920654, + "learning_rate": 4.927497531370697e-06, + "loss": 0.1114, + "step": 570, + "video_reward_cumulative_accuracy": 0.7728070175438596 + }, + { + "epoch": 0.1694864945087563, + "grad_norm": 3.6298933029174805, + "learning_rate": 4.9268769152845146e-06, + "loss": 0.0853, + "step": 571, + "video_reward_cumulative_accuracy": 0.7723292469352014 + }, + { + "epoch": 0.16978331849213416, + "grad_norm": 6.841943740844727, + "learning_rate": 4.926253693710416e-06, + "loss": 0.0701, + "step": 572, + "video_reward_cumulative_accuracy": 0.7727272727272727 + }, + { + "epoch": 0.170080142475512, + "grad_norm": 5.937003135681152, + "learning_rate": 4.925627867317491e-06, + "loss": 0.1064, + "step": 573, + "video_reward_cumulative_accuracy": 0.7731239092495636 + }, + { + "epoch": 0.1703769664588899, + "grad_norm": 2.3757357597351074, + "learning_rate": 4.924999436777624e-06, + "loss": 0.0466, + "step": 574, + "video_reward_cumulative_accuracy": 0.7735191637630662 + }, + { + "epoch": 0.17067379044226774, + "grad_norm": 1.4566744565963745, + "learning_rate": 4.924368402765498e-06, + "loss": 0.0228, + "step": 575, + "video_reward_cumulative_accuracy": 0.7739130434782608 + }, + { + "epoch": 0.1709706144256456, + "grad_norm": 2.156557083129883, + "learning_rate": 4.923734765958587e-06, + "loss": 0.0483, + "step": 576, + "video_reward_cumulative_accuracy": 0.7743055555555556 + }, + { + "epoch": 0.17126743840902345, + "grad_norm": 3.534210681915283, + "learning_rate": 4.9230985270371625e-06, + "loss": 0.0695, + "step": 577, + "video_reward_cumulative_accuracy": 0.7738301559792028 + }, + { + "epoch": 0.1715642623924013, + "grad_norm": 1.7886089086532593, + "learning_rate": 4.9224596866842895e-06, + "loss": 0.06, + "step": 578, + "video_reward_cumulative_accuracy": 0.7742214532871973 + }, + { + "epoch": 0.17186108637577915, + "grad_norm": 1.3056138753890991, + "learning_rate": 4.921818245585824e-06, + "loss": 0.0428, + "step": 579, + "video_reward_cumulative_accuracy": 0.7746113989637305 + }, + { + "epoch": 0.172157910359157, + "grad_norm": 2.9909825325012207, + "learning_rate": 4.921174204430415e-06, + "loss": 0.0486, + "step": 580, + "video_reward_cumulative_accuracy": 0.775 + }, + { + "epoch": 0.1724547343425349, + "grad_norm": 3.4431159496307373, + "learning_rate": 4.920527563909505e-06, + "loss": 0.0921, + "step": 581, + "video_reward_cumulative_accuracy": 0.774526678141136 + }, + { + "epoch": 0.17275155832591274, + "grad_norm": 1.9083445072174072, + "learning_rate": 4.919878324717323e-06, + "loss": 0.0655, + "step": 582, + "video_reward_cumulative_accuracy": 0.7749140893470791 + }, + { + "epoch": 0.1730483823092906, + "grad_norm": 5.427271366119385, + "learning_rate": 4.919226487550892e-06, + "loss": 0.0755, + "step": 583, + "video_reward_cumulative_accuracy": 0.774442538593482 + }, + { + "epoch": 0.17334520629266845, + "grad_norm": 2.5748579502105713, + "learning_rate": 4.918572053110022e-06, + "loss": 0.0445, + "step": 584, + "video_reward_cumulative_accuracy": 0.7748287671232876 + }, + { + "epoch": 0.1736420302760463, + "grad_norm": 1.3002210855484009, + "learning_rate": 4.917915022097313e-06, + "loss": 0.0644, + "step": 585, + "video_reward_cumulative_accuracy": 0.7752136752136752 + }, + { + "epoch": 0.17393885425942415, + "grad_norm": 0.7442671060562134, + "learning_rate": 4.917255395218149e-06, + "loss": 0.0408, + "step": 586, + "video_reward_cumulative_accuracy": 0.7747440273037542 + }, + { + "epoch": 0.174235678242802, + "grad_norm": 0.8181408643722534, + "learning_rate": 4.9165931731807045e-06, + "loss": 0.0564, + "step": 587, + "video_reward_cumulative_accuracy": 0.7751277683134583 + }, + { + "epoch": 0.1745325022261799, + "grad_norm": 1.601649522781372, + "learning_rate": 4.915928356695941e-06, + "loss": 0.0286, + "step": 588, + "video_reward_cumulative_accuracy": 0.7755102040816326 + }, + { + "epoch": 0.17482932620955774, + "grad_norm": 0.9357208013534546, + "learning_rate": 4.915260946477601e-06, + "loss": 0.0481, + "step": 589, + "video_reward_cumulative_accuracy": 0.7750424448217318 + }, + { + "epoch": 0.1751261501929356, + "grad_norm": 3.644911766052246, + "learning_rate": 4.914590943242216e-06, + "loss": 0.0626, + "step": 590, + "video_reward_cumulative_accuracy": 0.7745762711864407 + }, + { + "epoch": 0.17542297417631345, + "grad_norm": 6.013518810272217, + "learning_rate": 4.913918347709098e-06, + "loss": 0.0961, + "step": 591, + "video_reward_cumulative_accuracy": 0.7749576988155669 + }, + { + "epoch": 0.1757197981596913, + "grad_norm": 3.016268491744995, + "learning_rate": 4.9132431606003444e-06, + "loss": 0.0378, + "step": 592, + "video_reward_cumulative_accuracy": 0.7753378378378378 + }, + { + "epoch": 0.17601662214306915, + "grad_norm": 1.4820626974105835, + "learning_rate": 4.912565382640834e-06, + "loss": 0.0345, + "step": 593, + "video_reward_cumulative_accuracy": 0.7757166947723441 + }, + { + "epoch": 0.176313446126447, + "grad_norm": 5.260765075683594, + "learning_rate": 4.911885014558227e-06, + "loss": 0.0478, + "step": 594, + "video_reward_cumulative_accuracy": 0.7760942760942761 + }, + { + "epoch": 0.17661027010982489, + "grad_norm": 1.4641700983047485, + "learning_rate": 4.911202057082966e-06, + "loss": 0.0504, + "step": 595, + "video_reward_cumulative_accuracy": 0.7747899159663866 + }, + { + "epoch": 0.17690709409320274, + "grad_norm": 2.0738446712493896, + "learning_rate": 4.91051651094827e-06, + "loss": 0.0336, + "step": 596, + "video_reward_cumulative_accuracy": 0.7743288590604027 + }, + { + "epoch": 0.1772039180765806, + "grad_norm": 1.8221532106399536, + "learning_rate": 4.90982837689014e-06, + "loss": 0.0481, + "step": 597, + "video_reward_cumulative_accuracy": 0.7747068676716918 + }, + { + "epoch": 0.17750074205995844, + "grad_norm": 3.999908685684204, + "learning_rate": 4.909137655647354e-06, + "loss": 0.0827, + "step": 598, + "video_reward_cumulative_accuracy": 0.7750836120401338 + }, + { + "epoch": 0.1777975660433363, + "grad_norm": 3.1265053749084473, + "learning_rate": 4.908444347961472e-06, + "loss": 0.0808, + "step": 599, + "video_reward_cumulative_accuracy": 0.7754590984974958 + }, + { + "epoch": 0.17809439002671415, + "grad_norm": 4.669152736663818, + "learning_rate": 4.907748454576822e-06, + "loss": 0.09, + "step": 600, + "video_reward_cumulative_accuracy": 0.7758333333333334 + }, + { + "epoch": 0.17809439002671415, + "eval_runtime": 143.405, + "eval_samples_per_second": 5.502, + "eval_steps_per_second": 0.69, + "eval_test_set_accuracy": 0.73989898989899, + "step": 600 + }, + { + "epoch": 0.178391214010092, + "grad_norm": 1.6913496255874634, + "learning_rate": 4.907049976240516e-06, + "loss": 0.0538, + "step": 601, + "video_reward_cumulative_accuracy": 0.7762063227953411 + }, + { + "epoch": 0.17868803799346988, + "grad_norm": 10.412848472595215, + "learning_rate": 4.9063489137024375e-06, + "loss": 0.1035, + "step": 602, + "video_reward_cumulative_accuracy": 0.7757475083056479 + }, + { + "epoch": 0.17898486197684774, + "grad_norm": 0.9118223786354065, + "learning_rate": 4.905645267715246e-06, + "loss": 0.0125, + "step": 603, + "video_reward_cumulative_accuracy": 0.7761194029850746 + }, + { + "epoch": 0.1792816859602256, + "grad_norm": 2.315340518951416, + "learning_rate": 4.904939039034373e-06, + "loss": 0.0634, + "step": 604, + "video_reward_cumulative_accuracy": 0.7756622516556292 + }, + { + "epoch": 0.17957850994360344, + "grad_norm": 3.120900869369507, + "learning_rate": 4.904230228418023e-06, + "loss": 0.1249, + "step": 605, + "video_reward_cumulative_accuracy": 0.775206611570248 + }, + { + "epoch": 0.1798753339269813, + "grad_norm": 3.1384425163269043, + "learning_rate": 4.903518836627174e-06, + "loss": 0.0442, + "step": 606, + "video_reward_cumulative_accuracy": 0.7747524752475248 + }, + { + "epoch": 0.18017215791035915, + "grad_norm": 4.558505535125732, + "learning_rate": 4.9028048644255745e-06, + "loss": 0.071, + "step": 607, + "video_reward_cumulative_accuracy": 0.7742998352553542 + }, + { + "epoch": 0.180468981893737, + "grad_norm": 2.9076156616210938, + "learning_rate": 4.9020883125797415e-06, + "loss": 0.0323, + "step": 608, + "video_reward_cumulative_accuracy": 0.7738486842105263 + }, + { + "epoch": 0.18076580587711488, + "grad_norm": 4.778907775878906, + "learning_rate": 4.9013691818589635e-06, + "loss": 0.0686, + "step": 609, + "video_reward_cumulative_accuracy": 0.7742200328407225 + }, + { + "epoch": 0.18106262986049274, + "grad_norm": 1.0505435466766357, + "learning_rate": 4.9006474730352974e-06, + "loss": 0.0419, + "step": 610, + "video_reward_cumulative_accuracy": 0.7745901639344263 + }, + { + "epoch": 0.1813594538438706, + "grad_norm": 3.0237913131713867, + "learning_rate": 4.8999231868835675e-06, + "loss": 0.0503, + "step": 611, + "video_reward_cumulative_accuracy": 0.7741407528641571 + }, + { + "epoch": 0.18165627782724844, + "grad_norm": 1.5496163368225098, + "learning_rate": 4.899196324181365e-06, + "loss": 0.0545, + "step": 612, + "video_reward_cumulative_accuracy": 0.7736928104575164 + }, + { + "epoch": 0.1819531018106263, + "grad_norm": 4.970526695251465, + "learning_rate": 4.898466885709049e-06, + "loss": 0.0746, + "step": 613, + "video_reward_cumulative_accuracy": 0.7732463295269169 + }, + { + "epoch": 0.18224992579400415, + "grad_norm": 3.2845726013183594, + "learning_rate": 4.897734872249742e-06, + "loss": 0.0703, + "step": 614, + "video_reward_cumulative_accuracy": 0.7728013029315961 + }, + { + "epoch": 0.182546749777382, + "grad_norm": 1.6914643049240112, + "learning_rate": 4.89700028458933e-06, + "loss": 0.0178, + "step": 615, + "video_reward_cumulative_accuracy": 0.7731707317073171 + }, + { + "epoch": 0.18284357376075988, + "grad_norm": 1.8486802577972412, + "learning_rate": 4.896263123516465e-06, + "loss": 0.0277, + "step": 616, + "video_reward_cumulative_accuracy": 0.773538961038961 + }, + { + "epoch": 0.18314039774413773, + "grad_norm": 4.048538684844971, + "learning_rate": 4.8955233898225605e-06, + "loss": 0.0762, + "step": 617, + "video_reward_cumulative_accuracy": 0.773095623987034 + }, + { + "epoch": 0.1834372217275156, + "grad_norm": 3.5552480220794678, + "learning_rate": 4.894781084301793e-06, + "loss": 0.0318, + "step": 618, + "video_reward_cumulative_accuracy": 0.7726537216828478 + }, + { + "epoch": 0.18373404571089344, + "grad_norm": 4.219141006469727, + "learning_rate": 4.8940362077511e-06, + "loss": 0.0499, + "step": 619, + "video_reward_cumulative_accuracy": 0.7730210016155089 + }, + { + "epoch": 0.1840308696942713, + "grad_norm": 1.9816478490829468, + "learning_rate": 4.893288760970178e-06, + "loss": 0.0364, + "step": 620, + "video_reward_cumulative_accuracy": 0.7733870967741936 + }, + { + "epoch": 0.18432769367764915, + "grad_norm": 2.502150535583496, + "learning_rate": 4.892538744761484e-06, + "loss": 0.0592, + "step": 621, + "video_reward_cumulative_accuracy": 0.7737520128824477 + }, + { + "epoch": 0.184624517661027, + "grad_norm": 2.3041718006134033, + "learning_rate": 4.891786159930234e-06, + "loss": 0.0372, + "step": 622, + "video_reward_cumulative_accuracy": 0.7741157556270096 + }, + { + "epoch": 0.18492134164440488, + "grad_norm": 2.347670078277588, + "learning_rate": 4.8910310072843996e-06, + "loss": 0.0908, + "step": 623, + "video_reward_cumulative_accuracy": 0.7744783306581059 + }, + { + "epoch": 0.18521816562778273, + "grad_norm": 1.8450899124145508, + "learning_rate": 4.89027328763471e-06, + "loss": 0.0378, + "step": 624, + "video_reward_cumulative_accuracy": 0.7748397435897436 + }, + { + "epoch": 0.18551498961116059, + "grad_norm": 2.6531243324279785, + "learning_rate": 4.889513001794652e-06, + "loss": 0.0294, + "step": 625, + "video_reward_cumulative_accuracy": 0.7752 + }, + { + "epoch": 0.18581181359453844, + "grad_norm": 1.7452423572540283, + "learning_rate": 4.888750150580466e-06, + "loss": 0.0357, + "step": 626, + "video_reward_cumulative_accuracy": 0.7747603833865815 + }, + { + "epoch": 0.1861086375779163, + "grad_norm": 1.9167020320892334, + "learning_rate": 4.887984734811146e-06, + "loss": 0.0278, + "step": 627, + "video_reward_cumulative_accuracy": 0.7751196172248804 + }, + { + "epoch": 0.18640546156129414, + "grad_norm": 2.1981749534606934, + "learning_rate": 4.887216755308442e-06, + "loss": 0.0457, + "step": 628, + "video_reward_cumulative_accuracy": 0.7746815286624203 + }, + { + "epoch": 0.186702285544672, + "grad_norm": 2.631011962890625, + "learning_rate": 4.886446212896853e-06, + "loss": 0.0836, + "step": 629, + "video_reward_cumulative_accuracy": 0.7742448330683624 + }, + { + "epoch": 0.18699910952804988, + "grad_norm": 3.423548936843872, + "learning_rate": 4.885673108403631e-06, + "loss": 0.0552, + "step": 630, + "video_reward_cumulative_accuracy": 0.7746031746031746 + }, + { + "epoch": 0.18729593351142773, + "grad_norm": 0.9264172911643982, + "learning_rate": 4.884897442658779e-06, + "loss": 0.0143, + "step": 631, + "video_reward_cumulative_accuracy": 0.7749603803486529 + }, + { + "epoch": 0.18759275749480558, + "grad_norm": 1.8935270309448242, + "learning_rate": 4.88411921649505e-06, + "loss": 0.0241, + "step": 632, + "video_reward_cumulative_accuracy": 0.7745253164556962 + }, + { + "epoch": 0.18788958147818344, + "grad_norm": 1.9011247158050537, + "learning_rate": 4.883338430747944e-06, + "loss": 0.1005, + "step": 633, + "video_reward_cumulative_accuracy": 0.7748815165876777 + }, + { + "epoch": 0.1881864054615613, + "grad_norm": 2.4055683612823486, + "learning_rate": 4.882555086255712e-06, + "loss": 0.0326, + "step": 634, + "video_reward_cumulative_accuracy": 0.7752365930599369 + }, + { + "epoch": 0.18848322944493914, + "grad_norm": 3.9865567684173584, + "learning_rate": 4.88176918385935e-06, + "loss": 0.0432, + "step": 635, + "video_reward_cumulative_accuracy": 0.7748031496062993 + }, + { + "epoch": 0.188780053428317, + "grad_norm": 1.4653565883636475, + "learning_rate": 4.8809807244025985e-06, + "loss": 0.0275, + "step": 636, + "video_reward_cumulative_accuracy": 0.7751572327044025 + }, + { + "epoch": 0.18907687741169488, + "grad_norm": 1.477861762046814, + "learning_rate": 4.880189708731947e-06, + "loss": 0.0707, + "step": 637, + "video_reward_cumulative_accuracy": 0.7755102040816326 + }, + { + "epoch": 0.18937370139507273, + "grad_norm": 4.140712261199951, + "learning_rate": 4.879396137696628e-06, + "loss": 0.0873, + "step": 638, + "video_reward_cumulative_accuracy": 0.7750783699059561 + }, + { + "epoch": 0.18967052537845058, + "grad_norm": 2.715289354324341, + "learning_rate": 4.878600012148617e-06, + "loss": 0.0496, + "step": 639, + "video_reward_cumulative_accuracy": 0.7754303599374022 + }, + { + "epoch": 0.18996734936182844, + "grad_norm": 1.2748372554779053, + "learning_rate": 4.87780133294263e-06, + "loss": 0.0294, + "step": 640, + "video_reward_cumulative_accuracy": 0.77578125 + }, + { + "epoch": 0.1902641733452063, + "grad_norm": 1.9033632278442383, + "learning_rate": 4.877000100936129e-06, + "loss": 0.0344, + "step": 641, + "video_reward_cumulative_accuracy": 0.7761310452418096 + }, + { + "epoch": 0.19056099732858414, + "grad_norm": 3.2748398780822754, + "learning_rate": 4.876196316989313e-06, + "loss": 0.054, + "step": 642, + "video_reward_cumulative_accuracy": 0.7764797507788161 + }, + { + "epoch": 0.190857821311962, + "grad_norm": 2.8134162425994873, + "learning_rate": 4.875389981965123e-06, + "loss": 0.0845, + "step": 643, + "video_reward_cumulative_accuracy": 0.776049766718507 + }, + { + "epoch": 0.19115464529533988, + "grad_norm": 1.5256245136260986, + "learning_rate": 4.874581096729238e-06, + "loss": 0.0205, + "step": 644, + "video_reward_cumulative_accuracy": 0.7763975155279503 + }, + { + "epoch": 0.19145146927871773, + "grad_norm": 5.077547073364258, + "learning_rate": 4.8737696621500715e-06, + "loss": 0.0899, + "step": 645, + "video_reward_cumulative_accuracy": 0.7767441860465116 + }, + { + "epoch": 0.19174829326209558, + "grad_norm": 4.956404209136963, + "learning_rate": 4.872955679098782e-06, + "loss": 0.0849, + "step": 646, + "video_reward_cumulative_accuracy": 0.7770897832817337 + }, + { + "epoch": 0.19204511724547343, + "grad_norm": 7.943280220031738, + "learning_rate": 4.872139148449257e-06, + "loss": 0.1075, + "step": 647, + "video_reward_cumulative_accuracy": 0.7774343122102009 + }, + { + "epoch": 0.1923419412288513, + "grad_norm": 6.340520858764648, + "learning_rate": 4.871320071078122e-06, + "loss": 0.0777, + "step": 648, + "video_reward_cumulative_accuracy": 0.7777777777777778 + }, + { + "epoch": 0.19263876521222914, + "grad_norm": 2.82149338722229, + "learning_rate": 4.870498447864735e-06, + "loss": 0.0556, + "step": 649, + "video_reward_cumulative_accuracy": 0.7781201848998459 + }, + { + "epoch": 0.192935589195607, + "grad_norm": 5.320289134979248, + "learning_rate": 4.86967427969119e-06, + "loss": 0.0697, + "step": 650, + "video_reward_cumulative_accuracy": 0.7776923076923077 + }, + { + "epoch": 0.19323241317898487, + "grad_norm": 1.857016682624817, + "learning_rate": 4.86884756744231e-06, + "loss": 0.0323, + "step": 651, + "video_reward_cumulative_accuracy": 0.7780337941628265 + }, + { + "epoch": 0.19352923716236273, + "grad_norm": 2.1225006580352783, + "learning_rate": 4.8680183120056516e-06, + "loss": 0.0557, + "step": 652, + "video_reward_cumulative_accuracy": 0.7776073619631901 + }, + { + "epoch": 0.19382606114574058, + "grad_norm": 1.882553219795227, + "learning_rate": 4.8671865142715e-06, + "loss": 0.0494, + "step": 653, + "video_reward_cumulative_accuracy": 0.7771822358346095 + }, + { + "epoch": 0.19412288512911843, + "grad_norm": 3.701078414916992, + "learning_rate": 4.866352175132873e-06, + "loss": 0.1412, + "step": 654, + "video_reward_cumulative_accuracy": 0.7759938837920489 + }, + { + "epoch": 0.1944197091124963, + "grad_norm": 4.4002509117126465, + "learning_rate": 4.865515295485511e-06, + "loss": 0.0662, + "step": 655, + "video_reward_cumulative_accuracy": 0.7763358778625954 + }, + { + "epoch": 0.19471653309587414, + "grad_norm": 5.623415470123291, + "learning_rate": 4.864675876227889e-06, + "loss": 0.0927, + "step": 656, + "video_reward_cumulative_accuracy": 0.7751524390243902 + }, + { + "epoch": 0.195013357079252, + "grad_norm": 1.7767045497894287, + "learning_rate": 4.863833918261204e-06, + "loss": 0.0468, + "step": 657, + "video_reward_cumulative_accuracy": 0.7754946727549468 + }, + { + "epoch": 0.19531018106262987, + "grad_norm": 1.3329066038131714, + "learning_rate": 4.862989422489379e-06, + "loss": 0.0362, + "step": 658, + "video_reward_cumulative_accuracy": 0.7758358662613982 + }, + { + "epoch": 0.19560700504600773, + "grad_norm": 3.9193496704101562, + "learning_rate": 4.862142389819063e-06, + "loss": 0.0751, + "step": 659, + "video_reward_cumulative_accuracy": 0.776176024279211 + }, + { + "epoch": 0.19590382902938558, + "grad_norm": 2.8584847450256348, + "learning_rate": 4.861292821159627e-06, + "loss": 0.0569, + "step": 660, + "video_reward_cumulative_accuracy": 0.7765151515151515 + }, + { + "epoch": 0.19620065301276343, + "grad_norm": 1.3020362854003906, + "learning_rate": 4.860440717423166e-06, + "loss": 0.0746, + "step": 661, + "video_reward_cumulative_accuracy": 0.7768532526475038 + }, + { + "epoch": 0.19649747699614128, + "grad_norm": 5.554771423339844, + "learning_rate": 4.8595860795244955e-06, + "loss": 0.0509, + "step": 662, + "video_reward_cumulative_accuracy": 0.7764350453172205 + }, + { + "epoch": 0.19679430097951914, + "grad_norm": 1.001604676246643, + "learning_rate": 4.858728908381153e-06, + "loss": 0.0301, + "step": 663, + "video_reward_cumulative_accuracy": 0.7760180995475113 + }, + { + "epoch": 0.197091124962897, + "grad_norm": 1.391948938369751, + "learning_rate": 4.857869204913394e-06, + "loss": 0.0644, + "step": 664, + "video_reward_cumulative_accuracy": 0.776355421686747 + }, + { + "epoch": 0.19738794894627487, + "grad_norm": 1.2883930206298828, + "learning_rate": 4.857006970044194e-06, + "loss": 0.0282, + "step": 665, + "video_reward_cumulative_accuracy": 0.7766917293233083 + }, + { + "epoch": 0.19768477292965272, + "grad_norm": 1.8838915824890137, + "learning_rate": 4.856142204699246e-06, + "loss": 0.037, + "step": 666, + "video_reward_cumulative_accuracy": 0.777027027027027 + }, + { + "epoch": 0.19798159691303058, + "grad_norm": 8.990363121032715, + "learning_rate": 4.855274909806959e-06, + "loss": 0.1334, + "step": 667, + "video_reward_cumulative_accuracy": 0.7766116941529235 + }, + { + "epoch": 0.19827842089640843, + "grad_norm": 3.5435233116149902, + "learning_rate": 4.85440508629846e-06, + "loss": 0.0568, + "step": 668, + "video_reward_cumulative_accuracy": 0.7761976047904192 + }, + { + "epoch": 0.19857524487978628, + "grad_norm": 2.3105525970458984, + "learning_rate": 4.853532735107587e-06, + "loss": 0.0383, + "step": 669, + "video_reward_cumulative_accuracy": 0.7765321375186846 + }, + { + "epoch": 0.19887206886316414, + "grad_norm": 0.8967596292495728, + "learning_rate": 4.852657857170894e-06, + "loss": 0.0358, + "step": 670, + "video_reward_cumulative_accuracy": 0.7768656716417911 + }, + { + "epoch": 0.199168892846542, + "grad_norm": 1.6966391801834106, + "learning_rate": 4.851780453427648e-06, + "loss": 0.035, + "step": 671, + "video_reward_cumulative_accuracy": 0.7764530551415797 + }, + { + "epoch": 0.19946571682991987, + "grad_norm": 5.105749607086182, + "learning_rate": 4.8509005248198265e-06, + "loss": 0.0905, + "step": 672, + "video_reward_cumulative_accuracy": 0.7760416666666666 + }, + { + "epoch": 0.19976254081329772, + "grad_norm": 1.4758727550506592, + "learning_rate": 4.8500180722921184e-06, + "loss": 0.0379, + "step": 673, + "video_reward_cumulative_accuracy": 0.7763744427934621 + }, + { + "epoch": 0.20005936479667558, + "grad_norm": 2.2049713134765625, + "learning_rate": 4.849133096791923e-06, + "loss": 0.0313, + "step": 674, + "video_reward_cumulative_accuracy": 0.776706231454006 + }, + { + "epoch": 0.20035618878005343, + "grad_norm": 2.7456955909729004, + "learning_rate": 4.848245599269346e-06, + "loss": 0.0959, + "step": 675, + "video_reward_cumulative_accuracy": 0.7770370370370371 + }, + { + "epoch": 0.20065301276343128, + "grad_norm": 5.363068103790283, + "learning_rate": 4.847355580677203e-06, + "loss": 0.0997, + "step": 676, + "video_reward_cumulative_accuracy": 0.775887573964497 + }, + { + "epoch": 0.20094983674680914, + "grad_norm": 1.5729711055755615, + "learning_rate": 4.846463041971014e-06, + "loss": 0.0311, + "step": 677, + "video_reward_cumulative_accuracy": 0.7754800590841949 + }, + { + "epoch": 0.201246660730187, + "grad_norm": 7.414484024047852, + "learning_rate": 4.845567984109009e-06, + "loss": 0.1189, + "step": 678, + "video_reward_cumulative_accuracy": 0.7743362831858407 + }, + { + "epoch": 0.20154348471356487, + "grad_norm": 1.798652172088623, + "learning_rate": 4.844670408052117e-06, + "loss": 0.0357, + "step": 679, + "video_reward_cumulative_accuracy": 0.7746686303387335 + }, + { + "epoch": 0.20184030869694272, + "grad_norm": 4.525697231292725, + "learning_rate": 4.843770314763973e-06, + "loss": 0.0504, + "step": 680, + "video_reward_cumulative_accuracy": 0.774264705882353 + }, + { + "epoch": 0.20213713268032057, + "grad_norm": 2.7870752811431885, + "learning_rate": 4.842867705210915e-06, + "loss": 0.0954, + "step": 681, + "video_reward_cumulative_accuracy": 0.7738619676945668 + }, + { + "epoch": 0.20243395666369843, + "grad_norm": 2.8513903617858887, + "learning_rate": 4.841962580361983e-06, + "loss": 0.0483, + "step": 682, + "video_reward_cumulative_accuracy": 0.7741935483870968 + }, + { + "epoch": 0.20273078064707628, + "grad_norm": 2.141054630279541, + "learning_rate": 4.841054941188914e-06, + "loss": 0.0399, + "step": 683, + "video_reward_cumulative_accuracy": 0.773792093704246 + }, + { + "epoch": 0.20302760463045413, + "grad_norm": 1.3511686325073242, + "learning_rate": 4.840144788666149e-06, + "loss": 0.0312, + "step": 684, + "video_reward_cumulative_accuracy": 0.7741228070175439 + }, + { + "epoch": 0.203324428613832, + "grad_norm": 1.8301844596862793, + "learning_rate": 4.839232123770824e-06, + "loss": 0.0654, + "step": 685, + "video_reward_cumulative_accuracy": 0.7744525547445256 + }, + { + "epoch": 0.20362125259720987, + "grad_norm": 4.309445858001709, + "learning_rate": 4.838316947482774e-06, + "loss": 0.0561, + "step": 686, + "video_reward_cumulative_accuracy": 0.7747813411078717 + }, + { + "epoch": 0.20391807658058772, + "grad_norm": 4.205143928527832, + "learning_rate": 4.837399260784529e-06, + "loss": 0.073, + "step": 687, + "video_reward_cumulative_accuracy": 0.7743813682678311 + }, + { + "epoch": 0.20421490056396557, + "grad_norm": 2.1332249641418457, + "learning_rate": 4.836479064661314e-06, + "loss": 0.0535, + "step": 688, + "video_reward_cumulative_accuracy": 0.7747093023255814 + }, + { + "epoch": 0.20451172454734343, + "grad_norm": 2.2309157848358154, + "learning_rate": 4.83555636010105e-06, + "loss": 0.0286, + "step": 689, + "video_reward_cumulative_accuracy": 0.7750362844702468 + }, + { + "epoch": 0.20480854853072128, + "grad_norm": 1.6311012506484985, + "learning_rate": 4.8346311480943495e-06, + "loss": 0.0292, + "step": 690, + "video_reward_cumulative_accuracy": 0.7753623188405797 + }, + { + "epoch": 0.20510537251409913, + "grad_norm": 3.9914817810058594, + "learning_rate": 4.833703429634519e-06, + "loss": 0.0789, + "step": 691, + "video_reward_cumulative_accuracy": 0.7749638205499276 + }, + { + "epoch": 0.20540219649747699, + "grad_norm": 0.823984682559967, + "learning_rate": 4.832773205717551e-06, + "loss": 0.027, + "step": 692, + "video_reward_cumulative_accuracy": 0.7752890173410405 + }, + { + "epoch": 0.20569902048085487, + "grad_norm": 0.9912533760070801, + "learning_rate": 4.831840477342134e-06, + "loss": 0.0309, + "step": 693, + "video_reward_cumulative_accuracy": 0.7756132756132756 + }, + { + "epoch": 0.20599584446423272, + "grad_norm": 1.6996347904205322, + "learning_rate": 4.830905245509641e-06, + "loss": 0.0468, + "step": 694, + "video_reward_cumulative_accuracy": 0.7752161383285303 + }, + { + "epoch": 0.20629266844761057, + "grad_norm": 1.391541600227356, + "learning_rate": 4.829967511224135e-06, + "loss": 0.0389, + "step": 695, + "video_reward_cumulative_accuracy": 0.7755395683453238 + }, + { + "epoch": 0.20658949243098843, + "grad_norm": 2.34708833694458, + "learning_rate": 4.829027275492364e-06, + "loss": 0.0395, + "step": 696, + "video_reward_cumulative_accuracy": 0.7758620689655172 + }, + { + "epoch": 0.20688631641436628, + "grad_norm": 2.918024778366089, + "learning_rate": 4.828084539323763e-06, + "loss": 0.0451, + "step": 697, + "video_reward_cumulative_accuracy": 0.7754662840746055 + }, + { + "epoch": 0.20718314039774413, + "grad_norm": 2.345532178878784, + "learning_rate": 4.82713930373045e-06, + "loss": 0.0442, + "step": 698, + "video_reward_cumulative_accuracy": 0.7757879656160458 + }, + { + "epoch": 0.20747996438112198, + "grad_norm": 1.47147536277771, + "learning_rate": 4.826191569727228e-06, + "loss": 0.0284, + "step": 699, + "video_reward_cumulative_accuracy": 0.7761087267525035 + }, + { + "epoch": 0.20777678836449986, + "grad_norm": 1.816048264503479, + "learning_rate": 4.82524133833158e-06, + "loss": 0.0437, + "step": 700, + "video_reward_cumulative_accuracy": 0.7764285714285715 + }, + { + "epoch": 0.20807361234787772, + "grad_norm": 3.5193707942962646, + "learning_rate": 4.824288610563673e-06, + "loss": 0.0454, + "step": 701, + "video_reward_cumulative_accuracy": 0.7767475035663338 + }, + { + "epoch": 0.20837043633125557, + "grad_norm": 1.532949447631836, + "learning_rate": 4.8233333874463535e-06, + "loss": 0.036, + "step": 702, + "video_reward_cumulative_accuracy": 0.7770655270655271 + }, + { + "epoch": 0.20866726031463342, + "grad_norm": 1.1091006994247437, + "learning_rate": 4.822375670005144e-06, + "loss": 0.0265, + "step": 703, + "video_reward_cumulative_accuracy": 0.7773826458036984 + }, + { + "epoch": 0.20896408429801128, + "grad_norm": 2.238027572631836, + "learning_rate": 4.821415459268249e-06, + "loss": 0.0393, + "step": 704, + "video_reward_cumulative_accuracy": 0.7776988636363636 + }, + { + "epoch": 0.20926090828138913, + "grad_norm": 4.488368988037109, + "learning_rate": 4.820452756266546e-06, + "loss": 0.0896, + "step": 705, + "video_reward_cumulative_accuracy": 0.7780141843971631 + }, + { + "epoch": 0.20955773226476698, + "grad_norm": 2.5125250816345215, + "learning_rate": 4.819487562033592e-06, + "loss": 0.0354, + "step": 706, + "video_reward_cumulative_accuracy": 0.7776203966005666 + }, + { + "epoch": 0.20985455624814486, + "grad_norm": 3.1740994453430176, + "learning_rate": 4.818519877605616e-06, + "loss": 0.0392, + "step": 707, + "video_reward_cumulative_accuracy": 0.7779349363507779 + }, + { + "epoch": 0.21015138023152272, + "grad_norm": 0.8142343759536743, + "learning_rate": 4.817549704021521e-06, + "loss": 0.0256, + "step": 708, + "video_reward_cumulative_accuracy": 0.7782485875706214 + }, + { + "epoch": 0.21044820421490057, + "grad_norm": 2.2193245887756348, + "learning_rate": 4.816577042322883e-06, + "loss": 0.0586, + "step": 709, + "video_reward_cumulative_accuracy": 0.7785613540197461 + }, + { + "epoch": 0.21074502819827842, + "grad_norm": 3.6588878631591797, + "learning_rate": 4.815601893553948e-06, + "loss": 0.061, + "step": 710, + "video_reward_cumulative_accuracy": 0.778169014084507 + }, + { + "epoch": 0.21104185218165628, + "grad_norm": 3.278996229171753, + "learning_rate": 4.8146242587616335e-06, + "loss": 0.0306, + "step": 711, + "video_reward_cumulative_accuracy": 0.7784810126582279 + }, + { + "epoch": 0.21133867616503413, + "grad_norm": 4.987575054168701, + "learning_rate": 4.813644138995524e-06, + "loss": 0.0612, + "step": 712, + "video_reward_cumulative_accuracy": 0.7780898876404494 + }, + { + "epoch": 0.21163550014841198, + "grad_norm": 3.508737087249756, + "learning_rate": 4.812661535307876e-06, + "loss": 0.0883, + "step": 713, + "video_reward_cumulative_accuracy": 0.7776998597475456 + }, + { + "epoch": 0.21193232413178986, + "grad_norm": 1.5868617296218872, + "learning_rate": 4.811676448753606e-06, + "loss": 0.0478, + "step": 714, + "video_reward_cumulative_accuracy": 0.7780112044817927 + }, + { + "epoch": 0.21222914811516772, + "grad_norm": 1.609864592552185, + "learning_rate": 4.810688880390303e-06, + "loss": 0.0553, + "step": 715, + "video_reward_cumulative_accuracy": 0.7783216783216783 + }, + { + "epoch": 0.21252597209854557, + "grad_norm": 3.3906569480895996, + "learning_rate": 4.809698831278217e-06, + "loss": 0.077, + "step": 716, + "video_reward_cumulative_accuracy": 0.7779329608938548 + }, + { + "epoch": 0.21282279608192342, + "grad_norm": 1.417561411857605, + "learning_rate": 4.808706302480261e-06, + "loss": 0.038, + "step": 717, + "video_reward_cumulative_accuracy": 0.7782426778242678 + }, + { + "epoch": 0.21311962006530127, + "grad_norm": 1.8394412994384766, + "learning_rate": 4.807711295062013e-06, + "loss": 0.0496, + "step": 718, + "video_reward_cumulative_accuracy": 0.7785515320334262 + }, + { + "epoch": 0.21341644404867913, + "grad_norm": 2.1921329498291016, + "learning_rate": 4.8067138100917065e-06, + "loss": 0.0483, + "step": 719, + "video_reward_cumulative_accuracy": 0.778164116828929 + }, + { + "epoch": 0.21371326803205698, + "grad_norm": 3.041285753250122, + "learning_rate": 4.805713848640242e-06, + "loss": 0.0777, + "step": 720, + "video_reward_cumulative_accuracy": 0.7784722222222222 + }, + { + "epoch": 0.21401009201543486, + "grad_norm": 2.195009469985962, + "learning_rate": 4.804711411781173e-06, + "loss": 0.0761, + "step": 721, + "video_reward_cumulative_accuracy": 0.7787794729542302 + }, + { + "epoch": 0.2143069159988127, + "grad_norm": 1.3252123594284058, + "learning_rate": 4.803706500590714e-06, + "loss": 0.062, + "step": 722, + "video_reward_cumulative_accuracy": 0.7790858725761773 + }, + { + "epoch": 0.21460373998219057, + "grad_norm": 3.693161964416504, + "learning_rate": 4.802699116147732e-06, + "loss": 0.0576, + "step": 723, + "video_reward_cumulative_accuracy": 0.7786998616874136 + }, + { + "epoch": 0.21490056396556842, + "grad_norm": 1.6738680601119995, + "learning_rate": 4.801689259533756e-06, + "loss": 0.0488, + "step": 724, + "video_reward_cumulative_accuracy": 0.7790055248618785 + }, + { + "epoch": 0.21519738794894627, + "grad_norm": 2.5583913326263428, + "learning_rate": 4.800676931832963e-06, + "loss": 0.0596, + "step": 725, + "video_reward_cumulative_accuracy": 0.7793103448275862 + }, + { + "epoch": 0.21549421193232413, + "grad_norm": 2.4868931770324707, + "learning_rate": 4.799662134132185e-06, + "loss": 0.0386, + "step": 726, + "video_reward_cumulative_accuracy": 0.7789256198347108 + }, + { + "epoch": 0.21579103591570198, + "grad_norm": 1.2940788269042969, + "learning_rate": 4.798644867520905e-06, + "loss": 0.0506, + "step": 727, + "video_reward_cumulative_accuracy": 0.7792297111416782 + }, + { + "epoch": 0.21608785989907986, + "grad_norm": 1.6243387460708618, + "learning_rate": 4.797625133091259e-06, + "loss": 0.0387, + "step": 728, + "video_reward_cumulative_accuracy": 0.779532967032967 + }, + { + "epoch": 0.2163846838824577, + "grad_norm": 3.6060104370117188, + "learning_rate": 4.796602931938031e-06, + "loss": 0.072, + "step": 729, + "video_reward_cumulative_accuracy": 0.7791495198902606 + }, + { + "epoch": 0.21668150786583557, + "grad_norm": 2.368060827255249, + "learning_rate": 4.795578265158652e-06, + "loss": 0.0417, + "step": 730, + "video_reward_cumulative_accuracy": 0.7787671232876713 + }, + { + "epoch": 0.21697833184921342, + "grad_norm": 2.8776209354400635, + "learning_rate": 4.794551133853202e-06, + "loss": 0.0693, + "step": 731, + "video_reward_cumulative_accuracy": 0.7790697674418605 + }, + { + "epoch": 0.21727515583259127, + "grad_norm": 1.1618021726608276, + "learning_rate": 4.7935215391244065e-06, + "loss": 0.0558, + "step": 732, + "video_reward_cumulative_accuracy": 0.7786885245901639 + }, + { + "epoch": 0.21757197981596912, + "grad_norm": 4.455048561096191, + "learning_rate": 4.792489482077633e-06, + "loss": 0.0619, + "step": 733, + "video_reward_cumulative_accuracy": 0.7789904502046384 + }, + { + "epoch": 0.21786880379934698, + "grad_norm": 3.7576048374176025, + "learning_rate": 4.791454963820898e-06, + "loss": 0.0586, + "step": 734, + "video_reward_cumulative_accuracy": 0.779291553133515 + }, + { + "epoch": 0.21816562778272486, + "grad_norm": 2.280623435974121, + "learning_rate": 4.790417985464855e-06, + "loss": 0.0457, + "step": 735, + "video_reward_cumulative_accuracy": 0.7789115646258503 + }, + { + "epoch": 0.2184624517661027, + "grad_norm": 3.224135398864746, + "learning_rate": 4.789378548122803e-06, + "loss": 0.0378, + "step": 736, + "video_reward_cumulative_accuracy": 0.7792119565217391 + }, + { + "epoch": 0.21875927574948056, + "grad_norm": 1.9654239416122437, + "learning_rate": 4.788336652910676e-06, + "loss": 0.068, + "step": 737, + "video_reward_cumulative_accuracy": 0.7788331071913162 + }, + { + "epoch": 0.21905609973285842, + "grad_norm": 2.6083526611328125, + "learning_rate": 4.787292300947053e-06, + "loss": 0.0529, + "step": 738, + "video_reward_cumulative_accuracy": 0.7784552845528455 + }, + { + "epoch": 0.21935292371623627, + "grad_norm": 2.398172378540039, + "learning_rate": 4.786245493353145e-06, + "loss": 0.0541, + "step": 739, + "video_reward_cumulative_accuracy": 0.7780784844384303 + }, + { + "epoch": 0.21964974769961412, + "grad_norm": 0.9663519263267517, + "learning_rate": 4.785196231252802e-06, + "loss": 0.031, + "step": 740, + "video_reward_cumulative_accuracy": 0.7783783783783784 + }, + { + "epoch": 0.21994657168299198, + "grad_norm": 1.1138893365859985, + "learning_rate": 4.784144515772509e-06, + "loss": 0.0387, + "step": 741, + "video_reward_cumulative_accuracy": 0.7780026990553306 + }, + { + "epoch": 0.22024339566636983, + "grad_norm": 1.9808402061462402, + "learning_rate": 4.783090348041384e-06, + "loss": 0.0348, + "step": 742, + "video_reward_cumulative_accuracy": 0.7776280323450134 + }, + { + "epoch": 0.2205402196497477, + "grad_norm": 1.5914376974105835, + "learning_rate": 4.782033729191179e-06, + "loss": 0.0462, + "step": 743, + "video_reward_cumulative_accuracy": 0.7779273216689099 + }, + { + "epoch": 0.22083704363312556, + "grad_norm": 3.169459819793701, + "learning_rate": 4.780974660356276e-06, + "loss": 0.0653, + "step": 744, + "video_reward_cumulative_accuracy": 0.7782258064516129 + }, + { + "epoch": 0.22113386761650342, + "grad_norm": 2.7912189960479736, + "learning_rate": 4.77991314267369e-06, + "loss": 0.0405, + "step": 745, + "video_reward_cumulative_accuracy": 0.7778523489932886 + }, + { + "epoch": 0.22143069159988127, + "grad_norm": 2.121472120285034, + "learning_rate": 4.778849177283061e-06, + "loss": 0.0563, + "step": 746, + "video_reward_cumulative_accuracy": 0.7781501340482574 + }, + { + "epoch": 0.22172751558325912, + "grad_norm": 3.9767544269561768, + "learning_rate": 4.777782765326661e-06, + "loss": 0.0501, + "step": 747, + "video_reward_cumulative_accuracy": 0.7784471218206158 + }, + { + "epoch": 0.22202433956663697, + "grad_norm": 1.9390398263931274, + "learning_rate": 4.776713907949386e-06, + "loss": 0.065, + "step": 748, + "video_reward_cumulative_accuracy": 0.7787433155080213 + }, + { + "epoch": 0.22232116355001483, + "grad_norm": 4.238917827606201, + "learning_rate": 4.775642606298758e-06, + "loss": 0.0829, + "step": 749, + "video_reward_cumulative_accuracy": 0.7790387182910548 + }, + { + "epoch": 0.2226179875333927, + "grad_norm": 2.718532085418701, + "learning_rate": 4.774568861524923e-06, + "loss": 0.034, + "step": 750, + "video_reward_cumulative_accuracy": 0.7786666666666666 + }, + { + "epoch": 0.22291481151677056, + "grad_norm": 4.930084228515625, + "learning_rate": 4.773492674780651e-06, + "loss": 0.0583, + "step": 751, + "video_reward_cumulative_accuracy": 0.7782956058588548 + }, + { + "epoch": 0.22321163550014841, + "grad_norm": 4.565423965454102, + "learning_rate": 4.772414047221333e-06, + "loss": 0.0486, + "step": 752, + "video_reward_cumulative_accuracy": 0.7785904255319149 + }, + { + "epoch": 0.22350845948352627, + "grad_norm": 4.179710865020752, + "learning_rate": 4.77133298000498e-06, + "loss": 0.0614, + "step": 753, + "video_reward_cumulative_accuracy": 0.7782204515272244 + }, + { + "epoch": 0.22380528346690412, + "grad_norm": 5.0286383628845215, + "learning_rate": 4.7702494742922215e-06, + "loss": 0.0705, + "step": 754, + "video_reward_cumulative_accuracy": 0.7771883289124668 + }, + { + "epoch": 0.22410210745028197, + "grad_norm": 1.3847112655639648, + "learning_rate": 4.769163531246308e-06, + "loss": 0.0306, + "step": 755, + "video_reward_cumulative_accuracy": 0.7774834437086092 + }, + { + "epoch": 0.22439893143365983, + "grad_norm": 3.042525053024292, + "learning_rate": 4.7680751520331035e-06, + "loss": 0.0426, + "step": 756, + "video_reward_cumulative_accuracy": 0.7777777777777778 + }, + { + "epoch": 0.2246957554170377, + "grad_norm": 1.5790531635284424, + "learning_rate": 4.766984337821089e-06, + "loss": 0.0229, + "step": 757, + "video_reward_cumulative_accuracy": 0.7780713342140027 + }, + { + "epoch": 0.22499257940041556, + "grad_norm": 3.0712270736694336, + "learning_rate": 4.76589108978136e-06, + "loss": 0.0316, + "step": 758, + "video_reward_cumulative_accuracy": 0.7783641160949868 + }, + { + "epoch": 0.2252894033837934, + "grad_norm": 3.963674783706665, + "learning_rate": 4.764795409087623e-06, + "loss": 0.053, + "step": 759, + "video_reward_cumulative_accuracy": 0.7786561264822134 + }, + { + "epoch": 0.22558622736717127, + "grad_norm": 3.206650495529175, + "learning_rate": 4.7636972969161984e-06, + "loss": 0.0279, + "step": 760, + "video_reward_cumulative_accuracy": 0.7789473684210526 + }, + { + "epoch": 0.22588305135054912, + "grad_norm": 4.03346061706543, + "learning_rate": 4.762596754446017e-06, + "loss": 0.0558, + "step": 761, + "video_reward_cumulative_accuracy": 0.778580814717477 + }, + { + "epoch": 0.22617987533392697, + "grad_norm": 4.312930107116699, + "learning_rate": 4.7614937828586176e-06, + "loss": 0.1088, + "step": 762, + "video_reward_cumulative_accuracy": 0.7782152230971129 + }, + { + "epoch": 0.22647669931730482, + "grad_norm": 3.7724556922912598, + "learning_rate": 4.760388383338145e-06, + "loss": 0.0847, + "step": 763, + "video_reward_cumulative_accuracy": 0.7785058977719528 + }, + { + "epoch": 0.2267735233006827, + "grad_norm": 1.2291319370269775, + "learning_rate": 4.759280557071357e-06, + "loss": 0.0156, + "step": 764, + "video_reward_cumulative_accuracy": 0.7787958115183246 + }, + { + "epoch": 0.22707034728406056, + "grad_norm": 10.076594352722168, + "learning_rate": 4.758170305247608e-06, + "loss": 0.0863, + "step": 765, + "video_reward_cumulative_accuracy": 0.7790849673202614 + }, + { + "epoch": 0.2273671712674384, + "grad_norm": 2.8340132236480713, + "learning_rate": 4.757057629058865e-06, + "loss": 0.0329, + "step": 766, + "video_reward_cumulative_accuracy": 0.7787206266318538 + }, + { + "epoch": 0.22766399525081626, + "grad_norm": 2.3575525283813477, + "learning_rate": 4.755942529699692e-06, + "loss": 0.05, + "step": 767, + "video_reward_cumulative_accuracy": 0.7783572359843546 + }, + { + "epoch": 0.22796081923419412, + "grad_norm": 3.471806049346924, + "learning_rate": 4.754825008367256e-06, + "loss": 0.0655, + "step": 768, + "video_reward_cumulative_accuracy": 0.7779947916666666 + }, + { + "epoch": 0.22825764321757197, + "grad_norm": 3.795821189880371, + "learning_rate": 4.753705066261326e-06, + "loss": 0.0479, + "step": 769, + "video_reward_cumulative_accuracy": 0.7776332899869961 + }, + { + "epoch": 0.22855446720094982, + "grad_norm": 2.038712978363037, + "learning_rate": 4.752582704584267e-06, + "loss": 0.067, + "step": 770, + "video_reward_cumulative_accuracy": 0.7779220779220779 + }, + { + "epoch": 0.2288512911843277, + "grad_norm": 2.496264696121216, + "learning_rate": 4.751457924541045e-06, + "loss": 0.0409, + "step": 771, + "video_reward_cumulative_accuracy": 0.7782101167315175 + }, + { + "epoch": 0.22914811516770556, + "grad_norm": 1.8071130514144897, + "learning_rate": 4.75033072733922e-06, + "loss": 0.0145, + "step": 772, + "video_reward_cumulative_accuracy": 0.7784974093264249 + }, + { + "epoch": 0.2294449391510834, + "grad_norm": 3.4890074729919434, + "learning_rate": 4.749201114188946e-06, + "loss": 0.0712, + "step": 773, + "video_reward_cumulative_accuracy": 0.7781371280724451 + }, + { + "epoch": 0.22974176313446126, + "grad_norm": 2.5620384216308594, + "learning_rate": 4.748069086302975e-06, + "loss": 0.0741, + "step": 774, + "video_reward_cumulative_accuracy": 0.7777777777777778 + }, + { + "epoch": 0.23003858711783912, + "grad_norm": 1.5887774229049683, + "learning_rate": 4.7469346448966455e-06, + "loss": 0.0347, + "step": 775, + "video_reward_cumulative_accuracy": 0.7774193548387097 + }, + { + "epoch": 0.23033541110121697, + "grad_norm": 2.032930612564087, + "learning_rate": 4.745797791187894e-06, + "loss": 0.0537, + "step": 776, + "video_reward_cumulative_accuracy": 0.7777061855670103 + }, + { + "epoch": 0.23063223508459482, + "grad_norm": 3.915695905685425, + "learning_rate": 4.744658526397241e-06, + "loss": 0.0798, + "step": 777, + "video_reward_cumulative_accuracy": 0.7773487773487774 + }, + { + "epoch": 0.2309290590679727, + "grad_norm": 1.2229045629501343, + "learning_rate": 4.743516851747798e-06, + "loss": 0.0364, + "step": 778, + "video_reward_cumulative_accuracy": 0.7776349614395887 + }, + { + "epoch": 0.23122588305135056, + "grad_norm": 2.093339204788208, + "learning_rate": 4.742372768465264e-06, + "loss": 0.0472, + "step": 779, + "video_reward_cumulative_accuracy": 0.7772785622593068 + }, + { + "epoch": 0.2315227070347284, + "grad_norm": 2.220613479614258, + "learning_rate": 4.7412262777779235e-06, + "loss": 0.0444, + "step": 780, + "video_reward_cumulative_accuracy": 0.7769230769230769 + }, + { + "epoch": 0.23181953101810626, + "grad_norm": 4.584027290344238, + "learning_rate": 4.740077380916646e-06, + "loss": 0.0535, + "step": 781, + "video_reward_cumulative_accuracy": 0.7772087067861716 + }, + { + "epoch": 0.23211635500148411, + "grad_norm": 2.5561423301696777, + "learning_rate": 4.738926079114883e-06, + "loss": 0.0301, + "step": 782, + "video_reward_cumulative_accuracy": 0.7774936061381074 + }, + { + "epoch": 0.23241317898486197, + "grad_norm": 2.526623249053955, + "learning_rate": 4.737772373608669e-06, + "loss": 0.0315, + "step": 783, + "video_reward_cumulative_accuracy": 0.777139208173691 + }, + { + "epoch": 0.23271000296823982, + "grad_norm": 1.8118770122528076, + "learning_rate": 4.736616265636619e-06, + "loss": 0.0408, + "step": 784, + "video_reward_cumulative_accuracy": 0.7774234693877551 + }, + { + "epoch": 0.2330068269516177, + "grad_norm": 1.151275873184204, + "learning_rate": 4.735457756439926e-06, + "loss": 0.0169, + "step": 785, + "video_reward_cumulative_accuracy": 0.7777070063694268 + }, + { + "epoch": 0.23330365093499555, + "grad_norm": 5.073890209197998, + "learning_rate": 4.734296847262364e-06, + "loss": 0.0722, + "step": 786, + "video_reward_cumulative_accuracy": 0.7779898218829516 + }, + { + "epoch": 0.2336004749183734, + "grad_norm": 2.3573646545410156, + "learning_rate": 4.733133539350281e-06, + "loss": 0.0311, + "step": 787, + "video_reward_cumulative_accuracy": 0.7776365946632783 + }, + { + "epoch": 0.23389729890175126, + "grad_norm": 3.158076286315918, + "learning_rate": 4.7319678339526e-06, + "loss": 0.0754, + "step": 788, + "video_reward_cumulative_accuracy": 0.7779187817258884 + }, + { + "epoch": 0.2341941228851291, + "grad_norm": 2.9681179523468018, + "learning_rate": 4.730799732320819e-06, + "loss": 0.0412, + "step": 789, + "video_reward_cumulative_accuracy": 0.7782002534854245 + }, + { + "epoch": 0.23449094686850697, + "grad_norm": 2.718312978744507, + "learning_rate": 4.729629235709009e-06, + "loss": 0.0404, + "step": 790, + "video_reward_cumulative_accuracy": 0.7784810126582279 + }, + { + "epoch": 0.23478777085188482, + "grad_norm": 2.505716562271118, + "learning_rate": 4.728456345373813e-06, + "loss": 0.0526, + "step": 791, + "video_reward_cumulative_accuracy": 0.7787610619469026 + }, + { + "epoch": 0.2350845948352627, + "grad_norm": 3.465552806854248, + "learning_rate": 4.7272810625744405e-06, + "loss": 0.0543, + "step": 792, + "video_reward_cumulative_accuracy": 0.7790404040404041 + }, + { + "epoch": 0.23538141881864055, + "grad_norm": 2.564662218093872, + "learning_rate": 4.726103388572672e-06, + "loss": 0.0536, + "step": 793, + "video_reward_cumulative_accuracy": 0.7786885245901639 + }, + { + "epoch": 0.2356782428020184, + "grad_norm": 2.045988082885742, + "learning_rate": 4.724923324632855e-06, + "loss": 0.0451, + "step": 794, + "video_reward_cumulative_accuracy": 0.7789672544080605 + }, + { + "epoch": 0.23597506678539626, + "grad_norm": 1.944718837738037, + "learning_rate": 4.7237408720219045e-06, + "loss": 0.0348, + "step": 795, + "video_reward_cumulative_accuracy": 0.779245283018868 + }, + { + "epoch": 0.2362718907687741, + "grad_norm": 2.2963719367980957, + "learning_rate": 4.722556032009295e-06, + "loss": 0.0597, + "step": 796, + "video_reward_cumulative_accuracy": 0.7795226130653267 + }, + { + "epoch": 0.23656871475215197, + "grad_norm": 2.749324083328247, + "learning_rate": 4.72136880586707e-06, + "loss": 0.0344, + "step": 797, + "video_reward_cumulative_accuracy": 0.7797992471769134 + }, + { + "epoch": 0.23686553873552982, + "grad_norm": 3.161100387573242, + "learning_rate": 4.7201791948698315e-06, + "loss": 0.0518, + "step": 798, + "video_reward_cumulative_accuracy": 0.7800751879699248 + }, + { + "epoch": 0.2371623627189077, + "grad_norm": 8.226521492004395, + "learning_rate": 4.71898720029474e-06, + "loss": 0.1091, + "step": 799, + "video_reward_cumulative_accuracy": 0.77909887359199 + }, + { + "epoch": 0.23745918670228555, + "grad_norm": 4.398781776428223, + "learning_rate": 4.717792823421521e-06, + "loss": 0.0586, + "step": 800, + "video_reward_cumulative_accuracy": 0.77875 + }, + { + "epoch": 0.23745918670228555, + "eval_runtime": 128.8206, + "eval_samples_per_second": 6.125, + "eval_steps_per_second": 0.769, + "eval_test_set_accuracy": 0.7487373737373737, + "step": 800 + }, + { + "epoch": 0.2377560106856634, + "grad_norm": 3.7454681396484375, + "learning_rate": 4.71659606553245e-06, + "loss": 0.0363, + "step": 801, + "video_reward_cumulative_accuracy": 0.7790262172284644 + }, + { + "epoch": 0.23805283466904126, + "grad_norm": 1.430262804031372, + "learning_rate": 4.7153969279123665e-06, + "loss": 0.046, + "step": 802, + "video_reward_cumulative_accuracy": 0.7793017456359103 + }, + { + "epoch": 0.2383496586524191, + "grad_norm": 5.489625453948975, + "learning_rate": 4.7141954118486585e-06, + "loss": 0.0652, + "step": 803, + "video_reward_cumulative_accuracy": 0.7789539227895392 + }, + { + "epoch": 0.23864648263579696, + "grad_norm": 2.220259666442871, + "learning_rate": 4.712991518631272e-06, + "loss": 0.0209, + "step": 804, + "video_reward_cumulative_accuracy": 0.779228855721393 + }, + { + "epoch": 0.23894330661917482, + "grad_norm": 2.3900604248046875, + "learning_rate": 4.711785249552701e-06, + "loss": 0.0485, + "step": 805, + "video_reward_cumulative_accuracy": 0.7795031055900621 + }, + { + "epoch": 0.2392401306025527, + "grad_norm": 2.9659066200256348, + "learning_rate": 4.710576605907995e-06, + "loss": 0.0493, + "step": 806, + "video_reward_cumulative_accuracy": 0.7797766749379652 + }, + { + "epoch": 0.23953695458593055, + "grad_norm": 5.7252326011657715, + "learning_rate": 4.709365588994749e-06, + "loss": 0.0617, + "step": 807, + "video_reward_cumulative_accuracy": 0.7794299876084263 + }, + { + "epoch": 0.2398337785693084, + "grad_norm": 3.0332016944885254, + "learning_rate": 4.708152200113106e-06, + "loss": 0.0504, + "step": 808, + "video_reward_cumulative_accuracy": 0.7797029702970297 + }, + { + "epoch": 0.24013060255268626, + "grad_norm": 1.474946141242981, + "learning_rate": 4.706936440565759e-06, + "loss": 0.0367, + "step": 809, + "video_reward_cumulative_accuracy": 0.7799752781211372 + }, + { + "epoch": 0.2404274265360641, + "grad_norm": 5.749459743499756, + "learning_rate": 4.705718311657943e-06, + "loss": 0.0698, + "step": 810, + "video_reward_cumulative_accuracy": 0.7802469135802469 + }, + { + "epoch": 0.24072425051944196, + "grad_norm": 1.7116312980651855, + "learning_rate": 4.704497814697436e-06, + "loss": 0.0396, + "step": 811, + "video_reward_cumulative_accuracy": 0.7805178791615289 + }, + { + "epoch": 0.24102107450281982, + "grad_norm": 1.7955880165100098, + "learning_rate": 4.703274950994563e-06, + "loss": 0.0209, + "step": 812, + "video_reward_cumulative_accuracy": 0.7807881773399015 + }, + { + "epoch": 0.2413178984861977, + "grad_norm": 4.338817596435547, + "learning_rate": 4.702049721862184e-06, + "loss": 0.0471, + "step": 813, + "video_reward_cumulative_accuracy": 0.7810578105781057 + }, + { + "epoch": 0.24161472246957555, + "grad_norm": 1.308546781539917, + "learning_rate": 4.700822128615703e-06, + "loss": 0.028, + "step": 814, + "video_reward_cumulative_accuracy": 0.7813267813267813 + }, + { + "epoch": 0.2419115464529534, + "grad_norm": 5.44117546081543, + "learning_rate": 4.699592172573061e-06, + "loss": 0.08, + "step": 815, + "video_reward_cumulative_accuracy": 0.7815950920245399 + }, + { + "epoch": 0.24220837043633126, + "grad_norm": 3.996955394744873, + "learning_rate": 4.698359855054733e-06, + "loss": 0.0551, + "step": 816, + "video_reward_cumulative_accuracy": 0.7818627450980392 + }, + { + "epoch": 0.2425051944197091, + "grad_norm": 3.2032980918884277, + "learning_rate": 4.6971251773837335e-06, + "loss": 0.0969, + "step": 817, + "video_reward_cumulative_accuracy": 0.7821297429620563 + }, + { + "epoch": 0.24280201840308696, + "grad_norm": 3.8643088340759277, + "learning_rate": 4.695888140885608e-06, + "loss": 0.0489, + "step": 818, + "video_reward_cumulative_accuracy": 0.78239608801956 + }, + { + "epoch": 0.24309884238646481, + "grad_norm": 1.5098183155059814, + "learning_rate": 4.6946487468884346e-06, + "loss": 0.0363, + "step": 819, + "video_reward_cumulative_accuracy": 0.7826617826617827 + }, + { + "epoch": 0.2433956663698427, + "grad_norm": 2.6578338146209717, + "learning_rate": 4.693406996722824e-06, + "loss": 0.0805, + "step": 820, + "video_reward_cumulative_accuracy": 0.7829268292682927 + }, + { + "epoch": 0.24369249035322055, + "grad_norm": 2.7649166584014893, + "learning_rate": 4.692162891721917e-06, + "loss": 0.0424, + "step": 821, + "video_reward_cumulative_accuracy": 0.7825822168087698 + }, + { + "epoch": 0.2439893143365984, + "grad_norm": 1.1864911317825317, + "learning_rate": 4.690916433221377e-06, + "loss": 0.0167, + "step": 822, + "video_reward_cumulative_accuracy": 0.7828467153284672 + }, + { + "epoch": 0.24428613831997625, + "grad_norm": 1.7823535203933716, + "learning_rate": 4.6896676225594016e-06, + "loss": 0.0778, + "step": 823, + "video_reward_cumulative_accuracy": 0.7831105710814095 + }, + { + "epoch": 0.2445829623033541, + "grad_norm": 3.1167526245117188, + "learning_rate": 4.68841646107671e-06, + "loss": 0.0327, + "step": 824, + "video_reward_cumulative_accuracy": 0.783373786407767 + }, + { + "epoch": 0.24487978628673196, + "grad_norm": 3.2958545684814453, + "learning_rate": 4.6871629501165435e-06, + "loss": 0.024, + "step": 825, + "video_reward_cumulative_accuracy": 0.7836363636363637 + }, + { + "epoch": 0.2451766102701098, + "grad_norm": 3.2736194133758545, + "learning_rate": 4.68590709102467e-06, + "loss": 0.0416, + "step": 826, + "video_reward_cumulative_accuracy": 0.7832929782082324 + }, + { + "epoch": 0.2454734342534877, + "grad_norm": 2.1489417552948, + "learning_rate": 4.684648885149374e-06, + "loss": 0.0491, + "step": 827, + "video_reward_cumulative_accuracy": 0.7835550181378477 + }, + { + "epoch": 0.24577025823686555, + "grad_norm": 1.746704339981079, + "learning_rate": 4.6833883338414635e-06, + "loss": 0.0513, + "step": 828, + "video_reward_cumulative_accuracy": 0.7832125603864735 + }, + { + "epoch": 0.2460670822202434, + "grad_norm": 2.047314167022705, + "learning_rate": 4.682125438454261e-06, + "loss": 0.0399, + "step": 829, + "video_reward_cumulative_accuracy": 0.7828709288299156 + }, + { + "epoch": 0.24636390620362125, + "grad_norm": 1.1579736471176147, + "learning_rate": 4.680860200343609e-06, + "loss": 0.0214, + "step": 830, + "video_reward_cumulative_accuracy": 0.7831325301204819 + }, + { + "epoch": 0.2466607301869991, + "grad_norm": 3.0398266315460205, + "learning_rate": 4.679592620867862e-06, + "loss": 0.0607, + "step": 831, + "video_reward_cumulative_accuracy": 0.782791817087846 + }, + { + "epoch": 0.24695755417037696, + "grad_norm": 2.732314348220825, + "learning_rate": 4.678322701387891e-06, + "loss": 0.0403, + "step": 832, + "video_reward_cumulative_accuracy": 0.7824519230769231 + }, + { + "epoch": 0.2472543781537548, + "grad_norm": 2.1207661628723145, + "learning_rate": 4.677050443267076e-06, + "loss": 0.0301, + "step": 833, + "video_reward_cumulative_accuracy": 0.7827130852340937 + }, + { + "epoch": 0.2475512021371327, + "grad_norm": 1.3241153955459595, + "learning_rate": 4.675775847871311e-06, + "loss": 0.0499, + "step": 834, + "video_reward_cumulative_accuracy": 0.7823741007194245 + }, + { + "epoch": 0.24784802612051055, + "grad_norm": 1.7813966274261475, + "learning_rate": 4.6744989165689975e-06, + "loss": 0.0392, + "step": 835, + "video_reward_cumulative_accuracy": 0.7826347305389222 + }, + { + "epoch": 0.2481448501038884, + "grad_norm": 1.4840867519378662, + "learning_rate": 4.673219650731045e-06, + "loss": 0.0193, + "step": 836, + "video_reward_cumulative_accuracy": 0.7828947368421053 + }, + { + "epoch": 0.24844167408726625, + "grad_norm": 0.5827304720878601, + "learning_rate": 4.67193805173087e-06, + "loss": 0.0077, + "step": 837, + "video_reward_cumulative_accuracy": 0.7831541218637993 + }, + { + "epoch": 0.2487384980706441, + "grad_norm": 2.449009656906128, + "learning_rate": 4.670654120944393e-06, + "loss": 0.0828, + "step": 838, + "video_reward_cumulative_accuracy": 0.7828162291169452 + }, + { + "epoch": 0.24903532205402196, + "grad_norm": 2.1830556392669678, + "learning_rate": 4.669367859750038e-06, + "loss": 0.0239, + "step": 839, + "video_reward_cumulative_accuracy": 0.7830750893921334 + }, + { + "epoch": 0.2493321460373998, + "grad_norm": 2.4376471042633057, + "learning_rate": 4.668079269528732e-06, + "loss": 0.0652, + "step": 840, + "video_reward_cumulative_accuracy": 0.7833333333333333 + }, + { + "epoch": 0.2496289700207777, + "grad_norm": 8.17204761505127, + "learning_rate": 4.666788351663902e-06, + "loss": 0.1043, + "step": 841, + "video_reward_cumulative_accuracy": 0.7835909631391201 + }, + { + "epoch": 0.24992579400415554, + "grad_norm": 4.692628860473633, + "learning_rate": 4.6654951075414715e-06, + "loss": 0.1819, + "step": 842, + "video_reward_cumulative_accuracy": 0.7838479809976246 + }, + { + "epoch": 0.25022261798753337, + "grad_norm": 2.7783429622650146, + "learning_rate": 4.664199538549865e-06, + "loss": 0.0546, + "step": 843, + "video_reward_cumulative_accuracy": 0.7841043890865955 + }, + { + "epoch": 0.25051944197091125, + "grad_norm": 2.2595643997192383, + "learning_rate": 4.662901646080002e-06, + "loss": 0.0521, + "step": 844, + "video_reward_cumulative_accuracy": 0.7837677725118484 + }, + { + "epoch": 0.25081626595428913, + "grad_norm": 6.65918493270874, + "learning_rate": 4.661601431525295e-06, + "loss": 0.106, + "step": 845, + "video_reward_cumulative_accuracy": 0.7840236686390533 + }, + { + "epoch": 0.25111308993766696, + "grad_norm": 2.6098530292510986, + "learning_rate": 4.660298896281653e-06, + "loss": 0.0738, + "step": 846, + "video_reward_cumulative_accuracy": 0.7836879432624113 + }, + { + "epoch": 0.25140991392104484, + "grad_norm": 1.8904296159744263, + "learning_rate": 4.658994041747471e-06, + "loss": 0.0658, + "step": 847, + "video_reward_cumulative_accuracy": 0.7839433293978748 + }, + { + "epoch": 0.25170673790442266, + "grad_norm": 3.186429977416992, + "learning_rate": 4.657686869323638e-06, + "loss": 0.0371, + "step": 848, + "video_reward_cumulative_accuracy": 0.7841981132075472 + }, + { + "epoch": 0.25200356188780054, + "grad_norm": 1.2086502313613892, + "learning_rate": 4.6563773804135305e-06, + "loss": 0.038, + "step": 849, + "video_reward_cumulative_accuracy": 0.784452296819788 + }, + { + "epoch": 0.25230038587117837, + "grad_norm": 1.1845347881317139, + "learning_rate": 4.655065576423013e-06, + "loss": 0.0215, + "step": 850, + "video_reward_cumulative_accuracy": 0.7847058823529411 + }, + { + "epoch": 0.25259720985455625, + "grad_norm": 2.5198981761932373, + "learning_rate": 4.6537514587604316e-06, + "loss": 0.0581, + "step": 851, + "video_reward_cumulative_accuracy": 0.7849588719153937 + }, + { + "epoch": 0.25289403383793413, + "grad_norm": 4.27205753326416, + "learning_rate": 4.652435028836622e-06, + "loss": 0.0617, + "step": 852, + "video_reward_cumulative_accuracy": 0.7846244131455399 + }, + { + "epoch": 0.25319085782131195, + "grad_norm": 1.900675892829895, + "learning_rate": 4.651116288064899e-06, + "loss": 0.0384, + "step": 853, + "video_reward_cumulative_accuracy": 0.7848769050410317 + }, + { + "epoch": 0.25348768180468984, + "grad_norm": 1.898750901222229, + "learning_rate": 4.649795237861058e-06, + "loss": 0.0461, + "step": 854, + "video_reward_cumulative_accuracy": 0.7851288056206089 + }, + { + "epoch": 0.25378450578806766, + "grad_norm": 1.234446406364441, + "learning_rate": 4.648471879643374e-06, + "loss": 0.0797, + "step": 855, + "video_reward_cumulative_accuracy": 0.7847953216374269 + }, + { + "epoch": 0.25408132977144554, + "grad_norm": 2.0378189086914062, + "learning_rate": 4.647146214832602e-06, + "loss": 0.0835, + "step": 856, + "video_reward_cumulative_accuracy": 0.7844626168224299 + }, + { + "epoch": 0.25437815375482337, + "grad_norm": 1.5428322553634644, + "learning_rate": 4.645818244851971e-06, + "loss": 0.0477, + "step": 857, + "video_reward_cumulative_accuracy": 0.7841306884480747 + }, + { + "epoch": 0.25467497773820125, + "grad_norm": 1.234578251838684, + "learning_rate": 4.644487971127186e-06, + "loss": 0.0556, + "step": 858, + "video_reward_cumulative_accuracy": 0.7843822843822844 + }, + { + "epoch": 0.2549718017215791, + "grad_norm": 2.008531093597412, + "learning_rate": 4.643155395086425e-06, + "loss": 0.0508, + "step": 859, + "video_reward_cumulative_accuracy": 0.7846332945285215 + }, + { + "epoch": 0.25526862570495695, + "grad_norm": 2.030647039413452, + "learning_rate": 4.6418205181603385e-06, + "loss": 0.0239, + "step": 860, + "video_reward_cumulative_accuracy": 0.7848837209302325 + }, + { + "epoch": 0.25556544968833483, + "grad_norm": 3.389904022216797, + "learning_rate": 4.640483341782044e-06, + "loss": 0.0565, + "step": 861, + "video_reward_cumulative_accuracy": 0.7851335656213705 + }, + { + "epoch": 0.25586227367171266, + "grad_norm": 1.0547555685043335, + "learning_rate": 4.639143867387132e-06, + "loss": 0.0433, + "step": 862, + "video_reward_cumulative_accuracy": 0.7853828306264501 + }, + { + "epoch": 0.25615909765509054, + "grad_norm": 2.2876555919647217, + "learning_rate": 4.6378020964136586e-06, + "loss": 0.0344, + "step": 863, + "video_reward_cumulative_accuracy": 0.7856315179606026 + }, + { + "epoch": 0.25645592163846836, + "grad_norm": 1.4506580829620361, + "learning_rate": 4.636458030302144e-06, + "loss": 0.0418, + "step": 864, + "video_reward_cumulative_accuracy": 0.7858796296296297 + }, + { + "epoch": 0.25675274562184625, + "grad_norm": 5.08413028717041, + "learning_rate": 4.635111670495574e-06, + "loss": 0.049, + "step": 865, + "video_reward_cumulative_accuracy": 0.7855491329479769 + }, + { + "epoch": 0.2570495696052241, + "grad_norm": 4.657954692840576, + "learning_rate": 4.6337630184393965e-06, + "loss": 0.0434, + "step": 866, + "video_reward_cumulative_accuracy": 0.785796766743649 + }, + { + "epoch": 0.25734639358860195, + "grad_norm": 1.428345799446106, + "learning_rate": 4.632412075581521e-06, + "loss": 0.0475, + "step": 867, + "video_reward_cumulative_accuracy": 0.7860438292964245 + }, + { + "epoch": 0.25764321757197983, + "grad_norm": 0.920592188835144, + "learning_rate": 4.6310588433723145e-06, + "loss": 0.0175, + "step": 868, + "video_reward_cumulative_accuracy": 0.7862903225806451 + }, + { + "epoch": 0.25794004155535766, + "grad_norm": 3.902883291244507, + "learning_rate": 4.629703323264605e-06, + "loss": 0.0379, + "step": 869, + "video_reward_cumulative_accuracy": 0.786536248561565 + }, + { + "epoch": 0.25823686553873554, + "grad_norm": 2.918687582015991, + "learning_rate": 4.6283455167136724e-06, + "loss": 0.0538, + "step": 870, + "video_reward_cumulative_accuracy": 0.7867816091954023 + }, + { + "epoch": 0.25853368952211336, + "grad_norm": 2.900933027267456, + "learning_rate": 4.626985425177256e-06, + "loss": 0.0521, + "step": 871, + "video_reward_cumulative_accuracy": 0.7870264064293915 + }, + { + "epoch": 0.25883051350549124, + "grad_norm": 3.1551060676574707, + "learning_rate": 4.625623050115545e-06, + "loss": 0.0863, + "step": 872, + "video_reward_cumulative_accuracy": 0.786697247706422 + }, + { + "epoch": 0.2591273374888691, + "grad_norm": 1.1339958906173706, + "learning_rate": 4.6242583929911825e-06, + "loss": 0.0553, + "step": 873, + "video_reward_cumulative_accuracy": 0.786368843069874 + }, + { + "epoch": 0.25942416147224695, + "grad_norm": 1.0027540922164917, + "learning_rate": 4.6228914552692585e-06, + "loss": 0.0178, + "step": 874, + "video_reward_cumulative_accuracy": 0.7866132723112128 + }, + { + "epoch": 0.25972098545562483, + "grad_norm": 3.1570804119110107, + "learning_rate": 4.621522238417314e-06, + "loss": 0.1099, + "step": 875, + "video_reward_cumulative_accuracy": 0.7868571428571428 + }, + { + "epoch": 0.26001780943900266, + "grad_norm": 2.4433491230010986, + "learning_rate": 4.620150743905338e-06, + "loss": 0.0314, + "step": 876, + "video_reward_cumulative_accuracy": 0.7865296803652968 + }, + { + "epoch": 0.26031463342238054, + "grad_norm": 3.163719654083252, + "learning_rate": 4.6187769732057595e-06, + "loss": 0.0235, + "step": 877, + "video_reward_cumulative_accuracy": 0.7867730900798175 + }, + { + "epoch": 0.26061145740575836, + "grad_norm": 4.77492094039917, + "learning_rate": 4.617400927793457e-06, + "loss": 0.0781, + "step": 878, + "video_reward_cumulative_accuracy": 0.7864464692482915 + }, + { + "epoch": 0.26090828138913624, + "grad_norm": 1.8638689517974854, + "learning_rate": 4.6160226091457495e-06, + "loss": 0.0368, + "step": 879, + "video_reward_cumulative_accuracy": 0.7861205915813424 + }, + { + "epoch": 0.2612051053725141, + "grad_norm": 2.038536548614502, + "learning_rate": 4.6146420187423935e-06, + "loss": 0.0344, + "step": 880, + "video_reward_cumulative_accuracy": 0.7863636363636364 + }, + { + "epoch": 0.26150192935589195, + "grad_norm": 2.544743299484253, + "learning_rate": 4.613259158065588e-06, + "loss": 0.0741, + "step": 881, + "video_reward_cumulative_accuracy": 0.7866061293984109 + }, + { + "epoch": 0.26179875333926983, + "grad_norm": 4.100607395172119, + "learning_rate": 4.611874028599969e-06, + "loss": 0.0515, + "step": 882, + "video_reward_cumulative_accuracy": 0.7868480725623582 + }, + { + "epoch": 0.26209557732264765, + "grad_norm": 0.9536772966384888, + "learning_rate": 4.610486631832606e-06, + "loss": 0.0332, + "step": 883, + "video_reward_cumulative_accuracy": 0.7865232163080408 + }, + { + "epoch": 0.26239240130602554, + "grad_norm": 1.7179930210113525, + "learning_rate": 4.609096969253005e-06, + "loss": 0.0275, + "step": 884, + "video_reward_cumulative_accuracy": 0.7867647058823529 + }, + { + "epoch": 0.26268922528940336, + "grad_norm": 2.068021059036255, + "learning_rate": 4.607705042353104e-06, + "loss": 0.0526, + "step": 885, + "video_reward_cumulative_accuracy": 0.7870056497175141 + }, + { + "epoch": 0.26298604927278124, + "grad_norm": 3.365269899368286, + "learning_rate": 4.60631085262727e-06, + "loss": 0.0301, + "step": 886, + "video_reward_cumulative_accuracy": 0.7872460496613995 + }, + { + "epoch": 0.2632828732561591, + "grad_norm": 4.123976707458496, + "learning_rate": 4.604914401572301e-06, + "loss": 0.0437, + "step": 887, + "video_reward_cumulative_accuracy": 0.7874859075535513 + }, + { + "epoch": 0.26357969723953695, + "grad_norm": 2.0754764080047607, + "learning_rate": 4.603515690687425e-06, + "loss": 0.0825, + "step": 888, + "video_reward_cumulative_accuracy": 0.7877252252252253 + }, + { + "epoch": 0.26387652122291483, + "grad_norm": 2.7115345001220703, + "learning_rate": 4.602114721474293e-06, + "loss": 0.0596, + "step": 889, + "video_reward_cumulative_accuracy": 0.7879640044994376 + }, + { + "epoch": 0.26417334520629265, + "grad_norm": 1.4795091152191162, + "learning_rate": 4.60071149543698e-06, + "loss": 0.0438, + "step": 890, + "video_reward_cumulative_accuracy": 0.7882022471910113 + }, + { + "epoch": 0.26447016918967053, + "grad_norm": 4.9827985763549805, + "learning_rate": 4.599306014081987e-06, + "loss": 0.0523, + "step": 891, + "video_reward_cumulative_accuracy": 0.7878787878787878 + }, + { + "epoch": 0.26476699317304836, + "grad_norm": 3.740387439727783, + "learning_rate": 4.597898278918233e-06, + "loss": 0.0927, + "step": 892, + "video_reward_cumulative_accuracy": 0.7881165919282511 + }, + { + "epoch": 0.26506381715642624, + "grad_norm": 2.645918130874634, + "learning_rate": 4.596488291457061e-06, + "loss": 0.0482, + "step": 893, + "video_reward_cumulative_accuracy": 0.7883538633818589 + }, + { + "epoch": 0.2653606411398041, + "grad_norm": 3.115306854248047, + "learning_rate": 4.595076053212226e-06, + "loss": 0.0522, + "step": 894, + "video_reward_cumulative_accuracy": 0.7885906040268457 + }, + { + "epoch": 0.26565746512318195, + "grad_norm": 2.4514198303222656, + "learning_rate": 4.593661565699905e-06, + "loss": 0.0442, + "step": 895, + "video_reward_cumulative_accuracy": 0.788826815642458 + }, + { + "epoch": 0.2659542891065598, + "grad_norm": 1.658631443977356, + "learning_rate": 4.592244830438688e-06, + "loss": 0.0353, + "step": 896, + "video_reward_cumulative_accuracy": 0.7890625 + }, + { + "epoch": 0.26625111308993765, + "grad_norm": 4.696023464202881, + "learning_rate": 4.590825848949576e-06, + "loss": 0.0655, + "step": 897, + "video_reward_cumulative_accuracy": 0.7892976588628763 + }, + { + "epoch": 0.26654793707331553, + "grad_norm": 4.951619625091553, + "learning_rate": 4.589404622755985e-06, + "loss": 0.0759, + "step": 898, + "video_reward_cumulative_accuracy": 0.7895322939866369 + }, + { + "epoch": 0.26684476105669336, + "grad_norm": 4.583511829376221, + "learning_rate": 4.587981153383738e-06, + "loss": 0.1074, + "step": 899, + "video_reward_cumulative_accuracy": 0.789210233592881 + }, + { + "epoch": 0.26714158504007124, + "grad_norm": 1.0951406955718994, + "learning_rate": 4.586555442361068e-06, + "loss": 0.0384, + "step": 900, + "video_reward_cumulative_accuracy": 0.7894444444444444 + }, + { + "epoch": 0.2674384090234491, + "grad_norm": 2.3430957794189453, + "learning_rate": 4.585127491218615e-06, + "loss": 0.0268, + "step": 901, + "video_reward_cumulative_accuracy": 0.7896781354051055 + }, + { + "epoch": 0.26773523300682694, + "grad_norm": 3.9636011123657227, + "learning_rate": 4.5836973014894225e-06, + "loss": 0.0735, + "step": 902, + "video_reward_cumulative_accuracy": 0.7899113082039911 + }, + { + "epoch": 0.2680320569902048, + "grad_norm": 3.968548536300659, + "learning_rate": 4.582264874708937e-06, + "loss": 0.0568, + "step": 903, + "video_reward_cumulative_accuracy": 0.7901439645625692 + }, + { + "epoch": 0.26832888097358265, + "grad_norm": 1.8706952333450317, + "learning_rate": 4.58083021241501e-06, + "loss": 0.0886, + "step": 904, + "video_reward_cumulative_accuracy": 0.7898230088495575 + }, + { + "epoch": 0.26862570495696053, + "grad_norm": 2.0862226486206055, + "learning_rate": 4.579393316147888e-06, + "loss": 0.057, + "step": 905, + "video_reward_cumulative_accuracy": 0.7900552486187845 + }, + { + "epoch": 0.26892252894033836, + "grad_norm": 2.1430046558380127, + "learning_rate": 4.577954187450221e-06, + "loss": 0.051, + "step": 906, + "video_reward_cumulative_accuracy": 0.7902869757174393 + }, + { + "epoch": 0.26921935292371624, + "grad_norm": 1.2016264200210571, + "learning_rate": 4.576512827867051e-06, + "loss": 0.0298, + "step": 907, + "video_reward_cumulative_accuracy": 0.7905181918412348 + }, + { + "epoch": 0.2695161769070941, + "grad_norm": 2.2290544509887695, + "learning_rate": 4.57506923894582e-06, + "loss": 0.059, + "step": 908, + "video_reward_cumulative_accuracy": 0.7907488986784141 + }, + { + "epoch": 0.26981300089047194, + "grad_norm": 1.2380579710006714, + "learning_rate": 4.573623422236359e-06, + "loss": 0.0574, + "step": 909, + "video_reward_cumulative_accuracy": 0.7904290429042904 + }, + { + "epoch": 0.2701098248738498, + "grad_norm": 1.3100279569625854, + "learning_rate": 4.572175379290892e-06, + "loss": 0.0188, + "step": 910, + "video_reward_cumulative_accuracy": 0.7906593406593406 + }, + { + "epoch": 0.27040664885722765, + "grad_norm": 1.4202399253845215, + "learning_rate": 4.570725111664035e-06, + "loss": 0.0274, + "step": 911, + "video_reward_cumulative_accuracy": 0.7908891328210758 + }, + { + "epoch": 0.27070347284060553, + "grad_norm": 1.6007649898529053, + "learning_rate": 4.569272620912791e-06, + "loss": 0.0389, + "step": 912, + "video_reward_cumulative_accuracy": 0.7911184210526315 + }, + { + "epoch": 0.27100029682398336, + "grad_norm": 1.8392062187194824, + "learning_rate": 4.56781790859655e-06, + "loss": 0.07, + "step": 913, + "video_reward_cumulative_accuracy": 0.7913472070098576 + }, + { + "epoch": 0.27129712080736124, + "grad_norm": 2.425304651260376, + "learning_rate": 4.566360976277086e-06, + "loss": 0.0375, + "step": 914, + "video_reward_cumulative_accuracy": 0.7915754923413567 + }, + { + "epoch": 0.2715939447907391, + "grad_norm": 3.8037993907928467, + "learning_rate": 4.564901825518558e-06, + "loss": 0.066, + "step": 915, + "video_reward_cumulative_accuracy": 0.7918032786885246 + }, + { + "epoch": 0.27189076877411694, + "grad_norm": 2.4875757694244385, + "learning_rate": 4.563440457887506e-06, + "loss": 0.0269, + "step": 916, + "video_reward_cumulative_accuracy": 0.7920305676855895 + }, + { + "epoch": 0.2721875927574948, + "grad_norm": 2.975370168685913, + "learning_rate": 4.561976874952849e-06, + "loss": 0.0474, + "step": 917, + "video_reward_cumulative_accuracy": 0.792257360959651 + }, + { + "epoch": 0.27248441674087265, + "grad_norm": 1.9937337636947632, + "learning_rate": 4.560511078285885e-06, + "loss": 0.092, + "step": 918, + "video_reward_cumulative_accuracy": 0.7924836601307189 + }, + { + "epoch": 0.27278124072425053, + "grad_norm": 2.1573855876922607, + "learning_rate": 4.559043069460291e-06, + "loss": 0.0465, + "step": 919, + "video_reward_cumulative_accuracy": 0.7921653971708379 + }, + { + "epoch": 0.27307806470762835, + "grad_norm": 1.528318166732788, + "learning_rate": 4.557572850052116e-06, + "loss": 0.0668, + "step": 920, + "video_reward_cumulative_accuracy": 0.7918478260869565 + }, + { + "epoch": 0.27337488869100623, + "grad_norm": 3.7441587448120117, + "learning_rate": 4.556100421639783e-06, + "loss": 0.1027, + "step": 921, + "video_reward_cumulative_accuracy": 0.7920738327904452 + }, + { + "epoch": 0.2736717126743841, + "grad_norm": 1.5246275663375854, + "learning_rate": 4.554625785804087e-06, + "loss": 0.0302, + "step": 922, + "video_reward_cumulative_accuracy": 0.7922993492407809 + }, + { + "epoch": 0.27396853665776194, + "grad_norm": 3.7128987312316895, + "learning_rate": 4.553148944128192e-06, + "loss": 0.0519, + "step": 923, + "video_reward_cumulative_accuracy": 0.7925243770314193 + }, + { + "epoch": 0.2742653606411398, + "grad_norm": 5.355534076690674, + "learning_rate": 4.551669898197631e-06, + "loss": 0.0838, + "step": 924, + "video_reward_cumulative_accuracy": 0.7927489177489178 + }, + { + "epoch": 0.27456218462451765, + "grad_norm": 3.1454975605010986, + "learning_rate": 4.550188649600306e-06, + "loss": 0.0614, + "step": 925, + "video_reward_cumulative_accuracy": 0.792972972972973 + }, + { + "epoch": 0.2748590086078955, + "grad_norm": 6.770321846008301, + "learning_rate": 4.548705199926478e-06, + "loss": 0.0797, + "step": 926, + "video_reward_cumulative_accuracy": 0.7931965442764579 + }, + { + "epoch": 0.27515583259127335, + "grad_norm": 4.7147064208984375, + "learning_rate": 4.547219550768774e-06, + "loss": 0.0511, + "step": 927, + "video_reward_cumulative_accuracy": 0.7928802588996764 + }, + { + "epoch": 0.27545265657465123, + "grad_norm": 3.4447805881500244, + "learning_rate": 4.545731703722185e-06, + "loss": 0.0591, + "step": 928, + "video_reward_cumulative_accuracy": 0.7931034482758621 + }, + { + "epoch": 0.2757494805580291, + "grad_norm": 1.9188413619995117, + "learning_rate": 4.544241660384057e-06, + "loss": 0.0486, + "step": 929, + "video_reward_cumulative_accuracy": 0.7927879440258342 + }, + { + "epoch": 0.27604630454140694, + "grad_norm": 0.8871810436248779, + "learning_rate": 4.542749422354098e-06, + "loss": 0.0457, + "step": 930, + "video_reward_cumulative_accuracy": 0.7924731182795699 + }, + { + "epoch": 0.2763431285247848, + "grad_norm": 1.0391457080841064, + "learning_rate": 4.54125499123437e-06, + "loss": 0.0463, + "step": 931, + "video_reward_cumulative_accuracy": 0.7926960257787325 + }, + { + "epoch": 0.27663995250816265, + "grad_norm": 3.6447765827178955, + "learning_rate": 4.539758368629288e-06, + "loss": 0.0333, + "step": 932, + "video_reward_cumulative_accuracy": 0.7929184549356223 + }, + { + "epoch": 0.2769367764915405, + "grad_norm": 2.135462760925293, + "learning_rate": 4.538259556145623e-06, + "loss": 0.0349, + "step": 933, + "video_reward_cumulative_accuracy": 0.7931404072883173 + }, + { + "epoch": 0.27723360047491835, + "grad_norm": 1.0831674337387085, + "learning_rate": 4.5367585553924965e-06, + "loss": 0.0454, + "step": 934, + "video_reward_cumulative_accuracy": 0.7933618843683083 + }, + { + "epoch": 0.27753042445829623, + "grad_norm": 2.257012128829956, + "learning_rate": 4.5352553679813775e-06, + "loss": 0.0664, + "step": 935, + "video_reward_cumulative_accuracy": 0.793048128342246 + }, + { + "epoch": 0.2778272484416741, + "grad_norm": 1.1687437295913696, + "learning_rate": 4.5337499955260825e-06, + "loss": 0.0443, + "step": 936, + "video_reward_cumulative_accuracy": 0.7932692307692307 + }, + { + "epoch": 0.27812407242505194, + "grad_norm": 1.566916823387146, + "learning_rate": 4.532242439642778e-06, + "loss": 0.0663, + "step": 937, + "video_reward_cumulative_accuracy": 0.7934898612593383 + }, + { + "epoch": 0.2784208964084298, + "grad_norm": 2.265585422515869, + "learning_rate": 4.530732701949968e-06, + "loss": 0.0347, + "step": 938, + "video_reward_cumulative_accuracy": 0.7937100213219617 + }, + { + "epoch": 0.27871772039180764, + "grad_norm": 1.542738437652588, + "learning_rate": 4.529220784068505e-06, + "loss": 0.0337, + "step": 939, + "video_reward_cumulative_accuracy": 0.7933972310969116 + }, + { + "epoch": 0.2790145443751855, + "grad_norm": 1.868859887123108, + "learning_rate": 4.527706687621578e-06, + "loss": 0.0593, + "step": 940, + "video_reward_cumulative_accuracy": 0.7930851063829787 + }, + { + "epoch": 0.27931136835856335, + "grad_norm": 1.1284223794937134, + "learning_rate": 4.526190414234718e-06, + "loss": 0.0271, + "step": 941, + "video_reward_cumulative_accuracy": 0.7933049946865037 + }, + { + "epoch": 0.27960819234194123, + "grad_norm": 2.6170237064361572, + "learning_rate": 4.524671965535791e-06, + "loss": 0.0675, + "step": 942, + "video_reward_cumulative_accuracy": 0.7929936305732485 + }, + { + "epoch": 0.2799050163253191, + "grad_norm": 1.8915306329727173, + "learning_rate": 4.523151343154999e-06, + "loss": 0.0355, + "step": 943, + "video_reward_cumulative_accuracy": 0.7932131495227995 + }, + { + "epoch": 0.28020184030869694, + "grad_norm": 1.1092487573623657, + "learning_rate": 4.521628548724877e-06, + "loss": 0.0447, + "step": 944, + "video_reward_cumulative_accuracy": 0.7934322033898306 + }, + { + "epoch": 0.2804986642920748, + "grad_norm": 2.1226096153259277, + "learning_rate": 4.5201035838802935e-06, + "loss": 0.076, + "step": 945, + "video_reward_cumulative_accuracy": 0.7936507936507936 + }, + { + "epoch": 0.28079548827545264, + "grad_norm": 3.9487340450286865, + "learning_rate": 4.518576450258446e-06, + "loss": 0.102, + "step": 946, + "video_reward_cumulative_accuracy": 0.7938689217758985 + }, + { + "epoch": 0.2810923122588305, + "grad_norm": 2.5979459285736084, + "learning_rate": 4.517047149498861e-06, + "loss": 0.0372, + "step": 947, + "video_reward_cumulative_accuracy": 0.7940865892291447 + }, + { + "epoch": 0.28138913624220835, + "grad_norm": 5.942515850067139, + "learning_rate": 4.51551568324339e-06, + "loss": 0.1805, + "step": 948, + "video_reward_cumulative_accuracy": 0.7943037974683544 + }, + { + "epoch": 0.28168596022558623, + "grad_norm": 3.59495210647583, + "learning_rate": 4.5139820531362125e-06, + "loss": 0.0965, + "step": 949, + "video_reward_cumulative_accuracy": 0.7945205479452054 + }, + { + "epoch": 0.2819827842089641, + "grad_norm": 1.3361320495605469, + "learning_rate": 4.512446260823828e-06, + "loss": 0.0318, + "step": 950, + "video_reward_cumulative_accuracy": 0.7947368421052632 + }, + { + "epoch": 0.28227960819234194, + "grad_norm": 3.481994390487671, + "learning_rate": 4.510908307955059e-06, + "loss": 0.0588, + "step": 951, + "video_reward_cumulative_accuracy": 0.7944269190325972 + }, + { + "epoch": 0.2825764321757198, + "grad_norm": 0.8120396137237549, + "learning_rate": 4.509368196181048e-06, + "loss": 0.0275, + "step": 952, + "video_reward_cumulative_accuracy": 0.7946428571428571 + }, + { + "epoch": 0.28287325615909764, + "grad_norm": 6.15502405166626, + "learning_rate": 4.507825927155253e-06, + "loss": 0.0756, + "step": 953, + "video_reward_cumulative_accuracy": 0.7948583420776495 + }, + { + "epoch": 0.2831700801424755, + "grad_norm": 3.1125705242156982, + "learning_rate": 4.506281502533451e-06, + "loss": 0.0404, + "step": 954, + "video_reward_cumulative_accuracy": 0.7950733752620545 + }, + { + "epoch": 0.28346690412585335, + "grad_norm": 0.9939224123954773, + "learning_rate": 4.50473492397373e-06, + "loss": 0.0321, + "step": 955, + "video_reward_cumulative_accuracy": 0.7952879581151833 + }, + { + "epoch": 0.28376372810923123, + "grad_norm": 2.3135781288146973, + "learning_rate": 4.503186193136493e-06, + "loss": 0.0179, + "step": 956, + "video_reward_cumulative_accuracy": 0.7955020920502092 + }, + { + "epoch": 0.2840605520926091, + "grad_norm": 1.7014567852020264, + "learning_rate": 4.501635311684453e-06, + "loss": 0.095, + "step": 957, + "video_reward_cumulative_accuracy": 0.7951933124346917 + }, + { + "epoch": 0.28435737607598693, + "grad_norm": 1.145207166671753, + "learning_rate": 4.500082281282632e-06, + "loss": 0.044, + "step": 958, + "video_reward_cumulative_accuracy": 0.7954070981210856 + }, + { + "epoch": 0.2846542000593648, + "grad_norm": 1.2436047792434692, + "learning_rate": 4.4985271035983584e-06, + "loss": 0.0842, + "step": 959, + "video_reward_cumulative_accuracy": 0.7950990615224192 + }, + { + "epoch": 0.28495102404274264, + "grad_norm": 2.411715030670166, + "learning_rate": 4.496969780301267e-06, + "loss": 0.0421, + "step": 960, + "video_reward_cumulative_accuracy": 0.7953125 + }, + { + "epoch": 0.2852478480261205, + "grad_norm": 1.0124142169952393, + "learning_rate": 4.495410313063295e-06, + "loss": 0.037, + "step": 961, + "video_reward_cumulative_accuracy": 0.795525494276795 + }, + { + "epoch": 0.28554467200949835, + "grad_norm": 1.802912950515747, + "learning_rate": 4.493848703558681e-06, + "loss": 0.0552, + "step": 962, + "video_reward_cumulative_accuracy": 0.7957380457380457 + }, + { + "epoch": 0.2858414959928762, + "grad_norm": 2.056218147277832, + "learning_rate": 4.492284953463967e-06, + "loss": 0.0721, + "step": 963, + "video_reward_cumulative_accuracy": 0.7959501557632399 + }, + { + "epoch": 0.2861383199762541, + "grad_norm": 2.424288511276245, + "learning_rate": 4.490719064457987e-06, + "loss": 0.0269, + "step": 964, + "video_reward_cumulative_accuracy": 0.7956431535269709 + }, + { + "epoch": 0.28643514395963193, + "grad_norm": 1.5298640727996826, + "learning_rate": 4.4891510382218775e-06, + "loss": 0.0371, + "step": 965, + "video_reward_cumulative_accuracy": 0.7958549222797927 + }, + { + "epoch": 0.2867319679430098, + "grad_norm": 2.102123737335205, + "learning_rate": 4.487580876439066e-06, + "loss": 0.0662, + "step": 966, + "video_reward_cumulative_accuracy": 0.7960662525879917 + }, + { + "epoch": 0.28702879192638764, + "grad_norm": 1.2193660736083984, + "learning_rate": 4.486008580795273e-06, + "loss": 0.0652, + "step": 967, + "video_reward_cumulative_accuracy": 0.795760082730093 + }, + { + "epoch": 0.2873256159097655, + "grad_norm": 2.06874418258667, + "learning_rate": 4.484434152978512e-06, + "loss": 0.0292, + "step": 968, + "video_reward_cumulative_accuracy": 0.7954545454545454 + }, + { + "epoch": 0.28762243989314334, + "grad_norm": 1.5268133878707886, + "learning_rate": 4.482857594679082e-06, + "loss": 0.0429, + "step": 969, + "video_reward_cumulative_accuracy": 0.7951496388028896 + }, + { + "epoch": 0.2879192638765212, + "grad_norm": 2.622420072555542, + "learning_rate": 4.4812789075895735e-06, + "loss": 0.1016, + "step": 970, + "video_reward_cumulative_accuracy": 0.795360824742268 + }, + { + "epoch": 0.2882160878598991, + "grad_norm": 2.2772624492645264, + "learning_rate": 4.479698093404858e-06, + "loss": 0.0652, + "step": 971, + "video_reward_cumulative_accuracy": 0.7950566426364573 + }, + { + "epoch": 0.28851291184327693, + "grad_norm": 0.8947013020515442, + "learning_rate": 4.478115153822096e-06, + "loss": 0.0285, + "step": 972, + "video_reward_cumulative_accuracy": 0.7952674897119342 + }, + { + "epoch": 0.2888097358266548, + "grad_norm": 1.8154103755950928, + "learning_rate": 4.476530090540724e-06, + "loss": 0.0613, + "step": 973, + "video_reward_cumulative_accuracy": 0.7949640287769785 + }, + { + "epoch": 0.28910655981003264, + "grad_norm": 1.2615007162094116, + "learning_rate": 4.474942905262462e-06, + "loss": 0.0474, + "step": 974, + "video_reward_cumulative_accuracy": 0.7951745379876797 + }, + { + "epoch": 0.2894033837934105, + "grad_norm": 0.8353313207626343, + "learning_rate": 4.473353599691308e-06, + "loss": 0.0222, + "step": 975, + "video_reward_cumulative_accuracy": 0.7948717948717948 + }, + { + "epoch": 0.28970020777678834, + "grad_norm": 2.982597827911377, + "learning_rate": 4.471762175533535e-06, + "loss": 0.0546, + "step": 976, + "video_reward_cumulative_accuracy": 0.7945696721311475 + }, + { + "epoch": 0.2899970317601662, + "grad_norm": 2.2625701427459717, + "learning_rate": 4.470168634497692e-06, + "loss": 0.0405, + "step": 977, + "video_reward_cumulative_accuracy": 0.7947799385875128 + }, + { + "epoch": 0.2902938557435441, + "grad_norm": 1.974323034286499, + "learning_rate": 4.4685729782946005e-06, + "loss": 0.0385, + "step": 978, + "video_reward_cumulative_accuracy": 0.7949897750511248 + }, + { + "epoch": 0.29059067972692193, + "grad_norm": 4.570712566375732, + "learning_rate": 4.46697520863735e-06, + "loss": 0.0505, + "step": 979, + "video_reward_cumulative_accuracy": 0.7951991828396323 + }, + { + "epoch": 0.2908875037102998, + "grad_norm": 0.7615110874176025, + "learning_rate": 4.465375327241305e-06, + "loss": 0.0154, + "step": 980, + "video_reward_cumulative_accuracy": 0.7954081632653062 + }, + { + "epoch": 0.29118432769367764, + "grad_norm": 3.337805986404419, + "learning_rate": 4.46377333582409e-06, + "loss": 0.0636, + "step": 981, + "video_reward_cumulative_accuracy": 0.7956167176350663 + }, + { + "epoch": 0.2914811516770555, + "grad_norm": 1.5002707242965698, + "learning_rate": 4.4621692361056005e-06, + "loss": 0.0339, + "step": 982, + "video_reward_cumulative_accuracy": 0.7958248472505092 + }, + { + "epoch": 0.29177797566043334, + "grad_norm": 3.1056461334228516, + "learning_rate": 4.460563029807991e-06, + "loss": 0.0719, + "step": 983, + "video_reward_cumulative_accuracy": 0.7960325534079349 + }, + { + "epoch": 0.2920747996438112, + "grad_norm": 4.184977054595947, + "learning_rate": 4.4589547186556825e-06, + "loss": 0.0574, + "step": 984, + "video_reward_cumulative_accuracy": 0.796239837398374 + }, + { + "epoch": 0.2923716236271891, + "grad_norm": 0.9293310046195984, + "learning_rate": 4.45734430437535e-06, + "loss": 0.0066, + "step": 985, + "video_reward_cumulative_accuracy": 0.7964467005076142 + }, + { + "epoch": 0.29266844761056693, + "grad_norm": 1.263472557067871, + "learning_rate": 4.455731788695933e-06, + "loss": 0.0232, + "step": 986, + "video_reward_cumulative_accuracy": 0.7966531440162272 + }, + { + "epoch": 0.2929652715939448, + "grad_norm": 2.434809446334839, + "learning_rate": 4.4541171733486224e-06, + "loss": 0.06, + "step": 987, + "video_reward_cumulative_accuracy": 0.7958459979736575 + }, + { + "epoch": 0.29326209557732263, + "grad_norm": 4.916622161865234, + "learning_rate": 4.452500460066863e-06, + "loss": 0.0295, + "step": 988, + "video_reward_cumulative_accuracy": 0.7960526315789473 + }, + { + "epoch": 0.2935589195607005, + "grad_norm": 1.8236298561096191, + "learning_rate": 4.450881650586354e-06, + "loss": 0.0219, + "step": 989, + "video_reward_cumulative_accuracy": 0.7957532861476239 + }, + { + "epoch": 0.29385574354407834, + "grad_norm": 1.2961921691894531, + "learning_rate": 4.449260746645046e-06, + "loss": 0.0212, + "step": 990, + "video_reward_cumulative_accuracy": 0.795959595959596 + }, + { + "epoch": 0.2941525675274562, + "grad_norm": 0.6145333051681519, + "learning_rate": 4.447637749983135e-06, + "loss": 0.0062, + "step": 991, + "video_reward_cumulative_accuracy": 0.7961654894046418 + }, + { + "epoch": 0.2944493915108341, + "grad_norm": 3.7940635681152344, + "learning_rate": 4.446012662343066e-06, + "loss": 0.0816, + "step": 992, + "video_reward_cumulative_accuracy": 0.7953629032258065 + }, + { + "epoch": 0.2947462154942119, + "grad_norm": 2.3133246898651123, + "learning_rate": 4.444385485469529e-06, + "loss": 0.0319, + "step": 993, + "video_reward_cumulative_accuracy": 0.7955689828801611 + }, + { + "epoch": 0.2950430394775898, + "grad_norm": 2.988262891769409, + "learning_rate": 4.442756221109456e-06, + "loss": 0.0406, + "step": 994, + "video_reward_cumulative_accuracy": 0.795774647887324 + }, + { + "epoch": 0.29533986346096763, + "grad_norm": 0.47877877950668335, + "learning_rate": 4.441124871012018e-06, + "loss": 0.0096, + "step": 995, + "video_reward_cumulative_accuracy": 0.7959798994974875 + }, + { + "epoch": 0.2956366874443455, + "grad_norm": 3.234767198562622, + "learning_rate": 4.439491436928631e-06, + "loss": 0.0469, + "step": 996, + "video_reward_cumulative_accuracy": 0.7961847389558233 + }, + { + "epoch": 0.29593351142772334, + "grad_norm": 2.747255325317383, + "learning_rate": 4.437855920612945e-06, + "loss": 0.094, + "step": 997, + "video_reward_cumulative_accuracy": 0.7958876629889668 + }, + { + "epoch": 0.2962303354111012, + "grad_norm": 1.539255142211914, + "learning_rate": 4.436218323820843e-06, + "loss": 0.0229, + "step": 998, + "video_reward_cumulative_accuracy": 0.7960921843687375 + }, + { + "epoch": 0.2965271593944791, + "grad_norm": 4.402137279510498, + "learning_rate": 4.4345786483104455e-06, + "loss": 0.083, + "step": 999, + "video_reward_cumulative_accuracy": 0.7957957957957958 + }, + { + "epoch": 0.2968239833778569, + "grad_norm": 4.107393741607666, + "learning_rate": 4.432936895842104e-06, + "loss": 0.0391, + "step": 1000, + "video_reward_cumulative_accuracy": 0.796 + }, + { + "epoch": 0.2968239833778569, + "eval_runtime": 135.0462, + "eval_samples_per_second": 5.842, + "eval_steps_per_second": 0.733, + "eval_test_set_accuracy": 0.7790404040404041, + "step": 1000 + }, + { + "epoch": 0.2971208073612348, + "grad_norm": 2.150050640106201, + "learning_rate": 4.431293068178397e-06, + "loss": 0.1201, + "step": 1001, + "video_reward_cumulative_accuracy": 0.7957042957042957 + }, + { + "epoch": 0.29741763134461263, + "grad_norm": 1.469811201095581, + "learning_rate": 4.429647167084135e-06, + "loss": 0.022, + "step": 1002, + "video_reward_cumulative_accuracy": 0.7959081836327345 + }, + { + "epoch": 0.2977144553279905, + "grad_norm": 7.703973770141602, + "learning_rate": 4.4279991943263525e-06, + "loss": 0.0968, + "step": 1003, + "video_reward_cumulative_accuracy": 0.7961116650049851 + }, + { + "epoch": 0.29801127931136834, + "grad_norm": 4.6636271476745605, + "learning_rate": 4.426349151674307e-06, + "loss": 0.0969, + "step": 1004, + "video_reward_cumulative_accuracy": 0.795816733067729 + }, + { + "epoch": 0.2983081032947462, + "grad_norm": 4.039132118225098, + "learning_rate": 4.424697040899481e-06, + "loss": 0.0568, + "step": 1005, + "video_reward_cumulative_accuracy": 0.7955223880597015 + }, + { + "epoch": 0.2986049272781241, + "grad_norm": 3.1670384407043457, + "learning_rate": 4.423042863775574e-06, + "loss": 0.1071, + "step": 1006, + "video_reward_cumulative_accuracy": 0.7952286282306164 + }, + { + "epoch": 0.2989017512615019, + "grad_norm": 6.5014967918396, + "learning_rate": 4.421386622078507e-06, + "loss": 0.0683, + "step": 1007, + "video_reward_cumulative_accuracy": 0.7954319761668321 + }, + { + "epoch": 0.2991985752448798, + "grad_norm": 1.935164451599121, + "learning_rate": 4.419728317586416e-06, + "loss": 0.0532, + "step": 1008, + "video_reward_cumulative_accuracy": 0.7951388888888888 + }, + { + "epoch": 0.29949539922825763, + "grad_norm": 0.7051401138305664, + "learning_rate": 4.418067952079651e-06, + "loss": 0.0142, + "step": 1009, + "video_reward_cumulative_accuracy": 0.7953419226957383 + }, + { + "epoch": 0.2997922232116355, + "grad_norm": 3.0097496509552, + "learning_rate": 4.416405527340776e-06, + "loss": 0.0499, + "step": 1010, + "video_reward_cumulative_accuracy": 0.7955445544554456 + }, + { + "epoch": 0.30008904719501334, + "grad_norm": 3.7447476387023926, + "learning_rate": 4.414741045154566e-06, + "loss": 0.0378, + "step": 1011, + "video_reward_cumulative_accuracy": 0.7957467853610287 + }, + { + "epoch": 0.3003858711783912, + "grad_norm": 3.5587432384490967, + "learning_rate": 4.4130745073080025e-06, + "loss": 0.0991, + "step": 1012, + "video_reward_cumulative_accuracy": 0.7949604743083004 + }, + { + "epoch": 0.3006826951617691, + "grad_norm": 0.9364858269691467, + "learning_rate": 4.411405915590278e-06, + "loss": 0.0261, + "step": 1013, + "video_reward_cumulative_accuracy": 0.7951628825271471 + }, + { + "epoch": 0.3009795191451469, + "grad_norm": 0.5637649297714233, + "learning_rate": 4.409735271792786e-06, + "loss": 0.0121, + "step": 1014, + "video_reward_cumulative_accuracy": 0.7953648915187377 + }, + { + "epoch": 0.3012763431285248, + "grad_norm": 1.5942317247390747, + "learning_rate": 4.408062577709124e-06, + "loss": 0.0446, + "step": 1015, + "video_reward_cumulative_accuracy": 0.7955665024630542 + }, + { + "epoch": 0.30157316711190263, + "grad_norm": 2.7422399520874023, + "learning_rate": 4.406387835135094e-06, + "loss": 0.0223, + "step": 1016, + "video_reward_cumulative_accuracy": 0.7957677165354331 + }, + { + "epoch": 0.3018699910952805, + "grad_norm": 2.2551071643829346, + "learning_rate": 4.404711045868694e-06, + "loss": 0.0434, + "step": 1017, + "video_reward_cumulative_accuracy": 0.795968534906588 + }, + { + "epoch": 0.30216681507865834, + "grad_norm": 5.363723278045654, + "learning_rate": 4.403032211710118e-06, + "loss": 0.0916, + "step": 1018, + "video_reward_cumulative_accuracy": 0.7956777996070727 + }, + { + "epoch": 0.3024636390620362, + "grad_norm": 2.561952829360962, + "learning_rate": 4.401351334461759e-06, + "loss": 0.0415, + "step": 1019, + "video_reward_cumulative_accuracy": 0.7958783120706575 + }, + { + "epoch": 0.3027604630454141, + "grad_norm": 1.5561801195144653, + "learning_rate": 4.3996684159282014e-06, + "loss": 0.0834, + "step": 1020, + "video_reward_cumulative_accuracy": 0.796078431372549 + }, + { + "epoch": 0.3030572870287919, + "grad_norm": 2.4101288318634033, + "learning_rate": 4.397983457916222e-06, + "loss": 0.0574, + "step": 1021, + "video_reward_cumulative_accuracy": 0.7962781586679726 + }, + { + "epoch": 0.3033541110121698, + "grad_norm": 2.7204737663269043, + "learning_rate": 4.3962964622347855e-06, + "loss": 0.0439, + "step": 1022, + "video_reward_cumulative_accuracy": 0.7964774951076321 + }, + { + "epoch": 0.30365093499554763, + "grad_norm": 1.8821197748184204, + "learning_rate": 4.3946074306950484e-06, + "loss": 0.0284, + "step": 1023, + "video_reward_cumulative_accuracy": 0.7966764418377321 + }, + { + "epoch": 0.3039477589789255, + "grad_norm": 1.1801754236221313, + "learning_rate": 4.392916365110347e-06, + "loss": 0.0358, + "step": 1024, + "video_reward_cumulative_accuracy": 0.796875 + }, + { + "epoch": 0.30424458296230333, + "grad_norm": 1.7793229818344116, + "learning_rate": 4.391223267296206e-06, + "loss": 0.043, + "step": 1025, + "video_reward_cumulative_accuracy": 0.7970731707317074 + }, + { + "epoch": 0.3045414069456812, + "grad_norm": 1.5398008823394775, + "learning_rate": 4.389528139070329e-06, + "loss": 0.028, + "step": 1026, + "video_reward_cumulative_accuracy": 0.797270955165692 + }, + { + "epoch": 0.3048382309290591, + "grad_norm": 2.413785457611084, + "learning_rate": 4.387830982252602e-06, + "loss": 0.0461, + "step": 1027, + "video_reward_cumulative_accuracy": 0.7974683544303798 + }, + { + "epoch": 0.3051350549124369, + "grad_norm": 5.496139049530029, + "learning_rate": 4.3861317986650875e-06, + "loss": 0.0584, + "step": 1028, + "video_reward_cumulative_accuracy": 0.7976653696498055 + }, + { + "epoch": 0.3054318788958148, + "grad_norm": 2.5064406394958496, + "learning_rate": 4.384430590132023e-06, + "loss": 0.0621, + "step": 1029, + "video_reward_cumulative_accuracy": 0.7978620019436345 + }, + { + "epoch": 0.3057287028791926, + "grad_norm": 2.9505836963653564, + "learning_rate": 4.382727358479821e-06, + "loss": 0.0354, + "step": 1030, + "video_reward_cumulative_accuracy": 0.7980582524271844 + }, + { + "epoch": 0.3060255268625705, + "grad_norm": 5.6248297691345215, + "learning_rate": 4.3810221055370664e-06, + "loss": 0.0853, + "step": 1031, + "video_reward_cumulative_accuracy": 0.7982541222114452 + }, + { + "epoch": 0.30632235084594833, + "grad_norm": 2.6210145950317383, + "learning_rate": 4.3793148331345136e-06, + "loss": 0.0609, + "step": 1032, + "video_reward_cumulative_accuracy": 0.7984496124031008 + }, + { + "epoch": 0.3066191748293262, + "grad_norm": 3.5333571434020996, + "learning_rate": 4.377605543105086e-06, + "loss": 0.0319, + "step": 1033, + "video_reward_cumulative_accuracy": 0.7986447241045499 + }, + { + "epoch": 0.3069159988127041, + "grad_norm": 1.4399138689041138, + "learning_rate": 4.375894237283872e-06, + "loss": 0.0569, + "step": 1034, + "video_reward_cumulative_accuracy": 0.7988394584139265 + }, + { + "epoch": 0.3072128227960819, + "grad_norm": 1.2284201383590698, + "learning_rate": 4.374180917508124e-06, + "loss": 0.0193, + "step": 1035, + "video_reward_cumulative_accuracy": 0.7990338164251207 + }, + { + "epoch": 0.3075096467794598, + "grad_norm": 2.532024383544922, + "learning_rate": 4.372465585617257e-06, + "loss": 0.0401, + "step": 1036, + "video_reward_cumulative_accuracy": 0.7992277992277992 + }, + { + "epoch": 0.3078064707628376, + "grad_norm": 1.7466938495635986, + "learning_rate": 4.370748243452846e-06, + "loss": 0.038, + "step": 1037, + "video_reward_cumulative_accuracy": 0.7994214079074252 + }, + { + "epoch": 0.3081032947462155, + "grad_norm": 1.943616509437561, + "learning_rate": 4.369028892858626e-06, + "loss": 0.0164, + "step": 1038, + "video_reward_cumulative_accuracy": 0.7996146435452793 + }, + { + "epoch": 0.30840011872959333, + "grad_norm": 0.6063635349273682, + "learning_rate": 4.367307535680485e-06, + "loss": 0.0151, + "step": 1039, + "video_reward_cumulative_accuracy": 0.7998075072184793 + }, + { + "epoch": 0.3086969427129712, + "grad_norm": 6.530287742614746, + "learning_rate": 4.3655841737664685e-06, + "loss": 0.1203, + "step": 1040, + "video_reward_cumulative_accuracy": 0.7995192307692308 + }, + { + "epoch": 0.3089937666963491, + "grad_norm": 4.490469932556152, + "learning_rate": 4.363858808966772e-06, + "loss": 0.0926, + "step": 1041, + "video_reward_cumulative_accuracy": 0.7997118155619597 + }, + { + "epoch": 0.3092905906797269, + "grad_norm": 1.2573050260543823, + "learning_rate": 4.362131443133742e-06, + "loss": 0.0368, + "step": 1042, + "video_reward_cumulative_accuracy": 0.7994241842610365 + }, + { + "epoch": 0.3095874146631048, + "grad_norm": 3.369048595428467, + "learning_rate": 4.3604020781218736e-06, + "loss": 0.0502, + "step": 1043, + "video_reward_cumulative_accuracy": 0.7996164908916586 + }, + { + "epoch": 0.3098842386464826, + "grad_norm": 1.5536525249481201, + "learning_rate": 4.358670715787808e-06, + "loss": 0.0142, + "step": 1044, + "video_reward_cumulative_accuracy": 0.7998084291187739 + }, + { + "epoch": 0.3101810626298605, + "grad_norm": 2.2478020191192627, + "learning_rate": 4.356937357990331e-06, + "loss": 0.0408, + "step": 1045, + "video_reward_cumulative_accuracy": 0.8 + }, + { + "epoch": 0.31047788661323833, + "grad_norm": 4.114165782928467, + "learning_rate": 4.3552020065903685e-06, + "loss": 0.0481, + "step": 1046, + "video_reward_cumulative_accuracy": 0.7992351816443595 + }, + { + "epoch": 0.3107747105966162, + "grad_norm": 2.3197696208953857, + "learning_rate": 4.353464663450991e-06, + "loss": 0.0471, + "step": 1047, + "video_reward_cumulative_accuracy": 0.7994269340974212 + }, + { + "epoch": 0.3110715345799941, + "grad_norm": 1.8591456413269043, + "learning_rate": 4.351725330437405e-06, + "loss": 0.0179, + "step": 1048, + "video_reward_cumulative_accuracy": 0.799618320610687 + }, + { + "epoch": 0.3113683585633719, + "grad_norm": 1.4338923692703247, + "learning_rate": 4.349984009416952e-06, + "loss": 0.0159, + "step": 1049, + "video_reward_cumulative_accuracy": 0.7993326978074357 + }, + { + "epoch": 0.3116651825467498, + "grad_norm": 5.681126594543457, + "learning_rate": 4.34824070225911e-06, + "loss": 0.0691, + "step": 1050, + "video_reward_cumulative_accuracy": 0.799047619047619 + }, + { + "epoch": 0.3119620065301276, + "grad_norm": 1.271953821182251, + "learning_rate": 4.346495410835487e-06, + "loss": 0.0444, + "step": 1051, + "video_reward_cumulative_accuracy": 0.7992388201712655 + }, + { + "epoch": 0.3122588305135055, + "grad_norm": 2.048283100128174, + "learning_rate": 4.344748137019825e-06, + "loss": 0.0131, + "step": 1052, + "video_reward_cumulative_accuracy": 0.7989543726235742 + }, + { + "epoch": 0.31255565449688333, + "grad_norm": 1.9480700492858887, + "learning_rate": 4.34299888268799e-06, + "loss": 0.0607, + "step": 1053, + "video_reward_cumulative_accuracy": 0.798670465337132 + }, + { + "epoch": 0.3128524784802612, + "grad_norm": 2.398563861846924, + "learning_rate": 4.341247649717978e-06, + "loss": 0.055, + "step": 1054, + "video_reward_cumulative_accuracy": 0.7988614800759013 + }, + { + "epoch": 0.31314930246363903, + "grad_norm": 2.373682737350464, + "learning_rate": 4.339494439989907e-06, + "loss": 0.0684, + "step": 1055, + "video_reward_cumulative_accuracy": 0.7990521327014218 + }, + { + "epoch": 0.3134461264470169, + "grad_norm": 3.2019567489624023, + "learning_rate": 4.3377392553860156e-06, + "loss": 0.0426, + "step": 1056, + "video_reward_cumulative_accuracy": 0.7992424242424242 + }, + { + "epoch": 0.3137429504303948, + "grad_norm": 5.288125038146973, + "learning_rate": 4.335982097790668e-06, + "loss": 0.0768, + "step": 1057, + "video_reward_cumulative_accuracy": 0.7994323557237465 + }, + { + "epoch": 0.3140397744137726, + "grad_norm": 4.724822521209717, + "learning_rate": 4.334222969090342e-06, + "loss": 0.0782, + "step": 1058, + "video_reward_cumulative_accuracy": 0.7996219281663516 + }, + { + "epoch": 0.3143365983971505, + "grad_norm": 1.1170587539672852, + "learning_rate": 4.332461871173633e-06, + "loss": 0.0591, + "step": 1059, + "video_reward_cumulative_accuracy": 0.7998111425873465 + }, + { + "epoch": 0.3146334223805283, + "grad_norm": 3.1828134059906006, + "learning_rate": 4.330698805931251e-06, + "loss": 0.0642, + "step": 1060, + "video_reward_cumulative_accuracy": 0.8 + }, + { + "epoch": 0.3149302463639062, + "grad_norm": 3.1132760047912598, + "learning_rate": 4.328933775256017e-06, + "loss": 0.0925, + "step": 1061, + "video_reward_cumulative_accuracy": 0.8001885014137606 + }, + { + "epoch": 0.31522707034728403, + "grad_norm": 2.5283889770507812, + "learning_rate": 4.327166781042864e-06, + "loss": 0.0319, + "step": 1062, + "video_reward_cumulative_accuracy": 0.7999058380414312 + }, + { + "epoch": 0.3155238943306619, + "grad_norm": 2.8218538761138916, + "learning_rate": 4.325397825188829e-06, + "loss": 0.0391, + "step": 1063, + "video_reward_cumulative_accuracy": 0.8000940733772343 + }, + { + "epoch": 0.3158207183140398, + "grad_norm": 2.607076644897461, + "learning_rate": 4.323626909593062e-06, + "loss": 0.0454, + "step": 1064, + "video_reward_cumulative_accuracy": 0.8002819548872181 + }, + { + "epoch": 0.3161175422974176, + "grad_norm": 1.558544397354126, + "learning_rate": 4.321854036156809e-06, + "loss": 0.0189, + "step": 1065, + "video_reward_cumulative_accuracy": 0.8004694835680751 + }, + { + "epoch": 0.3164143662807955, + "grad_norm": 1.2349810600280762, + "learning_rate": 4.320079206783423e-06, + "loss": 0.0272, + "step": 1066, + "video_reward_cumulative_accuracy": 0.800656660412758 + }, + { + "epoch": 0.3167111902641733, + "grad_norm": 1.2629001140594482, + "learning_rate": 4.318302423378357e-06, + "loss": 0.0318, + "step": 1067, + "video_reward_cumulative_accuracy": 0.8003748828491096 + }, + { + "epoch": 0.3170080142475512, + "grad_norm": 2.740196466445923, + "learning_rate": 4.3165236878491575e-06, + "loss": 0.0518, + "step": 1068, + "video_reward_cumulative_accuracy": 0.800561797752809 + }, + { + "epoch": 0.31730483823092903, + "grad_norm": 3.2238783836364746, + "learning_rate": 4.314743002105473e-06, + "loss": 0.0403, + "step": 1069, + "video_reward_cumulative_accuracy": 0.8002806361085126 + }, + { + "epoch": 0.3176016622143069, + "grad_norm": 1.6323812007904053, + "learning_rate": 4.31296036805904e-06, + "loss": 0.0721, + "step": 1070, + "video_reward_cumulative_accuracy": 0.8004672897196262 + }, + { + "epoch": 0.3178984861976848, + "grad_norm": 1.8098689317703247, + "learning_rate": 4.3111757876236905e-06, + "loss": 0.0256, + "step": 1071, + "video_reward_cumulative_accuracy": 0.8006535947712419 + }, + { + "epoch": 0.3181953101810626, + "grad_norm": 1.6807585954666138, + "learning_rate": 4.309389262715344e-06, + "loss": 0.0353, + "step": 1072, + "video_reward_cumulative_accuracy": 0.800839552238806 + }, + { + "epoch": 0.3184921341644405, + "grad_norm": 4.06589937210083, + "learning_rate": 4.307600795252008e-06, + "loss": 0.0547, + "step": 1073, + "video_reward_cumulative_accuracy": 0.8005591798695247 + }, + { + "epoch": 0.3187889581478183, + "grad_norm": 2.8750007152557373, + "learning_rate": 4.305810387153778e-06, + "loss": 0.0552, + "step": 1074, + "video_reward_cumulative_accuracy": 0.8007448789571695 + }, + { + "epoch": 0.3190857821311962, + "grad_norm": 1.4461251497268677, + "learning_rate": 4.30401804034283e-06, + "loss": 0.0467, + "step": 1075, + "video_reward_cumulative_accuracy": 0.8009302325581396 + }, + { + "epoch": 0.31938260611457403, + "grad_norm": 7.60485315322876, + "learning_rate": 4.30222375674342e-06, + "loss": 0.0826, + "step": 1076, + "video_reward_cumulative_accuracy": 0.8011152416356877 + }, + { + "epoch": 0.3196794300979519, + "grad_norm": 5.275068283081055, + "learning_rate": 4.3004275382818884e-06, + "loss": 0.0536, + "step": 1077, + "video_reward_cumulative_accuracy": 0.8012999071494893 + }, + { + "epoch": 0.3199762540813298, + "grad_norm": 2.498542547225952, + "learning_rate": 4.298629386886649e-06, + "loss": 0.0574, + "step": 1078, + "video_reward_cumulative_accuracy": 0.8010204081632653 + }, + { + "epoch": 0.3202730780647076, + "grad_norm": 4.200756072998047, + "learning_rate": 4.296829304488191e-06, + "loss": 0.0782, + "step": 1079, + "video_reward_cumulative_accuracy": 0.8012048192771084 + }, + { + "epoch": 0.3205699020480855, + "grad_norm": 1.4311738014221191, + "learning_rate": 4.29502729301908e-06, + "loss": 0.0201, + "step": 1080, + "video_reward_cumulative_accuracy": 0.8013888888888889 + }, + { + "epoch": 0.3208667260314633, + "grad_norm": 0.836157500743866, + "learning_rate": 4.293223354413948e-06, + "loss": 0.0322, + "step": 1081, + "video_reward_cumulative_accuracy": 0.8015726179463459 + }, + { + "epoch": 0.3211635500148412, + "grad_norm": 1.57651686668396, + "learning_rate": 4.2914174906094985e-06, + "loss": 0.0481, + "step": 1082, + "video_reward_cumulative_accuracy": 0.8017560073937153 + }, + { + "epoch": 0.32146037399821903, + "grad_norm": 1.0639756917953491, + "learning_rate": 4.289609703544501e-06, + "loss": 0.0469, + "step": 1083, + "video_reward_cumulative_accuracy": 0.8019390581717452 + }, + { + "epoch": 0.3217571979815969, + "grad_norm": 2.068735361099243, + "learning_rate": 4.2877999951597935e-06, + "loss": 0.0474, + "step": 1084, + "video_reward_cumulative_accuracy": 0.8021217712177122 + }, + { + "epoch": 0.3220540219649748, + "grad_norm": 2.1171066761016846, + "learning_rate": 4.28598836739827e-06, + "loss": 0.0429, + "step": 1085, + "video_reward_cumulative_accuracy": 0.8018433179723502 + }, + { + "epoch": 0.3223508459483526, + "grad_norm": 3.831587076187134, + "learning_rate": 4.28417482220489e-06, + "loss": 0.0376, + "step": 1086, + "video_reward_cumulative_accuracy": 0.8015653775322283 + }, + { + "epoch": 0.3226476699317305, + "grad_norm": 6.328731536865234, + "learning_rate": 4.282359361526671e-06, + "loss": 0.0615, + "step": 1087, + "video_reward_cumulative_accuracy": 0.8012879484820608 + }, + { + "epoch": 0.3229444939151083, + "grad_norm": 1.401442289352417, + "learning_rate": 4.2805419873126855e-06, + "loss": 0.0197, + "step": 1088, + "video_reward_cumulative_accuracy": 0.8014705882352942 + }, + { + "epoch": 0.3232413178984862, + "grad_norm": 4.52972412109375, + "learning_rate": 4.278722701514061e-06, + "loss": 0.0569, + "step": 1089, + "video_reward_cumulative_accuracy": 0.8016528925619835 + }, + { + "epoch": 0.323538141881864, + "grad_norm": 2.7835745811462402, + "learning_rate": 4.276901506083978e-06, + "loss": 0.0962, + "step": 1090, + "video_reward_cumulative_accuracy": 0.8018348623853211 + }, + { + "epoch": 0.3238349658652419, + "grad_norm": 1.7582579851150513, + "learning_rate": 4.275078402977666e-06, + "loss": 0.0331, + "step": 1091, + "video_reward_cumulative_accuracy": 0.8020164986251146 + }, + { + "epoch": 0.3241317898486198, + "grad_norm": 1.52336847782135, + "learning_rate": 4.273253394152404e-06, + "loss": 0.059, + "step": 1092, + "video_reward_cumulative_accuracy": 0.8021978021978022 + }, + { + "epoch": 0.3244286138319976, + "grad_norm": 1.373092532157898, + "learning_rate": 4.271426481567515e-06, + "loss": 0.0319, + "step": 1093, + "video_reward_cumulative_accuracy": 0.8023787740164684 + }, + { + "epoch": 0.3247254378153755, + "grad_norm": 4.265668869018555, + "learning_rate": 4.269597667184366e-06, + "loss": 0.1015, + "step": 1094, + "video_reward_cumulative_accuracy": 0.8025594149908593 + }, + { + "epoch": 0.3250222617987533, + "grad_norm": 2.7200961112976074, + "learning_rate": 4.267766952966369e-06, + "loss": 0.0494, + "step": 1095, + "video_reward_cumulative_accuracy": 0.8027397260273973 + }, + { + "epoch": 0.3253190857821312, + "grad_norm": 2.589541435241699, + "learning_rate": 4.2659343408789734e-06, + "loss": 0.038, + "step": 1096, + "video_reward_cumulative_accuracy": 0.8029197080291971 + }, + { + "epoch": 0.325615909765509, + "grad_norm": 2.7991340160369873, + "learning_rate": 4.264099832889665e-06, + "loss": 0.0584, + "step": 1097, + "video_reward_cumulative_accuracy": 0.8030993618960802 + }, + { + "epoch": 0.3259127337488869, + "grad_norm": 2.104408025741577, + "learning_rate": 4.262263430967966e-06, + "loss": 0.0505, + "step": 1098, + "video_reward_cumulative_accuracy": 0.8032786885245902 + }, + { + "epoch": 0.3262095577322648, + "grad_norm": 1.6177819967269897, + "learning_rate": 4.2604251370854325e-06, + "loss": 0.0405, + "step": 1099, + "video_reward_cumulative_accuracy": 0.8030027297543221 + }, + { + "epoch": 0.3265063817156426, + "grad_norm": 4.011326789855957, + "learning_rate": 4.2585849532156505e-06, + "loss": 0.0463, + "step": 1100, + "video_reward_cumulative_accuracy": 0.8027272727272727 + }, + { + "epoch": 0.3268032056990205, + "grad_norm": 1.1863105297088623, + "learning_rate": 4.256742881334238e-06, + "loss": 0.0208, + "step": 1101, + "video_reward_cumulative_accuracy": 0.8029064486830154 + }, + { + "epoch": 0.3271000296823983, + "grad_norm": 2.122631072998047, + "learning_rate": 4.254898923418838e-06, + "loss": 0.0622, + "step": 1102, + "video_reward_cumulative_accuracy": 0.8026315789473685 + }, + { + "epoch": 0.3273968536657762, + "grad_norm": 2.541916608810425, + "learning_rate": 4.253053081449116e-06, + "loss": 0.0483, + "step": 1103, + "video_reward_cumulative_accuracy": 0.8028105167724388 + }, + { + "epoch": 0.327693677649154, + "grad_norm": 3.4067628383636475, + "learning_rate": 4.251205357406764e-06, + "loss": 0.0289, + "step": 1104, + "video_reward_cumulative_accuracy": 0.802536231884058 + }, + { + "epoch": 0.3279905016325319, + "grad_norm": 1.9281690120697021, + "learning_rate": 4.249355753275492e-06, + "loss": 0.0488, + "step": 1105, + "video_reward_cumulative_accuracy": 0.8022624434389141 + }, + { + "epoch": 0.3282873256159098, + "grad_norm": 4.4767231941223145, + "learning_rate": 4.247504271041031e-06, + "loss": 0.0644, + "step": 1106, + "video_reward_cumulative_accuracy": 0.8024412296564195 + }, + { + "epoch": 0.3285841495992876, + "grad_norm": 4.301503658294678, + "learning_rate": 4.245650912691127e-06, + "loss": 0.0987, + "step": 1107, + "video_reward_cumulative_accuracy": 0.8026196928635954 + }, + { + "epoch": 0.3288809735826655, + "grad_norm": 2.4027132987976074, + "learning_rate": 4.243795680215538e-06, + "loss": 0.0807, + "step": 1108, + "video_reward_cumulative_accuracy": 0.8023465703971119 + }, + { + "epoch": 0.3291777975660433, + "grad_norm": 2.302290678024292, + "learning_rate": 4.241938575606038e-06, + "loss": 0.0403, + "step": 1109, + "video_reward_cumulative_accuracy": 0.8025247971145176 + }, + { + "epoch": 0.3294746215494212, + "grad_norm": 4.239207744598389, + "learning_rate": 4.240079600856408e-06, + "loss": 0.062, + "step": 1110, + "video_reward_cumulative_accuracy": 0.8027027027027027 + }, + { + "epoch": 0.329771445532799, + "grad_norm": 3.736924171447754, + "learning_rate": 4.238218757962439e-06, + "loss": 0.0399, + "step": 1111, + "video_reward_cumulative_accuracy": 0.8028802880288028 + }, + { + "epoch": 0.3300682695161769, + "grad_norm": 1.4400713443756104, + "learning_rate": 4.2363560489219255e-06, + "loss": 0.0213, + "step": 1112, + "video_reward_cumulative_accuracy": 0.8030575539568345 + }, + { + "epoch": 0.3303650934995548, + "grad_norm": 1.8887395858764648, + "learning_rate": 4.234491475734667e-06, + "loss": 0.0368, + "step": 1113, + "video_reward_cumulative_accuracy": 0.8032345013477089 + }, + { + "epoch": 0.3306619174829326, + "grad_norm": 1.5093616247177124, + "learning_rate": 4.232625040402463e-06, + "loss": 0.0272, + "step": 1114, + "video_reward_cumulative_accuracy": 0.803411131059246 + }, + { + "epoch": 0.3309587414663105, + "grad_norm": 5.804702281951904, + "learning_rate": 4.230756744929114e-06, + "loss": 0.0515, + "step": 1115, + "video_reward_cumulative_accuracy": 0.8031390134529148 + }, + { + "epoch": 0.3312555654496883, + "grad_norm": 0.6705430150032043, + "learning_rate": 4.228886591320415e-06, + "loss": 0.0149, + "step": 1116, + "video_reward_cumulative_accuracy": 0.8033154121863799 + }, + { + "epoch": 0.3315523894330662, + "grad_norm": 5.313145637512207, + "learning_rate": 4.227014581584159e-06, + "loss": 0.0725, + "step": 1117, + "video_reward_cumulative_accuracy": 0.8034914950760967 + }, + { + "epoch": 0.331849213416444, + "grad_norm": 2.9840526580810547, + "learning_rate": 4.2251407177301295e-06, + "loss": 0.0556, + "step": 1118, + "video_reward_cumulative_accuracy": 0.8032200357781754 + }, + { + "epoch": 0.3321460373998219, + "grad_norm": 1.9225348234176636, + "learning_rate": 4.2232650017701015e-06, + "loss": 0.0517, + "step": 1119, + "video_reward_cumulative_accuracy": 0.8029490616621984 + }, + { + "epoch": 0.3324428613831998, + "grad_norm": 6.583012580871582, + "learning_rate": 4.221387435717838e-06, + "loss": 0.0913, + "step": 1120, + "video_reward_cumulative_accuracy": 0.803125 + }, + { + "epoch": 0.3327396853665776, + "grad_norm": 5.673295021057129, + "learning_rate": 4.219508021589088e-06, + "loss": 0.0629, + "step": 1121, + "video_reward_cumulative_accuracy": 0.8033006244424621 + }, + { + "epoch": 0.3330365093499555, + "grad_norm": 2.6932709217071533, + "learning_rate": 4.217626761401585e-06, + "loss": 0.035, + "step": 1122, + "video_reward_cumulative_accuracy": 0.803475935828877 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.6052873134613037, + "learning_rate": 4.215743657175046e-06, + "loss": 0.0277, + "step": 1123, + "video_reward_cumulative_accuracy": 0.8036509349955476 + }, + { + "epoch": 0.3336301573167112, + "grad_norm": 1.1534535884857178, + "learning_rate": 4.213858710931163e-06, + "loss": 0.0273, + "step": 1124, + "video_reward_cumulative_accuracy": 0.8033807829181495 + }, + { + "epoch": 0.333926981300089, + "grad_norm": 1.3987401723861694, + "learning_rate": 4.2119719246936114e-06, + "loss": 0.034, + "step": 1125, + "video_reward_cumulative_accuracy": 0.8035555555555556 + }, + { + "epoch": 0.3342238052834669, + "grad_norm": 2.05250883102417, + "learning_rate": 4.210083300488038e-06, + "loss": 0.0606, + "step": 1126, + "video_reward_cumulative_accuracy": 0.8037300177619894 + }, + { + "epoch": 0.3345206292668448, + "grad_norm": 2.67376971244812, + "learning_rate": 4.208192840342066e-06, + "loss": 0.0622, + "step": 1127, + "video_reward_cumulative_accuracy": 0.8039041703637977 + }, + { + "epoch": 0.3348174532502226, + "grad_norm": 1.0269320011138916, + "learning_rate": 4.206300546285286e-06, + "loss": 0.0466, + "step": 1128, + "video_reward_cumulative_accuracy": 0.8036347517730497 + }, + { + "epoch": 0.3351142772336005, + "grad_norm": 6.182705879211426, + "learning_rate": 4.204406420349259e-06, + "loss": 0.035, + "step": 1129, + "video_reward_cumulative_accuracy": 0.8038086802480071 + }, + { + "epoch": 0.3354111012169783, + "grad_norm": 3.8771231174468994, + "learning_rate": 4.2025104645675145e-06, + "loss": 0.0472, + "step": 1130, + "video_reward_cumulative_accuracy": 0.8039823008849557 + }, + { + "epoch": 0.3357079252003562, + "grad_norm": 4.503866195678711, + "learning_rate": 4.200612680975545e-06, + "loss": 0.0592, + "step": 1131, + "video_reward_cumulative_accuracy": 0.8037135278514589 + }, + { + "epoch": 0.336004749183734, + "grad_norm": 1.73423433303833, + "learning_rate": 4.1987130716108046e-06, + "loss": 0.035, + "step": 1132, + "video_reward_cumulative_accuracy": 0.8034452296819788 + }, + { + "epoch": 0.3363015731671119, + "grad_norm": 2.8510076999664307, + "learning_rate": 4.196811638512708e-06, + "loss": 0.0513, + "step": 1133, + "video_reward_cumulative_accuracy": 0.8031774051191527 + }, + { + "epoch": 0.3365983971504898, + "grad_norm": 2.853792190551758, + "learning_rate": 4.194908383722629e-06, + "loss": 0.0676, + "step": 1134, + "video_reward_cumulative_accuracy": 0.8029100529100529 + }, + { + "epoch": 0.3368952211338676, + "grad_norm": 3.5561459064483643, + "learning_rate": 4.193003309283896e-06, + "loss": 0.0834, + "step": 1135, + "video_reward_cumulative_accuracy": 0.8030837004405287 + }, + { + "epoch": 0.3371920451172455, + "grad_norm": 1.2049405574798584, + "learning_rate": 4.191096417241792e-06, + "loss": 0.0216, + "step": 1136, + "video_reward_cumulative_accuracy": 0.8032570422535211 + }, + { + "epoch": 0.3374888691006233, + "grad_norm": 1.6044228076934814, + "learning_rate": 4.189187709643549e-06, + "loss": 0.0478, + "step": 1137, + "video_reward_cumulative_accuracy": 0.8034300791556728 + }, + { + "epoch": 0.3377856930840012, + "grad_norm": 1.5150611400604248, + "learning_rate": 4.1872771885383525e-06, + "loss": 0.0535, + "step": 1138, + "video_reward_cumulative_accuracy": 0.8031634446397188 + }, + { + "epoch": 0.338082517067379, + "grad_norm": 3.9265263080596924, + "learning_rate": 4.18536485597733e-06, + "loss": 0.0596, + "step": 1139, + "video_reward_cumulative_accuracy": 0.8033362598770851 + }, + { + "epoch": 0.3383793410507569, + "grad_norm": 7.542102336883545, + "learning_rate": 4.183450714013557e-06, + "loss": 0.0811, + "step": 1140, + "video_reward_cumulative_accuracy": 0.8035087719298246 + }, + { + "epoch": 0.3386761650341348, + "grad_norm": 4.337952613830566, + "learning_rate": 4.181534764702051e-06, + "loss": 0.0957, + "step": 1141, + "video_reward_cumulative_accuracy": 0.8032427695004382 + }, + { + "epoch": 0.3389729890175126, + "grad_norm": 1.57660710811615, + "learning_rate": 4.179617010099768e-06, + "loss": 0.0462, + "step": 1142, + "video_reward_cumulative_accuracy": 0.803415061295972 + }, + { + "epoch": 0.3392698130008905, + "grad_norm": 1.877387285232544, + "learning_rate": 4.177697452265605e-06, + "loss": 0.0485, + "step": 1143, + "video_reward_cumulative_accuracy": 0.8035870516185477 + }, + { + "epoch": 0.3395666369842683, + "grad_norm": 3.2722318172454834, + "learning_rate": 4.175776093260395e-06, + "loss": 0.0395, + "step": 1144, + "video_reward_cumulative_accuracy": 0.8037587412587412 + }, + { + "epoch": 0.3398634609676462, + "grad_norm": 2.0722169876098633, + "learning_rate": 4.1738529351469e-06, + "loss": 0.0416, + "step": 1145, + "video_reward_cumulative_accuracy": 0.8039301310043668 + }, + { + "epoch": 0.340160284951024, + "grad_norm": 1.58327317237854, + "learning_rate": 4.1719279799898205e-06, + "loss": 0.0566, + "step": 1146, + "video_reward_cumulative_accuracy": 0.8041012216404887 + }, + { + "epoch": 0.3404571089344019, + "grad_norm": 4.316904544830322, + "learning_rate": 4.17000122985578e-06, + "loss": 0.0597, + "step": 1147, + "video_reward_cumulative_accuracy": 0.8038360941586749 + }, + { + "epoch": 0.3407539329177798, + "grad_norm": 5.312283039093018, + "learning_rate": 4.168072686813332e-06, + "loss": 0.0579, + "step": 1148, + "video_reward_cumulative_accuracy": 0.804006968641115 + }, + { + "epoch": 0.3410507569011576, + "grad_norm": 1.4211534261703491, + "learning_rate": 4.166142352932957e-06, + "loss": 0.032, + "step": 1149, + "video_reward_cumulative_accuracy": 0.804177545691906 + }, + { + "epoch": 0.3413475808845355, + "grad_norm": 1.4748672246932983, + "learning_rate": 4.164210230287053e-06, + "loss": 0.0391, + "step": 1150, + "video_reward_cumulative_accuracy": 0.8043478260869565 + }, + { + "epoch": 0.3416444048679133, + "grad_norm": 1.9210374355316162, + "learning_rate": 4.162276320949943e-06, + "loss": 0.0787, + "step": 1151, + "video_reward_cumulative_accuracy": 0.8036490008688097 + }, + { + "epoch": 0.3419412288512912, + "grad_norm": 3.2308928966522217, + "learning_rate": 4.160340626997865e-06, + "loss": 0.0535, + "step": 1152, + "video_reward_cumulative_accuracy": 0.8038194444444444 + }, + { + "epoch": 0.342238052834669, + "grad_norm": 1.825577735900879, + "learning_rate": 4.158403150508975e-06, + "loss": 0.0535, + "step": 1153, + "video_reward_cumulative_accuracy": 0.8035559410234172 + }, + { + "epoch": 0.3425348768180469, + "grad_norm": 1.2146353721618652, + "learning_rate": 4.156463893563342e-06, + "loss": 0.0576, + "step": 1154, + "video_reward_cumulative_accuracy": 0.8032928942807626 + }, + { + "epoch": 0.3428317008014248, + "grad_norm": 1.1705416440963745, + "learning_rate": 4.154522858242947e-06, + "loss": 0.0369, + "step": 1155, + "video_reward_cumulative_accuracy": 0.8034632034632034 + }, + { + "epoch": 0.3431285247848026, + "grad_norm": 1.2783030271530151, + "learning_rate": 4.15258004663168e-06, + "loss": 0.0363, + "step": 1156, + "video_reward_cumulative_accuracy": 0.8036332179930796 + }, + { + "epoch": 0.3434253487681805, + "grad_norm": 1.560981273651123, + "learning_rate": 4.150635460815336e-06, + "loss": 0.0454, + "step": 1157, + "video_reward_cumulative_accuracy": 0.8033707865168539 + }, + { + "epoch": 0.3437221727515583, + "grad_norm": 1.9000885486602783, + "learning_rate": 4.148689102881619e-06, + "loss": 0.0535, + "step": 1158, + "video_reward_cumulative_accuracy": 0.8035405872193437 + }, + { + "epoch": 0.3440189967349362, + "grad_norm": 1.363930344581604, + "learning_rate": 4.146740974920131e-06, + "loss": 0.0444, + "step": 1159, + "video_reward_cumulative_accuracy": 0.8037100949094047 + }, + { + "epoch": 0.344315820718314, + "grad_norm": 1.4986649751663208, + "learning_rate": 4.144791079022379e-06, + "loss": 0.0329, + "step": 1160, + "video_reward_cumulative_accuracy": 0.8038793103448276 + }, + { + "epoch": 0.3446126447016919, + "grad_norm": 2.1276040077209473, + "learning_rate": 4.142839417281762e-06, + "loss": 0.0409, + "step": 1161, + "video_reward_cumulative_accuracy": 0.8040482342807924 + }, + { + "epoch": 0.3449094686850698, + "grad_norm": 2.0634193420410156, + "learning_rate": 4.140885991793582e-06, + "loss": 0.0363, + "step": 1162, + "video_reward_cumulative_accuracy": 0.8042168674698795 + }, + { + "epoch": 0.3452062926684476, + "grad_norm": 4.047058582305908, + "learning_rate": 4.138930804655028e-06, + "loss": 0.0609, + "step": 1163, + "video_reward_cumulative_accuracy": 0.8039552880481513 + }, + { + "epoch": 0.3455031166518255, + "grad_norm": 1.8624390363693237, + "learning_rate": 4.136973857965185e-06, + "loss": 0.0345, + "step": 1164, + "video_reward_cumulative_accuracy": 0.8041237113402062 + }, + { + "epoch": 0.3457999406352033, + "grad_norm": 3.3220489025115967, + "learning_rate": 4.135015153825024e-06, + "loss": 0.0393, + "step": 1165, + "video_reward_cumulative_accuracy": 0.8042918454935623 + }, + { + "epoch": 0.3460967646185812, + "grad_norm": 2.244272470474243, + "learning_rate": 4.133054694337404e-06, + "loss": 0.1681, + "step": 1166, + "video_reward_cumulative_accuracy": 0.8040308747855918 + }, + { + "epoch": 0.346393588601959, + "grad_norm": 1.2074984312057495, + "learning_rate": 4.1310924816070705e-06, + "loss": 0.0117, + "step": 1167, + "video_reward_cumulative_accuracy": 0.8041988003427593 + }, + { + "epoch": 0.3466904125853369, + "grad_norm": 4.098258018493652, + "learning_rate": 4.129128517740647e-06, + "loss": 0.0522, + "step": 1168, + "video_reward_cumulative_accuracy": 0.8039383561643836 + }, + { + "epoch": 0.3469872365687148, + "grad_norm": 1.4286725521087646, + "learning_rate": 4.12716280484664e-06, + "loss": 0.0594, + "step": 1169, + "video_reward_cumulative_accuracy": 0.8032506415739948 + }, + { + "epoch": 0.3472840605520926, + "grad_norm": 3.419475555419922, + "learning_rate": 4.125195345035433e-06, + "loss": 0.0582, + "step": 1170, + "video_reward_cumulative_accuracy": 0.8034188034188035 + }, + { + "epoch": 0.3475808845354705, + "grad_norm": 1.2283254861831665, + "learning_rate": 4.1232261404192865e-06, + "loss": 0.0162, + "step": 1171, + "video_reward_cumulative_accuracy": 0.8035866780529461 + }, + { + "epoch": 0.3478777085188483, + "grad_norm": 1.2608306407928467, + "learning_rate": 4.12125519311233e-06, + "loss": 0.0137, + "step": 1172, + "video_reward_cumulative_accuracy": 0.8037542662116041 + }, + { + "epoch": 0.3481745325022262, + "grad_norm": 1.785900354385376, + "learning_rate": 4.119282505230569e-06, + "loss": 0.0385, + "step": 1173, + "video_reward_cumulative_accuracy": 0.8034953111679455 + }, + { + "epoch": 0.348471356485604, + "grad_norm": 2.0561554431915283, + "learning_rate": 4.117308078891876e-06, + "loss": 0.0453, + "step": 1174, + "video_reward_cumulative_accuracy": 0.8036626916524702 + }, + { + "epoch": 0.3487681804689819, + "grad_norm": 4.3540730476379395, + "learning_rate": 4.115331916215987e-06, + "loss": 0.0697, + "step": 1175, + "video_reward_cumulative_accuracy": 0.8038297872340425 + }, + { + "epoch": 0.3490650044523598, + "grad_norm": 2.7298593521118164, + "learning_rate": 4.1133540193245056e-06, + "loss": 0.0258, + "step": 1176, + "video_reward_cumulative_accuracy": 0.8039965986394558 + }, + { + "epoch": 0.3493618284357376, + "grad_norm": 1.9880746603012085, + "learning_rate": 4.111374390340895e-06, + "loss": 0.0545, + "step": 1177, + "video_reward_cumulative_accuracy": 0.8037383177570093 + }, + { + "epoch": 0.3496586524191155, + "grad_norm": 2.990823745727539, + "learning_rate": 4.109393031390482e-06, + "loss": 0.0543, + "step": 1178, + "video_reward_cumulative_accuracy": 0.8034804753820034 + }, + { + "epoch": 0.3499554764024933, + "grad_norm": 2.0915024280548096, + "learning_rate": 4.107409944600444e-06, + "loss": 0.0747, + "step": 1179, + "video_reward_cumulative_accuracy": 0.8036471586089907 + }, + { + "epoch": 0.3502523003858712, + "grad_norm": 1.47837495803833, + "learning_rate": 4.105425132099821e-06, + "loss": 0.0526, + "step": 1180, + "video_reward_cumulative_accuracy": 0.8038135593220339 + }, + { + "epoch": 0.350549124369249, + "grad_norm": 1.934434175491333, + "learning_rate": 4.103438596019498e-06, + "loss": 0.0385, + "step": 1181, + "video_reward_cumulative_accuracy": 0.8039796782387807 + }, + { + "epoch": 0.3508459483526269, + "grad_norm": 1.294867753982544, + "learning_rate": 4.1014503384922164e-06, + "loss": 0.0314, + "step": 1182, + "video_reward_cumulative_accuracy": 0.8041455160744501 + }, + { + "epoch": 0.35114277233600477, + "grad_norm": 2.0357749462127686, + "learning_rate": 4.099460361652563e-06, + "loss": 0.0253, + "step": 1183, + "video_reward_cumulative_accuracy": 0.8043110735418427 + }, + { + "epoch": 0.3514395963193826, + "grad_norm": 2.3916919231414795, + "learning_rate": 4.097468667636971e-06, + "loss": 0.0447, + "step": 1184, + "video_reward_cumulative_accuracy": 0.8040540540540541 + }, + { + "epoch": 0.3517364203027605, + "grad_norm": 2.0069797039031982, + "learning_rate": 4.095475258583719e-06, + "loss": 0.0179, + "step": 1185, + "video_reward_cumulative_accuracy": 0.8042194092827004 + }, + { + "epoch": 0.3520332442861383, + "grad_norm": 3.129631996154785, + "learning_rate": 4.093480136632922e-06, + "loss": 0.1022, + "step": 1186, + "video_reward_cumulative_accuracy": 0.8039629005059022 + }, + { + "epoch": 0.3523300682695162, + "grad_norm": 1.985904574394226, + "learning_rate": 4.09148330392654e-06, + "loss": 0.0333, + "step": 1187, + "video_reward_cumulative_accuracy": 0.8041280539174389 + }, + { + "epoch": 0.352626892252894, + "grad_norm": 2.5807793140411377, + "learning_rate": 4.089484762608365e-06, + "loss": 0.0602, + "step": 1188, + "video_reward_cumulative_accuracy": 0.8042929292929293 + }, + { + "epoch": 0.3529237162362719, + "grad_norm": 2.0883278846740723, + "learning_rate": 4.0874845148240265e-06, + "loss": 0.0711, + "step": 1189, + "video_reward_cumulative_accuracy": 0.804457527333894 + }, + { + "epoch": 0.35322054021964977, + "grad_norm": 1.2518675327301025, + "learning_rate": 4.085482562720983e-06, + "loss": 0.0395, + "step": 1190, + "video_reward_cumulative_accuracy": 0.8042016806722689 + }, + { + "epoch": 0.3535173642030276, + "grad_norm": 1.957599401473999, + "learning_rate": 4.083478908448525e-06, + "loss": 0.0636, + "step": 1191, + "video_reward_cumulative_accuracy": 0.8043660789252729 + }, + { + "epoch": 0.3538141881864055, + "grad_norm": 2.872051954269409, + "learning_rate": 4.08147355415777e-06, + "loss": 0.0783, + "step": 1192, + "video_reward_cumulative_accuracy": 0.8045302013422819 + }, + { + "epoch": 0.3541110121697833, + "grad_norm": 5.688748836517334, + "learning_rate": 4.07946650200166e-06, + "loss": 0.0614, + "step": 1193, + "video_reward_cumulative_accuracy": 0.8046940486169321 + }, + { + "epoch": 0.3544078361531612, + "grad_norm": 1.7336045503616333, + "learning_rate": 4.0774577541349605e-06, + "loss": 0.0619, + "step": 1194, + "video_reward_cumulative_accuracy": 0.8044388609715243 + }, + { + "epoch": 0.354704660136539, + "grad_norm": 2.06296968460083, + "learning_rate": 4.075447312714258e-06, + "loss": 0.0263, + "step": 1195, + "video_reward_cumulative_accuracy": 0.80418410041841 + }, + { + "epoch": 0.3550014841199169, + "grad_norm": 1.0817362070083618, + "learning_rate": 4.073435179897956e-06, + "loss": 0.0567, + "step": 1196, + "video_reward_cumulative_accuracy": 0.8035117056856187 + }, + { + "epoch": 0.35529830810329477, + "grad_norm": 1.0421109199523926, + "learning_rate": 4.071421357846274e-06, + "loss": 0.0549, + "step": 1197, + "video_reward_cumulative_accuracy": 0.8036758563074352 + }, + { + "epoch": 0.3555951320866726, + "grad_norm": 2.0795469284057617, + "learning_rate": 4.0694058487212464e-06, + "loss": 0.0329, + "step": 1198, + "video_reward_cumulative_accuracy": 0.8038397328881469 + }, + { + "epoch": 0.3558919560700505, + "grad_norm": 2.0918068885803223, + "learning_rate": 4.067388654686717e-06, + "loss": 0.0445, + "step": 1199, + "video_reward_cumulative_accuracy": 0.8035863219349458 + }, + { + "epoch": 0.3561887800534283, + "grad_norm": 1.405401349067688, + "learning_rate": 4.065369777908339e-06, + "loss": 0.0439, + "step": 1200, + "video_reward_cumulative_accuracy": 0.80375 + }, + { + "epoch": 0.3561887800534283, + "eval_runtime": 129.9202, + "eval_samples_per_second": 6.073, + "eval_steps_per_second": 0.762, + "eval_test_set_accuracy": 0.7689393939393939, + "step": 1200 + }, + { + "epoch": 0.3564856040368062, + "grad_norm": 1.2888494729995728, + "learning_rate": 4.063349220553573e-06, + "loss": 0.0477, + "step": 1201, + "video_reward_cumulative_accuracy": 0.8030807660283097 + }, + { + "epoch": 0.356782428020184, + "grad_norm": 2.8064706325531006, + "learning_rate": 4.0613269847916845e-06, + "loss": 0.0431, + "step": 1202, + "video_reward_cumulative_accuracy": 0.8032445923460898 + }, + { + "epoch": 0.3570792520035619, + "grad_norm": 1.3145371675491333, + "learning_rate": 4.059303072793739e-06, + "loss": 0.0215, + "step": 1203, + "video_reward_cumulative_accuracy": 0.8034081463009144 + }, + { + "epoch": 0.35737607598693977, + "grad_norm": 1.8004136085510254, + "learning_rate": 4.057277486732601e-06, + "loss": 0.1032, + "step": 1204, + "video_reward_cumulative_accuracy": 0.8035714285714286 + }, + { + "epoch": 0.3576728999703176, + "grad_norm": 0.9853895902633667, + "learning_rate": 4.0552502287829365e-06, + "loss": 0.0421, + "step": 1205, + "video_reward_cumulative_accuracy": 0.8033195020746888 + }, + { + "epoch": 0.3579697239536955, + "grad_norm": 2.2384772300720215, + "learning_rate": 4.0532213011212025e-06, + "loss": 0.0396, + "step": 1206, + "video_reward_cumulative_accuracy": 0.8034825870646766 + }, + { + "epoch": 0.3582665479370733, + "grad_norm": 1.6136014461517334, + "learning_rate": 4.0511907059256485e-06, + "loss": 0.0425, + "step": 1207, + "video_reward_cumulative_accuracy": 0.8036454018227009 + }, + { + "epoch": 0.3585633719204512, + "grad_norm": 0.9855162501335144, + "learning_rate": 4.049158445376318e-06, + "loss": 0.0325, + "step": 1208, + "video_reward_cumulative_accuracy": 0.8038079470198676 + }, + { + "epoch": 0.358860195903829, + "grad_norm": 2.7680773735046387, + "learning_rate": 4.047124521655037e-06, + "loss": 0.0559, + "step": 1209, + "video_reward_cumulative_accuracy": 0.803556658395368 + }, + { + "epoch": 0.3591570198872069, + "grad_norm": 1.1192249059677124, + "learning_rate": 4.045088936945423e-06, + "loss": 0.0224, + "step": 1210, + "video_reward_cumulative_accuracy": 0.8037190082644629 + }, + { + "epoch": 0.35945384387058477, + "grad_norm": 1.8444184064865112, + "learning_rate": 4.043051693432871e-06, + "loss": 0.0565, + "step": 1211, + "video_reward_cumulative_accuracy": 0.8038810900082577 + }, + { + "epoch": 0.3597506678539626, + "grad_norm": 2.4251158237457275, + "learning_rate": 4.041012793304563e-06, + "loss": 0.0407, + "step": 1212, + "video_reward_cumulative_accuracy": 0.804042904290429 + }, + { + "epoch": 0.36004749183734047, + "grad_norm": 1.5506036281585693, + "learning_rate": 4.038972238749452e-06, + "loss": 0.0247, + "step": 1213, + "video_reward_cumulative_accuracy": 0.8042044517724649 + }, + { + "epoch": 0.3603443158207183, + "grad_norm": 4.467177391052246, + "learning_rate": 4.036930031958275e-06, + "loss": 0.0588, + "step": 1214, + "video_reward_cumulative_accuracy": 0.8043657331136738 + }, + { + "epoch": 0.3606411398040962, + "grad_norm": 2.9094066619873047, + "learning_rate": 4.034886175123537e-06, + "loss": 0.0309, + "step": 1215, + "video_reward_cumulative_accuracy": 0.8045267489711934 + }, + { + "epoch": 0.360937963787474, + "grad_norm": 2.6900370121002197, + "learning_rate": 4.032840670439517e-06, + "loss": 0.0351, + "step": 1216, + "video_reward_cumulative_accuracy": 0.8046875 + }, + { + "epoch": 0.3612347877708519, + "grad_norm": 3.0466442108154297, + "learning_rate": 4.030793520102264e-06, + "loss": 0.0434, + "step": 1217, + "video_reward_cumulative_accuracy": 0.804847986852917 + }, + { + "epoch": 0.36153161175422976, + "grad_norm": 0.9686444401741028, + "learning_rate": 4.028744726309592e-06, + "loss": 0.0301, + "step": 1218, + "video_reward_cumulative_accuracy": 0.805008210180624 + }, + { + "epoch": 0.3618284357376076, + "grad_norm": 1.4730597734451294, + "learning_rate": 4.02669429126108e-06, + "loss": 0.0612, + "step": 1219, + "video_reward_cumulative_accuracy": 0.8051681706316653 + }, + { + "epoch": 0.36212525972098547, + "grad_norm": 4.798864841461182, + "learning_rate": 4.024642217158068e-06, + "loss": 0.0744, + "step": 1220, + "video_reward_cumulative_accuracy": 0.8045081967213115 + }, + { + "epoch": 0.3624220837043633, + "grad_norm": 2.4471278190612793, + "learning_rate": 4.022588506203658e-06, + "loss": 0.035, + "step": 1221, + "video_reward_cumulative_accuracy": 0.8046683046683046 + }, + { + "epoch": 0.3627189076877412, + "grad_norm": 7.197712421417236, + "learning_rate": 4.020533160602708e-06, + "loss": 0.0859, + "step": 1222, + "video_reward_cumulative_accuracy": 0.8048281505728314 + }, + { + "epoch": 0.363015731671119, + "grad_norm": 2.378415107727051, + "learning_rate": 4.018476182561829e-06, + "loss": 0.053, + "step": 1223, + "video_reward_cumulative_accuracy": 0.8049877350776778 + }, + { + "epoch": 0.3633125556544969, + "grad_norm": 1.3243201971054077, + "learning_rate": 4.0164175742893894e-06, + "loss": 0.0264, + "step": 1224, + "video_reward_cumulative_accuracy": 0.8051470588235294 + }, + { + "epoch": 0.36360937963787476, + "grad_norm": 4.309061050415039, + "learning_rate": 4.014357337995504e-06, + "loss": 0.0634, + "step": 1225, + "video_reward_cumulative_accuracy": 0.8048979591836735 + }, + { + "epoch": 0.3639062036212526, + "grad_norm": 2.373619318008423, + "learning_rate": 4.012295475892036e-06, + "loss": 0.0803, + "step": 1226, + "video_reward_cumulative_accuracy": 0.8050570962479608 + }, + { + "epoch": 0.36420302760463047, + "grad_norm": 6.605100154876709, + "learning_rate": 4.0102319901925945e-06, + "loss": 0.076, + "step": 1227, + "video_reward_cumulative_accuracy": 0.8048084759576202 + }, + { + "epoch": 0.3644998515880083, + "grad_norm": 1.552177906036377, + "learning_rate": 4.008166883112532e-06, + "loss": 0.0249, + "step": 1228, + "video_reward_cumulative_accuracy": 0.8049674267100977 + }, + { + "epoch": 0.3647966755713862, + "grad_norm": 1.2064961194992065, + "learning_rate": 4.00610015686894e-06, + "loss": 0.0212, + "step": 1229, + "video_reward_cumulative_accuracy": 0.8051261187957689 + }, + { + "epoch": 0.365093499554764, + "grad_norm": 1.4522373676300049, + "learning_rate": 4.004031813680652e-06, + "loss": 0.057, + "step": 1230, + "video_reward_cumulative_accuracy": 0.8052845528455285 + }, + { + "epoch": 0.3653903235381419, + "grad_norm": 3.753844738006592, + "learning_rate": 4.0019618557682345e-06, + "loss": 0.0332, + "step": 1231, + "video_reward_cumulative_accuracy": 0.8050365556458164 + }, + { + "epoch": 0.36568714752151976, + "grad_norm": 0.7311299443244934, + "learning_rate": 3.999890285353988e-06, + "loss": 0.0161, + "step": 1232, + "video_reward_cumulative_accuracy": 0.8051948051948052 + }, + { + "epoch": 0.3659839715048976, + "grad_norm": 1.8597936630249023, + "learning_rate": 3.997817104661943e-06, + "loss": 0.0308, + "step": 1233, + "video_reward_cumulative_accuracy": 0.805352798053528 + }, + { + "epoch": 0.36628079548827547, + "grad_norm": 1.923897624015808, + "learning_rate": 3.995742315917862e-06, + "loss": 0.0338, + "step": 1234, + "video_reward_cumulative_accuracy": 0.8055105348460292 + }, + { + "epoch": 0.3665776194716533, + "grad_norm": 1.4819157123565674, + "learning_rate": 3.993665921349232e-06, + "loss": 0.0398, + "step": 1235, + "video_reward_cumulative_accuracy": 0.805668016194332 + }, + { + "epoch": 0.3668744434550312, + "grad_norm": 3.7512669563293457, + "learning_rate": 3.991587923185263e-06, + "loss": 0.0493, + "step": 1236, + "video_reward_cumulative_accuracy": 0.8050161812297735 + }, + { + "epoch": 0.367171267438409, + "grad_norm": 0.6202178597450256, + "learning_rate": 3.989508323656888e-06, + "loss": 0.0137, + "step": 1237, + "video_reward_cumulative_accuracy": 0.8051738075990299 + }, + { + "epoch": 0.3674680914217869, + "grad_norm": 3.8399429321289062, + "learning_rate": 3.987427124996759e-06, + "loss": 0.0561, + "step": 1238, + "video_reward_cumulative_accuracy": 0.8053311793214862 + }, + { + "epoch": 0.36776491540516476, + "grad_norm": 1.2864596843719482, + "learning_rate": 3.985344329439246e-06, + "loss": 0.0431, + "step": 1239, + "video_reward_cumulative_accuracy": 0.8050847457627118 + }, + { + "epoch": 0.3680617393885426, + "grad_norm": 2.3478002548217773, + "learning_rate": 3.983259939220431e-06, + "loss": 0.03, + "step": 1240, + "video_reward_cumulative_accuracy": 0.805241935483871 + }, + { + "epoch": 0.36835856337192047, + "grad_norm": 4.141085624694824, + "learning_rate": 3.9811739565781085e-06, + "loss": 0.0648, + "step": 1241, + "video_reward_cumulative_accuracy": 0.8053988718775181 + }, + { + "epoch": 0.3686553873552983, + "grad_norm": 3.281418800354004, + "learning_rate": 3.979086383751786e-06, + "loss": 0.0348, + "step": 1242, + "video_reward_cumulative_accuracy": 0.8055555555555556 + }, + { + "epoch": 0.3689522113386762, + "grad_norm": 2.6300387382507324, + "learning_rate": 3.976997222982671e-06, + "loss": 0.0671, + "step": 1243, + "video_reward_cumulative_accuracy": 0.8053097345132744 + }, + { + "epoch": 0.369249035322054, + "grad_norm": 2.545103073120117, + "learning_rate": 3.974906476513686e-06, + "loss": 0.0264, + "step": 1244, + "video_reward_cumulative_accuracy": 0.805064308681672 + }, + { + "epoch": 0.3695458593054319, + "grad_norm": 3.0602078437805176, + "learning_rate": 3.972814146589446e-06, + "loss": 0.051, + "step": 1245, + "video_reward_cumulative_accuracy": 0.8052208835341366 + }, + { + "epoch": 0.36984268328880976, + "grad_norm": 3.791985273361206, + "learning_rate": 3.970720235456272e-06, + "loss": 0.0344, + "step": 1246, + "video_reward_cumulative_accuracy": 0.8053772070626003 + }, + { + "epoch": 0.3701395072721876, + "grad_norm": 2.767526388168335, + "learning_rate": 3.96862474536218e-06, + "loss": 0.0703, + "step": 1247, + "video_reward_cumulative_accuracy": 0.8051323175621492 + }, + { + "epoch": 0.37043633125556547, + "grad_norm": 3.0397701263427734, + "learning_rate": 3.9665276785568825e-06, + "loss": 0.0825, + "step": 1248, + "video_reward_cumulative_accuracy": 0.8048878205128205 + }, + { + "epoch": 0.3707331552389433, + "grad_norm": 1.6637073755264282, + "learning_rate": 3.964429037291785e-06, + "loss": 0.0129, + "step": 1249, + "video_reward_cumulative_accuracy": 0.8050440352281826 + }, + { + "epoch": 0.37102997922232117, + "grad_norm": 4.110602855682373, + "learning_rate": 3.962328823819981e-06, + "loss": 0.0622, + "step": 1250, + "video_reward_cumulative_accuracy": 0.8048 + }, + { + "epoch": 0.371326803205699, + "grad_norm": 2.5476889610290527, + "learning_rate": 3.960227040396255e-06, + "loss": 0.0384, + "step": 1251, + "video_reward_cumulative_accuracy": 0.8049560351718625 + }, + { + "epoch": 0.3716236271890769, + "grad_norm": 0.3595353364944458, + "learning_rate": 3.958123689277074e-06, + "loss": 0.0129, + "step": 1252, + "video_reward_cumulative_accuracy": 0.805111821086262 + }, + { + "epoch": 0.37192045117245476, + "grad_norm": 0.8653481602668762, + "learning_rate": 3.956018772720591e-06, + "loss": 0.0145, + "step": 1253, + "video_reward_cumulative_accuracy": 0.8052673583399841 + }, + { + "epoch": 0.3722172751558326, + "grad_norm": 4.666868209838867, + "learning_rate": 3.953912292986637e-06, + "loss": 0.0618, + "step": 1254, + "video_reward_cumulative_accuracy": 0.8054226475279107 + }, + { + "epoch": 0.37251409913921046, + "grad_norm": 1.4767639636993408, + "learning_rate": 3.951804252336723e-06, + "loss": 0.027, + "step": 1255, + "video_reward_cumulative_accuracy": 0.8055776892430279 + }, + { + "epoch": 0.3728109231225883, + "grad_norm": 1.8188785314559937, + "learning_rate": 3.949694653034036e-06, + "loss": 0.0648, + "step": 1256, + "video_reward_cumulative_accuracy": 0.8057324840764332 + }, + { + "epoch": 0.37310774710596617, + "grad_norm": 4.584212779998779, + "learning_rate": 3.9475834973434345e-06, + "loss": 0.0546, + "step": 1257, + "video_reward_cumulative_accuracy": 0.8058870326173428 + }, + { + "epoch": 0.373404571089344, + "grad_norm": 2.5021183490753174, + "learning_rate": 3.94547078753145e-06, + "loss": 0.0283, + "step": 1258, + "video_reward_cumulative_accuracy": 0.8060413354531002 + }, + { + "epoch": 0.3737013950727219, + "grad_norm": 0.826378345489502, + "learning_rate": 3.94335652586628e-06, + "loss": 0.0123, + "step": 1259, + "video_reward_cumulative_accuracy": 0.8061953931691819 + }, + { + "epoch": 0.37399821905609976, + "grad_norm": 2.355395555496216, + "learning_rate": 3.941240714617791e-06, + "loss": 0.0221, + "step": 1260, + "video_reward_cumulative_accuracy": 0.8063492063492064 + }, + { + "epoch": 0.3742950430394776, + "grad_norm": 0.9242327213287354, + "learning_rate": 3.9391233560575116e-06, + "loss": 0.011, + "step": 1261, + "video_reward_cumulative_accuracy": 0.8065027755749405 + }, + { + "epoch": 0.37459186702285546, + "grad_norm": 1.269935131072998, + "learning_rate": 3.937004452458631e-06, + "loss": 0.0226, + "step": 1262, + "video_reward_cumulative_accuracy": 0.8066561014263075 + }, + { + "epoch": 0.3748886910062333, + "grad_norm": 1.5934422016143799, + "learning_rate": 3.9348840060959985e-06, + "loss": 0.0288, + "step": 1263, + "video_reward_cumulative_accuracy": 0.8068091844813935 + }, + { + "epoch": 0.37518551498961117, + "grad_norm": 1.6049624681472778, + "learning_rate": 3.932762019246119e-06, + "loss": 0.0327, + "step": 1264, + "video_reward_cumulative_accuracy": 0.8065664556962026 + }, + { + "epoch": 0.375482338972989, + "grad_norm": 2.5892493724823, + "learning_rate": 3.930638494187151e-06, + "loss": 0.0368, + "step": 1265, + "video_reward_cumulative_accuracy": 0.8063241106719368 + }, + { + "epoch": 0.3757791629563669, + "grad_norm": 4.644944667816162, + "learning_rate": 3.928513433198905e-06, + "loss": 0.1756, + "step": 1266, + "video_reward_cumulative_accuracy": 0.8060821484992101 + }, + { + "epoch": 0.37607598693974476, + "grad_norm": 3.284151315689087, + "learning_rate": 3.92638683856284e-06, + "loss": 0.0922, + "step": 1267, + "video_reward_cumulative_accuracy": 0.8058405682715075 + }, + { + "epoch": 0.3763728109231226, + "grad_norm": 1.4588419198989868, + "learning_rate": 3.924258712562061e-06, + "loss": 0.0219, + "step": 1268, + "video_reward_cumulative_accuracy": 0.805993690851735 + }, + { + "epoch": 0.37666963490650046, + "grad_norm": 2.8274147510528564, + "learning_rate": 3.9221290574813205e-06, + "loss": 0.0222, + "step": 1269, + "video_reward_cumulative_accuracy": 0.806146572104019 + }, + { + "epoch": 0.3769664588898783, + "grad_norm": 2.812047243118286, + "learning_rate": 3.919997875607008e-06, + "loss": 0.0383, + "step": 1270, + "video_reward_cumulative_accuracy": 0.8062992125984252 + }, + { + "epoch": 0.37726328287325617, + "grad_norm": 5.075555324554443, + "learning_rate": 3.917865169227154e-06, + "loss": 0.0635, + "step": 1271, + "video_reward_cumulative_accuracy": 0.8060582218725413 + }, + { + "epoch": 0.377560106856634, + "grad_norm": 1.0230021476745605, + "learning_rate": 3.915730940631426e-06, + "loss": 0.0303, + "step": 1272, + "video_reward_cumulative_accuracy": 0.8058176100628931 + }, + { + "epoch": 0.3778569308400119, + "grad_norm": 1.6530554294586182, + "learning_rate": 3.913595192111124e-06, + "loss": 0.0386, + "step": 1273, + "video_reward_cumulative_accuracy": 0.8059701492537313 + }, + { + "epoch": 0.37815375482338975, + "grad_norm": 5.165441513061523, + "learning_rate": 3.911457925959185e-06, + "loss": 0.062, + "step": 1274, + "video_reward_cumulative_accuracy": 0.8057299843014128 + }, + { + "epoch": 0.3784505788067676, + "grad_norm": 1.2560231685638428, + "learning_rate": 3.909319144470169e-06, + "loss": 0.0234, + "step": 1275, + "video_reward_cumulative_accuracy": 0.8058823529411765 + }, + { + "epoch": 0.37874740279014546, + "grad_norm": 0.828478991985321, + "learning_rate": 3.907178849940266e-06, + "loss": 0.0111, + "step": 1276, + "video_reward_cumulative_accuracy": 0.8060344827586207 + }, + { + "epoch": 0.3790442267735233, + "grad_norm": 3.1586334705352783, + "learning_rate": 3.90503704466729e-06, + "loss": 0.0643, + "step": 1277, + "video_reward_cumulative_accuracy": 0.8061863743148003 + }, + { + "epoch": 0.37934105075690117, + "grad_norm": 3.0786163806915283, + "learning_rate": 3.902893730950676e-06, + "loss": 0.0786, + "step": 1278, + "video_reward_cumulative_accuracy": 0.8063380281690141 + }, + { + "epoch": 0.379637874740279, + "grad_norm": 2.47560453414917, + "learning_rate": 3.900748911091481e-06, + "loss": 0.0522, + "step": 1279, + "video_reward_cumulative_accuracy": 0.8060985144644254 + }, + { + "epoch": 0.37993469872365687, + "grad_norm": 2.573753595352173, + "learning_rate": 3.898602587392377e-06, + "loss": 0.0691, + "step": 1280, + "video_reward_cumulative_accuracy": 0.80625 + }, + { + "epoch": 0.38023152270703475, + "grad_norm": 6.1154656410217285, + "learning_rate": 3.89645476215765e-06, + "loss": 0.1061, + "step": 1281, + "video_reward_cumulative_accuracy": 0.8064012490241999 + }, + { + "epoch": 0.3805283466904126, + "grad_norm": 2.435875654220581, + "learning_rate": 3.894305437693198e-06, + "loss": 0.0309, + "step": 1282, + "video_reward_cumulative_accuracy": 0.8061622464898596 + }, + { + "epoch": 0.38082517067379046, + "grad_norm": 3.2194957733154297, + "learning_rate": 3.892154616306531e-06, + "loss": 0.0485, + "step": 1283, + "video_reward_cumulative_accuracy": 0.8063133281371785 + }, + { + "epoch": 0.3811219946571683, + "grad_norm": 2.315264940261841, + "learning_rate": 3.890002300306764e-06, + "loss": 0.0662, + "step": 1284, + "video_reward_cumulative_accuracy": 0.8064641744548287 + }, + { + "epoch": 0.38141881864054616, + "grad_norm": 2.33933687210083, + "learning_rate": 3.887848492004618e-06, + "loss": 0.0468, + "step": 1285, + "video_reward_cumulative_accuracy": 0.8066147859922179 + }, + { + "epoch": 0.381715642623924, + "grad_norm": 2.370605707168579, + "learning_rate": 3.885693193712413e-06, + "loss": 0.0385, + "step": 1286, + "video_reward_cumulative_accuracy": 0.8063763608087092 + }, + { + "epoch": 0.38201246660730187, + "grad_norm": 2.9528722763061523, + "learning_rate": 3.883536407744073e-06, + "loss": 0.0312, + "step": 1287, + "video_reward_cumulative_accuracy": 0.8061383061383062 + }, + { + "epoch": 0.38230929059067975, + "grad_norm": 1.1673752069473267, + "learning_rate": 3.881378136415117e-06, + "loss": 0.0343, + "step": 1288, + "video_reward_cumulative_accuracy": 0.8062888198757764 + }, + { + "epoch": 0.3826061145740576, + "grad_norm": 1.4453524351119995, + "learning_rate": 3.8792183820426575e-06, + "loss": 0.0593, + "step": 1289, + "video_reward_cumulative_accuracy": 0.8064391000775796 + }, + { + "epoch": 0.38290293855743546, + "grad_norm": 2.969148874282837, + "learning_rate": 3.877057146945401e-06, + "loss": 0.0384, + "step": 1290, + "video_reward_cumulative_accuracy": 0.8065891472868217 + }, + { + "epoch": 0.3831997625408133, + "grad_norm": 2.2823967933654785, + "learning_rate": 3.874894433443643e-06, + "loss": 0.0443, + "step": 1291, + "video_reward_cumulative_accuracy": 0.8067389620449265 + }, + { + "epoch": 0.38349658652419116, + "grad_norm": 1.4445525407791138, + "learning_rate": 3.872730243859267e-06, + "loss": 0.0504, + "step": 1292, + "video_reward_cumulative_accuracy": 0.8065015479876161 + }, + { + "epoch": 0.383793410507569, + "grad_norm": 1.5774403810501099, + "learning_rate": 3.87056458051574e-06, + "loss": 0.0322, + "step": 1293, + "video_reward_cumulative_accuracy": 0.8066511987625676 + }, + { + "epoch": 0.38409023449094687, + "grad_norm": 2.641799211502075, + "learning_rate": 3.868397445738112e-06, + "loss": 0.0246, + "step": 1294, + "video_reward_cumulative_accuracy": 0.8068006182380216 + }, + { + "epoch": 0.38438705847432475, + "grad_norm": 2.0484931468963623, + "learning_rate": 3.866228841853012e-06, + "loss": 0.0937, + "step": 1295, + "video_reward_cumulative_accuracy": 0.806949806949807 + }, + { + "epoch": 0.3846838824577026, + "grad_norm": 3.79331374168396, + "learning_rate": 3.864058771188648e-06, + "loss": 0.0471, + "step": 1296, + "video_reward_cumulative_accuracy": 0.8070987654320988 + }, + { + "epoch": 0.38498070644108046, + "grad_norm": 1.6458531618118286, + "learning_rate": 3.861887236074801e-06, + "loss": 0.0402, + "step": 1297, + "video_reward_cumulative_accuracy": 0.8072474942174248 + }, + { + "epoch": 0.3852775304244583, + "grad_norm": 2.398191213607788, + "learning_rate": 3.859714238842823e-06, + "loss": 0.0288, + "step": 1298, + "video_reward_cumulative_accuracy": 0.8073959938366718 + }, + { + "epoch": 0.38557435440783616, + "grad_norm": 1.3121765851974487, + "learning_rate": 3.8575397818256396e-06, + "loss": 0.0379, + "step": 1299, + "video_reward_cumulative_accuracy": 0.8075442648190916 + }, + { + "epoch": 0.385871178391214, + "grad_norm": 2.593432903289795, + "learning_rate": 3.855363867357741e-06, + "loss": 0.0366, + "step": 1300, + "video_reward_cumulative_accuracy": 0.8076923076923077 + }, + { + "epoch": 0.38616800237459187, + "grad_norm": 2.9913852214813232, + "learning_rate": 3.853186497775181e-06, + "loss": 0.0427, + "step": 1301, + "video_reward_cumulative_accuracy": 0.8078401229823213 + }, + { + "epoch": 0.38646482635796975, + "grad_norm": 1.1634633541107178, + "learning_rate": 3.85100767541558e-06, + "loss": 0.0457, + "step": 1302, + "video_reward_cumulative_accuracy": 0.8079877112135176 + }, + { + "epoch": 0.3867616503413476, + "grad_norm": 2.312039852142334, + "learning_rate": 3.8488274026181125e-06, + "loss": 0.0337, + "step": 1303, + "video_reward_cumulative_accuracy": 0.8077513430544896 + }, + { + "epoch": 0.38705847432472545, + "grad_norm": 0.9711390733718872, + "learning_rate": 3.846645681723514e-06, + "loss": 0.0367, + "step": 1304, + "video_reward_cumulative_accuracy": 0.807898773006135 + }, + { + "epoch": 0.3873552983081033, + "grad_norm": 0.5557654500007629, + "learning_rate": 3.844462515074075e-06, + "loss": 0.0175, + "step": 1305, + "video_reward_cumulative_accuracy": 0.8080459770114943 + }, + { + "epoch": 0.38765212229148116, + "grad_norm": 1.1500357389450073, + "learning_rate": 3.842277905013634e-06, + "loss": 0.0308, + "step": 1306, + "video_reward_cumulative_accuracy": 0.8081929555895865 + }, + { + "epoch": 0.387948946274859, + "grad_norm": 1.2327475547790527, + "learning_rate": 3.840091853887585e-06, + "loss": 0.0429, + "step": 1307, + "video_reward_cumulative_accuracy": 0.8083397092578424 + }, + { + "epoch": 0.38824577025823687, + "grad_norm": 0.717802107334137, + "learning_rate": 3.837904364042864e-06, + "loss": 0.0256, + "step": 1308, + "video_reward_cumulative_accuracy": 0.8084862385321101 + }, + { + "epoch": 0.38854259424161475, + "grad_norm": 1.6281253099441528, + "learning_rate": 3.835715437827954e-06, + "loss": 0.0191, + "step": 1309, + "video_reward_cumulative_accuracy": 0.8086325439266616 + }, + { + "epoch": 0.3888394182249926, + "grad_norm": 2.4564850330352783, + "learning_rate": 3.83352507759288e-06, + "loss": 0.0526, + "step": 1310, + "video_reward_cumulative_accuracy": 0.8087786259541985 + }, + { + "epoch": 0.38913624220837045, + "grad_norm": 1.5885370969772339, + "learning_rate": 3.831333285689207e-06, + "loss": 0.0145, + "step": 1311, + "video_reward_cumulative_accuracy": 0.8089244851258581 + }, + { + "epoch": 0.3894330661917483, + "grad_norm": 3.0064384937286377, + "learning_rate": 3.829140064470035e-06, + "loss": 0.0724, + "step": 1312, + "video_reward_cumulative_accuracy": 0.8090701219512195 + }, + { + "epoch": 0.38972989017512616, + "grad_norm": 0.9188132882118225, + "learning_rate": 3.826945416290001e-06, + "loss": 0.012, + "step": 1313, + "video_reward_cumulative_accuracy": 0.8092155369383092 + }, + { + "epoch": 0.390026714158504, + "grad_norm": 2.3891713619232178, + "learning_rate": 3.824749343505271e-06, + "loss": 0.0717, + "step": 1314, + "video_reward_cumulative_accuracy": 0.8089802130898022 + }, + { + "epoch": 0.39032353814188187, + "grad_norm": 2.077953577041626, + "learning_rate": 3.822551848473545e-06, + "loss": 0.0498, + "step": 1315, + "video_reward_cumulative_accuracy": 0.8091254752851711 + }, + { + "epoch": 0.39062036212525975, + "grad_norm": 1.5445294380187988, + "learning_rate": 3.820352933554045e-06, + "loss": 0.0276, + "step": 1316, + "video_reward_cumulative_accuracy": 0.8088905775075987 + }, + { + "epoch": 0.39091718610863757, + "grad_norm": 3.751812219619751, + "learning_rate": 3.81815260110752e-06, + "loss": 0.0526, + "step": 1317, + "video_reward_cumulative_accuracy": 0.8090356871678056 + }, + { + "epoch": 0.39121401009201545, + "grad_norm": 3.9450860023498535, + "learning_rate": 3.815950853496242e-06, + "loss": 0.0608, + "step": 1318, + "video_reward_cumulative_accuracy": 0.8091805766312595 + }, + { + "epoch": 0.3915108340753933, + "grad_norm": 2.4571402072906494, + "learning_rate": 3.813747693083999e-06, + "loss": 0.0406, + "step": 1319, + "video_reward_cumulative_accuracy": 0.809325246398787 + }, + { + "epoch": 0.39180765805877116, + "grad_norm": 0.831182062625885, + "learning_rate": 3.8115431222360984e-06, + "loss": 0.0075, + "step": 1320, + "video_reward_cumulative_accuracy": 0.809469696969697 + }, + { + "epoch": 0.392104482042149, + "grad_norm": 5.207632541656494, + "learning_rate": 3.80933714331936e-06, + "loss": 0.0581, + "step": 1321, + "video_reward_cumulative_accuracy": 0.8096139288417865 + }, + { + "epoch": 0.39240130602552686, + "grad_norm": 0.5821350812911987, + "learning_rate": 3.807129758702117e-06, + "loss": 0.0128, + "step": 1322, + "video_reward_cumulative_accuracy": 0.8097579425113465 + }, + { + "epoch": 0.39269813000890474, + "grad_norm": 4.091737747192383, + "learning_rate": 3.804920970754211e-06, + "loss": 0.032, + "step": 1323, + "video_reward_cumulative_accuracy": 0.809901738473167 + }, + { + "epoch": 0.39299495399228257, + "grad_norm": 1.5021380186080933, + "learning_rate": 3.802710781846991e-06, + "loss": 0.017, + "step": 1324, + "video_reward_cumulative_accuracy": 0.8100453172205438 + }, + { + "epoch": 0.39329177797566045, + "grad_norm": 3.842895984649658, + "learning_rate": 3.8004991943533077e-06, + "loss": 0.059, + "step": 1325, + "video_reward_cumulative_accuracy": 0.810188679245283 + }, + { + "epoch": 0.3935886019590383, + "grad_norm": 3.194486141204834, + "learning_rate": 3.798286210647516e-06, + "loss": 0.0299, + "step": 1326, + "video_reward_cumulative_accuracy": 0.8103318250377074 + }, + { + "epoch": 0.39388542594241616, + "grad_norm": 2.559457302093506, + "learning_rate": 3.796071833105468e-06, + "loss": 0.0648, + "step": 1327, + "video_reward_cumulative_accuracy": 0.8100979653353428 + }, + { + "epoch": 0.394182249925794, + "grad_norm": 4.017072677612305, + "learning_rate": 3.793856064104514e-06, + "loss": 0.057, + "step": 1328, + "video_reward_cumulative_accuracy": 0.8102409638554217 + }, + { + "epoch": 0.39447907390917186, + "grad_norm": 0.940937876701355, + "learning_rate": 3.7916389060234964e-06, + "loss": 0.0176, + "step": 1329, + "video_reward_cumulative_accuracy": 0.8103837471783296 + }, + { + "epoch": 0.39477589789254974, + "grad_norm": 3.047013759613037, + "learning_rate": 3.78942036124275e-06, + "loss": 0.0547, + "step": 1330, + "video_reward_cumulative_accuracy": 0.8105263157894737 + }, + { + "epoch": 0.39507272187592757, + "grad_norm": 0.7967216372489929, + "learning_rate": 3.787200432144097e-06, + "loss": 0.0175, + "step": 1331, + "video_reward_cumulative_accuracy": 0.8106686701728024 + }, + { + "epoch": 0.39536954585930545, + "grad_norm": 3.501380205154419, + "learning_rate": 3.784979121110848e-06, + "loss": 0.0465, + "step": 1332, + "video_reward_cumulative_accuracy": 0.8108108108108109 + }, + { + "epoch": 0.3956663698426833, + "grad_norm": 3.338715076446533, + "learning_rate": 3.782756430527794e-06, + "loss": 0.0628, + "step": 1333, + "video_reward_cumulative_accuracy": 0.8105776444111028 + }, + { + "epoch": 0.39596319382606116, + "grad_norm": 4.169296741485596, + "learning_rate": 3.7805323627812108e-06, + "loss": 0.0453, + "step": 1334, + "video_reward_cumulative_accuracy": 0.81071964017991 + }, + { + "epoch": 0.396260017809439, + "grad_norm": 2.169301986694336, + "learning_rate": 3.778306920258852e-06, + "loss": 0.0294, + "step": 1335, + "video_reward_cumulative_accuracy": 0.8104868913857678 + }, + { + "epoch": 0.39655684179281686, + "grad_norm": 2.8611955642700195, + "learning_rate": 3.7760801053499435e-06, + "loss": 0.0706, + "step": 1336, + "video_reward_cumulative_accuracy": 0.8106287425149701 + }, + { + "epoch": 0.39685366577619474, + "grad_norm": 3.386845827102661, + "learning_rate": 3.7738519204451883e-06, + "loss": 0.0497, + "step": 1337, + "video_reward_cumulative_accuracy": 0.8107703814510098 + }, + { + "epoch": 0.39715048975957257, + "grad_norm": 3.034348249435425, + "learning_rate": 3.7716223679367604e-06, + "loss": 0.0708, + "step": 1338, + "video_reward_cumulative_accuracy": 0.8109118086696562 + }, + { + "epoch": 0.39744731374295045, + "grad_norm": 0.7038185000419617, + "learning_rate": 3.769391450218298e-06, + "loss": 0.0199, + "step": 1339, + "video_reward_cumulative_accuracy": 0.8110530246452576 + }, + { + "epoch": 0.3977441377263283, + "grad_norm": 2.768979072570801, + "learning_rate": 3.767159169684911e-06, + "loss": 0.0546, + "step": 1340, + "video_reward_cumulative_accuracy": 0.8111940298507463 + }, + { + "epoch": 0.39804096170970615, + "grad_norm": 2.743908405303955, + "learning_rate": 3.7649255287331676e-06, + "loss": 0.0425, + "step": 1341, + "video_reward_cumulative_accuracy": 0.8113348247576435 + }, + { + "epoch": 0.398337785693084, + "grad_norm": 2.2306787967681885, + "learning_rate": 3.762690529761097e-06, + "loss": 0.0258, + "step": 1342, + "video_reward_cumulative_accuracy": 0.8114754098360656 + }, + { + "epoch": 0.39863460967646186, + "grad_norm": 1.8014007806777954, + "learning_rate": 3.7604541751681904e-06, + "loss": 0.0331, + "step": 1343, + "video_reward_cumulative_accuracy": 0.8116157855547282 + }, + { + "epoch": 0.39893143365983974, + "grad_norm": 2.2490646839141846, + "learning_rate": 3.7582164673553888e-06, + "loss": 0.0227, + "step": 1344, + "video_reward_cumulative_accuracy": 0.8117559523809523 + }, + { + "epoch": 0.39922825764321757, + "grad_norm": 3.8205676078796387, + "learning_rate": 3.7559774087250906e-06, + "loss": 0.0826, + "step": 1345, + "video_reward_cumulative_accuracy": 0.8118959107806691 + }, + { + "epoch": 0.39952508162659545, + "grad_norm": 4.000797271728516, + "learning_rate": 3.753737001681142e-06, + "loss": 0.0942, + "step": 1346, + "video_reward_cumulative_accuracy": 0.812035661218425 + }, + { + "epoch": 0.39982190560997327, + "grad_norm": 3.239428758621216, + "learning_rate": 3.7514952486288365e-06, + "loss": 0.0449, + "step": 1347, + "video_reward_cumulative_accuracy": 0.811804008908686 + }, + { + "epoch": 0.40011872959335115, + "grad_norm": 4.8066725730896, + "learning_rate": 3.7492521519749146e-06, + "loss": 0.0516, + "step": 1348, + "video_reward_cumulative_accuracy": 0.8119436201780416 + }, + { + "epoch": 0.400415553576729, + "grad_norm": 3.3413074016571045, + "learning_rate": 3.7470077141275578e-06, + "loss": 0.0927, + "step": 1349, + "video_reward_cumulative_accuracy": 0.8117123795404003 + }, + { + "epoch": 0.40071237756010686, + "grad_norm": 1.3113895654678345, + "learning_rate": 3.744761937496389e-06, + "loss": 0.0308, + "step": 1350, + "video_reward_cumulative_accuracy": 0.8118518518518518 + }, + { + "epoch": 0.40100920154348474, + "grad_norm": 1.7896422147750854, + "learning_rate": 3.742514824492465e-06, + "loss": 0.0702, + "step": 1351, + "video_reward_cumulative_accuracy": 0.8119911176905995 + }, + { + "epoch": 0.40130602552686256, + "grad_norm": 3.303739309310913, + "learning_rate": 3.740266377528282e-06, + "loss": 0.0711, + "step": 1352, + "video_reward_cumulative_accuracy": 0.8121301775147929 + }, + { + "epoch": 0.40160284951024044, + "grad_norm": 0.9642285108566284, + "learning_rate": 3.738016599017766e-06, + "loss": 0.0306, + "step": 1353, + "video_reward_cumulative_accuracy": 0.8122690317812269 + }, + { + "epoch": 0.40189967349361827, + "grad_norm": 1.783601999282837, + "learning_rate": 3.735765491376271e-06, + "loss": 0.041, + "step": 1354, + "video_reward_cumulative_accuracy": 0.8124076809453471 + }, + { + "epoch": 0.40219649747699615, + "grad_norm": 2.2338671684265137, + "learning_rate": 3.733513057020581e-06, + "loss": 0.0219, + "step": 1355, + "video_reward_cumulative_accuracy": 0.8125461254612546 + }, + { + "epoch": 0.402493321460374, + "grad_norm": 2.4188389778137207, + "learning_rate": 3.731259298368902e-06, + "loss": 0.042, + "step": 1356, + "video_reward_cumulative_accuracy": 0.8126843657817109 + }, + { + "epoch": 0.40279014544375186, + "grad_norm": 2.109005928039551, + "learning_rate": 3.7290042178408625e-06, + "loss": 0.0795, + "step": 1357, + "video_reward_cumulative_accuracy": 0.8124539425202653 + }, + { + "epoch": 0.40308696942712974, + "grad_norm": 2.0904476642608643, + "learning_rate": 3.726747817857511e-06, + "loss": 0.039, + "step": 1358, + "video_reward_cumulative_accuracy": 0.8125920471281296 + }, + { + "epoch": 0.40338379341050756, + "grad_norm": 3.161112070083618, + "learning_rate": 3.7244901008413127e-06, + "loss": 0.0728, + "step": 1359, + "video_reward_cumulative_accuracy": 0.8123620309050773 + }, + { + "epoch": 0.40368061739388544, + "grad_norm": 2.363586187362671, + "learning_rate": 3.7222310692161434e-06, + "loss": 0.0416, + "step": 1360, + "video_reward_cumulative_accuracy": 0.8125 + }, + { + "epoch": 0.40397744137726327, + "grad_norm": 1.192459225654602, + "learning_rate": 3.7199707254072953e-06, + "loss": 0.0394, + "step": 1361, + "video_reward_cumulative_accuracy": 0.8126377663482733 + }, + { + "epoch": 0.40427426536064115, + "grad_norm": 2.6609785556793213, + "learning_rate": 3.7177090718414654e-06, + "loss": 0.1119, + "step": 1362, + "video_reward_cumulative_accuracy": 0.8124082232011748 + }, + { + "epoch": 0.404571089344019, + "grad_norm": 3.815920352935791, + "learning_rate": 3.7154461109467586e-06, + "loss": 0.072, + "step": 1363, + "video_reward_cumulative_accuracy": 0.8125458547322084 + }, + { + "epoch": 0.40486791332739686, + "grad_norm": 2.569744110107422, + "learning_rate": 3.713181845152684e-06, + "loss": 0.025, + "step": 1364, + "video_reward_cumulative_accuracy": 0.8126832844574781 + }, + { + "epoch": 0.40516473731077474, + "grad_norm": 3.3158631324768066, + "learning_rate": 3.710916276890149e-06, + "loss": 0.0523, + "step": 1365, + "video_reward_cumulative_accuracy": 0.8128205128205128 + }, + { + "epoch": 0.40546156129415256, + "grad_norm": 3.6916356086730957, + "learning_rate": 3.7086494085914632e-06, + "loss": 0.0656, + "step": 1366, + "video_reward_cumulative_accuracy": 0.8129575402635432 + }, + { + "epoch": 0.40575838527753044, + "grad_norm": 1.8868242502212524, + "learning_rate": 3.7063812426903273e-06, + "loss": 0.031, + "step": 1367, + "video_reward_cumulative_accuracy": 0.8127286027798098 + }, + { + "epoch": 0.40605520926090827, + "grad_norm": 3.946322441101074, + "learning_rate": 3.7041117816218396e-06, + "loss": 0.068, + "step": 1368, + "video_reward_cumulative_accuracy": 0.8128654970760234 + }, + { + "epoch": 0.40635203324428615, + "grad_norm": 4.210629940032959, + "learning_rate": 3.7018410278224852e-06, + "loss": 0.0726, + "step": 1369, + "video_reward_cumulative_accuracy": 0.8122717311906501 + }, + { + "epoch": 0.406648857227664, + "grad_norm": 3.0957443714141846, + "learning_rate": 3.69956898373014e-06, + "loss": 0.0363, + "step": 1370, + "video_reward_cumulative_accuracy": 0.8124087591240876 + }, + { + "epoch": 0.40694568121104185, + "grad_norm": 1.1315875053405762, + "learning_rate": 3.697295651784063e-06, + "loss": 0.0365, + "step": 1371, + "video_reward_cumulative_accuracy": 0.812545587162655 + }, + { + "epoch": 0.40724250519441973, + "grad_norm": 4.435636043548584, + "learning_rate": 3.695021034424897e-06, + "loss": 0.0564, + "step": 1372, + "video_reward_cumulative_accuracy": 0.8126822157434402 + }, + { + "epoch": 0.40753932917779756, + "grad_norm": 1.2530689239501953, + "learning_rate": 3.692745134094665e-06, + "loss": 0.03, + "step": 1373, + "video_reward_cumulative_accuracy": 0.8128186453022578 + }, + { + "epoch": 0.40783615316117544, + "grad_norm": 1.9480600357055664, + "learning_rate": 3.690467953236766e-06, + "loss": 0.0375, + "step": 1374, + "video_reward_cumulative_accuracy": 0.8125909752547307 + }, + { + "epoch": 0.40813297714455327, + "grad_norm": 1.831527590751648, + "learning_rate": 3.6881894942959752e-06, + "loss": 0.0614, + "step": 1375, + "video_reward_cumulative_accuracy": 0.8127272727272727 + }, + { + "epoch": 0.40842980112793115, + "grad_norm": 2.9821012020111084, + "learning_rate": 3.6859097597184395e-06, + "loss": 0.0336, + "step": 1376, + "video_reward_cumulative_accuracy": 0.8128633720930233 + }, + { + "epoch": 0.40872662511130897, + "grad_norm": 1.6577091217041016, + "learning_rate": 3.6836287519516745e-06, + "loss": 0.0375, + "step": 1377, + "video_reward_cumulative_accuracy": 0.8126361655773421 + }, + { + "epoch": 0.40902344909468685, + "grad_norm": 3.604968309402466, + "learning_rate": 3.681346473444565e-06, + "loss": 0.0396, + "step": 1378, + "video_reward_cumulative_accuracy": 0.8127721335268505 + }, + { + "epoch": 0.40932027307806473, + "grad_norm": 4.176747798919678, + "learning_rate": 3.6790629266473564e-06, + "loss": 0.0433, + "step": 1379, + "video_reward_cumulative_accuracy": 0.8129079042784626 + }, + { + "epoch": 0.40961709706144256, + "grad_norm": 1.8375120162963867, + "learning_rate": 3.676778114011659e-06, + "loss": 0.0673, + "step": 1380, + "video_reward_cumulative_accuracy": 0.8130434782608695 + }, + { + "epoch": 0.40991392104482044, + "grad_norm": 1.4766967296600342, + "learning_rate": 3.6744920379904407e-06, + "loss": 0.0376, + "step": 1381, + "video_reward_cumulative_accuracy": 0.8131788559015206 + }, + { + "epoch": 0.41021074502819826, + "grad_norm": 1.3402959108352661, + "learning_rate": 3.6722047010380265e-06, + "loss": 0.0401, + "step": 1382, + "video_reward_cumulative_accuracy": 0.8133140376266281 + }, + { + "epoch": 0.41050756901157615, + "grad_norm": 2.443718671798706, + "learning_rate": 3.669916105610094e-06, + "loss": 0.0345, + "step": 1383, + "video_reward_cumulative_accuracy": 0.8134490238611713 + }, + { + "epoch": 0.41080439299495397, + "grad_norm": 2.9310362339019775, + "learning_rate": 3.667626254163673e-06, + "loss": 0.0351, + "step": 1384, + "video_reward_cumulative_accuracy": 0.8132225433526011 + }, + { + "epoch": 0.41110121697833185, + "grad_norm": 1.3766952753067017, + "learning_rate": 3.665335149157141e-06, + "loss": 0.0203, + "step": 1385, + "video_reward_cumulative_accuracy": 0.8133574007220217 + }, + { + "epoch": 0.41139804096170973, + "grad_norm": 2.6975274085998535, + "learning_rate": 3.6630427930502215e-06, + "loss": 0.0513, + "step": 1386, + "video_reward_cumulative_accuracy": 0.8131313131313131 + }, + { + "epoch": 0.41169486494508756, + "grad_norm": 2.027492046356201, + "learning_rate": 3.6607491883039807e-06, + "loss": 0.071, + "step": 1387, + "video_reward_cumulative_accuracy": 0.8132660418168709 + }, + { + "epoch": 0.41199168892846544, + "grad_norm": 1.6064057350158691, + "learning_rate": 3.658454337380827e-06, + "loss": 0.0425, + "step": 1388, + "video_reward_cumulative_accuracy": 0.8134005763688761 + }, + { + "epoch": 0.41228851291184326, + "grad_norm": 2.689882755279541, + "learning_rate": 3.6561582427445053e-06, + "loss": 0.0549, + "step": 1389, + "video_reward_cumulative_accuracy": 0.8135349172066235 + }, + { + "epoch": 0.41258533689522114, + "grad_norm": 2.3749701976776123, + "learning_rate": 3.653860906860096e-06, + "loss": 0.0514, + "step": 1390, + "video_reward_cumulative_accuracy": 0.8133093525179856 + }, + { + "epoch": 0.41288216087859897, + "grad_norm": 2.138916015625, + "learning_rate": 3.651562332194012e-06, + "loss": 0.0303, + "step": 1391, + "video_reward_cumulative_accuracy": 0.8134435657800144 + }, + { + "epoch": 0.41317898486197685, + "grad_norm": 3.9567198753356934, + "learning_rate": 3.6492625212139964e-06, + "loss": 0.0721, + "step": 1392, + "video_reward_cumulative_accuracy": 0.8135775862068966 + }, + { + "epoch": 0.41347580884535473, + "grad_norm": 1.3105418682098389, + "learning_rate": 3.6469614763891193e-06, + "loss": 0.0371, + "step": 1393, + "video_reward_cumulative_accuracy": 0.8137114142139268 + }, + { + "epoch": 0.41377263282873256, + "grad_norm": 1.6768875122070312, + "learning_rate": 3.644659200189776e-06, + "loss": 0.0369, + "step": 1394, + "video_reward_cumulative_accuracy": 0.8134863701578192 + }, + { + "epoch": 0.41406945681211044, + "grad_norm": 3.7469892501831055, + "learning_rate": 3.6423556950876827e-06, + "loss": 0.0554, + "step": 1395, + "video_reward_cumulative_accuracy": 0.8136200716845878 + }, + { + "epoch": 0.41436628079548826, + "grad_norm": 5.655117511749268, + "learning_rate": 3.6400509635558766e-06, + "loss": 0.0512, + "step": 1396, + "video_reward_cumulative_accuracy": 0.8133954154727794 + }, + { + "epoch": 0.41466310477886614, + "grad_norm": 3.703136682510376, + "learning_rate": 3.6377450080687106e-06, + "loss": 0.0283, + "step": 1397, + "video_reward_cumulative_accuracy": 0.813528990694345 + }, + { + "epoch": 0.41495992876224397, + "grad_norm": 3.908426284790039, + "learning_rate": 3.635437831101851e-06, + "loss": 0.0451, + "step": 1398, + "video_reward_cumulative_accuracy": 0.8136623748211731 + }, + { + "epoch": 0.41525675274562185, + "grad_norm": 1.4794903993606567, + "learning_rate": 3.633129435132277e-06, + "loss": 0.0248, + "step": 1399, + "video_reward_cumulative_accuracy": 0.813795568263045 + }, + { + "epoch": 0.41555357672899973, + "grad_norm": 1.4713983535766602, + "learning_rate": 3.630819822638275e-06, + "loss": 0.0451, + "step": 1400, + "video_reward_cumulative_accuracy": 0.8135714285714286 + }, + { + "epoch": 0.41555357672899973, + "eval_runtime": 130.0037, + "eval_samples_per_second": 6.069, + "eval_steps_per_second": 0.762, + "eval_test_set_accuracy": 0.773989898989899, + "step": 1400 + }, + { + "epoch": 0.41585040071237755, + "grad_norm": 2.494872570037842, + "learning_rate": 3.6285089960994396e-06, + "loss": 0.0338, + "step": 1401, + "video_reward_cumulative_accuracy": 0.8137044967880086 + }, + { + "epoch": 0.41614722469575544, + "grad_norm": 3.971022605895996, + "learning_rate": 3.626196957996666e-06, + "loss": 0.081, + "step": 1402, + "video_reward_cumulative_accuracy": 0.8138373751783167 + }, + { + "epoch": 0.41644404867913326, + "grad_norm": 2.763796806335449, + "learning_rate": 3.6238837108121514e-06, + "loss": 0.0512, + "step": 1403, + "video_reward_cumulative_accuracy": 0.8136136849607983 + }, + { + "epoch": 0.41674087266251114, + "grad_norm": 3.586524724960327, + "learning_rate": 3.6215692570293924e-06, + "loss": 0.0805, + "step": 1404, + "video_reward_cumulative_accuracy": 0.8137464387464387 + }, + { + "epoch": 0.41703769664588897, + "grad_norm": 2.187155246734619, + "learning_rate": 3.619253599133178e-06, + "loss": 0.0365, + "step": 1405, + "video_reward_cumulative_accuracy": 0.8135231316725978 + }, + { + "epoch": 0.41733452062926685, + "grad_norm": 3.1447842121124268, + "learning_rate": 3.6169367396095935e-06, + "loss": 0.0494, + "step": 1406, + "video_reward_cumulative_accuracy": 0.8133001422475107 + }, + { + "epoch": 0.41763134461264473, + "grad_norm": 1.2427921295166016, + "learning_rate": 3.6146186809460114e-06, + "loss": 0.0147, + "step": 1407, + "video_reward_cumulative_accuracy": 0.8134328358208955 + }, + { + "epoch": 0.41792816859602255, + "grad_norm": 1.1575847864151, + "learning_rate": 3.612299425631093e-06, + "loss": 0.0468, + "step": 1408, + "video_reward_cumulative_accuracy": 0.8132102272727273 + }, + { + "epoch": 0.41822499257940043, + "grad_norm": 0.6622688174247742, + "learning_rate": 3.609978976154784e-06, + "loss": 0.0116, + "step": 1409, + "video_reward_cumulative_accuracy": 0.8133427963094393 + }, + { + "epoch": 0.41852181656277826, + "grad_norm": 1.4234910011291504, + "learning_rate": 3.6076573350083112e-06, + "loss": 0.0449, + "step": 1410, + "video_reward_cumulative_accuracy": 0.8134751773049645 + }, + { + "epoch": 0.41881864054615614, + "grad_norm": 0.7136000990867615, + "learning_rate": 3.605334504684183e-06, + "loss": 0.0299, + "step": 1411, + "video_reward_cumulative_accuracy": 0.8132530120481928 + }, + { + "epoch": 0.41911546452953397, + "grad_norm": 2.231410503387451, + "learning_rate": 3.6030104876761835e-06, + "loss": 0.0417, + "step": 1412, + "video_reward_cumulative_accuracy": 0.8133852691218131 + }, + { + "epoch": 0.41941228851291185, + "grad_norm": 2.5650246143341064, + "learning_rate": 3.600685286479369e-06, + "loss": 0.0438, + "step": 1413, + "video_reward_cumulative_accuracy": 0.813517338995046 + }, + { + "epoch": 0.4197091124962897, + "grad_norm": 3.8068923950195312, + "learning_rate": 3.59835890359007e-06, + "loss": 0.0378, + "step": 1414, + "video_reward_cumulative_accuracy": 0.8136492220650636 + }, + { + "epoch": 0.42000593647966755, + "grad_norm": 2.6522464752197266, + "learning_rate": 3.5960313415058833e-06, + "loss": 0.0443, + "step": 1415, + "video_reward_cumulative_accuracy": 0.8134275618374558 + }, + { + "epoch": 0.42030276046304543, + "grad_norm": 2.89424729347229, + "learning_rate": 3.5937026027256738e-06, + "loss": 0.0213, + "step": 1416, + "video_reward_cumulative_accuracy": 0.8135593220338984 + }, + { + "epoch": 0.42059958444642326, + "grad_norm": 3.648902177810669, + "learning_rate": 3.591372689749567e-06, + "loss": 0.1216, + "step": 1417, + "video_reward_cumulative_accuracy": 0.8136908962597036 + }, + { + "epoch": 0.42089640842980114, + "grad_norm": 1.9664138555526733, + "learning_rate": 3.5890416050789523e-06, + "loss": 0.0361, + "step": 1418, + "video_reward_cumulative_accuracy": 0.8138222849083215 + }, + { + "epoch": 0.42119323241317896, + "grad_norm": 3.4354248046875, + "learning_rate": 3.586709351216474e-06, + "loss": 0.0391, + "step": 1419, + "video_reward_cumulative_accuracy": 0.8136011275546159 + }, + { + "epoch": 0.42149005639655684, + "grad_norm": 1.596327543258667, + "learning_rate": 3.5843759306660344e-06, + "loss": 0.0905, + "step": 1420, + "video_reward_cumulative_accuracy": 0.8133802816901409 + }, + { + "epoch": 0.4217868803799347, + "grad_norm": 3.7419114112854004, + "learning_rate": 3.5820413459327863e-06, + "loss": 0.0678, + "step": 1421, + "video_reward_cumulative_accuracy": 0.812807881773399 + }, + { + "epoch": 0.42208370436331255, + "grad_norm": 3.569519281387329, + "learning_rate": 3.579705599523132e-06, + "loss": 0.047, + "step": 1422, + "video_reward_cumulative_accuracy": 0.8129395218002813 + }, + { + "epoch": 0.42238052834669043, + "grad_norm": 1.55231511592865, + "learning_rate": 3.5773686939447226e-06, + "loss": 0.0314, + "step": 1423, + "video_reward_cumulative_accuracy": 0.8130709768095573 + }, + { + "epoch": 0.42267735233006826, + "grad_norm": 2.8076114654541016, + "learning_rate": 3.575030631706454e-06, + "loss": 0.0511, + "step": 1424, + "video_reward_cumulative_accuracy": 0.8128511235955056 + }, + { + "epoch": 0.42297417631344614, + "grad_norm": 1.3293800354003906, + "learning_rate": 3.5726914153184624e-06, + "loss": 0.0334, + "step": 1425, + "video_reward_cumulative_accuracy": 0.8129824561403509 + }, + { + "epoch": 0.42327100029682396, + "grad_norm": 2.4649341106414795, + "learning_rate": 3.570351047292123e-06, + "loss": 0.0397, + "step": 1426, + "video_reward_cumulative_accuracy": 0.8131136044880786 + }, + { + "epoch": 0.42356782428020184, + "grad_norm": 3.885298490524292, + "learning_rate": 3.5680095301400497e-06, + "loss": 0.0345, + "step": 1427, + "video_reward_cumulative_accuracy": 0.8132445690259286 + }, + { + "epoch": 0.4238646482635797, + "grad_norm": 2.974383592605591, + "learning_rate": 3.565666866376086e-06, + "loss": 0.0572, + "step": 1428, + "video_reward_cumulative_accuracy": 0.8130252100840336 + }, + { + "epoch": 0.42416147224695755, + "grad_norm": 1.509018898010254, + "learning_rate": 3.5633230585153093e-06, + "loss": 0.0386, + "step": 1429, + "video_reward_cumulative_accuracy": 0.8131560531840448 + }, + { + "epoch": 0.42445829623033543, + "grad_norm": 1.3774346113204956, + "learning_rate": 3.5609781090740264e-06, + "loss": 0.0231, + "step": 1430, + "video_reward_cumulative_accuracy": 0.8132867132867133 + }, + { + "epoch": 0.42475512021371326, + "grad_norm": 2.268357276916504, + "learning_rate": 3.558632020569768e-06, + "loss": 0.045, + "step": 1431, + "video_reward_cumulative_accuracy": 0.8134171907756813 + }, + { + "epoch": 0.42505194419709114, + "grad_norm": 2.4706010818481445, + "learning_rate": 3.5562847955212863e-06, + "loss": 0.0662, + "step": 1432, + "video_reward_cumulative_accuracy": 0.8131983240223464 + }, + { + "epoch": 0.42534876818046896, + "grad_norm": 1.930578589439392, + "learning_rate": 3.553936436448556e-06, + "loss": 0.0474, + "step": 1433, + "video_reward_cumulative_accuracy": 0.8133286810886252 + }, + { + "epoch": 0.42564559216384684, + "grad_norm": 2.0366480350494385, + "learning_rate": 3.551586945872769e-06, + "loss": 0.061, + "step": 1434, + "video_reward_cumulative_accuracy": 0.8131101813110181 + }, + { + "epoch": 0.4259424161472247, + "grad_norm": 2.4363696575164795, + "learning_rate": 3.5492363263163305e-06, + "loss": 0.0386, + "step": 1435, + "video_reward_cumulative_accuracy": 0.8132404181184669 + }, + { + "epoch": 0.42623924013060255, + "grad_norm": 1.8315794467926025, + "learning_rate": 3.546884580302859e-06, + "loss": 0.0375, + "step": 1436, + "video_reward_cumulative_accuracy": 0.8133704735376045 + }, + { + "epoch": 0.42653606411398043, + "grad_norm": 2.6749696731567383, + "learning_rate": 3.544531710357183e-06, + "loss": 0.1026, + "step": 1437, + "video_reward_cumulative_accuracy": 0.8131524008350731 + }, + { + "epoch": 0.42683288809735825, + "grad_norm": 1.775738000869751, + "learning_rate": 3.5421777190053354e-06, + "loss": 0.0308, + "step": 1438, + "video_reward_cumulative_accuracy": 0.8132823365785814 + }, + { + "epoch": 0.42712971208073613, + "grad_norm": 2.429361581802368, + "learning_rate": 3.539822608774555e-06, + "loss": 0.0293, + "step": 1439, + "video_reward_cumulative_accuracy": 0.8134120917303683 + }, + { + "epoch": 0.42742653606411396, + "grad_norm": 1.1443023681640625, + "learning_rate": 3.537466382193282e-06, + "loss": 0.0219, + "step": 1440, + "video_reward_cumulative_accuracy": 0.8135416666666667 + }, + { + "epoch": 0.42772336004749184, + "grad_norm": 2.1525886058807373, + "learning_rate": 3.535109041791153e-06, + "loss": 0.0612, + "step": 1441, + "video_reward_cumulative_accuracy": 0.8136710617626648 + }, + { + "epoch": 0.4280201840308697, + "grad_norm": 2.423384666442871, + "learning_rate": 3.532750590099002e-06, + "loss": 0.0459, + "step": 1442, + "video_reward_cumulative_accuracy": 0.8134535367545076 + }, + { + "epoch": 0.42831700801424755, + "grad_norm": 1.831072449684143, + "learning_rate": 3.5303910296488565e-06, + "loss": 0.0212, + "step": 1443, + "video_reward_cumulative_accuracy": 0.8135828135828136 + }, + { + "epoch": 0.4286138319976254, + "grad_norm": 3.769604444503784, + "learning_rate": 3.528030362973933e-06, + "loss": 0.0522, + "step": 1444, + "video_reward_cumulative_accuracy": 0.8133656509695291 + }, + { + "epoch": 0.42891065598100325, + "grad_norm": 2.6289186477661133, + "learning_rate": 3.525668592608637e-06, + "loss": 0.0766, + "step": 1445, + "video_reward_cumulative_accuracy": 0.8134948096885813 + }, + { + "epoch": 0.42920747996438113, + "grad_norm": 2.1730971336364746, + "learning_rate": 3.523305721088558e-06, + "loss": 0.0222, + "step": 1446, + "video_reward_cumulative_accuracy": 0.8136237897648686 + }, + { + "epoch": 0.42950430394775896, + "grad_norm": 1.4084819555282593, + "learning_rate": 3.5209417509504668e-06, + "loss": 0.0793, + "step": 1447, + "video_reward_cumulative_accuracy": 0.813752591568763 + }, + { + "epoch": 0.42980112793113684, + "grad_norm": 1.7031943798065186, + "learning_rate": 3.518576684732316e-06, + "loss": 0.0489, + "step": 1448, + "video_reward_cumulative_accuracy": 0.8138812154696132 + }, + { + "epoch": 0.4300979519145147, + "grad_norm": 2.925882339477539, + "learning_rate": 3.5162105249732336e-06, + "loss": 0.0379, + "step": 1449, + "video_reward_cumulative_accuracy": 0.8136645962732919 + }, + { + "epoch": 0.43039477589789255, + "grad_norm": 0.6623610854148865, + "learning_rate": 3.5138432742135215e-06, + "loss": 0.023, + "step": 1450, + "video_reward_cumulative_accuracy": 0.8137931034482758 + }, + { + "epoch": 0.4306915998812704, + "grad_norm": 1.5418504476547241, + "learning_rate": 3.511474934994653e-06, + "loss": 0.0315, + "step": 1451, + "video_reward_cumulative_accuracy": 0.8135768435561681 + }, + { + "epoch": 0.43098842386464825, + "grad_norm": 1.7840099334716797, + "learning_rate": 3.509105509859271e-06, + "loss": 0.0258, + "step": 1452, + "video_reward_cumulative_accuracy": 0.8137052341597796 + }, + { + "epoch": 0.43128524784802613, + "grad_norm": 3.910229444503784, + "learning_rate": 3.5067350013511816e-06, + "loss": 0.0932, + "step": 1453, + "video_reward_cumulative_accuracy": 0.8134893324156917 + }, + { + "epoch": 0.43158207183140396, + "grad_norm": 1.7669485807418823, + "learning_rate": 3.5043634120153572e-06, + "loss": 0.0423, + "step": 1454, + "video_reward_cumulative_accuracy": 0.813617606602476 + }, + { + "epoch": 0.43187889581478184, + "grad_norm": 0.9480779767036438, + "learning_rate": 3.5019907443979297e-06, + "loss": 0.0301, + "step": 1455, + "video_reward_cumulative_accuracy": 0.813745704467354 + }, + { + "epoch": 0.4321757197981597, + "grad_norm": 2.241234540939331, + "learning_rate": 3.4996170010461862e-06, + "loss": 0.0298, + "step": 1456, + "video_reward_cumulative_accuracy": 0.8138736263736264 + }, + { + "epoch": 0.43247254378153754, + "grad_norm": 2.01543927192688, + "learning_rate": 3.497242184508571e-06, + "loss": 0.0604, + "step": 1457, + "video_reward_cumulative_accuracy": 0.8140013726835964 + }, + { + "epoch": 0.4327693677649154, + "grad_norm": 1.9135305881500244, + "learning_rate": 3.4948662973346816e-06, + "loss": 0.0274, + "step": 1458, + "video_reward_cumulative_accuracy": 0.8141289437585734 + }, + { + "epoch": 0.43306619174829325, + "grad_norm": 1.8279916048049927, + "learning_rate": 3.492489342075262e-06, + "loss": 0.0504, + "step": 1459, + "video_reward_cumulative_accuracy": 0.8142563399588759 + }, + { + "epoch": 0.43336301573167113, + "grad_norm": 1.939100980758667, + "learning_rate": 3.4901113212822057e-06, + "loss": 0.0561, + "step": 1460, + "video_reward_cumulative_accuracy": 0.8143835616438356 + }, + { + "epoch": 0.43365983971504896, + "grad_norm": 1.3775911331176758, + "learning_rate": 3.487732237508547e-06, + "loss": 0.0456, + "step": 1461, + "video_reward_cumulative_accuracy": 0.8145106091718002 + }, + { + "epoch": 0.43395666369842684, + "grad_norm": 3.4904792308807373, + "learning_rate": 3.485352093308465e-06, + "loss": 0.0448, + "step": 1462, + "video_reward_cumulative_accuracy": 0.8146374829001368 + }, + { + "epoch": 0.4342534876818047, + "grad_norm": 1.0064564943313599, + "learning_rate": 3.4829708912372746e-06, + "loss": 0.0191, + "step": 1463, + "video_reward_cumulative_accuracy": 0.8147641831852358 + }, + { + "epoch": 0.43455031166518254, + "grad_norm": 2.0086288452148438, + "learning_rate": 3.4805886338514277e-06, + "loss": 0.0285, + "step": 1464, + "video_reward_cumulative_accuracy": 0.8145491803278688 + }, + { + "epoch": 0.4348471356485604, + "grad_norm": 2.064359664916992, + "learning_rate": 3.4782053237085083e-06, + "loss": 0.0519, + "step": 1465, + "video_reward_cumulative_accuracy": 0.8143344709897611 + }, + { + "epoch": 0.43514395963193825, + "grad_norm": 3.986988067626953, + "learning_rate": 3.4758209633672313e-06, + "loss": 0.081, + "step": 1466, + "video_reward_cumulative_accuracy": 0.8144611186903138 + }, + { + "epoch": 0.43544078361531613, + "grad_norm": 3.117048740386963, + "learning_rate": 3.47343555538744e-06, + "loss": 0.0216, + "step": 1467, + "video_reward_cumulative_accuracy": 0.8142467620995228 + }, + { + "epoch": 0.43573760759869395, + "grad_norm": 2.8893699645996094, + "learning_rate": 3.4710491023300997e-06, + "loss": 0.0428, + "step": 1468, + "video_reward_cumulative_accuracy": 0.8140326975476839 + }, + { + "epoch": 0.43603443158207184, + "grad_norm": 3.7498586177825928, + "learning_rate": 3.468661606757301e-06, + "loss": 0.0808, + "step": 1469, + "video_reward_cumulative_accuracy": 0.8138189244383934 + }, + { + "epoch": 0.4363312555654497, + "grad_norm": 1.7329144477844238, + "learning_rate": 3.4662730712322514e-06, + "loss": 0.0348, + "step": 1470, + "video_reward_cumulative_accuracy": 0.8136054421768707 + }, + { + "epoch": 0.43662807954882754, + "grad_norm": 5.551042079925537, + "learning_rate": 3.4638834983192743e-06, + "loss": 0.0883, + "step": 1471, + "video_reward_cumulative_accuracy": 0.8133922501699524 + }, + { + "epoch": 0.4369249035322054, + "grad_norm": 1.6959829330444336, + "learning_rate": 3.4614928905838103e-06, + "loss": 0.0174, + "step": 1472, + "video_reward_cumulative_accuracy": 0.8135190217391305 + }, + { + "epoch": 0.43722172751558325, + "grad_norm": 1.2532111406326294, + "learning_rate": 3.4591012505924078e-06, + "loss": 0.0122, + "step": 1473, + "video_reward_cumulative_accuracy": 0.8136456211812627 + }, + { + "epoch": 0.43751855149896113, + "grad_norm": 0.6892161965370178, + "learning_rate": 3.4567085809127247e-06, + "loss": 0.0065, + "step": 1474, + "video_reward_cumulative_accuracy": 0.8137720488466758 + }, + { + "epoch": 0.43781537548233895, + "grad_norm": 3.311598539352417, + "learning_rate": 3.4543148841135243e-06, + "loss": 0.0672, + "step": 1475, + "video_reward_cumulative_accuracy": 0.8135593220338984 + }, + { + "epoch": 0.43811219946571683, + "grad_norm": 1.2820119857788086, + "learning_rate": 3.4519201627646713e-06, + "loss": 0.0293, + "step": 1476, + "video_reward_cumulative_accuracy": 0.8133468834688347 + }, + { + "epoch": 0.43840902344909466, + "grad_norm": 1.7546663284301758, + "learning_rate": 3.4495244194371337e-06, + "loss": 0.0629, + "step": 1477, + "video_reward_cumulative_accuracy": 0.8134732566012187 + }, + { + "epoch": 0.43870584743247254, + "grad_norm": 3.1134088039398193, + "learning_rate": 3.447127656702971e-06, + "loss": 0.061, + "step": 1478, + "video_reward_cumulative_accuracy": 0.8135994587280109 + }, + { + "epoch": 0.4390026714158504, + "grad_norm": 3.5772175788879395, + "learning_rate": 3.444729877135345e-06, + "loss": 0.0409, + "step": 1479, + "video_reward_cumulative_accuracy": 0.8137254901960784 + }, + { + "epoch": 0.43929949539922825, + "grad_norm": 1.2507808208465576, + "learning_rate": 3.4423310833085015e-06, + "loss": 0.0542, + "step": 1480, + "video_reward_cumulative_accuracy": 0.8135135135135135 + }, + { + "epoch": 0.4395963193826061, + "grad_norm": 0.4887540936470032, + "learning_rate": 3.4399312777977794e-06, + "loss": 0.0112, + "step": 1481, + "video_reward_cumulative_accuracy": 0.8136394328156651 + }, + { + "epoch": 0.43989314336598395, + "grad_norm": 0.6733037233352661, + "learning_rate": 3.437530463179604e-06, + "loss": 0.0139, + "step": 1482, + "video_reward_cumulative_accuracy": 0.8137651821862348 + }, + { + "epoch": 0.44018996734936183, + "grad_norm": 0.46859636902809143, + "learning_rate": 3.4351286420314807e-06, + "loss": 0.0161, + "step": 1483, + "video_reward_cumulative_accuracy": 0.8138907619689818 + }, + { + "epoch": 0.44048679133273966, + "grad_norm": 1.171273946762085, + "learning_rate": 3.4327258169319986e-06, + "loss": 0.0282, + "step": 1484, + "video_reward_cumulative_accuracy": 0.8140161725067385 + }, + { + "epoch": 0.44078361531611754, + "grad_norm": 1.2421537637710571, + "learning_rate": 3.4303219904608244e-06, + "loss": 0.0144, + "step": 1485, + "video_reward_cumulative_accuracy": 0.8138047138047138 + }, + { + "epoch": 0.4410804392994954, + "grad_norm": 1.5912413597106934, + "learning_rate": 3.427917165198698e-06, + "loss": 0.0317, + "step": 1486, + "video_reward_cumulative_accuracy": 0.8135935397039031 + }, + { + "epoch": 0.44137726328287324, + "grad_norm": 2.529520034790039, + "learning_rate": 3.425511343727434e-06, + "loss": 0.04, + "step": 1487, + "video_reward_cumulative_accuracy": 0.8137188971082717 + }, + { + "epoch": 0.4416740872662511, + "grad_norm": 2.3800694942474365, + "learning_rate": 3.4231045286299136e-06, + "loss": 0.0557, + "step": 1488, + "video_reward_cumulative_accuracy": 0.8138440860215054 + }, + { + "epoch": 0.44197091124962895, + "grad_norm": 1.2274895906448364, + "learning_rate": 3.4206967224900885e-06, + "loss": 0.0194, + "step": 1489, + "video_reward_cumulative_accuracy": 0.8139691067830759 + }, + { + "epoch": 0.44226773523300683, + "grad_norm": 1.3150626420974731, + "learning_rate": 3.41828792789297e-06, + "loss": 0.0124, + "step": 1490, + "video_reward_cumulative_accuracy": 0.8140939597315436 + }, + { + "epoch": 0.44256455921638466, + "grad_norm": 1.3226598501205444, + "learning_rate": 3.415878147424634e-06, + "loss": 0.0129, + "step": 1491, + "video_reward_cumulative_accuracy": 0.8142186452045607 + }, + { + "epoch": 0.44286138319976254, + "grad_norm": 2.370067596435547, + "learning_rate": 3.413467383672214e-06, + "loss": 0.025, + "step": 1492, + "video_reward_cumulative_accuracy": 0.814343163538874 + }, + { + "epoch": 0.4431582071831404, + "grad_norm": 2.3997597694396973, + "learning_rate": 3.411055639223898e-06, + "loss": 0.0187, + "step": 1493, + "video_reward_cumulative_accuracy": 0.8144675150703282 + }, + { + "epoch": 0.44345503116651824, + "grad_norm": 1.877609133720398, + "learning_rate": 3.4086429166689296e-06, + "loss": 0.0527, + "step": 1494, + "video_reward_cumulative_accuracy": 0.8145917001338688 + }, + { + "epoch": 0.4437518551498961, + "grad_norm": 4.125174522399902, + "learning_rate": 3.4062292185975987e-06, + "loss": 0.0701, + "step": 1495, + "video_reward_cumulative_accuracy": 0.8147157190635451 + }, + { + "epoch": 0.44404867913327395, + "grad_norm": 2.3468966484069824, + "learning_rate": 3.403814547601244e-06, + "loss": 0.0234, + "step": 1496, + "video_reward_cumulative_accuracy": 0.8145053475935828 + }, + { + "epoch": 0.44434550311665183, + "grad_norm": 0.8067638874053955, + "learning_rate": 3.4013989062722514e-06, + "loss": 0.0121, + "step": 1497, + "video_reward_cumulative_accuracy": 0.814629258517034 + }, + { + "epoch": 0.44464232710002966, + "grad_norm": 1.2667425870895386, + "learning_rate": 3.398982297204045e-06, + "loss": 0.0718, + "step": 1498, + "video_reward_cumulative_accuracy": 0.8147530040053405 + }, + { + "epoch": 0.44493915108340754, + "grad_norm": 4.346756935119629, + "learning_rate": 3.396564722991089e-06, + "loss": 0.037, + "step": 1499, + "video_reward_cumulative_accuracy": 0.8148765843895931 + }, + { + "epoch": 0.4452359750667854, + "grad_norm": 2.6099324226379395, + "learning_rate": 3.394146186228885e-06, + "loss": 0.0565, + "step": 1500, + "video_reward_cumulative_accuracy": 0.815 + }, + { + "epoch": 0.44553279905016324, + "grad_norm": 2.9593958854675293, + "learning_rate": 3.3917266895139654e-06, + "loss": 0.0548, + "step": 1501, + "video_reward_cumulative_accuracy": 0.8147901399067289 + }, + { + "epoch": 0.4458296230335411, + "grad_norm": 1.9937952756881714, + "learning_rate": 3.389306235443896e-06, + "loss": 0.0267, + "step": 1502, + "video_reward_cumulative_accuracy": 0.8145805592543276 + }, + { + "epoch": 0.44612644701691895, + "grad_norm": 2.4806485176086426, + "learning_rate": 3.3868848266172693e-06, + "loss": 0.0222, + "step": 1503, + "video_reward_cumulative_accuracy": 0.8143712574850299 + }, + { + "epoch": 0.44642327100029683, + "grad_norm": 4.427879333496094, + "learning_rate": 3.384462465633702e-06, + "loss": 0.0498, + "step": 1504, + "video_reward_cumulative_accuracy": 0.8144946808510638 + }, + { + "epoch": 0.44672009498367465, + "grad_norm": 3.623908519744873, + "learning_rate": 3.3820391550938337e-06, + "loss": 0.0426, + "step": 1505, + "video_reward_cumulative_accuracy": 0.8142857142857143 + }, + { + "epoch": 0.44701691896705253, + "grad_norm": 3.808448076248169, + "learning_rate": 3.3796148975993236e-06, + "loss": 0.0388, + "step": 1506, + "video_reward_cumulative_accuracy": 0.8144090305444888 + }, + { + "epoch": 0.4473137429504304, + "grad_norm": 1.8826570510864258, + "learning_rate": 3.3771896957528476e-06, + "loss": 0.0414, + "step": 1507, + "video_reward_cumulative_accuracy": 0.814200398142004 + }, + { + "epoch": 0.44761056693380824, + "grad_norm": 2.2933990955352783, + "learning_rate": 3.374763552158095e-06, + "loss": 0.0396, + "step": 1508, + "video_reward_cumulative_accuracy": 0.8143236074270557 + }, + { + "epoch": 0.4479073909171861, + "grad_norm": 2.446279764175415, + "learning_rate": 3.372336469419767e-06, + "loss": 0.0834, + "step": 1509, + "video_reward_cumulative_accuracy": 0.8144466534128562 + }, + { + "epoch": 0.44820421490056395, + "grad_norm": 1.7556294202804565, + "learning_rate": 3.3699084501435717e-06, + "loss": 0.0329, + "step": 1510, + "video_reward_cumulative_accuracy": 0.8142384105960265 + }, + { + "epoch": 0.4485010388839418, + "grad_norm": 1.7487667798995972, + "learning_rate": 3.3674794969362235e-06, + "loss": 0.0511, + "step": 1511, + "video_reward_cumulative_accuracy": 0.814361350099272 + }, + { + "epoch": 0.44879786286731965, + "grad_norm": 2.5053646564483643, + "learning_rate": 3.365049612405441e-06, + "loss": 0.0534, + "step": 1512, + "video_reward_cumulative_accuracy": 0.814484126984127 + }, + { + "epoch": 0.44909468685069753, + "grad_norm": 1.7758805751800537, + "learning_rate": 3.3626187991599384e-06, + "loss": 0.0223, + "step": 1513, + "video_reward_cumulative_accuracy": 0.8146067415730337 + }, + { + "epoch": 0.4493915108340754, + "grad_norm": 3.1137726306915283, + "learning_rate": 3.3601870598094317e-06, + "loss": 0.0652, + "step": 1514, + "video_reward_cumulative_accuracy": 0.8147291941875826 + }, + { + "epoch": 0.44968833481745324, + "grad_norm": 1.9859057664871216, + "learning_rate": 3.3577543969646287e-06, + "loss": 0.0781, + "step": 1515, + "video_reward_cumulative_accuracy": 0.8148514851485148 + }, + { + "epoch": 0.4499851588008311, + "grad_norm": 2.5349478721618652, + "learning_rate": 3.3553208132372284e-06, + "loss": 0.056, + "step": 1516, + "video_reward_cumulative_accuracy": 0.8149736147757256 + }, + { + "epoch": 0.45028198278420895, + "grad_norm": 1.9917017221450806, + "learning_rate": 3.35288631123992e-06, + "loss": 0.0526, + "step": 1517, + "video_reward_cumulative_accuracy": 0.8147659854976929 + }, + { + "epoch": 0.4505788067675868, + "grad_norm": 2.3147571086883545, + "learning_rate": 3.3504508935863776e-06, + "loss": 0.0395, + "step": 1518, + "video_reward_cumulative_accuracy": 0.8148880105401844 + }, + { + "epoch": 0.45087563075096465, + "grad_norm": 3.1221325397491455, + "learning_rate": 3.3480145628912574e-06, + "loss": 0.0416, + "step": 1519, + "video_reward_cumulative_accuracy": 0.815009874917709 + }, + { + "epoch": 0.45117245473434253, + "grad_norm": 3.894681692123413, + "learning_rate": 3.3455773217701977e-06, + "loss": 0.0472, + "step": 1520, + "video_reward_cumulative_accuracy": 0.8151315789473684 + }, + { + "epoch": 0.4514692787177204, + "grad_norm": 2.071953773498535, + "learning_rate": 3.343139172839813e-06, + "loss": 0.0382, + "step": 1521, + "video_reward_cumulative_accuracy": 0.8152531229454306 + }, + { + "epoch": 0.45176610270109824, + "grad_norm": 1.4821226596832275, + "learning_rate": 3.3407001187176934e-06, + "loss": 0.0516, + "step": 1522, + "video_reward_cumulative_accuracy": 0.8153745072273325 + }, + { + "epoch": 0.4520629266844761, + "grad_norm": 2.996478796005249, + "learning_rate": 3.3382601620224e-06, + "loss": 0.0383, + "step": 1523, + "video_reward_cumulative_accuracy": 0.8154957321076822 + }, + { + "epoch": 0.45235975066785394, + "grad_norm": 0.9077578783035278, + "learning_rate": 3.335819305373463e-06, + "loss": 0.0113, + "step": 1524, + "video_reward_cumulative_accuracy": 0.8156167979002624 + }, + { + "epoch": 0.4526565746512318, + "grad_norm": 3.7463269233703613, + "learning_rate": 3.333377551391379e-06, + "loss": 0.065, + "step": 1525, + "video_reward_cumulative_accuracy": 0.8157377049180328 + }, + { + "epoch": 0.45295339863460965, + "grad_norm": 2.012558698654175, + "learning_rate": 3.3309349026976074e-06, + "loss": 0.0298, + "step": 1526, + "video_reward_cumulative_accuracy": 0.8158584534731324 + }, + { + "epoch": 0.45325022261798753, + "grad_norm": 2.4637279510498047, + "learning_rate": 3.3284913619145697e-06, + "loss": 0.0271, + "step": 1527, + "video_reward_cumulative_accuracy": 0.8159790438768828 + }, + { + "epoch": 0.4535470466013654, + "grad_norm": 1.1083879470825195, + "learning_rate": 3.3260469316656435e-06, + "loss": 0.0653, + "step": 1528, + "video_reward_cumulative_accuracy": 0.8157722513089005 + }, + { + "epoch": 0.45384387058474324, + "grad_norm": 0.8266122937202454, + "learning_rate": 3.3236016145751616e-06, + "loss": 0.0347, + "step": 1529, + "video_reward_cumulative_accuracy": 0.815892740353172 + }, + { + "epoch": 0.4541406945681211, + "grad_norm": 1.2026057243347168, + "learning_rate": 3.32115541326841e-06, + "loss": 0.035, + "step": 1530, + "video_reward_cumulative_accuracy": 0.8160130718954248 + }, + { + "epoch": 0.45443751855149894, + "grad_norm": 1.0709147453308105, + "learning_rate": 3.3187083303716218e-06, + "loss": 0.0366, + "step": 1531, + "video_reward_cumulative_accuracy": 0.8154800783801437 + }, + { + "epoch": 0.4547343425348768, + "grad_norm": 2.988943338394165, + "learning_rate": 3.3162603685119795e-06, + "loss": 0.075, + "step": 1532, + "video_reward_cumulative_accuracy": 0.8152741514360313 + }, + { + "epoch": 0.45503116651825465, + "grad_norm": 4.098608493804932, + "learning_rate": 3.3138115303176073e-06, + "loss": 0.0878, + "step": 1533, + "video_reward_cumulative_accuracy": 0.8153946510110893 + }, + { + "epoch": 0.45532799050163253, + "grad_norm": 1.185163140296936, + "learning_rate": 3.31136181841757e-06, + "loss": 0.0365, + "step": 1534, + "video_reward_cumulative_accuracy": 0.8155149934810951 + }, + { + "epoch": 0.4556248144850104, + "grad_norm": 1.2512524127960205, + "learning_rate": 3.308911235441873e-06, + "loss": 0.0201, + "step": 1535, + "video_reward_cumulative_accuracy": 0.8156351791530945 + }, + { + "epoch": 0.45592163846838824, + "grad_norm": 1.520028829574585, + "learning_rate": 3.306459784021452e-06, + "loss": 0.0322, + "step": 1536, + "video_reward_cumulative_accuracy": 0.8154296875 + }, + { + "epoch": 0.4562184624517661, + "grad_norm": 1.225994348526001, + "learning_rate": 3.304007466788181e-06, + "loss": 0.0554, + "step": 1537, + "video_reward_cumulative_accuracy": 0.815224463240078 + }, + { + "epoch": 0.45651528643514394, + "grad_norm": 2.5382115840911865, + "learning_rate": 3.301554286374859e-06, + "loss": 0.0308, + "step": 1538, + "video_reward_cumulative_accuracy": 0.8153446033810143 + }, + { + "epoch": 0.4568121104185218, + "grad_norm": 1.67540442943573, + "learning_rate": 3.2991002454152133e-06, + "loss": 0.0605, + "step": 1539, + "video_reward_cumulative_accuracy": 0.8151397011046134 + }, + { + "epoch": 0.45710893440189965, + "grad_norm": 0.42339617013931274, + "learning_rate": 3.2966453465438954e-06, + "loss": 0.011, + "step": 1540, + "video_reward_cumulative_accuracy": 0.8152597402597402 + }, + { + "epoch": 0.4574057583852775, + "grad_norm": 1.9776630401611328, + "learning_rate": 3.294189592396477e-06, + "loss": 0.0485, + "step": 1541, + "video_reward_cumulative_accuracy": 0.8153796236210253 + }, + { + "epoch": 0.4577025823686554, + "grad_norm": 3.960407018661499, + "learning_rate": 3.29173298560945e-06, + "loss": 0.0464, + "step": 1542, + "video_reward_cumulative_accuracy": 0.8154993514915694 + }, + { + "epoch": 0.45799940635203323, + "grad_norm": 0.9101660251617432, + "learning_rate": 3.289275528820218e-06, + "loss": 0.0242, + "step": 1543, + "video_reward_cumulative_accuracy": 0.8156189241736876 + }, + { + "epoch": 0.4582962303354111, + "grad_norm": 6.294965744018555, + "learning_rate": 3.2868172246671005e-06, + "loss": 0.0793, + "step": 1544, + "video_reward_cumulative_accuracy": 0.8157383419689119 + }, + { + "epoch": 0.45859305431878894, + "grad_norm": 2.3557205200195312, + "learning_rate": 3.2843580757893266e-06, + "loss": 0.0603, + "step": 1545, + "video_reward_cumulative_accuracy": 0.8158576051779936 + }, + { + "epoch": 0.4588898783021668, + "grad_norm": 3.9995932579040527, + "learning_rate": 3.28189808482703e-06, + "loss": 0.0902, + "step": 1546, + "video_reward_cumulative_accuracy": 0.815653298835705 + }, + { + "epoch": 0.45918670228554465, + "grad_norm": 1.1430221796035767, + "learning_rate": 3.2794372544212495e-06, + "loss": 0.0306, + "step": 1547, + "video_reward_cumulative_accuracy": 0.8157724628312863 + }, + { + "epoch": 0.4594835262689225, + "grad_norm": 4.331371784210205, + "learning_rate": 3.2769755872139264e-06, + "loss": 0.0725, + "step": 1548, + "video_reward_cumulative_accuracy": 0.8158914728682171 + }, + { + "epoch": 0.4597803502523004, + "grad_norm": 2.8517324924468994, + "learning_rate": 3.274513085847899e-06, + "loss": 0.0492, + "step": 1549, + "video_reward_cumulative_accuracy": 0.815687540348612 + }, + { + "epoch": 0.46007717423567823, + "grad_norm": 2.0116770267486572, + "learning_rate": 3.272049752966901e-06, + "loss": 0.0484, + "step": 1550, + "video_reward_cumulative_accuracy": 0.8158064516129032 + }, + { + "epoch": 0.4603739982190561, + "grad_norm": 2.2900784015655518, + "learning_rate": 3.2695855912155605e-06, + "loss": 0.0372, + "step": 1551, + "video_reward_cumulative_accuracy": 0.8156028368794326 + }, + { + "epoch": 0.46067082220243394, + "grad_norm": 2.2841272354125977, + "learning_rate": 3.2671206032393926e-06, + "loss": 0.0331, + "step": 1552, + "video_reward_cumulative_accuracy": 0.8150773195876289 + }, + { + "epoch": 0.4609676461858118, + "grad_norm": 1.335290789604187, + "learning_rate": 3.264654791684803e-06, + "loss": 0.0229, + "step": 1553, + "video_reward_cumulative_accuracy": 0.815196394075982 + }, + { + "epoch": 0.46126447016918964, + "grad_norm": 4.430738925933838, + "learning_rate": 3.2621881591990784e-06, + "loss": 0.0468, + "step": 1554, + "video_reward_cumulative_accuracy": 0.8153153153153153 + }, + { + "epoch": 0.4615612941525675, + "grad_norm": 5.130397319793701, + "learning_rate": 3.2597207084303893e-06, + "loss": 0.0863, + "step": 1555, + "video_reward_cumulative_accuracy": 0.8154340836012862 + }, + { + "epoch": 0.4618581181359454, + "grad_norm": 2.924949884414673, + "learning_rate": 3.2572524420277825e-06, + "loss": 0.0296, + "step": 1556, + "video_reward_cumulative_accuracy": 0.8152313624678663 + }, + { + "epoch": 0.46215494211932323, + "grad_norm": 1.930640697479248, + "learning_rate": 3.2547833626411812e-06, + "loss": 0.0361, + "step": 1557, + "video_reward_cumulative_accuracy": 0.8153500321130379 + }, + { + "epoch": 0.4624517661027011, + "grad_norm": 2.353710651397705, + "learning_rate": 3.2523134729213833e-06, + "loss": 0.0365, + "step": 1558, + "video_reward_cumulative_accuracy": 0.8151476251604621 + }, + { + "epoch": 0.46274859008607894, + "grad_norm": 2.0358400344848633, + "learning_rate": 3.2498427755200546e-06, + "loss": 0.0564, + "step": 1559, + "video_reward_cumulative_accuracy": 0.8152661962796665 + }, + { + "epoch": 0.4630454140694568, + "grad_norm": 2.5255966186523438, + "learning_rate": 3.2473712730897282e-06, + "loss": 0.0233, + "step": 1560, + "video_reward_cumulative_accuracy": 0.8153846153846154 + }, + { + "epoch": 0.46334223805283464, + "grad_norm": 0.2955355644226074, + "learning_rate": 3.244898968283802e-06, + "loss": 0.006, + "step": 1561, + "video_reward_cumulative_accuracy": 0.8155028827674567 + }, + { + "epoch": 0.4636390620362125, + "grad_norm": 1.5781440734863281, + "learning_rate": 3.2424258637565347e-06, + "loss": 0.0168, + "step": 1562, + "video_reward_cumulative_accuracy": 0.8156209987195903 + }, + { + "epoch": 0.4639358860195904, + "grad_norm": 1.639114260673523, + "learning_rate": 3.239951962163045e-06, + "loss": 0.0222, + "step": 1563, + "video_reward_cumulative_accuracy": 0.8154190658989123 + }, + { + "epoch": 0.46423271000296823, + "grad_norm": 0.8644540309906006, + "learning_rate": 3.2374772661593055e-06, + "loss": 0.0186, + "step": 1564, + "video_reward_cumulative_accuracy": 0.815537084398977 + }, + { + "epoch": 0.4645295339863461, + "grad_norm": 1.5920159816741943, + "learning_rate": 3.235001778402143e-06, + "loss": 0.0297, + "step": 1565, + "video_reward_cumulative_accuracy": 0.8156549520766773 + }, + { + "epoch": 0.46482635796972394, + "grad_norm": 2.091097354888916, + "learning_rate": 3.232525501549234e-06, + "loss": 0.0579, + "step": 1566, + "video_reward_cumulative_accuracy": 0.815772669220945 + }, + { + "epoch": 0.4651231819531018, + "grad_norm": 3.484055995941162, + "learning_rate": 3.230048438259102e-06, + "loss": 0.0441, + "step": 1567, + "video_reward_cumulative_accuracy": 0.8155711550733886 + }, + { + "epoch": 0.46542000593647964, + "grad_norm": 1.8032653331756592, + "learning_rate": 3.227570591191114e-06, + "loss": 0.0827, + "step": 1568, + "video_reward_cumulative_accuracy": 0.8156887755102041 + }, + { + "epoch": 0.4657168299198575, + "grad_norm": 2.3294694423675537, + "learning_rate": 3.22509196300548e-06, + "loss": 0.046, + "step": 1569, + "video_reward_cumulative_accuracy": 0.815806246016571 + }, + { + "epoch": 0.4660136539032354, + "grad_norm": 1.2451170682907104, + "learning_rate": 3.222612556363249e-06, + "loss": 0.0456, + "step": 1570, + "video_reward_cumulative_accuracy": 0.8159235668789809 + }, + { + "epoch": 0.46631047788661323, + "grad_norm": 2.33011531829834, + "learning_rate": 3.2201323739263024e-06, + "loss": 0.0337, + "step": 1571, + "video_reward_cumulative_accuracy": 0.8160407383831955 + }, + { + "epoch": 0.4666073018699911, + "grad_norm": 3.825652599334717, + "learning_rate": 3.217651418357359e-06, + "loss": 0.0352, + "step": 1572, + "video_reward_cumulative_accuracy": 0.8158396946564885 + }, + { + "epoch": 0.46690412585336893, + "grad_norm": 2.4924354553222656, + "learning_rate": 3.2151696923199636e-06, + "loss": 0.07, + "step": 1573, + "video_reward_cumulative_accuracy": 0.8156389065479974 + }, + { + "epoch": 0.4672009498367468, + "grad_norm": 3.4220986366271973, + "learning_rate": 3.2126871984784907e-06, + "loss": 0.079, + "step": 1574, + "video_reward_cumulative_accuracy": 0.8157560355781448 + }, + { + "epoch": 0.46749777382012464, + "grad_norm": 2.504857301712036, + "learning_rate": 3.210203939498139e-06, + "loss": 0.0278, + "step": 1575, + "video_reward_cumulative_accuracy": 0.8158730158730159 + }, + { + "epoch": 0.4677945978035025, + "grad_norm": 2.0332424640655518, + "learning_rate": 3.207719918044927e-06, + "loss": 0.0329, + "step": 1576, + "video_reward_cumulative_accuracy": 0.815989847715736 + }, + { + "epoch": 0.4680914217868804, + "grad_norm": 1.2692821025848389, + "learning_rate": 3.205235136785693e-06, + "loss": 0.0211, + "step": 1577, + "video_reward_cumulative_accuracy": 0.8161065313887127 + }, + { + "epoch": 0.4683882457702582, + "grad_norm": 1.8608229160308838, + "learning_rate": 3.202749598388092e-06, + "loss": 0.0441, + "step": 1578, + "video_reward_cumulative_accuracy": 0.8162230671736375 + }, + { + "epoch": 0.4686850697536361, + "grad_norm": 0.5689178109169006, + "learning_rate": 3.200263305520589e-06, + "loss": 0.0146, + "step": 1579, + "video_reward_cumulative_accuracy": 0.8163394553514883 + }, + { + "epoch": 0.46898189373701393, + "grad_norm": 1.689761996269226, + "learning_rate": 3.197776260852462e-06, + "loss": 0.0347, + "step": 1580, + "video_reward_cumulative_accuracy": 0.8161392405063291 + }, + { + "epoch": 0.4692787177203918, + "grad_norm": 2.7706823348999023, + "learning_rate": 3.195288467053795e-06, + "loss": 0.0643, + "step": 1581, + "video_reward_cumulative_accuracy": 0.8162555344718533 + }, + { + "epoch": 0.46957554170376964, + "grad_norm": 1.1557561159133911, + "learning_rate": 3.1927999267954746e-06, + "loss": 0.0424, + "step": 1582, + "video_reward_cumulative_accuracy": 0.8163716814159292 + }, + { + "epoch": 0.4698723656871475, + "grad_norm": 2.010145902633667, + "learning_rate": 3.1903106427491914e-06, + "loss": 0.0498, + "step": 1583, + "video_reward_cumulative_accuracy": 0.8164876816171825 + }, + { + "epoch": 0.4701691896705254, + "grad_norm": 3.788320302963257, + "learning_rate": 3.1878206175874334e-06, + "loss": 0.0303, + "step": 1584, + "video_reward_cumulative_accuracy": 0.8166035353535354 + }, + { + "epoch": 0.4704660136539032, + "grad_norm": 3.469613552093506, + "learning_rate": 3.1853298539834836e-06, + "loss": 0.0477, + "step": 1585, + "video_reward_cumulative_accuracy": 0.8167192429022082 + }, + { + "epoch": 0.4707628376372811, + "grad_norm": 5.445047378540039, + "learning_rate": 3.1828383546114196e-06, + "loss": 0.0825, + "step": 1586, + "video_reward_cumulative_accuracy": 0.8168348045397226 + }, + { + "epoch": 0.47105966162065893, + "grad_norm": 2.0806350708007812, + "learning_rate": 3.180346122146106e-06, + "loss": 0.0379, + "step": 1587, + "video_reward_cumulative_accuracy": 0.8169502205419029 + }, + { + "epoch": 0.4713564856040368, + "grad_norm": 1.694346308708191, + "learning_rate": 3.177853159263199e-06, + "loss": 0.0271, + "step": 1588, + "video_reward_cumulative_accuracy": 0.8167506297229219 + }, + { + "epoch": 0.47165330958741464, + "grad_norm": 2.332949161529541, + "learning_rate": 3.1753594686391343e-06, + "loss": 0.0367, + "step": 1589, + "video_reward_cumulative_accuracy": 0.8165512901195721 + }, + { + "epoch": 0.4719501335707925, + "grad_norm": 0.6793799996376038, + "learning_rate": 3.1728650529511308e-06, + "loss": 0.0154, + "step": 1590, + "video_reward_cumulative_accuracy": 0.8166666666666667 + }, + { + "epoch": 0.4722469575541704, + "grad_norm": 3.225214958190918, + "learning_rate": 3.1703699148771866e-06, + "loss": 0.0366, + "step": 1591, + "video_reward_cumulative_accuracy": 0.816781898177247 + }, + { + "epoch": 0.4725437815375482, + "grad_norm": 1.2964482307434082, + "learning_rate": 3.167874057096073e-06, + "loss": 0.0323, + "step": 1592, + "video_reward_cumulative_accuracy": 0.8168969849246231 + }, + { + "epoch": 0.4728406055209261, + "grad_norm": 1.0534178018569946, + "learning_rate": 3.1653774822873378e-06, + "loss": 0.0419, + "step": 1593, + "video_reward_cumulative_accuracy": 0.8166980539861896 + }, + { + "epoch": 0.47313742950430393, + "grad_norm": 3.556938886642456, + "learning_rate": 3.162880193131296e-06, + "loss": 0.04, + "step": 1594, + "video_reward_cumulative_accuracy": 0.8164993726474279 + }, + { + "epoch": 0.4734342534876818, + "grad_norm": 2.3933658599853516, + "learning_rate": 3.1603821923090277e-06, + "loss": 0.031, + "step": 1595, + "video_reward_cumulative_accuracy": 0.8166144200626959 + }, + { + "epoch": 0.47373107747105964, + "grad_norm": 2.3574721813201904, + "learning_rate": 3.157883482502382e-06, + "loss": 0.0415, + "step": 1596, + "video_reward_cumulative_accuracy": 0.8167293233082706 + }, + { + "epoch": 0.4740279014544375, + "grad_norm": 3.107463836669922, + "learning_rate": 3.155384066393964e-06, + "loss": 0.0538, + "step": 1597, + "video_reward_cumulative_accuracy": 0.8168440826549781 + }, + { + "epoch": 0.4743247254378154, + "grad_norm": 1.9954248666763306, + "learning_rate": 3.1528839466671413e-06, + "loss": 0.0545, + "step": 1598, + "video_reward_cumulative_accuracy": 0.8169586983729662 + }, + { + "epoch": 0.4746215494211932, + "grad_norm": 4.566195964813232, + "learning_rate": 3.1503831260060346e-06, + "loss": 0.0645, + "step": 1599, + "video_reward_cumulative_accuracy": 0.8170731707317073 + }, + { + "epoch": 0.4749183734045711, + "grad_norm": 1.2144123315811157, + "learning_rate": 3.1478816070955164e-06, + "loss": 0.0793, + "step": 1600, + "video_reward_cumulative_accuracy": 0.816875 + }, + { + "epoch": 0.4749183734045711, + "eval_runtime": 148.9352, + "eval_samples_per_second": 5.298, + "eval_steps_per_second": 0.665, + "eval_test_set_accuracy": 0.7815656565656566, + "step": 1600 + }, + { + "epoch": 0.47521519738794893, + "grad_norm": 1.2719308137893677, + "learning_rate": 3.1453793926212125e-06, + "loss": 0.0114, + "step": 1601, + "video_reward_cumulative_accuracy": 0.8169893816364772 + }, + { + "epoch": 0.4755120213713268, + "grad_norm": 1.4081724882125854, + "learning_rate": 3.1428764852694905e-06, + "loss": 0.0274, + "step": 1602, + "video_reward_cumulative_accuracy": 0.8167915106117354 + }, + { + "epoch": 0.47580884535470463, + "grad_norm": 2.737161874771118, + "learning_rate": 3.1403728877274662e-06, + "loss": 0.0492, + "step": 1603, + "video_reward_cumulative_accuracy": 0.8165938864628821 + }, + { + "epoch": 0.4761056693380825, + "grad_norm": 3.401216983795166, + "learning_rate": 3.137868602682993e-06, + "loss": 0.0512, + "step": 1604, + "video_reward_cumulative_accuracy": 0.816708229426434 + }, + { + "epoch": 0.4764024933214604, + "grad_norm": 3.201937437057495, + "learning_rate": 3.1353636328246652e-06, + "loss": 0.0277, + "step": 1605, + "video_reward_cumulative_accuracy": 0.8165109034267912 + }, + { + "epoch": 0.4766993173048382, + "grad_norm": 4.124434471130371, + "learning_rate": 3.1328579808418103e-06, + "loss": 0.043, + "step": 1606, + "video_reward_cumulative_accuracy": 0.8166251556662516 + }, + { + "epoch": 0.4769961412882161, + "grad_norm": 2.2116482257843018, + "learning_rate": 3.1303516494244897e-06, + "loss": 0.0553, + "step": 1607, + "video_reward_cumulative_accuracy": 0.8167392657125078 + }, + { + "epoch": 0.4772929652715939, + "grad_norm": 2.262800455093384, + "learning_rate": 3.127844641263493e-06, + "loss": 0.0599, + "step": 1608, + "video_reward_cumulative_accuracy": 0.8165422885572139 + }, + { + "epoch": 0.4775897892549718, + "grad_norm": 3.7271931171417236, + "learning_rate": 3.1253369590503357e-06, + "loss": 0.0817, + "step": 1609, + "video_reward_cumulative_accuracy": 0.8160348042262274 + }, + { + "epoch": 0.47788661323834963, + "grad_norm": 1.643730878829956, + "learning_rate": 3.12282860547726e-06, + "loss": 0.0279, + "step": 1610, + "video_reward_cumulative_accuracy": 0.8158385093167702 + }, + { + "epoch": 0.4781834372217275, + "grad_norm": 2.743765115737915, + "learning_rate": 3.1203195832372256e-06, + "loss": 0.0265, + "step": 1611, + "video_reward_cumulative_accuracy": 0.8159528243327127 + }, + { + "epoch": 0.4784802612051054, + "grad_norm": 3.5921216011047363, + "learning_rate": 3.1178098950239118e-06, + "loss": 0.0393, + "step": 1612, + "video_reward_cumulative_accuracy": 0.8160669975186104 + }, + { + "epoch": 0.4787770851884832, + "grad_norm": 1.2250019311904907, + "learning_rate": 3.115299543531713e-06, + "loss": 0.0227, + "step": 1613, + "video_reward_cumulative_accuracy": 0.8161810291382517 + }, + { + "epoch": 0.4790739091718611, + "grad_norm": 1.2587252855300903, + "learning_rate": 3.1127885314557343e-06, + "loss": 0.0438, + "step": 1614, + "video_reward_cumulative_accuracy": 0.8162949194547707 + }, + { + "epoch": 0.4793707331552389, + "grad_norm": 2.4114818572998047, + "learning_rate": 3.110276861491791e-06, + "loss": 0.0305, + "step": 1615, + "video_reward_cumulative_accuracy": 0.8164086687306501 + }, + { + "epoch": 0.4796675571386168, + "grad_norm": 0.43257880210876465, + "learning_rate": 3.107764536336405e-06, + "loss": 0.0134, + "step": 1616, + "video_reward_cumulative_accuracy": 0.8165222772277227 + }, + { + "epoch": 0.47996438112199463, + "grad_norm": 2.9027011394500732, + "learning_rate": 3.1052515586868005e-06, + "loss": 0.0472, + "step": 1617, + "video_reward_cumulative_accuracy": 0.8166357452071737 + }, + { + "epoch": 0.4802612051053725, + "grad_norm": 3.9004299640655518, + "learning_rate": 3.102737931240904e-06, + "loss": 0.0582, + "step": 1618, + "video_reward_cumulative_accuracy": 0.8167490729295427 + }, + { + "epoch": 0.4805580290887504, + "grad_norm": 1.6789156198501587, + "learning_rate": 3.1002236566973383e-06, + "loss": 0.044, + "step": 1619, + "video_reward_cumulative_accuracy": 0.8168622606547251 + }, + { + "epoch": 0.4808548530721282, + "grad_norm": 0.7176075577735901, + "learning_rate": 3.09770873775542e-06, + "loss": 0.0199, + "step": 1620, + "video_reward_cumulative_accuracy": 0.8169753086419753 + }, + { + "epoch": 0.4811516770555061, + "grad_norm": 3.140882968902588, + "learning_rate": 3.09519317711516e-06, + "loss": 0.0341, + "step": 1621, + "video_reward_cumulative_accuracy": 0.8170882171499074 + }, + { + "epoch": 0.4814485010388839, + "grad_norm": 4.167765140533447, + "learning_rate": 3.092676977477256e-06, + "loss": 0.069, + "step": 1622, + "video_reward_cumulative_accuracy": 0.8172009864364982 + }, + { + "epoch": 0.4817453250222618, + "grad_norm": 2.755486011505127, + "learning_rate": 3.090160141543092e-06, + "loss": 0.0635, + "step": 1623, + "video_reward_cumulative_accuracy": 0.8173136167590881 + }, + { + "epoch": 0.48204214900563963, + "grad_norm": 2.705613136291504, + "learning_rate": 3.087642672014738e-06, + "loss": 0.0451, + "step": 1624, + "video_reward_cumulative_accuracy": 0.8174261083743842 + }, + { + "epoch": 0.4823389729890175, + "grad_norm": 0.6087374091148376, + "learning_rate": 3.085124571594939e-06, + "loss": 0.0153, + "step": 1625, + "video_reward_cumulative_accuracy": 0.8175384615384615 + }, + { + "epoch": 0.4826357969723954, + "grad_norm": 4.505397319793701, + "learning_rate": 3.0826058429871226e-06, + "loss": 0.0659, + "step": 1626, + "video_reward_cumulative_accuracy": 0.817650676506765 + }, + { + "epoch": 0.4829326209557732, + "grad_norm": 1.5764565467834473, + "learning_rate": 3.0800864888953863e-06, + "loss": 0.0229, + "step": 1627, + "video_reward_cumulative_accuracy": 0.8177627535341119 + }, + { + "epoch": 0.4832294449391511, + "grad_norm": 1.974806785583496, + "learning_rate": 3.077566512024503e-06, + "loss": 0.0307, + "step": 1628, + "video_reward_cumulative_accuracy": 0.8178746928746928 + }, + { + "epoch": 0.4835262689225289, + "grad_norm": 2.2565455436706543, + "learning_rate": 3.0750459150799116e-06, + "loss": 0.0528, + "step": 1629, + "video_reward_cumulative_accuracy": 0.8173726212400245 + }, + { + "epoch": 0.4838230929059068, + "grad_norm": 2.1708600521087646, + "learning_rate": 3.0725247007677172e-06, + "loss": 0.0366, + "step": 1630, + "video_reward_cumulative_accuracy": 0.8171779141104294 + }, + { + "epoch": 0.48411991688928463, + "grad_norm": 0.7493535280227661, + "learning_rate": 3.0700028717946895e-06, + "loss": 0.0177, + "step": 1631, + "video_reward_cumulative_accuracy": 0.8172900061312078 + }, + { + "epoch": 0.4844167408726625, + "grad_norm": 0.8744514584541321, + "learning_rate": 3.0674804308682547e-06, + "loss": 0.0233, + "step": 1632, + "video_reward_cumulative_accuracy": 0.8174019607843137 + }, + { + "epoch": 0.4847135648560404, + "grad_norm": 2.157686710357666, + "learning_rate": 3.0649573806965006e-06, + "loss": 0.0401, + "step": 1633, + "video_reward_cumulative_accuracy": 0.8172075933864054 + }, + { + "epoch": 0.4850103888394182, + "grad_norm": 3.5518863201141357, + "learning_rate": 3.0624337239881636e-06, + "loss": 0.091, + "step": 1634, + "video_reward_cumulative_accuracy": 0.8170134638922889 + }, + { + "epoch": 0.4853072128227961, + "grad_norm": 2.3240246772766113, + "learning_rate": 3.0599094634526348e-06, + "loss": 0.0462, + "step": 1635, + "video_reward_cumulative_accuracy": 0.8168195718654434 + }, + { + "epoch": 0.4856040368061739, + "grad_norm": 1.841416358947754, + "learning_rate": 3.057384601799955e-06, + "loss": 0.0536, + "step": 1636, + "video_reward_cumulative_accuracy": 0.8166259168704156 + }, + { + "epoch": 0.4859008607895518, + "grad_norm": 1.3698524236679077, + "learning_rate": 3.0548591417408054e-06, + "loss": 0.0268, + "step": 1637, + "video_reward_cumulative_accuracy": 0.8167379352474038 + }, + { + "epoch": 0.48619768477292963, + "grad_norm": 8.311427116394043, + "learning_rate": 3.0523330859865147e-06, + "loss": 0.0834, + "step": 1638, + "video_reward_cumulative_accuracy": 0.8168498168498168 + }, + { + "epoch": 0.4864945087563075, + "grad_norm": 2.798309803009033, + "learning_rate": 3.0498064372490475e-06, + "loss": 0.0553, + "step": 1639, + "video_reward_cumulative_accuracy": 0.8169615619280048 + }, + { + "epoch": 0.4867913327396854, + "grad_norm": 1.4771977663040161, + "learning_rate": 3.0472791982410065e-06, + "loss": 0.0298, + "step": 1640, + "video_reward_cumulative_accuracy": 0.8170731707317073 + }, + { + "epoch": 0.4870881567230632, + "grad_norm": 2.279690742492676, + "learning_rate": 3.0447513716756294e-06, + "loss": 0.0534, + "step": 1641, + "video_reward_cumulative_accuracy": 0.8165752589884216 + }, + { + "epoch": 0.4873849807064411, + "grad_norm": 1.9072357416152954, + "learning_rate": 3.0422229602667825e-06, + "loss": 0.0476, + "step": 1642, + "video_reward_cumulative_accuracy": 0.8163824604141291 + }, + { + "epoch": 0.4876818046898189, + "grad_norm": 1.5728148221969604, + "learning_rate": 3.0396939667289597e-06, + "loss": 0.0234, + "step": 1643, + "video_reward_cumulative_accuracy": 0.8164942178940962 + }, + { + "epoch": 0.4879786286731968, + "grad_norm": 5.686618804931641, + "learning_rate": 3.0371643937772834e-06, + "loss": 0.0733, + "step": 1644, + "video_reward_cumulative_accuracy": 0.8166058394160584 + }, + { + "epoch": 0.4882754526565746, + "grad_norm": 2.5927038192749023, + "learning_rate": 3.0346342441274935e-06, + "loss": 0.0404, + "step": 1645, + "video_reward_cumulative_accuracy": 0.8164133738601824 + }, + { + "epoch": 0.4885722766399525, + "grad_norm": 3.340865135192871, + "learning_rate": 3.0321035204959524e-06, + "loss": 0.0583, + "step": 1646, + "video_reward_cumulative_accuracy": 0.8165249088699879 + }, + { + "epoch": 0.4888691006233304, + "grad_norm": 1.8082395792007446, + "learning_rate": 3.029572225599637e-06, + "loss": 0.1195, + "step": 1647, + "video_reward_cumulative_accuracy": 0.8166363084395871 + }, + { + "epoch": 0.4891659246067082, + "grad_norm": 2.37727427482605, + "learning_rate": 3.0270403621561387e-06, + "loss": 0.0565, + "step": 1648, + "video_reward_cumulative_accuracy": 0.816747572815534 + }, + { + "epoch": 0.4894627485900861, + "grad_norm": 2.865441083908081, + "learning_rate": 3.024507932883659e-06, + "loss": 0.0404, + "step": 1649, + "video_reward_cumulative_accuracy": 0.8168587022437841 + }, + { + "epoch": 0.4897595725734639, + "grad_norm": 1.271600604057312, + "learning_rate": 3.0219749405010054e-06, + "loss": 0.0444, + "step": 1650, + "video_reward_cumulative_accuracy": 0.816969696969697 + }, + { + "epoch": 0.4900563965568418, + "grad_norm": 0.5346439480781555, + "learning_rate": 3.019441387727591e-06, + "loss": 0.012, + "step": 1651, + "video_reward_cumulative_accuracy": 0.8170805572380375 + }, + { + "epoch": 0.4903532205402196, + "grad_norm": 1.969221830368042, + "learning_rate": 3.016907277283431e-06, + "loss": 0.0382, + "step": 1652, + "video_reward_cumulative_accuracy": 0.8171912832929782 + }, + { + "epoch": 0.4906500445235975, + "grad_norm": 1.0193865299224854, + "learning_rate": 3.014372611889139e-06, + "loss": 0.0287, + "step": 1653, + "video_reward_cumulative_accuracy": 0.8173018753781004 + }, + { + "epoch": 0.4909468685069754, + "grad_norm": 2.115161418914795, + "learning_rate": 3.011837394265925e-06, + "loss": 0.0778, + "step": 1654, + "video_reward_cumulative_accuracy": 0.8174123337363967 + }, + { + "epoch": 0.4912436924903532, + "grad_norm": 4.27302360534668, + "learning_rate": 3.0093016271355894e-06, + "loss": 0.0571, + "step": 1655, + "video_reward_cumulative_accuracy": 0.8172205438066465 + }, + { + "epoch": 0.4915405164737311, + "grad_norm": 3.0853607654571533, + "learning_rate": 3.0067653132205267e-06, + "loss": 0.0414, + "step": 1656, + "video_reward_cumulative_accuracy": 0.8170289855072463 + }, + { + "epoch": 0.4918373404571089, + "grad_norm": 1.4236749410629272, + "learning_rate": 3.0042284552437138e-06, + "loss": 0.0427, + "step": 1657, + "video_reward_cumulative_accuracy": 0.8168376584188292 + }, + { + "epoch": 0.4921341644404868, + "grad_norm": 3.9889345169067383, + "learning_rate": 3.0016910559287147e-06, + "loss": 0.0583, + "step": 1658, + "video_reward_cumulative_accuracy": 0.8166465621230398 + }, + { + "epoch": 0.4924309884238646, + "grad_norm": 3.3688066005706787, + "learning_rate": 2.999153117999675e-06, + "loss": 0.0509, + "step": 1659, + "video_reward_cumulative_accuracy": 0.8164556962025317 + }, + { + "epoch": 0.4927278124072425, + "grad_norm": 1.3700438737869263, + "learning_rate": 2.996614644181316e-06, + "loss": 0.0319, + "step": 1660, + "video_reward_cumulative_accuracy": 0.816566265060241 + }, + { + "epoch": 0.4930246363906204, + "grad_norm": 1.2768040895462036, + "learning_rate": 2.9940756371989366e-06, + "loss": 0.0296, + "step": 1661, + "video_reward_cumulative_accuracy": 0.8166767007826611 + }, + { + "epoch": 0.4933214603739982, + "grad_norm": 1.5340852737426758, + "learning_rate": 2.9915360997784066e-06, + "loss": 0.0312, + "step": 1662, + "video_reward_cumulative_accuracy": 0.8164861612515042 + }, + { + "epoch": 0.4936182843573761, + "grad_norm": 2.8251636028289795, + "learning_rate": 2.9889960346461653e-06, + "loss": 0.0578, + "step": 1663, + "video_reward_cumulative_accuracy": 0.8162958508719182 + }, + { + "epoch": 0.4939151083407539, + "grad_norm": 1.959545612335205, + "learning_rate": 2.9864554445292205e-06, + "loss": 0.0421, + "step": 1664, + "video_reward_cumulative_accuracy": 0.81640625 + }, + { + "epoch": 0.4942119323241318, + "grad_norm": 1.7720321416854858, + "learning_rate": 2.9839143321551415e-06, + "loss": 0.0273, + "step": 1665, + "video_reward_cumulative_accuracy": 0.8165165165165165 + }, + { + "epoch": 0.4945087563075096, + "grad_norm": 3.3178257942199707, + "learning_rate": 2.9813727002520597e-06, + "loss": 0.0837, + "step": 1666, + "video_reward_cumulative_accuracy": 0.8166266506602641 + }, + { + "epoch": 0.4948055802908875, + "grad_norm": 1.7163565158843994, + "learning_rate": 2.9788305515486636e-06, + "loss": 0.0305, + "step": 1667, + "video_reward_cumulative_accuracy": 0.8167366526694662 + }, + { + "epoch": 0.4951024042742654, + "grad_norm": 2.1592905521392822, + "learning_rate": 2.9762878887741956e-06, + "loss": 0.0309, + "step": 1668, + "video_reward_cumulative_accuracy": 0.8168465227817746 + }, + { + "epoch": 0.4953992282576432, + "grad_norm": 0.8975669741630554, + "learning_rate": 2.973744714658452e-06, + "loss": 0.0316, + "step": 1669, + "video_reward_cumulative_accuracy": 0.816956261234272 + }, + { + "epoch": 0.4956960522410211, + "grad_norm": 1.1875498294830322, + "learning_rate": 2.9712010319317765e-06, + "loss": 0.0149, + "step": 1670, + "video_reward_cumulative_accuracy": 0.8167664670658683 + }, + { + "epoch": 0.4959928762243989, + "grad_norm": 1.2564858198165894, + "learning_rate": 2.968656843325059e-06, + "loss": 0.0371, + "step": 1671, + "video_reward_cumulative_accuracy": 0.8168761220825853 + }, + { + "epoch": 0.4962897002077768, + "grad_norm": 3.0329394340515137, + "learning_rate": 2.966112151569734e-06, + "loss": 0.0305, + "step": 1672, + "video_reward_cumulative_accuracy": 0.8166866028708134 + }, + { + "epoch": 0.4965865241911546, + "grad_norm": 1.6530811786651611, + "learning_rate": 2.963566959397774e-06, + "loss": 0.0277, + "step": 1673, + "video_reward_cumulative_accuracy": 0.8164973102211596 + }, + { + "epoch": 0.4968833481745325, + "grad_norm": 4.417179107666016, + "learning_rate": 2.9610212695416908e-06, + "loss": 0.058, + "step": 1674, + "video_reward_cumulative_accuracy": 0.8166069295101553 + }, + { + "epoch": 0.4971801721579104, + "grad_norm": 3.6994571685791016, + "learning_rate": 2.958475084734529e-06, + "loss": 0.0651, + "step": 1675, + "video_reward_cumulative_accuracy": 0.8167164179104478 + }, + { + "epoch": 0.4974769961412882, + "grad_norm": 1.733941912651062, + "learning_rate": 2.955928407709864e-06, + "loss": 0.0476, + "step": 1676, + "video_reward_cumulative_accuracy": 0.8168257756563246 + }, + { + "epoch": 0.4977738201246661, + "grad_norm": 1.2677801847457886, + "learning_rate": 2.953381241201804e-06, + "loss": 0.0159, + "step": 1677, + "video_reward_cumulative_accuracy": 0.8169350029815146 + }, + { + "epoch": 0.4980706441080439, + "grad_norm": 1.808666467666626, + "learning_rate": 2.9508335879449764e-06, + "loss": 0.0442, + "step": 1678, + "video_reward_cumulative_accuracy": 0.8170441001191895 + }, + { + "epoch": 0.4983674680914218, + "grad_norm": 1.5872184038162231, + "learning_rate": 2.9482854506745353e-06, + "loss": 0.0499, + "step": 1679, + "video_reward_cumulative_accuracy": 0.8171530673019655 + }, + { + "epoch": 0.4986642920747996, + "grad_norm": 0.5910095572471619, + "learning_rate": 2.945736832126153e-06, + "loss": 0.0082, + "step": 1680, + "video_reward_cumulative_accuracy": 0.8172619047619047 + }, + { + "epoch": 0.4989611160581775, + "grad_norm": 3.666095018386841, + "learning_rate": 2.9431877350360198e-06, + "loss": 0.0307, + "step": 1681, + "video_reward_cumulative_accuracy": 0.8173706127305176 + }, + { + "epoch": 0.4992579400415554, + "grad_norm": 0.6903228163719177, + "learning_rate": 2.9406381621408374e-06, + "loss": 0.0174, + "step": 1682, + "video_reward_cumulative_accuracy": 0.8174791914387634 + }, + { + "epoch": 0.4995547640249332, + "grad_norm": 1.7546206712722778, + "learning_rate": 2.9380881161778214e-06, + "loss": 0.0198, + "step": 1683, + "video_reward_cumulative_accuracy": 0.8175876411170528 + }, + { + "epoch": 0.4998515880083111, + "grad_norm": 1.842877745628357, + "learning_rate": 2.9355375998846923e-06, + "loss": 0.0213, + "step": 1684, + "video_reward_cumulative_accuracy": 0.8176959619952494 + }, + { + "epoch": 0.5001484119916889, + "grad_norm": 2.9342010021209717, + "learning_rate": 2.932986615999678e-06, + "loss": 0.0577, + "step": 1685, + "video_reward_cumulative_accuracy": 0.8175074183976261 + }, + { + "epoch": 0.5004452359750667, + "grad_norm": 4.135309219360352, + "learning_rate": 2.9304351672615067e-06, + "loss": 0.0362, + "step": 1686, + "video_reward_cumulative_accuracy": 0.8173190984578885 + }, + { + "epoch": 0.5007420599584447, + "grad_norm": 3.366182804107666, + "learning_rate": 2.9278832564094064e-06, + "loss": 0.0511, + "step": 1687, + "video_reward_cumulative_accuracy": 0.8174273858921162 + }, + { + "epoch": 0.5010388839418225, + "grad_norm": 2.525951623916626, + "learning_rate": 2.9253308861831e-06, + "loss": 0.0335, + "step": 1688, + "video_reward_cumulative_accuracy": 0.8175355450236966 + }, + { + "epoch": 0.5013357079252003, + "grad_norm": 3.1717841625213623, + "learning_rate": 2.9227780593228063e-06, + "loss": 0.0498, + "step": 1689, + "video_reward_cumulative_accuracy": 0.8173475429248076 + }, + { + "epoch": 0.5016325319085783, + "grad_norm": 4.952225208282471, + "learning_rate": 2.9202247785692323e-06, + "loss": 0.0765, + "step": 1690, + "video_reward_cumulative_accuracy": 0.8168639053254438 + }, + { + "epoch": 0.5019293558919561, + "grad_norm": 2.3946175575256348, + "learning_rate": 2.9176710466635718e-06, + "loss": 0.0809, + "step": 1691, + "video_reward_cumulative_accuracy": 0.8166765227675932 + }, + { + "epoch": 0.5022261798753339, + "grad_norm": 1.2514123916625977, + "learning_rate": 2.915116866347505e-06, + "loss": 0.03, + "step": 1692, + "video_reward_cumulative_accuracy": 0.8167848699763594 + }, + { + "epoch": 0.5025230038587117, + "grad_norm": 1.9969528913497925, + "learning_rate": 2.9125622403631913e-06, + "loss": 0.017, + "step": 1693, + "video_reward_cumulative_accuracy": 0.8168930891907856 + }, + { + "epoch": 0.5028198278420897, + "grad_norm": 1.2726820707321167, + "learning_rate": 2.9100071714532706e-06, + "loss": 0.043, + "step": 1694, + "video_reward_cumulative_accuracy": 0.8170011806375442 + }, + { + "epoch": 0.5031166518254675, + "grad_norm": 1.732360601425171, + "learning_rate": 2.907451662360857e-06, + "loss": 0.0243, + "step": 1695, + "video_reward_cumulative_accuracy": 0.8171091445427728 + }, + { + "epoch": 0.5034134758088453, + "grad_norm": 0.7770466804504395, + "learning_rate": 2.904895715829537e-06, + "loss": 0.0144, + "step": 1696, + "video_reward_cumulative_accuracy": 0.8172169811320755 + }, + { + "epoch": 0.5037102997922233, + "grad_norm": 1.9971357583999634, + "learning_rate": 2.902339334603369e-06, + "loss": 0.0488, + "step": 1697, + "video_reward_cumulative_accuracy": 0.8173246906305245 + }, + { + "epoch": 0.5040071237756011, + "grad_norm": 2.280515670776367, + "learning_rate": 2.8997825214268743e-06, + "loss": 0.0486, + "step": 1698, + "video_reward_cumulative_accuracy": 0.8171378091872792 + }, + { + "epoch": 0.5043039477589789, + "grad_norm": 4.495890140533447, + "learning_rate": 2.8972252790450413e-06, + "loss": 0.082, + "step": 1699, + "video_reward_cumulative_accuracy": 0.8169511477339612 + }, + { + "epoch": 0.5046007717423567, + "grad_norm": 1.4922055006027222, + "learning_rate": 2.8946676102033167e-06, + "loss": 0.0249, + "step": 1700, + "video_reward_cumulative_accuracy": 0.8170588235294117 + }, + { + "epoch": 0.5048975957257347, + "grad_norm": 1.0637956857681274, + "learning_rate": 2.892109517647607e-06, + "loss": 0.0296, + "step": 1701, + "video_reward_cumulative_accuracy": 0.8168724279835391 + }, + { + "epoch": 0.5051944197091125, + "grad_norm": 1.7560207843780518, + "learning_rate": 2.8895510041242737e-06, + "loss": 0.0522, + "step": 1702, + "video_reward_cumulative_accuracy": 0.8169800235017627 + }, + { + "epoch": 0.5054912436924903, + "grad_norm": 0.3437102735042572, + "learning_rate": 2.886992072380128e-06, + "loss": 0.0042, + "step": 1703, + "video_reward_cumulative_accuracy": 0.8170874926600118 + }, + { + "epoch": 0.5057880676758683, + "grad_norm": 3.053436279296875, + "learning_rate": 2.884432725162433e-06, + "loss": 0.0345, + "step": 1704, + "video_reward_cumulative_accuracy": 0.8171948356807511 + }, + { + "epoch": 0.5060848916592461, + "grad_norm": 1.0694425106048584, + "learning_rate": 2.8818729652188936e-06, + "loss": 0.0315, + "step": 1705, + "video_reward_cumulative_accuracy": 0.817008797653959 + }, + { + "epoch": 0.5063817156426239, + "grad_norm": 3.757838249206543, + "learning_rate": 2.879312795297663e-06, + "loss": 0.0712, + "step": 1706, + "video_reward_cumulative_accuracy": 0.8171160609613131 + }, + { + "epoch": 0.5066785396260017, + "grad_norm": 2.922731637954712, + "learning_rate": 2.8767522181473323e-06, + "loss": 0.0432, + "step": 1707, + "video_reward_cumulative_accuracy": 0.81693028705331 + }, + { + "epoch": 0.5069753636093797, + "grad_norm": 2.4191317558288574, + "learning_rate": 2.8741912365169276e-06, + "loss": 0.0722, + "step": 1708, + "video_reward_cumulative_accuracy": 0.8167447306791569 + }, + { + "epoch": 0.5072721875927575, + "grad_norm": 1.132919192314148, + "learning_rate": 2.8716298531559133e-06, + "loss": 0.0104, + "step": 1709, + "video_reward_cumulative_accuracy": 0.8168519602106495 + }, + { + "epoch": 0.5075690115761353, + "grad_norm": 3.451291561126709, + "learning_rate": 2.8690680708141814e-06, + "loss": 0.1011, + "step": 1710, + "video_reward_cumulative_accuracy": 0.8169590643274853 + }, + { + "epoch": 0.5078658355595133, + "grad_norm": 2.044468879699707, + "learning_rate": 2.866505892242055e-06, + "loss": 0.0415, + "step": 1711, + "video_reward_cumulative_accuracy": 0.8170660432495617 + }, + { + "epoch": 0.5081626595428911, + "grad_norm": 1.3463438749313354, + "learning_rate": 2.8639433201902807e-06, + "loss": 0.0201, + "step": 1712, + "video_reward_cumulative_accuracy": 0.8171728971962616 + }, + { + "epoch": 0.5084594835262689, + "grad_norm": 1.1717925071716309, + "learning_rate": 2.8613803574100284e-06, + "loss": 0.0327, + "step": 1713, + "video_reward_cumulative_accuracy": 0.8172796263864565 + }, + { + "epoch": 0.5087563075096467, + "grad_norm": 2.5041465759277344, + "learning_rate": 2.858817006652888e-06, + "loss": 0.0391, + "step": 1714, + "video_reward_cumulative_accuracy": 0.8170945157526255 + }, + { + "epoch": 0.5090531314930247, + "grad_norm": 2.035304546356201, + "learning_rate": 2.8562532706708655e-06, + "loss": 0.017, + "step": 1715, + "video_reward_cumulative_accuracy": 0.817201166180758 + }, + { + "epoch": 0.5093499554764025, + "grad_norm": 2.6891417503356934, + "learning_rate": 2.853689152216379e-06, + "loss": 0.0498, + "step": 1716, + "video_reward_cumulative_accuracy": 0.8173076923076923 + }, + { + "epoch": 0.5096467794597803, + "grad_norm": 4.318187713623047, + "learning_rate": 2.8511246540422597e-06, + "loss": 0.0503, + "step": 1717, + "video_reward_cumulative_accuracy": 0.8174140943506115 + }, + { + "epoch": 0.5099436034431583, + "grad_norm": 5.709010124206543, + "learning_rate": 2.848559778901745e-06, + "loss": 0.062, + "step": 1718, + "video_reward_cumulative_accuracy": 0.8175203725261933 + }, + { + "epoch": 0.5102404274265361, + "grad_norm": 1.210877537727356, + "learning_rate": 2.845994529548477e-06, + "loss": 0.0096, + "step": 1719, + "video_reward_cumulative_accuracy": 0.8176265270506108 + }, + { + "epoch": 0.5105372514099139, + "grad_norm": 1.9540245532989502, + "learning_rate": 2.8434289087365002e-06, + "loss": 0.041, + "step": 1720, + "video_reward_cumulative_accuracy": 0.8177325581395349 + }, + { + "epoch": 0.5108340753932917, + "grad_norm": 2.498683452606201, + "learning_rate": 2.8408629192202574e-06, + "loss": 0.0548, + "step": 1721, + "video_reward_cumulative_accuracy": 0.8178384660081348 + }, + { + "epoch": 0.5111308993766697, + "grad_norm": 1.3648625612258911, + "learning_rate": 2.8382965637545877e-06, + "loss": 0.0349, + "step": 1722, + "video_reward_cumulative_accuracy": 0.8179442508710801 + }, + { + "epoch": 0.5114277233600475, + "grad_norm": 2.748464345932007, + "learning_rate": 2.835729845094722e-06, + "loss": 0.0527, + "step": 1723, + "video_reward_cumulative_accuracy": 0.818049912942542 + }, + { + "epoch": 0.5117245473434253, + "grad_norm": 1.2629681825637817, + "learning_rate": 2.8331627659962824e-06, + "loss": 0.0243, + "step": 1724, + "video_reward_cumulative_accuracy": 0.8181554524361949 + }, + { + "epoch": 0.5120213713268033, + "grad_norm": 2.213495969772339, + "learning_rate": 2.8305953292152785e-06, + "loss": 0.035, + "step": 1725, + "video_reward_cumulative_accuracy": 0.8182608695652174 + }, + { + "epoch": 0.5123181953101811, + "grad_norm": 2.6524288654327393, + "learning_rate": 2.8280275375081e-06, + "loss": 0.0457, + "step": 1726, + "video_reward_cumulative_accuracy": 0.8180764774044033 + }, + { + "epoch": 0.5126150192935589, + "grad_norm": 2.8354032039642334, + "learning_rate": 2.8254593936315243e-06, + "loss": 0.0827, + "step": 1727, + "video_reward_cumulative_accuracy": 0.8178922987840185 + }, + { + "epoch": 0.5129118432769367, + "grad_norm": 2.953294515609741, + "learning_rate": 2.8228909003427003e-06, + "loss": 0.0621, + "step": 1728, + "video_reward_cumulative_accuracy": 0.8179976851851852 + }, + { + "epoch": 0.5132086672603147, + "grad_norm": 3.763993263244629, + "learning_rate": 2.820322060399156e-06, + "loss": 0.0425, + "step": 1729, + "video_reward_cumulative_accuracy": 0.818102949681897 + }, + { + "epoch": 0.5135054912436925, + "grad_norm": 1.8136401176452637, + "learning_rate": 2.817752876558789e-06, + "loss": 0.0226, + "step": 1730, + "video_reward_cumulative_accuracy": 0.8182080924855492 + }, + { + "epoch": 0.5138023152270703, + "grad_norm": 0.8390701413154602, + "learning_rate": 2.81518335157987e-06, + "loss": 0.0311, + "step": 1731, + "video_reward_cumulative_accuracy": 0.818313113807048 + }, + { + "epoch": 0.5140991392104483, + "grad_norm": 1.5557044744491577, + "learning_rate": 2.8126134882210313e-06, + "loss": 0.0402, + "step": 1732, + "video_reward_cumulative_accuracy": 0.8181293302540416 + }, + { + "epoch": 0.5143959631938261, + "grad_norm": 2.67010235786438, + "learning_rate": 2.8100432892412723e-06, + "loss": 0.0301, + "step": 1733, + "video_reward_cumulative_accuracy": 0.8182342758222735 + }, + { + "epoch": 0.5146927871772039, + "grad_norm": 2.489520311355591, + "learning_rate": 2.8074727573999495e-06, + "loss": 0.0829, + "step": 1734, + "video_reward_cumulative_accuracy": 0.8183391003460208 + }, + { + "epoch": 0.5149896111605817, + "grad_norm": 2.488553524017334, + "learning_rate": 2.8049018954567797e-06, + "loss": 0.0745, + "step": 1735, + "video_reward_cumulative_accuracy": 0.8181556195965418 + }, + { + "epoch": 0.5152864351439597, + "grad_norm": 3.8210175037384033, + "learning_rate": 2.802330706171831e-06, + "loss": 0.049, + "step": 1736, + "video_reward_cumulative_accuracy": 0.8179723502304147 + }, + { + "epoch": 0.5155832591273375, + "grad_norm": 7.683956623077393, + "learning_rate": 2.799759192305526e-06, + "loss": 0.0934, + "step": 1737, + "video_reward_cumulative_accuracy": 0.8177892918825561 + }, + { + "epoch": 0.5158800831107153, + "grad_norm": 1.5301347970962524, + "learning_rate": 2.7971873566186347e-06, + "loss": 0.0483, + "step": 1738, + "video_reward_cumulative_accuracy": 0.8178941311852704 + }, + { + "epoch": 0.5161769070940933, + "grad_norm": 0.7959959506988525, + "learning_rate": 2.7946152018722714e-06, + "loss": 0.0287, + "step": 1739, + "video_reward_cumulative_accuracy": 0.8177113283496262 + }, + { + "epoch": 0.5164737310774711, + "grad_norm": 3.1695709228515625, + "learning_rate": 2.7920427308278946e-06, + "loss": 0.034, + "step": 1740, + "video_reward_cumulative_accuracy": 0.8178160919540229 + }, + { + "epoch": 0.5167705550608489, + "grad_norm": 4.1620917320251465, + "learning_rate": 2.7894699462473008e-06, + "loss": 0.039, + "step": 1741, + "video_reward_cumulative_accuracy": 0.8179207352096496 + }, + { + "epoch": 0.5170673790442267, + "grad_norm": 1.3403260707855225, + "learning_rate": 2.7868968508926242e-06, + "loss": 0.0228, + "step": 1742, + "video_reward_cumulative_accuracy": 0.8180252583237658 + }, + { + "epoch": 0.5173642030276047, + "grad_norm": 2.3487467765808105, + "learning_rate": 2.784323447526333e-06, + "loss": 0.0681, + "step": 1743, + "video_reward_cumulative_accuracy": 0.8181296615031555 + }, + { + "epoch": 0.5176610270109825, + "grad_norm": 1.9424347877502441, + "learning_rate": 2.7817497389112247e-06, + "loss": 0.0378, + "step": 1744, + "video_reward_cumulative_accuracy": 0.8182339449541285 + }, + { + "epoch": 0.5179578509943603, + "grad_norm": 2.208332061767578, + "learning_rate": 2.779175727810426e-06, + "loss": 0.0513, + "step": 1745, + "video_reward_cumulative_accuracy": 0.8177650429799427 + }, + { + "epoch": 0.5182546749777382, + "grad_norm": 4.050232887268066, + "learning_rate": 2.7766014169873874e-06, + "loss": 0.0422, + "step": 1746, + "video_reward_cumulative_accuracy": 0.8178694158075601 + }, + { + "epoch": 0.5185514989611161, + "grad_norm": 1.5156047344207764, + "learning_rate": 2.7740268092058813e-06, + "loss": 0.0397, + "step": 1747, + "video_reward_cumulative_accuracy": 0.8179736691471093 + }, + { + "epoch": 0.5188483229444939, + "grad_norm": 0.790824294090271, + "learning_rate": 2.771451907229999e-06, + "loss": 0.0114, + "step": 1748, + "video_reward_cumulative_accuracy": 0.8180778032036613 + }, + { + "epoch": 0.5191451469278717, + "grad_norm": 0.7043279409408569, + "learning_rate": 2.7688767138241474e-06, + "loss": 0.017, + "step": 1749, + "video_reward_cumulative_accuracy": 0.8181818181818182 + }, + { + "epoch": 0.5194419709112497, + "grad_norm": 2.117793560028076, + "learning_rate": 2.7663012317530474e-06, + "loss": 0.0303, + "step": 1750, + "video_reward_cumulative_accuracy": 0.818 + }, + { + "epoch": 0.5197387948946275, + "grad_norm": 2.9384334087371826, + "learning_rate": 2.7637254637817284e-06, + "loss": 0.066, + "step": 1751, + "video_reward_cumulative_accuracy": 0.817818389491719 + }, + { + "epoch": 0.5200356188780053, + "grad_norm": 5.217811584472656, + "learning_rate": 2.7611494126755276e-06, + "loss": 0.0593, + "step": 1752, + "video_reward_cumulative_accuracy": 0.817351598173516 + }, + { + "epoch": 0.5203324428613832, + "grad_norm": 4.0168890953063965, + "learning_rate": 2.7585730812000855e-06, + "loss": 0.0452, + "step": 1753, + "video_reward_cumulative_accuracy": 0.8171705647461495 + }, + { + "epoch": 0.5206292668447611, + "grad_norm": 2.7809596061706543, + "learning_rate": 2.755996472121344e-06, + "loss": 0.043, + "step": 1754, + "video_reward_cumulative_accuracy": 0.8169897377423033 + }, + { + "epoch": 0.5209260908281389, + "grad_norm": 2.7551541328430176, + "learning_rate": 2.753419588205544e-06, + "loss": 0.0537, + "step": 1755, + "video_reward_cumulative_accuracy": 0.8170940170940171 + }, + { + "epoch": 0.5212229148115167, + "grad_norm": 1.8034993410110474, + "learning_rate": 2.750842432219219e-06, + "loss": 0.0245, + "step": 1756, + "video_reward_cumulative_accuracy": 0.8171981776765376 + }, + { + "epoch": 0.5215197387948947, + "grad_norm": 2.4644577503204346, + "learning_rate": 2.7482650069291976e-06, + "loss": 0.0258, + "step": 1757, + "video_reward_cumulative_accuracy": 0.8170176437108708 + }, + { + "epoch": 0.5218165627782725, + "grad_norm": 3.7700116634368896, + "learning_rate": 2.745687315102595e-06, + "loss": 0.0812, + "step": 1758, + "video_reward_cumulative_accuracy": 0.8168373151308305 + }, + { + "epoch": 0.5221133867616503, + "grad_norm": 2.102640390396118, + "learning_rate": 2.743109359506813e-06, + "loss": 0.0272, + "step": 1759, + "video_reward_cumulative_accuracy": 0.816941444002274 + }, + { + "epoch": 0.5224102107450282, + "grad_norm": 3.0133395195007324, + "learning_rate": 2.7405311429095384e-06, + "loss": 0.0467, + "step": 1760, + "video_reward_cumulative_accuracy": 0.8170454545454545 + }, + { + "epoch": 0.5227070347284061, + "grad_norm": 4.504801273345947, + "learning_rate": 2.7379526680787365e-06, + "loss": 0.0576, + "step": 1761, + "video_reward_cumulative_accuracy": 0.8168654173764907 + }, + { + "epoch": 0.5230038587117839, + "grad_norm": 1.1885194778442383, + "learning_rate": 2.7353739377826503e-06, + "loss": 0.0166, + "step": 1762, + "video_reward_cumulative_accuracy": 0.8169693530079455 + }, + { + "epoch": 0.5233006826951617, + "grad_norm": 2.73569393157959, + "learning_rate": 2.7327949547897977e-06, + "loss": 0.0413, + "step": 1763, + "video_reward_cumulative_accuracy": 0.8170731707317073 + }, + { + "epoch": 0.5235975066785397, + "grad_norm": 4.12099027633667, + "learning_rate": 2.7302157218689655e-06, + "loss": 0.0539, + "step": 1764, + "video_reward_cumulative_accuracy": 0.8171768707482994 + }, + { + "epoch": 0.5238943306619175, + "grad_norm": 2.6001250743865967, + "learning_rate": 2.7276362417892124e-06, + "loss": 0.034, + "step": 1765, + "video_reward_cumulative_accuracy": 0.8172804532577904 + }, + { + "epoch": 0.5241911546452953, + "grad_norm": 2.1774399280548096, + "learning_rate": 2.7250565173198596e-06, + "loss": 0.0154, + "step": 1766, + "video_reward_cumulative_accuracy": 0.8173839184597962 + }, + { + "epoch": 0.5244879786286732, + "grad_norm": 3.843973159790039, + "learning_rate": 2.722476551230491e-06, + "loss": 0.0978, + "step": 1767, + "video_reward_cumulative_accuracy": 0.8174872665534805 + }, + { + "epoch": 0.5247848026120511, + "grad_norm": 1.590867280960083, + "learning_rate": 2.7198963462909534e-06, + "loss": 0.0226, + "step": 1768, + "video_reward_cumulative_accuracy": 0.8175904977375565 + }, + { + "epoch": 0.5250816265954289, + "grad_norm": 4.675069808959961, + "learning_rate": 2.717315905271344e-06, + "loss": 0.1181, + "step": 1769, + "video_reward_cumulative_accuracy": 0.8174109666478236 + }, + { + "epoch": 0.5253784505788067, + "grad_norm": 1.5898845195770264, + "learning_rate": 2.714735230942019e-06, + "loss": 0.0133, + "step": 1770, + "video_reward_cumulative_accuracy": 0.8175141242937853 + }, + { + "epoch": 0.5256752745621847, + "grad_norm": 2.406005382537842, + "learning_rate": 2.712154326073581e-06, + "loss": 0.042, + "step": 1771, + "video_reward_cumulative_accuracy": 0.8176171654432524 + }, + { + "epoch": 0.5259720985455625, + "grad_norm": 1.935448408126831, + "learning_rate": 2.709573193436883e-06, + "loss": 0.0373, + "step": 1772, + "video_reward_cumulative_accuracy": 0.8174379232505643 + }, + { + "epoch": 0.5262689225289403, + "grad_norm": 1.7007020711898804, + "learning_rate": 2.7069918358030218e-06, + "loss": 0.0311, + "step": 1773, + "video_reward_cumulative_accuracy": 0.817258883248731 + }, + { + "epoch": 0.5265657465123182, + "grad_norm": 5.44350528717041, + "learning_rate": 2.7044102559433346e-06, + "loss": 0.0672, + "step": 1774, + "video_reward_cumulative_accuracy": 0.8170800450958287 + }, + { + "epoch": 0.5268625704956961, + "grad_norm": 2.1259396076202393, + "learning_rate": 2.701828456629398e-06, + "loss": 0.0689, + "step": 1775, + "video_reward_cumulative_accuracy": 0.8169014084507042 + }, + { + "epoch": 0.5271593944790739, + "grad_norm": 0.9994587302207947, + "learning_rate": 2.699246440633023e-06, + "loss": 0.015, + "step": 1776, + "video_reward_cumulative_accuracy": 0.8170045045045045 + }, + { + "epoch": 0.5274562184624517, + "grad_norm": 2.5973446369171143, + "learning_rate": 2.696664210726257e-06, + "loss": 0.0958, + "step": 1777, + "video_reward_cumulative_accuracy": 0.8171074845244795 + }, + { + "epoch": 0.5277530424458297, + "grad_norm": 2.029411792755127, + "learning_rate": 2.694081769681373e-06, + "loss": 0.0194, + "step": 1778, + "video_reward_cumulative_accuracy": 0.8172103487064117 + }, + { + "epoch": 0.5280498664292075, + "grad_norm": 1.6319355964660645, + "learning_rate": 2.6914991202708707e-06, + "loss": 0.0408, + "step": 1779, + "video_reward_cumulative_accuracy": 0.8173130972456436 + }, + { + "epoch": 0.5283466904125853, + "grad_norm": 1.4673457145690918, + "learning_rate": 2.6889162652674776e-06, + "loss": 0.0176, + "step": 1780, + "video_reward_cumulative_accuracy": 0.8174157303370787 + }, + { + "epoch": 0.5286435143959632, + "grad_norm": 2.3411591053009033, + "learning_rate": 2.6863332074441374e-06, + "loss": 0.0476, + "step": 1781, + "video_reward_cumulative_accuracy": 0.8172375070185289 + }, + { + "epoch": 0.5289403383793411, + "grad_norm": 2.2734508514404297, + "learning_rate": 2.6837499495740144e-06, + "loss": 0.0589, + "step": 1782, + "video_reward_cumulative_accuracy": 0.8173400673400674 + }, + { + "epoch": 0.5292371623627189, + "grad_norm": 4.031787872314453, + "learning_rate": 2.681166494430486e-06, + "loss": 0.0424, + "step": 1783, + "video_reward_cumulative_accuracy": 0.8174425126191811 + }, + { + "epoch": 0.5295339863460967, + "grad_norm": 3.090514898300171, + "learning_rate": 2.6785828447871415e-06, + "loss": 0.0261, + "step": 1784, + "video_reward_cumulative_accuracy": 0.8175448430493274 + }, + { + "epoch": 0.5298308103294747, + "grad_norm": 0.7824661731719971, + "learning_rate": 2.6759990034177814e-06, + "loss": 0.0154, + "step": 1785, + "video_reward_cumulative_accuracy": 0.8176470588235294 + }, + { + "epoch": 0.5301276343128525, + "grad_norm": 1.5541051626205444, + "learning_rate": 2.6734149730964085e-06, + "loss": 0.0278, + "step": 1786, + "video_reward_cumulative_accuracy": 0.8177491601343785 + }, + { + "epoch": 0.5304244582962303, + "grad_norm": 1.8912038803100586, + "learning_rate": 2.6708307565972307e-06, + "loss": 0.0368, + "step": 1787, + "video_reward_cumulative_accuracy": 0.8178511471740347 + }, + { + "epoch": 0.5307212822796082, + "grad_norm": 0.8161097168922424, + "learning_rate": 2.668246356694656e-06, + "loss": 0.0143, + "step": 1788, + "video_reward_cumulative_accuracy": 0.8179530201342282 + }, + { + "epoch": 0.5310181062629861, + "grad_norm": 3.4581801891326904, + "learning_rate": 2.6656617761632863e-06, + "loss": 0.0676, + "step": 1789, + "video_reward_cumulative_accuracy": 0.8180547792062605 + }, + { + "epoch": 0.5313149302463639, + "grad_norm": 0.8631388545036316, + "learning_rate": 2.6630770177779218e-06, + "loss": 0.0121, + "step": 1790, + "video_reward_cumulative_accuracy": 0.8181564245810056 + }, + { + "epoch": 0.5316117542297417, + "grad_norm": 1.1447865962982178, + "learning_rate": 2.660492084313551e-06, + "loss": 0.0248, + "step": 1791, + "video_reward_cumulative_accuracy": 0.8182579564489112 + }, + { + "epoch": 0.5319085782131197, + "grad_norm": 2.4453208446502686, + "learning_rate": 2.657906978545351e-06, + "loss": 0.0264, + "step": 1792, + "video_reward_cumulative_accuracy": 0.8180803571428571 + }, + { + "epoch": 0.5322054021964975, + "grad_norm": 2.468322515487671, + "learning_rate": 2.6553217032486832e-06, + "loss": 0.0487, + "step": 1793, + "video_reward_cumulative_accuracy": 0.8181818181818182 + }, + { + "epoch": 0.5325022261798753, + "grad_norm": 0.8765484094619751, + "learning_rate": 2.6527362611990915e-06, + "loss": 0.0165, + "step": 1794, + "video_reward_cumulative_accuracy": 0.8182831661092531 + }, + { + "epoch": 0.5327990501632532, + "grad_norm": 2.160796880722046, + "learning_rate": 2.6501506551722995e-06, + "loss": 0.0564, + "step": 1795, + "video_reward_cumulative_accuracy": 0.8181058495821727 + }, + { + "epoch": 0.5330958741466311, + "grad_norm": 2.3637046813964844, + "learning_rate": 2.6475648879442055e-06, + "loss": 0.0582, + "step": 1796, + "video_reward_cumulative_accuracy": 0.8182071269487751 + }, + { + "epoch": 0.5333926981300089, + "grad_norm": 1.2972532510757446, + "learning_rate": 2.6449789622908823e-06, + "loss": 0.0191, + "step": 1797, + "video_reward_cumulative_accuracy": 0.8183082915971063 + }, + { + "epoch": 0.5336895221133867, + "grad_norm": 3.382450580596924, + "learning_rate": 2.6423928809885716e-06, + "loss": 0.0522, + "step": 1798, + "video_reward_cumulative_accuracy": 0.818131256952169 + }, + { + "epoch": 0.5339863460967647, + "grad_norm": 2.019676685333252, + "learning_rate": 2.639806646813683e-06, + "loss": 0.0699, + "step": 1799, + "video_reward_cumulative_accuracy": 0.8182323513062812 + }, + { + "epoch": 0.5342831700801425, + "grad_norm": 2.9581692218780518, + "learning_rate": 2.6372202625427897e-06, + "loss": 0.0817, + "step": 1800, + "video_reward_cumulative_accuracy": 0.8183333333333334 + }, + { + "epoch": 0.5342831700801425, + "eval_runtime": 131.6405, + "eval_samples_per_second": 5.994, + "eval_steps_per_second": 0.752, + "eval_test_set_accuracy": 0.7765151515151515, + "step": 1800 + }, + { + "epoch": 0.5345799940635203, + "grad_norm": 3.738384485244751, + "learning_rate": 2.6346337309526265e-06, + "loss": 0.0749, + "step": 1801, + "video_reward_cumulative_accuracy": 0.8181565796779567 + }, + { + "epoch": 0.5348768180468982, + "grad_norm": 1.4811760187149048, + "learning_rate": 2.6320470548200848e-06, + "loss": 0.0336, + "step": 1802, + "video_reward_cumulative_accuracy": 0.8182574916759157 + }, + { + "epoch": 0.5351736420302761, + "grad_norm": 1.9211323261260986, + "learning_rate": 2.6294602369222145e-06, + "loss": 0.0304, + "step": 1803, + "video_reward_cumulative_accuracy": 0.8183582917359956 + }, + { + "epoch": 0.5354704660136539, + "grad_norm": 0.7293126583099365, + "learning_rate": 2.6268732800362147e-06, + "loss": 0.0101, + "step": 1804, + "video_reward_cumulative_accuracy": 0.8184589800443459 + }, + { + "epoch": 0.5357672899970317, + "grad_norm": 2.35774827003479, + "learning_rate": 2.624286186939435e-06, + "loss": 0.0468, + "step": 1805, + "video_reward_cumulative_accuracy": 0.8182825484764543 + }, + { + "epoch": 0.5360641139804097, + "grad_norm": 3.0999953746795654, + "learning_rate": 2.62169896040937e-06, + "loss": 0.0384, + "step": 1806, + "video_reward_cumulative_accuracy": 0.8183831672203765 + }, + { + "epoch": 0.5363609379637875, + "grad_norm": 4.446628093719482, + "learning_rate": 2.6191116032236598e-06, + "loss": 0.0707, + "step": 1807, + "video_reward_cumulative_accuracy": 0.8184836745987825 + }, + { + "epoch": 0.5366577619471653, + "grad_norm": 2.9089622497558594, + "learning_rate": 2.616524118160082e-06, + "loss": 0.0499, + "step": 1808, + "video_reward_cumulative_accuracy": 0.8185840707964602 + }, + { + "epoch": 0.5369545859305432, + "grad_norm": 0.42697280645370483, + "learning_rate": 2.613936507996554e-06, + "loss": 0.0118, + "step": 1809, + "video_reward_cumulative_accuracy": 0.8186843559977889 + }, + { + "epoch": 0.5372514099139211, + "grad_norm": 2.4700889587402344, + "learning_rate": 2.611348775511127e-06, + "loss": 0.06, + "step": 1810, + "video_reward_cumulative_accuracy": 0.8187845303867404 + }, + { + "epoch": 0.5375482338972989, + "grad_norm": 1.9913703203201294, + "learning_rate": 2.6087609234819822e-06, + "loss": 0.0386, + "step": 1811, + "video_reward_cumulative_accuracy": 0.8188845941468802 + }, + { + "epoch": 0.5378450578806767, + "grad_norm": 2.4547126293182373, + "learning_rate": 2.606172954687429e-06, + "loss": 0.0696, + "step": 1812, + "video_reward_cumulative_accuracy": 0.8189845474613686 + }, + { + "epoch": 0.5381418818640546, + "grad_norm": 2.6594624519348145, + "learning_rate": 2.603584871905905e-06, + "loss": 0.065, + "step": 1813, + "video_reward_cumulative_accuracy": 0.8188086045228903 + }, + { + "epoch": 0.5384387058474325, + "grad_norm": 1.8410438299179077, + "learning_rate": 2.600996677915967e-06, + "loss": 0.0422, + "step": 1814, + "video_reward_cumulative_accuracy": 0.8189084895259096 + }, + { + "epoch": 0.5387355298308103, + "grad_norm": 1.4541661739349365, + "learning_rate": 2.598408375496292e-06, + "loss": 0.0367, + "step": 1815, + "video_reward_cumulative_accuracy": 0.8190082644628099 + }, + { + "epoch": 0.5390323538141882, + "grad_norm": 0.9136159420013428, + "learning_rate": 2.5958199674256755e-06, + "loss": 0.0166, + "step": 1816, + "video_reward_cumulative_accuracy": 0.8191079295154186 + }, + { + "epoch": 0.5393291777975661, + "grad_norm": 3.7377233505249023, + "learning_rate": 2.5932314564830237e-06, + "loss": 0.0694, + "step": 1817, + "video_reward_cumulative_accuracy": 0.8192074848651624 + }, + { + "epoch": 0.5396260017809439, + "grad_norm": 3.6235744953155518, + "learning_rate": 2.5906428454473546e-06, + "loss": 0.0713, + "step": 1818, + "video_reward_cumulative_accuracy": 0.819031903190319 + }, + { + "epoch": 0.5399228257643217, + "grad_norm": 3.179281711578369, + "learning_rate": 2.588054137097793e-06, + "loss": 0.0389, + "step": 1819, + "video_reward_cumulative_accuracy": 0.8191313908741067 + }, + { + "epoch": 0.5402196497476996, + "grad_norm": 2.8550491333007812, + "learning_rate": 2.5854653342135687e-06, + "loss": 0.0421, + "step": 1820, + "video_reward_cumulative_accuracy": 0.8192307692307692 + }, + { + "epoch": 0.5405164737310775, + "grad_norm": 4.346277713775635, + "learning_rate": 2.5828764395740135e-06, + "loss": 0.0625, + "step": 1821, + "video_reward_cumulative_accuracy": 0.8193300384404174 + }, + { + "epoch": 0.5408132977144553, + "grad_norm": 2.5140342712402344, + "learning_rate": 2.5802874559585567e-06, + "loss": 0.0429, + "step": 1822, + "video_reward_cumulative_accuracy": 0.8194291986827662 + }, + { + "epoch": 0.5411101216978332, + "grad_norm": 6.247716903686523, + "learning_rate": 2.5776983861467237e-06, + "loss": 0.0708, + "step": 1823, + "video_reward_cumulative_accuracy": 0.8195282501371366 + }, + { + "epoch": 0.5414069456812111, + "grad_norm": 3.389479875564575, + "learning_rate": 2.575109232918131e-06, + "loss": 0.0473, + "step": 1824, + "video_reward_cumulative_accuracy": 0.8196271929824561 + }, + { + "epoch": 0.5417037696645889, + "grad_norm": 2.464885711669922, + "learning_rate": 2.5725199990524874e-06, + "loss": 0.0297, + "step": 1825, + "video_reward_cumulative_accuracy": 0.8197260273972603 + }, + { + "epoch": 0.5420005936479667, + "grad_norm": 1.1972076892852783, + "learning_rate": 2.569930687329586e-06, + "loss": 0.0302, + "step": 1826, + "video_reward_cumulative_accuracy": 0.8198247535596933 + }, + { + "epoch": 0.5422974176313446, + "grad_norm": 1.1544277667999268, + "learning_rate": 2.567341300529305e-06, + "loss": 0.0154, + "step": 1827, + "video_reward_cumulative_accuracy": 0.8199233716475096 + }, + { + "epoch": 0.5425942416147225, + "grad_norm": 1.3733747005462646, + "learning_rate": 2.5647518414316015e-06, + "loss": 0.0441, + "step": 1828, + "video_reward_cumulative_accuracy": 0.8200218818380745 + }, + { + "epoch": 0.5428910655981003, + "grad_norm": 2.372936964035034, + "learning_rate": 2.562162312816511e-06, + "loss": 0.0401, + "step": 1829, + "video_reward_cumulative_accuracy": 0.8201202843083653 + }, + { + "epoch": 0.5431878895814782, + "grad_norm": 2.063004970550537, + "learning_rate": 2.559572717464145e-06, + "loss": 0.0189, + "step": 1830, + "video_reward_cumulative_accuracy": 0.8199453551912569 + }, + { + "epoch": 0.5434847135648561, + "grad_norm": 2.8820412158966064, + "learning_rate": 2.556983058154685e-06, + "loss": 0.0546, + "step": 1831, + "video_reward_cumulative_accuracy": 0.8200436919716002 + }, + { + "epoch": 0.5437815375482339, + "grad_norm": 1.9526046514511108, + "learning_rate": 2.5543933376683805e-06, + "loss": 0.0197, + "step": 1832, + "video_reward_cumulative_accuracy": 0.82014192139738 + }, + { + "epoch": 0.5440783615316117, + "grad_norm": 2.945932149887085, + "learning_rate": 2.5518035587855492e-06, + "loss": 0.06, + "step": 1833, + "video_reward_cumulative_accuracy": 0.8202400436442989 + }, + { + "epoch": 0.5443751855149896, + "grad_norm": 1.299325942993164, + "learning_rate": 2.5492137242865706e-06, + "loss": 0.0312, + "step": 1834, + "video_reward_cumulative_accuracy": 0.8203380588876772 + }, + { + "epoch": 0.5446720094983675, + "grad_norm": 1.5175635814666748, + "learning_rate": 2.5466238369518808e-06, + "loss": 0.0433, + "step": 1835, + "video_reward_cumulative_accuracy": 0.8204359673024523 + }, + { + "epoch": 0.5449688334817453, + "grad_norm": 3.6290009021759033, + "learning_rate": 2.544033899561978e-06, + "loss": 0.0684, + "step": 1836, + "video_reward_cumulative_accuracy": 0.8205337690631809 + }, + { + "epoch": 0.5452656574651232, + "grad_norm": 2.739287853240967, + "learning_rate": 2.5414439148974096e-06, + "loss": 0.0701, + "step": 1837, + "video_reward_cumulative_accuracy": 0.8206314643440392 + }, + { + "epoch": 0.5455624814485011, + "grad_norm": 2.4160091876983643, + "learning_rate": 2.5388538857387756e-06, + "loss": 0.0424, + "step": 1838, + "video_reward_cumulative_accuracy": 0.8204570184983678 + }, + { + "epoch": 0.5458593054318789, + "grad_norm": 2.7636618614196777, + "learning_rate": 2.5362638148667256e-06, + "loss": 0.0486, + "step": 1839, + "video_reward_cumulative_accuracy": 0.8205546492659054 + }, + { + "epoch": 0.5461561294152567, + "grad_norm": 1.2934203147888184, + "learning_rate": 2.5336737050619497e-06, + "loss": 0.0313, + "step": 1840, + "video_reward_cumulative_accuracy": 0.8206521739130435 + }, + { + "epoch": 0.5464529533986346, + "grad_norm": 1.7498849630355835, + "learning_rate": 2.5310835591051837e-06, + "loss": 0.0527, + "step": 1841, + "video_reward_cumulative_accuracy": 0.8207495926127105 + }, + { + "epoch": 0.5467497773820125, + "grad_norm": 2.3050217628479004, + "learning_rate": 2.528493379777199e-06, + "loss": 0.0268, + "step": 1842, + "video_reward_cumulative_accuracy": 0.8208469055374593 + }, + { + "epoch": 0.5470466013653903, + "grad_norm": 1.1211016178131104, + "learning_rate": 2.5259031698588065e-06, + "loss": 0.0133, + "step": 1843, + "video_reward_cumulative_accuracy": 0.8209441128594682 + }, + { + "epoch": 0.5473434253487682, + "grad_norm": 2.8683953285217285, + "learning_rate": 2.523312932130847e-06, + "loss": 0.0691, + "step": 1844, + "video_reward_cumulative_accuracy": 0.8210412147505423 + }, + { + "epoch": 0.5476402493321461, + "grad_norm": 2.4542722702026367, + "learning_rate": 2.5207226693741914e-06, + "loss": 0.0429, + "step": 1845, + "video_reward_cumulative_accuracy": 0.8208672086720867 + }, + { + "epoch": 0.5479370733155239, + "grad_norm": 3.8734562397003174, + "learning_rate": 2.5181323843697403e-06, + "loss": 0.0458, + "step": 1846, + "video_reward_cumulative_accuracy": 0.820964247020585 + }, + { + "epoch": 0.5482338972989017, + "grad_norm": 2.5339853763580322, + "learning_rate": 2.5155420798984137e-06, + "loss": 0.0254, + "step": 1847, + "video_reward_cumulative_accuracy": 0.821061180292366 + }, + { + "epoch": 0.5485307212822796, + "grad_norm": 1.4046697616577148, + "learning_rate": 2.512951758741156e-06, + "loss": 0.0352, + "step": 1848, + "video_reward_cumulative_accuracy": 0.8211580086580087 + }, + { + "epoch": 0.5488275452656575, + "grad_norm": 0.8442341089248657, + "learning_rate": 2.510361423678929e-06, + "loss": 0.024, + "step": 1849, + "video_reward_cumulative_accuracy": 0.8209843158464034 + }, + { + "epoch": 0.5491243692490353, + "grad_norm": 1.8290486335754395, + "learning_rate": 2.5077710774927067e-06, + "loss": 0.0147, + "step": 1850, + "video_reward_cumulative_accuracy": 0.8210810810810811 + }, + { + "epoch": 0.5494211932324132, + "grad_norm": 1.443580150604248, + "learning_rate": 2.5051807229634796e-06, + "loss": 0.0335, + "step": 1851, + "video_reward_cumulative_accuracy": 0.8211777417612102 + }, + { + "epoch": 0.549718017215791, + "grad_norm": 0.5750948190689087, + "learning_rate": 2.5025903628722427e-06, + "loss": 0.0131, + "step": 1852, + "video_reward_cumulative_accuracy": 0.8212742980561555 + }, + { + "epoch": 0.5500148411991689, + "grad_norm": 0.9644943475723267, + "learning_rate": 2.5e-06, + "loss": 0.0187, + "step": 1853, + "video_reward_cumulative_accuracy": 0.8213707501349163 + }, + { + "epoch": 0.5503116651825467, + "grad_norm": 2.259012460708618, + "learning_rate": 2.4974096371277577e-06, + "loss": 0.0195, + "step": 1854, + "video_reward_cumulative_accuracy": 0.8214670981661273 + }, + { + "epoch": 0.5506084891659246, + "grad_norm": 4.043064117431641, + "learning_rate": 2.4948192770365217e-06, + "loss": 0.0473, + "step": 1855, + "video_reward_cumulative_accuracy": 0.8215633423180593 + }, + { + "epoch": 0.5509053131493025, + "grad_norm": 2.6557042598724365, + "learning_rate": 2.4922289225072937e-06, + "loss": 0.0347, + "step": 1856, + "video_reward_cumulative_accuracy": 0.8213900862068966 + }, + { + "epoch": 0.5512021371326803, + "grad_norm": 3.153779983520508, + "learning_rate": 2.4896385763210725e-06, + "loss": 0.0883, + "step": 1857, + "video_reward_cumulative_accuracy": 0.821486268174475 + }, + { + "epoch": 0.5514989611160582, + "grad_norm": 2.566129446029663, + "learning_rate": 2.4870482412588444e-06, + "loss": 0.0617, + "step": 1858, + "video_reward_cumulative_accuracy": 0.8215823466092572 + }, + { + "epoch": 0.551795785099436, + "grad_norm": 1.7508662939071655, + "learning_rate": 2.484457920101587e-06, + "loss": 0.0305, + "step": 1859, + "video_reward_cumulative_accuracy": 0.8216783216783217 + }, + { + "epoch": 0.5520926090828139, + "grad_norm": 2.019207715988159, + "learning_rate": 2.4818676156302605e-06, + "loss": 0.0521, + "step": 1860, + "video_reward_cumulative_accuracy": 0.8217741935483871 + }, + { + "epoch": 0.5523894330661917, + "grad_norm": 0.6667674779891968, + "learning_rate": 2.4792773306258085e-06, + "loss": 0.011, + "step": 1861, + "video_reward_cumulative_accuracy": 0.8218699623858141 + }, + { + "epoch": 0.5526862570495696, + "grad_norm": 1.5519354343414307, + "learning_rate": 2.4766870678691538e-06, + "loss": 0.0503, + "step": 1862, + "video_reward_cumulative_accuracy": 0.8219656283566058 + }, + { + "epoch": 0.5529830810329475, + "grad_norm": 3.2907981872558594, + "learning_rate": 2.474096830141194e-06, + "loss": 0.0484, + "step": 1863, + "video_reward_cumulative_accuracy": 0.8217928073000537 + }, + { + "epoch": 0.5532799050163253, + "grad_norm": 1.4493234157562256, + "learning_rate": 2.4715066202228017e-06, + "loss": 0.0132, + "step": 1864, + "video_reward_cumulative_accuracy": 0.8218884120171673 + }, + { + "epoch": 0.5535767289997032, + "grad_norm": 0.8373463749885559, + "learning_rate": 2.4689164408948176e-06, + "loss": 0.0103, + "step": 1865, + "video_reward_cumulative_accuracy": 0.8219839142091153 + }, + { + "epoch": 0.553873552983081, + "grad_norm": 1.3251855373382568, + "learning_rate": 2.4663262949380508e-06, + "loss": 0.0186, + "step": 1866, + "video_reward_cumulative_accuracy": 0.8220793140407289 + }, + { + "epoch": 0.5541703769664589, + "grad_norm": 2.757843255996704, + "learning_rate": 2.4637361851332752e-06, + "loss": 0.0747, + "step": 1867, + "video_reward_cumulative_accuracy": 0.8221746116764863 + }, + { + "epoch": 0.5544672009498367, + "grad_norm": 3.380035877227783, + "learning_rate": 2.4611461142612243e-06, + "loss": 0.0661, + "step": 1868, + "video_reward_cumulative_accuracy": 0.8222698072805139 + }, + { + "epoch": 0.5547640249332146, + "grad_norm": 1.7979135513305664, + "learning_rate": 2.4585560851025917e-06, + "loss": 0.0395, + "step": 1869, + "video_reward_cumulative_accuracy": 0.8223649010165864 + }, + { + "epoch": 0.5550608489165925, + "grad_norm": 1.7438832521438599, + "learning_rate": 2.455966100438023e-06, + "loss": 0.0389, + "step": 1870, + "video_reward_cumulative_accuracy": 0.8224598930481284 + }, + { + "epoch": 0.5553576728999703, + "grad_norm": 2.5413877964019775, + "learning_rate": 2.4533761630481205e-06, + "loss": 0.0202, + "step": 1871, + "video_reward_cumulative_accuracy": 0.8225547835382149 + }, + { + "epoch": 0.5556544968833482, + "grad_norm": 2.7013587951660156, + "learning_rate": 2.45078627571343e-06, + "loss": 0.0537, + "step": 1872, + "video_reward_cumulative_accuracy": 0.8226495726495726 + }, + { + "epoch": 0.555951320866726, + "grad_norm": 2.5262835025787354, + "learning_rate": 2.4481964412144508e-06, + "loss": 0.0388, + "step": 1873, + "video_reward_cumulative_accuracy": 0.8227442605445809 + }, + { + "epoch": 0.5562481448501039, + "grad_norm": 1.8535455465316772, + "learning_rate": 2.4456066623316203e-06, + "loss": 0.0714, + "step": 1874, + "video_reward_cumulative_accuracy": 0.8228388473852721 + }, + { + "epoch": 0.5565449688334817, + "grad_norm": 2.635227680206299, + "learning_rate": 2.4430169418453157e-06, + "loss": 0.0385, + "step": 1875, + "video_reward_cumulative_accuracy": 0.8229333333333333 + }, + { + "epoch": 0.5568417928168596, + "grad_norm": 1.0287766456604004, + "learning_rate": 2.4404272825358564e-06, + "loss": 0.0146, + "step": 1876, + "video_reward_cumulative_accuracy": 0.8230277185501066 + }, + { + "epoch": 0.5571386168002375, + "grad_norm": 0.640048086643219, + "learning_rate": 2.4378376871834896e-06, + "loss": 0.0152, + "step": 1877, + "video_reward_cumulative_accuracy": 0.8231220031965903 + }, + { + "epoch": 0.5574354407836153, + "grad_norm": 1.0190867185592651, + "learning_rate": 2.435248158568399e-06, + "loss": 0.0137, + "step": 1878, + "video_reward_cumulative_accuracy": 0.8232161874334398 + }, + { + "epoch": 0.5577322647669932, + "grad_norm": 0.6473628282546997, + "learning_rate": 2.4326586994706964e-06, + "loss": 0.0099, + "step": 1879, + "video_reward_cumulative_accuracy": 0.8233102714209686 + }, + { + "epoch": 0.558029088750371, + "grad_norm": 1.6340287923812866, + "learning_rate": 2.430069312670414e-06, + "loss": 0.0222, + "step": 1880, + "video_reward_cumulative_accuracy": 0.823404255319149 + }, + { + "epoch": 0.5583259127337489, + "grad_norm": 2.285609245300293, + "learning_rate": 2.4274800009475134e-06, + "loss": 0.06, + "step": 1881, + "video_reward_cumulative_accuracy": 0.823498139287613 + }, + { + "epoch": 0.5586227367171267, + "grad_norm": 2.4804632663726807, + "learning_rate": 2.42489076708187e-06, + "loss": 0.0291, + "step": 1882, + "video_reward_cumulative_accuracy": 0.8235919234856536 + }, + { + "epoch": 0.5589195607005046, + "grad_norm": 2.7268261909484863, + "learning_rate": 2.422301613853278e-06, + "loss": 0.0443, + "step": 1883, + "video_reward_cumulative_accuracy": 0.8236856080722251 + }, + { + "epoch": 0.5592163846838825, + "grad_norm": 3.1525275707244873, + "learning_rate": 2.419712544041444e-06, + "loss": 0.0388, + "step": 1884, + "video_reward_cumulative_accuracy": 0.8237791932059448 + }, + { + "epoch": 0.5595132086672603, + "grad_norm": 1.7583348751068115, + "learning_rate": 2.4171235604259865e-06, + "loss": 0.0213, + "step": 1885, + "video_reward_cumulative_accuracy": 0.8238726790450929 + }, + { + "epoch": 0.5598100326506382, + "grad_norm": 4.774654388427734, + "learning_rate": 2.4145346657864318e-06, + "loss": 0.0528, + "step": 1886, + "video_reward_cumulative_accuracy": 0.823966065747614 + }, + { + "epoch": 0.560106856634016, + "grad_norm": 2.143681526184082, + "learning_rate": 2.4119458629022077e-06, + "loss": 0.0185, + "step": 1887, + "video_reward_cumulative_accuracy": 0.8240593534711181 + }, + { + "epoch": 0.5604036806173939, + "grad_norm": 1.6760286092758179, + "learning_rate": 2.4093571545526466e-06, + "loss": 0.0488, + "step": 1888, + "video_reward_cumulative_accuracy": 0.8241525423728814 + }, + { + "epoch": 0.5607005046007717, + "grad_norm": 0.8435209393501282, + "learning_rate": 2.406768543516977e-06, + "loss": 0.0108, + "step": 1889, + "video_reward_cumulative_accuracy": 0.8242456326098465 + }, + { + "epoch": 0.5609973285841496, + "grad_norm": 3.637840747833252, + "learning_rate": 2.404180032574325e-06, + "loss": 0.085, + "step": 1890, + "video_reward_cumulative_accuracy": 0.8243386243386244 + }, + { + "epoch": 0.5612941525675275, + "grad_norm": 0.5115882158279419, + "learning_rate": 2.4015916245037086e-06, + "loss": 0.0259, + "step": 1891, + "video_reward_cumulative_accuracy": 0.8244315177154945 + }, + { + "epoch": 0.5615909765509053, + "grad_norm": 1.5231218338012695, + "learning_rate": 2.3990033220840344e-06, + "loss": 0.0462, + "step": 1892, + "video_reward_cumulative_accuracy": 0.8242600422832981 + }, + { + "epoch": 0.5618878005342832, + "grad_norm": 2.774663209915161, + "learning_rate": 2.3964151280940963e-06, + "loss": 0.0213, + "step": 1893, + "video_reward_cumulative_accuracy": 0.8243528790279979 + }, + { + "epoch": 0.562184624517661, + "grad_norm": 7.451237201690674, + "learning_rate": 2.3938270453125717e-06, + "loss": 0.0661, + "step": 1894, + "video_reward_cumulative_accuracy": 0.8244456177402323 + }, + { + "epoch": 0.5624814485010389, + "grad_norm": 3.062457323074341, + "learning_rate": 2.3912390765180195e-06, + "loss": 0.018, + "step": 1895, + "video_reward_cumulative_accuracy": 0.8242744063324539 + }, + { + "epoch": 0.5627782724844167, + "grad_norm": 5.17927885055542, + "learning_rate": 2.3886512244888737e-06, + "loss": 0.071, + "step": 1896, + "video_reward_cumulative_accuracy": 0.8238396624472574 + }, + { + "epoch": 0.5630750964677946, + "grad_norm": 2.72430682182312, + "learning_rate": 2.386063492003446e-06, + "loss": 0.0373, + "step": 1897, + "video_reward_cumulative_accuracy": 0.8236689509752241 + }, + { + "epoch": 0.5633719204511725, + "grad_norm": 1.81307053565979, + "learning_rate": 2.3834758818399185e-06, + "loss": 0.0787, + "step": 1898, + "video_reward_cumulative_accuracy": 0.8232349841938883 + }, + { + "epoch": 0.5636687444345503, + "grad_norm": 2.822479248046875, + "learning_rate": 2.3808883967763415e-06, + "loss": 0.0702, + "step": 1899, + "video_reward_cumulative_accuracy": 0.8230647709320695 + }, + { + "epoch": 0.5639655684179282, + "grad_norm": 3.3593389987945557, + "learning_rate": 2.378301039590631e-06, + "loss": 0.0254, + "step": 1900, + "video_reward_cumulative_accuracy": 0.8231578947368421 + }, + { + "epoch": 0.564262392401306, + "grad_norm": 4.469653129577637, + "learning_rate": 2.3757138130605662e-06, + "loss": 0.0387, + "step": 1901, + "video_reward_cumulative_accuracy": 0.823250920568122 + }, + { + "epoch": 0.5645592163846839, + "grad_norm": 1.8031344413757324, + "learning_rate": 2.3731267199637857e-06, + "loss": 0.0324, + "step": 1902, + "video_reward_cumulative_accuracy": 0.8233438485804416 + }, + { + "epoch": 0.5648560403680617, + "grad_norm": 2.311720132827759, + "learning_rate": 2.370539763077786e-06, + "loss": 0.0386, + "step": 1903, + "video_reward_cumulative_accuracy": 0.8234366789280084 + }, + { + "epoch": 0.5651528643514396, + "grad_norm": 2.129380464553833, + "learning_rate": 2.3679529451799156e-06, + "loss": 0.0571, + "step": 1904, + "video_reward_cumulative_accuracy": 0.823266806722689 + }, + { + "epoch": 0.5654496883348175, + "grad_norm": 1.209018349647522, + "learning_rate": 2.3653662690473747e-06, + "loss": 0.0215, + "step": 1905, + "video_reward_cumulative_accuracy": 0.8233595800524934 + }, + { + "epoch": 0.5657465123181953, + "grad_norm": 2.74529767036438, + "learning_rate": 2.3627797374572107e-06, + "loss": 0.0363, + "step": 1906, + "video_reward_cumulative_accuracy": 0.8234522560335782 + }, + { + "epoch": 0.5660433363015732, + "grad_norm": 2.243732452392578, + "learning_rate": 2.3601933531863182e-06, + "loss": 0.0252, + "step": 1907, + "video_reward_cumulative_accuracy": 0.8235448348190876 + }, + { + "epoch": 0.566340160284951, + "grad_norm": 2.2682273387908936, + "learning_rate": 2.357607119011429e-06, + "loss": 0.0386, + "step": 1908, + "video_reward_cumulative_accuracy": 0.8236373165618449 + }, + { + "epoch": 0.5666369842683289, + "grad_norm": 1.7562425136566162, + "learning_rate": 2.355021037709118e-06, + "loss": 0.0565, + "step": 1909, + "video_reward_cumulative_accuracy": 0.8237297014143531 + }, + { + "epoch": 0.5669338082517067, + "grad_norm": 1.4634464979171753, + "learning_rate": 2.352435112055795e-06, + "loss": 0.0267, + "step": 1910, + "video_reward_cumulative_accuracy": 0.8238219895287958 + }, + { + "epoch": 0.5672306322350846, + "grad_norm": 1.8789516687393188, + "learning_rate": 2.3498493448277013e-06, + "loss": 0.0227, + "step": 1911, + "video_reward_cumulative_accuracy": 0.8239141810570382 + }, + { + "epoch": 0.5675274562184625, + "grad_norm": 2.6937637329101562, + "learning_rate": 2.3472637388009094e-06, + "loss": 0.0524, + "step": 1912, + "video_reward_cumulative_accuracy": 0.8240062761506276 + }, + { + "epoch": 0.5678242802018403, + "grad_norm": 1.9026590585708618, + "learning_rate": 2.3446782967513176e-06, + "loss": 0.04, + "step": 1913, + "video_reward_cumulative_accuracy": 0.8240982749607946 + }, + { + "epoch": 0.5681211041852182, + "grad_norm": 2.191349744796753, + "learning_rate": 2.3420930214546496e-06, + "loss": 0.0194, + "step": 1914, + "video_reward_cumulative_accuracy": 0.8241901776384535 + }, + { + "epoch": 0.568417928168596, + "grad_norm": 0.43110188841819763, + "learning_rate": 2.3395079156864493e-06, + "loss": 0.0067, + "step": 1915, + "video_reward_cumulative_accuracy": 0.8242819843342036 + }, + { + "epoch": 0.5687147521519739, + "grad_norm": 2.1239190101623535, + "learning_rate": 2.3369229822220782e-06, + "loss": 0.0494, + "step": 1916, + "video_reward_cumulative_accuracy": 0.8243736951983298 + }, + { + "epoch": 0.5690115761353517, + "grad_norm": 2.330325126647949, + "learning_rate": 2.3343382238367145e-06, + "loss": 0.0496, + "step": 1917, + "video_reward_cumulative_accuracy": 0.8244653103808033 + }, + { + "epoch": 0.5693084001187296, + "grad_norm": 2.316889524459839, + "learning_rate": 2.331753643305345e-06, + "loss": 0.0605, + "step": 1918, + "video_reward_cumulative_accuracy": 0.82429614181439 + }, + { + "epoch": 0.5696052241021075, + "grad_norm": 2.9871466159820557, + "learning_rate": 2.3291692434027705e-06, + "loss": 0.0446, + "step": 1919, + "video_reward_cumulative_accuracy": 0.8243877019280875 + }, + { + "epoch": 0.5699020480854853, + "grad_norm": 2.9490387439727783, + "learning_rate": 2.326585026903592e-06, + "loss": 0.03, + "step": 1920, + "video_reward_cumulative_accuracy": 0.8244791666666667 + }, + { + "epoch": 0.5701988720688632, + "grad_norm": 2.3850908279418945, + "learning_rate": 2.324000996582219e-06, + "loss": 0.0334, + "step": 1921, + "video_reward_cumulative_accuracy": 0.8243102550754815 + }, + { + "epoch": 0.570495696052241, + "grad_norm": 1.3517158031463623, + "learning_rate": 2.3214171552128594e-06, + "loss": 0.0335, + "step": 1922, + "video_reward_cumulative_accuracy": 0.8244016649323621 + }, + { + "epoch": 0.5707925200356189, + "grad_norm": 2.155529022216797, + "learning_rate": 2.3188335055695145e-06, + "loss": 0.0204, + "step": 1923, + "video_reward_cumulative_accuracy": 0.8244929797191888 + }, + { + "epoch": 0.5710893440189967, + "grad_norm": 2.1936349868774414, + "learning_rate": 2.316250050425987e-06, + "loss": 0.0398, + "step": 1924, + "video_reward_cumulative_accuracy": 0.8243243243243243 + }, + { + "epoch": 0.5713861680023746, + "grad_norm": 3.196364164352417, + "learning_rate": 2.3136667925558635e-06, + "loss": 0.0479, + "step": 1925, + "video_reward_cumulative_accuracy": 0.8244155844155844 + }, + { + "epoch": 0.5716829919857525, + "grad_norm": 1.847775936126709, + "learning_rate": 2.311083734732523e-06, + "loss": 0.0229, + "step": 1926, + "video_reward_cumulative_accuracy": 0.8245067497403946 + }, + { + "epoch": 0.5719798159691303, + "grad_norm": 0.9182643890380859, + "learning_rate": 2.30850087972913e-06, + "loss": 0.0187, + "step": 1927, + "video_reward_cumulative_accuracy": 0.8245978204462896 + }, + { + "epoch": 0.5722766399525082, + "grad_norm": 1.595245599746704, + "learning_rate": 2.3059182303186276e-06, + "loss": 0.0361, + "step": 1928, + "video_reward_cumulative_accuracy": 0.8246887966804979 + }, + { + "epoch": 0.572573463935886, + "grad_norm": 1.5071243047714233, + "learning_rate": 2.303335789273744e-06, + "loss": 0.028, + "step": 1929, + "video_reward_cumulative_accuracy": 0.8247796785899429 + }, + { + "epoch": 0.5728702879192639, + "grad_norm": 1.241547703742981, + "learning_rate": 2.3007535593669773e-06, + "loss": 0.0086, + "step": 1930, + "video_reward_cumulative_accuracy": 0.8248704663212435 + }, + { + "epoch": 0.5731671119026417, + "grad_norm": 1.961064100265503, + "learning_rate": 2.2981715433706037e-06, + "loss": 0.0262, + "step": 1931, + "video_reward_cumulative_accuracy": 0.824702226825479 + }, + { + "epoch": 0.5734639358860196, + "grad_norm": 0.9366904497146606, + "learning_rate": 2.2955897440566667e-06, + "loss": 0.0133, + "step": 1932, + "video_reward_cumulative_accuracy": 0.8247929606625258 + }, + { + "epoch": 0.5737607598693975, + "grad_norm": 1.7824345827102661, + "learning_rate": 2.2930081641969782e-06, + "loss": 0.0164, + "step": 1933, + "video_reward_cumulative_accuracy": 0.8248836006207967 + }, + { + "epoch": 0.5740575838527753, + "grad_norm": 2.103070020675659, + "learning_rate": 2.2904268065631174e-06, + "loss": 0.0494, + "step": 1934, + "video_reward_cumulative_accuracy": 0.8249741468459152 + }, + { + "epoch": 0.5743544078361532, + "grad_norm": 1.738095760345459, + "learning_rate": 2.2878456739264197e-06, + "loss": 0.0459, + "step": 1935, + "video_reward_cumulative_accuracy": 0.8250645994832041 + }, + { + "epoch": 0.574651231819531, + "grad_norm": 2.6681344509124756, + "learning_rate": 2.2852647690579823e-06, + "loss": 0.1033, + "step": 1936, + "video_reward_cumulative_accuracy": 0.824896694214876 + }, + { + "epoch": 0.5749480558029089, + "grad_norm": 2.3271491527557373, + "learning_rate": 2.2826840947286566e-06, + "loss": 0.0543, + "step": 1937, + "video_reward_cumulative_accuracy": 0.8249870934434693 + }, + { + "epoch": 0.5752448797862867, + "grad_norm": 3.099133014678955, + "learning_rate": 2.2801036537090475e-06, + "loss": 0.0862, + "step": 1938, + "video_reward_cumulative_accuracy": 0.8248194014447885 + }, + { + "epoch": 0.5755417037696646, + "grad_norm": 2.7922987937927246, + "learning_rate": 2.2775234487695093e-06, + "loss": 0.0294, + "step": 1939, + "video_reward_cumulative_accuracy": 0.8249097472924187 + }, + { + "epoch": 0.5758385277530425, + "grad_norm": 2.1268813610076904, + "learning_rate": 2.2749434826801416e-06, + "loss": 0.0671, + "step": 1940, + "video_reward_cumulative_accuracy": 0.825 + }, + { + "epoch": 0.5761353517364203, + "grad_norm": 3.367703676223755, + "learning_rate": 2.272363758210789e-06, + "loss": 0.0779, + "step": 1941, + "video_reward_cumulative_accuracy": 0.8250901597114889 + }, + { + "epoch": 0.5764321757197982, + "grad_norm": 4.295740127563477, + "learning_rate": 2.2697842781310354e-06, + "loss": 0.0447, + "step": 1942, + "video_reward_cumulative_accuracy": 0.8251802265705458 + }, + { + "epoch": 0.576728999703176, + "grad_norm": 6.525967597961426, + "learning_rate": 2.2672050452102036e-06, + "loss": 0.0787, + "step": 1943, + "video_reward_cumulative_accuracy": 0.8252702007205353 + }, + { + "epoch": 0.5770258236865539, + "grad_norm": 3.3833274841308594, + "learning_rate": 2.26462606221735e-06, + "loss": 0.0647, + "step": 1944, + "video_reward_cumulative_accuracy": 0.8253600823045267 + }, + { + "epoch": 0.5773226476699317, + "grad_norm": 1.5874844789505005, + "learning_rate": 2.262047331921264e-06, + "loss": 0.0274, + "step": 1945, + "video_reward_cumulative_accuracy": 0.8254498714652956 + }, + { + "epoch": 0.5776194716533096, + "grad_norm": 1.2541552782058716, + "learning_rate": 2.259468857090462e-06, + "loss": 0.0108, + "step": 1946, + "video_reward_cumulative_accuracy": 0.8255395683453237 + }, + { + "epoch": 0.5779162956366874, + "grad_norm": 1.4068396091461182, + "learning_rate": 2.2568906404931878e-06, + "loss": 0.0234, + "step": 1947, + "video_reward_cumulative_accuracy": 0.8256291730868002 + }, + { + "epoch": 0.5782131196200653, + "grad_norm": 1.276092290878296, + "learning_rate": 2.254312684897406e-06, + "loss": 0.0289, + "step": 1948, + "video_reward_cumulative_accuracy": 0.8257186858316222 + }, + { + "epoch": 0.5785099436034432, + "grad_norm": 0.7035172581672668, + "learning_rate": 2.2517349930708032e-06, + "loss": 0.0199, + "step": 1949, + "video_reward_cumulative_accuracy": 0.8258081067213956 + }, + { + "epoch": 0.578806767586821, + "grad_norm": 1.7570953369140625, + "learning_rate": 2.2491575677807813e-06, + "loss": 0.0319, + "step": 1950, + "video_reward_cumulative_accuracy": 0.8258974358974359 + }, + { + "epoch": 0.5791035915701989, + "grad_norm": 2.2827887535095215, + "learning_rate": 2.2465804117944568e-06, + "loss": 0.0531, + "step": 1951, + "video_reward_cumulative_accuracy": 0.8259866735007688 + }, + { + "epoch": 0.5794004155535767, + "grad_norm": 0.8664276003837585, + "learning_rate": 2.244003527878656e-06, + "loss": 0.0131, + "step": 1952, + "video_reward_cumulative_accuracy": 0.8260758196721312 + }, + { + "epoch": 0.5796972395369546, + "grad_norm": 0.4808574914932251, + "learning_rate": 2.2414269187999153e-06, + "loss": 0.0139, + "step": 1953, + "video_reward_cumulative_accuracy": 0.8261648745519713 + }, + { + "epoch": 0.5799940635203324, + "grad_norm": 3.057589054107666, + "learning_rate": 2.2388505873244728e-06, + "loss": 0.0282, + "step": 1954, + "video_reward_cumulative_accuracy": 0.8259979529170931 + }, + { + "epoch": 0.5802908875037103, + "grad_norm": 1.7676241397857666, + "learning_rate": 2.2362745362182724e-06, + "loss": 0.0718, + "step": 1955, + "video_reward_cumulative_accuracy": 0.8258312020460358 + }, + { + "epoch": 0.5805877114870882, + "grad_norm": 1.0364630222320557, + "learning_rate": 2.2336987682469534e-06, + "loss": 0.017, + "step": 1956, + "video_reward_cumulative_accuracy": 0.825920245398773 + }, + { + "epoch": 0.580884535470466, + "grad_norm": 1.8541654348373413, + "learning_rate": 2.2311232861758526e-06, + "loss": 0.0202, + "step": 1957, + "video_reward_cumulative_accuracy": 0.8260091977516607 + }, + { + "epoch": 0.5811813594538439, + "grad_norm": 1.1879740953445435, + "learning_rate": 2.228548092770002e-06, + "loss": 0.0549, + "step": 1958, + "video_reward_cumulative_accuracy": 0.8260980592441267 + }, + { + "epoch": 0.5814781834372217, + "grad_norm": 1.5490862131118774, + "learning_rate": 2.2259731907941195e-06, + "loss": 0.0214, + "step": 1959, + "video_reward_cumulative_accuracy": 0.8261868300153139 + }, + { + "epoch": 0.5817750074205996, + "grad_norm": 2.618912696838379, + "learning_rate": 2.223398583012614e-06, + "loss": 0.0376, + "step": 1960, + "video_reward_cumulative_accuracy": 0.8262755102040816 + }, + { + "epoch": 0.5820718314039774, + "grad_norm": 1.805620551109314, + "learning_rate": 2.2208242721895744e-06, + "loss": 0.0346, + "step": 1961, + "video_reward_cumulative_accuracy": 0.8263640999490056 + }, + { + "epoch": 0.5823686553873553, + "grad_norm": 4.237667083740234, + "learning_rate": 2.2182502610887757e-06, + "loss": 0.0546, + "step": 1962, + "video_reward_cumulative_accuracy": 0.8264525993883792 + }, + { + "epoch": 0.5826654793707332, + "grad_norm": 0.6842634677886963, + "learning_rate": 2.215676552473668e-06, + "loss": 0.005, + "step": 1963, + "video_reward_cumulative_accuracy": 0.826541008660214 + }, + { + "epoch": 0.582962303354111, + "grad_norm": 2.4333693981170654, + "learning_rate": 2.213103149107376e-06, + "loss": 0.0525, + "step": 1964, + "video_reward_cumulative_accuracy": 0.8263747454175153 + }, + { + "epoch": 0.5832591273374889, + "grad_norm": 2.396888256072998, + "learning_rate": 2.210530053752701e-06, + "loss": 0.0805, + "step": 1965, + "video_reward_cumulative_accuracy": 0.8264631043256997 + }, + { + "epoch": 0.5835559513208667, + "grad_norm": 2.7673487663269043, + "learning_rate": 2.2079572691721063e-06, + "loss": 0.0213, + "step": 1966, + "video_reward_cumulative_accuracy": 0.8262970498474059 + }, + { + "epoch": 0.5838527753042446, + "grad_norm": 1.4659632444381714, + "learning_rate": 2.20538479812773e-06, + "loss": 0.023, + "step": 1967, + "video_reward_cumulative_accuracy": 0.8263853584138282 + }, + { + "epoch": 0.5841495992876224, + "grad_norm": 2.001253366470337, + "learning_rate": 2.2028126433813657e-06, + "loss": 0.0304, + "step": 1968, + "video_reward_cumulative_accuracy": 0.8264735772357723 + }, + { + "epoch": 0.5844464232710003, + "grad_norm": 2.858795166015625, + "learning_rate": 2.200240807694474e-06, + "loss": 0.0827, + "step": 1969, + "video_reward_cumulative_accuracy": 0.8263077704418487 + }, + { + "epoch": 0.5847432472543782, + "grad_norm": 1.0106351375579834, + "learning_rate": 2.19766929382817e-06, + "loss": 0.035, + "step": 1970, + "video_reward_cumulative_accuracy": 0.8263959390862944 + }, + { + "epoch": 0.585040071237756, + "grad_norm": 1.513059377670288, + "learning_rate": 2.195098104543221e-06, + "loss": 0.019, + "step": 1971, + "video_reward_cumulative_accuracy": 0.8264840182648402 + }, + { + "epoch": 0.5853368952211339, + "grad_norm": 1.704352855682373, + "learning_rate": 2.1925272426000514e-06, + "loss": 0.0135, + "step": 1972, + "video_reward_cumulative_accuracy": 0.8265720081135902 + }, + { + "epoch": 0.5856337192045117, + "grad_norm": 3.4350345134735107, + "learning_rate": 2.189956710758729e-06, + "loss": 0.0736, + "step": 1973, + "video_reward_cumulative_accuracy": 0.8264064875823619 + }, + { + "epoch": 0.5859305431878896, + "grad_norm": 0.5892782807350159, + "learning_rate": 2.1873865117789682e-06, + "loss": 0.0074, + "step": 1974, + "video_reward_cumulative_accuracy": 0.8264944275582573 + }, + { + "epoch": 0.5862273671712674, + "grad_norm": 0.523690938949585, + "learning_rate": 2.184816648420131e-06, + "loss": 0.0095, + "step": 1975, + "video_reward_cumulative_accuracy": 0.8265822784810126 + }, + { + "epoch": 0.5865241911546453, + "grad_norm": 3.0676653385162354, + "learning_rate": 2.1822471234412106e-06, + "loss": 0.0585, + "step": 1976, + "video_reward_cumulative_accuracy": 0.8266700404858299 + }, + { + "epoch": 0.5868210151380232, + "grad_norm": 1.3661112785339355, + "learning_rate": 2.1796779396008456e-06, + "loss": 0.0415, + "step": 1977, + "video_reward_cumulative_accuracy": 0.8265048052604957 + }, + { + "epoch": 0.587117839121401, + "grad_norm": 2.5101301670074463, + "learning_rate": 2.177109099657301e-06, + "loss": 0.0227, + "step": 1978, + "video_reward_cumulative_accuracy": 0.826592517694641 + }, + { + "epoch": 0.5874146631047789, + "grad_norm": 1.466486930847168, + "learning_rate": 2.174540606368477e-06, + "loss": 0.054, + "step": 1979, + "video_reward_cumulative_accuracy": 0.8266801414855988 + }, + { + "epoch": 0.5877114870881567, + "grad_norm": 1.7742908000946045, + "learning_rate": 2.1719724624919004e-06, + "loss": 0.0465, + "step": 1980, + "video_reward_cumulative_accuracy": 0.8267676767676768 + }, + { + "epoch": 0.5880083110715346, + "grad_norm": 5.307931423187256, + "learning_rate": 2.169404670784722e-06, + "loss": 0.07, + "step": 1981, + "video_reward_cumulative_accuracy": 0.8268551236749117 + }, + { + "epoch": 0.5883051350549124, + "grad_norm": 2.4654183387756348, + "learning_rate": 2.1668372340037184e-06, + "loss": 0.0548, + "step": 1982, + "video_reward_cumulative_accuracy": 0.8269424823410696 + }, + { + "epoch": 0.5886019590382903, + "grad_norm": 3.1083977222442627, + "learning_rate": 2.164270154905279e-06, + "loss": 0.0294, + "step": 1983, + "video_reward_cumulative_accuracy": 0.827029752899647 + }, + { + "epoch": 0.5888987830216682, + "grad_norm": 2.782560348510742, + "learning_rate": 2.1617034362454136e-06, + "loss": 0.0229, + "step": 1984, + "video_reward_cumulative_accuracy": 0.827116935483871 + }, + { + "epoch": 0.589195607005046, + "grad_norm": 3.644629955291748, + "learning_rate": 2.1591370807797434e-06, + "loss": 0.0401, + "step": 1985, + "video_reward_cumulative_accuracy": 0.8272040302267003 + }, + { + "epoch": 0.5894924309884239, + "grad_norm": 3.797820806503296, + "learning_rate": 2.1565710912635006e-06, + "loss": 0.0408, + "step": 1986, + "video_reward_cumulative_accuracy": 0.8272910372608258 + }, + { + "epoch": 0.5897892549718017, + "grad_norm": 2.0654091835021973, + "learning_rate": 2.154005470451524e-06, + "loss": 0.0144, + "step": 1987, + "video_reward_cumulative_accuracy": 0.8273779567186713 + }, + { + "epoch": 0.5900860789551796, + "grad_norm": 2.2619528770446777, + "learning_rate": 2.1514402210982558e-06, + "loss": 0.0629, + "step": 1988, + "video_reward_cumulative_accuracy": 0.8272132796780685 + }, + { + "epoch": 0.5903829029385574, + "grad_norm": 3.204058885574341, + "learning_rate": 2.148875345957741e-06, + "loss": 0.0629, + "step": 1989, + "video_reward_cumulative_accuracy": 0.8273001508295625 + }, + { + "epoch": 0.5906797269219353, + "grad_norm": 2.4990057945251465, + "learning_rate": 2.1463108477836217e-06, + "loss": 0.0596, + "step": 1990, + "video_reward_cumulative_accuracy": 0.8273869346733669 + }, + { + "epoch": 0.5909765509053132, + "grad_norm": 1.8199396133422852, + "learning_rate": 2.1437467293291357e-06, + "loss": 0.0306, + "step": 1991, + "video_reward_cumulative_accuracy": 0.8274736313410347 + }, + { + "epoch": 0.591273374888691, + "grad_norm": 1.290522813796997, + "learning_rate": 2.1411829933471124e-06, + "loss": 0.0254, + "step": 1992, + "video_reward_cumulative_accuracy": 0.8273092369477911 + }, + { + "epoch": 0.5915701988720689, + "grad_norm": 0.8122701048851013, + "learning_rate": 2.138619642589972e-06, + "loss": 0.0428, + "step": 1993, + "video_reward_cumulative_accuracy": 0.8273958855995986 + }, + { + "epoch": 0.5918670228554467, + "grad_norm": 1.304606318473816, + "learning_rate": 2.13605667980972e-06, + "loss": 0.0408, + "step": 1994, + "video_reward_cumulative_accuracy": 0.827482447342026 + }, + { + "epoch": 0.5921638468388246, + "grad_norm": 0.6447356343269348, + "learning_rate": 2.1334941077579457e-06, + "loss": 0.0113, + "step": 1995, + "video_reward_cumulative_accuracy": 0.8275689223057644 + }, + { + "epoch": 0.5924606708222024, + "grad_norm": 1.6157499551773071, + "learning_rate": 2.1309319291858194e-06, + "loss": 0.0341, + "step": 1996, + "video_reward_cumulative_accuracy": 0.8276553106212425 + }, + { + "epoch": 0.5927574948055803, + "grad_norm": 1.3407137393951416, + "learning_rate": 2.1283701468440875e-06, + "loss": 0.0139, + "step": 1997, + "video_reward_cumulative_accuracy": 0.827741612418628 + }, + { + "epoch": 0.5930543187889582, + "grad_norm": 2.7344970703125, + "learning_rate": 2.1258087634830724e-06, + "loss": 0.0588, + "step": 1998, + "video_reward_cumulative_accuracy": 0.8275775775775776 + }, + { + "epoch": 0.593351142772336, + "grad_norm": 2.2847628593444824, + "learning_rate": 2.1232477818526685e-06, + "loss": 0.0199, + "step": 1999, + "video_reward_cumulative_accuracy": 0.8276638319159579 + }, + { + "epoch": 0.5936479667557139, + "grad_norm": 1.1096928119659424, + "learning_rate": 2.120687204702337e-06, + "loss": 0.0169, + "step": 2000, + "video_reward_cumulative_accuracy": 0.82775 + }, + { + "epoch": 0.5936479667557139, + "eval_runtime": 130.6244, + "eval_samples_per_second": 6.04, + "eval_steps_per_second": 0.758, + "eval_test_set_accuracy": 0.8106060606060606, + "step": 2000 + }, + { + "epoch": 0.5939447907390917, + "grad_norm": 0.6623875498771667, + "learning_rate": 2.118127034781107e-06, + "loss": 0.017, + "step": 2001, + "video_reward_cumulative_accuracy": 0.8278360819590205 + }, + { + "epoch": 0.5942416147224696, + "grad_norm": 1.5880417823791504, + "learning_rate": 2.1155672748375684e-06, + "loss": 0.0249, + "step": 2002, + "video_reward_cumulative_accuracy": 0.827922077922078 + }, + { + "epoch": 0.5945384387058474, + "grad_norm": 0.9980058670043945, + "learning_rate": 2.1130079276198727e-06, + "loss": 0.0209, + "step": 2003, + "video_reward_cumulative_accuracy": 0.828007988017973 + }, + { + "epoch": 0.5948352626892253, + "grad_norm": 1.9351149797439575, + "learning_rate": 2.1104489958757267e-06, + "loss": 0.028, + "step": 2004, + "video_reward_cumulative_accuracy": 0.8278443113772455 + }, + { + "epoch": 0.5951320866726032, + "grad_norm": 1.2848634719848633, + "learning_rate": 2.107890482352393e-06, + "loss": 0.0193, + "step": 2005, + "video_reward_cumulative_accuracy": 0.827930174563591 + }, + { + "epoch": 0.595428910655981, + "grad_norm": 2.291006565093994, + "learning_rate": 2.105332389796684e-06, + "loss": 0.0395, + "step": 2006, + "video_reward_cumulative_accuracy": 0.8280159521435693 + }, + { + "epoch": 0.5957257346393589, + "grad_norm": 1.295036792755127, + "learning_rate": 2.1027747209549596e-06, + "loss": 0.0176, + "step": 2007, + "video_reward_cumulative_accuracy": 0.828101644245142 + }, + { + "epoch": 0.5960225586227367, + "grad_norm": 1.5013102293014526, + "learning_rate": 2.1002174785731265e-06, + "loss": 0.0294, + "step": 2008, + "video_reward_cumulative_accuracy": 0.828187250996016 + }, + { + "epoch": 0.5963193826061146, + "grad_norm": 1.3362897634506226, + "learning_rate": 2.097660665396632e-06, + "loss": 0.0279, + "step": 2009, + "video_reward_cumulative_accuracy": 0.8282727725236436 + }, + { + "epoch": 0.5966162065894924, + "grad_norm": 2.3554601669311523, + "learning_rate": 2.0951042841704628e-06, + "loss": 0.0542, + "step": 2010, + "video_reward_cumulative_accuracy": 0.8278606965174129 + }, + { + "epoch": 0.5969130305728703, + "grad_norm": 2.2153587341308594, + "learning_rate": 2.0925483376391437e-06, + "loss": 0.0155, + "step": 2011, + "video_reward_cumulative_accuracy": 0.8279462953754351 + }, + { + "epoch": 0.5972098545562482, + "grad_norm": 2.6395490169525146, + "learning_rate": 2.08999282854673e-06, + "loss": 0.0452, + "step": 2012, + "video_reward_cumulative_accuracy": 0.8280318091451292 + }, + { + "epoch": 0.597506678539626, + "grad_norm": 2.8601341247558594, + "learning_rate": 2.08743775963681e-06, + "loss": 0.0375, + "step": 2013, + "video_reward_cumulative_accuracy": 0.8281172379533035 + }, + { + "epoch": 0.5978035025230038, + "grad_norm": 1.6802117824554443, + "learning_rate": 2.0848831336524956e-06, + "loss": 0.0371, + "step": 2014, + "video_reward_cumulative_accuracy": 0.8282025819265144 + }, + { + "epoch": 0.5981003265063817, + "grad_norm": 2.9565298557281494, + "learning_rate": 2.0823289533364295e-06, + "loss": 0.0639, + "step": 2015, + "video_reward_cumulative_accuracy": 0.828287841191067 + }, + { + "epoch": 0.5983971504897596, + "grad_norm": 3.7333922386169434, + "learning_rate": 2.0797752214307685e-06, + "loss": 0.0546, + "step": 2016, + "video_reward_cumulative_accuracy": 0.8283730158730159 + }, + { + "epoch": 0.5986939744731374, + "grad_norm": 1.6850907802581787, + "learning_rate": 2.077221940677194e-06, + "loss": 0.03, + "step": 2017, + "video_reward_cumulative_accuracy": 0.8284581060981656 + }, + { + "epoch": 0.5989907984565153, + "grad_norm": 0.4525964856147766, + "learning_rate": 2.0746691138169013e-06, + "loss": 0.0098, + "step": 2018, + "video_reward_cumulative_accuracy": 0.8285431119920713 + }, + { + "epoch": 0.5992876224398932, + "grad_norm": 1.5810643434524536, + "learning_rate": 2.0721167435905945e-06, + "loss": 0.0129, + "step": 2019, + "video_reward_cumulative_accuracy": 0.8283803863298663 + }, + { + "epoch": 0.599584446423271, + "grad_norm": 3.38840389251709, + "learning_rate": 2.069564832738495e-06, + "loss": 0.0289, + "step": 2020, + "video_reward_cumulative_accuracy": 0.8284653465346534 + }, + { + "epoch": 0.5998812704066488, + "grad_norm": 2.073776960372925, + "learning_rate": 2.067013384000323e-06, + "loss": 0.0237, + "step": 2021, + "video_reward_cumulative_accuracy": 0.8285502226620485 + }, + { + "epoch": 0.6001780943900267, + "grad_norm": 3.2106335163116455, + "learning_rate": 2.0644624001153073e-06, + "loss": 0.0942, + "step": 2022, + "video_reward_cumulative_accuracy": 0.8283877349159249 + }, + { + "epoch": 0.6004749183734046, + "grad_norm": 2.6965174674987793, + "learning_rate": 2.06191188382218e-06, + "loss": 0.0288, + "step": 2023, + "video_reward_cumulative_accuracy": 0.828225407810183 + }, + { + "epoch": 0.6007717423567824, + "grad_norm": 0.7816161513328552, + "learning_rate": 2.0593618378591625e-06, + "loss": 0.0159, + "step": 2024, + "video_reward_cumulative_accuracy": 0.8283102766798419 + }, + { + "epoch": 0.6010685663401603, + "grad_norm": 2.3955113887786865, + "learning_rate": 2.0568122649639815e-06, + "loss": 0.0364, + "step": 2025, + "video_reward_cumulative_accuracy": 0.828395061728395 + }, + { + "epoch": 0.6013653903235382, + "grad_norm": 2.5972840785980225, + "learning_rate": 2.0542631678738478e-06, + "loss": 0.0567, + "step": 2026, + "video_reward_cumulative_accuracy": 0.8284797630799605 + }, + { + "epoch": 0.601662214306916, + "grad_norm": 2.920619249343872, + "learning_rate": 2.051714549325466e-06, + "loss": 0.0282, + "step": 2027, + "video_reward_cumulative_accuracy": 0.8285643808584114 + }, + { + "epoch": 0.6019590382902938, + "grad_norm": 2.846386432647705, + "learning_rate": 2.049166412055025e-06, + "loss": 0.0385, + "step": 2028, + "video_reward_cumulative_accuracy": 0.8286489151873767 + }, + { + "epoch": 0.6022558622736717, + "grad_norm": 1.5138801336288452, + "learning_rate": 2.046618758798197e-06, + "loss": 0.0298, + "step": 2029, + "video_reward_cumulative_accuracy": 0.8287333661902415 + }, + { + "epoch": 0.6025526862570496, + "grad_norm": 3.822578191757202, + "learning_rate": 2.0440715922901362e-06, + "loss": 0.0489, + "step": 2030, + "video_reward_cumulative_accuracy": 0.8288177339901478 + }, + { + "epoch": 0.6028495102404274, + "grad_norm": 4.538577556610107, + "learning_rate": 2.041524915265472e-06, + "loss": 0.056, + "step": 2031, + "video_reward_cumulative_accuracy": 0.828902018709995 + }, + { + "epoch": 0.6031463342238053, + "grad_norm": 2.920750141143799, + "learning_rate": 2.0389787304583105e-06, + "loss": 0.0301, + "step": 2032, + "video_reward_cumulative_accuracy": 0.828986220472441 + }, + { + "epoch": 0.6034431582071832, + "grad_norm": 2.3497962951660156, + "learning_rate": 2.0364330406022265e-06, + "loss": 0.0964, + "step": 2033, + "video_reward_cumulative_accuracy": 0.8288243974422036 + }, + { + "epoch": 0.603739982190561, + "grad_norm": 3.301518440246582, + "learning_rate": 2.033887848430267e-06, + "loss": 0.057, + "step": 2034, + "video_reward_cumulative_accuracy": 0.8289085545722714 + }, + { + "epoch": 0.6040368061739388, + "grad_norm": 2.4687845706939697, + "learning_rate": 2.031343156674942e-06, + "loss": 0.1124, + "step": 2035, + "video_reward_cumulative_accuracy": 0.8287469287469288 + }, + { + "epoch": 0.6043336301573167, + "grad_norm": 4.89956521987915, + "learning_rate": 2.0287989680682247e-06, + "loss": 0.0747, + "step": 2036, + "video_reward_cumulative_accuracy": 0.8288310412573674 + }, + { + "epoch": 0.6046304541406946, + "grad_norm": 3.3470458984375, + "learning_rate": 2.026255285341549e-06, + "loss": 0.0345, + "step": 2037, + "video_reward_cumulative_accuracy": 0.8289150711831125 + }, + { + "epoch": 0.6049272781240724, + "grad_norm": 1.8060578107833862, + "learning_rate": 2.023712111225805e-06, + "loss": 0.0265, + "step": 2038, + "video_reward_cumulative_accuracy": 0.8289990186457311 + }, + { + "epoch": 0.6052241021074503, + "grad_norm": 4.508866310119629, + "learning_rate": 2.0211694484513376e-06, + "loss": 0.0483, + "step": 2039, + "video_reward_cumulative_accuracy": 0.8288376655223149 + }, + { + "epoch": 0.6055209260908282, + "grad_norm": 1.380003571510315, + "learning_rate": 2.0186272997479407e-06, + "loss": 0.0247, + "step": 2040, + "video_reward_cumulative_accuracy": 0.828921568627451 + }, + { + "epoch": 0.605817750074206, + "grad_norm": 2.6239430904388428, + "learning_rate": 2.016085667844859e-06, + "loss": 0.0568, + "step": 2041, + "video_reward_cumulative_accuracy": 0.8290053895149436 + }, + { + "epoch": 0.6061145740575838, + "grad_norm": 3.0193066596984863, + "learning_rate": 2.0135445554707803e-06, + "loss": 0.0539, + "step": 2042, + "video_reward_cumulative_accuracy": 0.8290891283055828 + }, + { + "epoch": 0.6064113980409617, + "grad_norm": 2.1342217922210693, + "learning_rate": 2.011003965353835e-06, + "loss": 0.0406, + "step": 2043, + "video_reward_cumulative_accuracy": 0.8291727851199217 + }, + { + "epoch": 0.6067082220243396, + "grad_norm": 2.42179536819458, + "learning_rate": 2.008463900221595e-06, + "loss": 0.0604, + "step": 2044, + "video_reward_cumulative_accuracy": 0.8292563600782779 + }, + { + "epoch": 0.6070050460077174, + "grad_norm": 3.034480333328247, + "learning_rate": 2.0059243628010643e-06, + "loss": 0.0389, + "step": 2045, + "video_reward_cumulative_accuracy": 0.8290953545232274 + }, + { + "epoch": 0.6073018699910953, + "grad_norm": 1.7863432168960571, + "learning_rate": 2.0033853558186845e-06, + "loss": 0.0353, + "step": 2046, + "video_reward_cumulative_accuracy": 0.8291788856304986 + }, + { + "epoch": 0.6075986939744732, + "grad_norm": 1.3945716619491577, + "learning_rate": 2.0008468820003257e-06, + "loss": 0.0178, + "step": 2047, + "video_reward_cumulative_accuracy": 0.8290180752320468 + }, + { + "epoch": 0.607895517957851, + "grad_norm": 1.2314127683639526, + "learning_rate": 1.9983089440712853e-06, + "loss": 0.0172, + "step": 2048, + "video_reward_cumulative_accuracy": 0.8291015625 + }, + { + "epoch": 0.6081923419412288, + "grad_norm": 2.0700814723968506, + "learning_rate": 1.995771544756287e-06, + "loss": 0.0232, + "step": 2049, + "video_reward_cumulative_accuracy": 0.8289409468033186 + }, + { + "epoch": 0.6084891659246067, + "grad_norm": 2.8075079917907715, + "learning_rate": 1.993234686779474e-06, + "loss": 0.034, + "step": 2050, + "video_reward_cumulative_accuracy": 0.8290243902439024 + }, + { + "epoch": 0.6087859899079846, + "grad_norm": 0.5491511225700378, + "learning_rate": 1.990698372864411e-06, + "loss": 0.0198, + "step": 2051, + "video_reward_cumulative_accuracy": 0.8291077523159435 + }, + { + "epoch": 0.6090828138913624, + "grad_norm": 0.6947237253189087, + "learning_rate": 1.9881626057340757e-06, + "loss": 0.019, + "step": 2052, + "video_reward_cumulative_accuracy": 0.8291910331384016 + }, + { + "epoch": 0.6093796378747403, + "grad_norm": 2.1727752685546875, + "learning_rate": 1.9856273881108613e-06, + "loss": 0.0855, + "step": 2053, + "video_reward_cumulative_accuracy": 0.8292742328300049 + }, + { + "epoch": 0.6096764618581182, + "grad_norm": 1.6275689601898193, + "learning_rate": 1.9830927227165697e-06, + "loss": 0.0308, + "step": 2054, + "video_reward_cumulative_accuracy": 0.8293573515092503 + }, + { + "epoch": 0.609973285841496, + "grad_norm": 2.2714242935180664, + "learning_rate": 1.9805586122724095e-06, + "loss": 0.0478, + "step": 2055, + "video_reward_cumulative_accuracy": 0.8294403892944039 + }, + { + "epoch": 0.6102701098248738, + "grad_norm": 1.9769221544265747, + "learning_rate": 1.978025059498996e-06, + "loss": 0.0581, + "step": 2056, + "video_reward_cumulative_accuracy": 0.829523346303502 + }, + { + "epoch": 0.6105669338082517, + "grad_norm": 0.7704261541366577, + "learning_rate": 1.9754920671163418e-06, + "loss": 0.0099, + "step": 2057, + "video_reward_cumulative_accuracy": 0.829606222654351 + }, + { + "epoch": 0.6108637577916296, + "grad_norm": 2.209693193435669, + "learning_rate": 1.972959637843861e-06, + "loss": 0.0349, + "step": 2058, + "video_reward_cumulative_accuracy": 0.8296890184645287 + }, + { + "epoch": 0.6111605817750074, + "grad_norm": 0.5300698280334473, + "learning_rate": 1.9704277744003632e-06, + "loss": 0.0127, + "step": 2059, + "video_reward_cumulative_accuracy": 0.8297717338513841 + }, + { + "epoch": 0.6114574057583853, + "grad_norm": 2.17396879196167, + "learning_rate": 1.967896479504048e-06, + "loss": 0.0377, + "step": 2060, + "video_reward_cumulative_accuracy": 0.8298543689320388 + }, + { + "epoch": 0.6117542297417632, + "grad_norm": 2.26611328125, + "learning_rate": 1.9653657558725077e-06, + "loss": 0.0517, + "step": 2061, + "video_reward_cumulative_accuracy": 0.8296943231441049 + }, + { + "epoch": 0.612051053725141, + "grad_norm": 1.2621009349822998, + "learning_rate": 1.962835606222717e-06, + "loss": 0.0343, + "step": 2062, + "video_reward_cumulative_accuracy": 0.8297769156159069 + }, + { + "epoch": 0.6123478777085188, + "grad_norm": 1.5711784362792969, + "learning_rate": 1.9603060332710415e-06, + "loss": 0.0368, + "step": 2063, + "video_reward_cumulative_accuracy": 0.8298594280174503 + }, + { + "epoch": 0.6126447016918967, + "grad_norm": 2.2621207237243652, + "learning_rate": 1.9577770397332184e-06, + "loss": 0.0516, + "step": 2064, + "video_reward_cumulative_accuracy": 0.8296996124031008 + }, + { + "epoch": 0.6129415256752746, + "grad_norm": 2.1944663524627686, + "learning_rate": 1.955248628324371e-06, + "loss": 0.0442, + "step": 2065, + "video_reward_cumulative_accuracy": 0.8295399515738499 + }, + { + "epoch": 0.6132383496586524, + "grad_norm": 2.246171236038208, + "learning_rate": 1.9527208017589944e-06, + "loss": 0.0516, + "step": 2066, + "video_reward_cumulative_accuracy": 0.829622458857696 + }, + { + "epoch": 0.6135351736420303, + "grad_norm": 1.9881268739700317, + "learning_rate": 1.950193562750953e-06, + "loss": 0.034, + "step": 2067, + "video_reward_cumulative_accuracy": 0.8297048863086599 + }, + { + "epoch": 0.6138319976254082, + "grad_norm": 0.437326580286026, + "learning_rate": 1.947666914013487e-06, + "loss": 0.0082, + "step": 2068, + "video_reward_cumulative_accuracy": 0.8297872340425532 + }, + { + "epoch": 0.614128821608786, + "grad_norm": 2.9777653217315674, + "learning_rate": 1.945140858259195e-06, + "loss": 0.0259, + "step": 2069, + "video_reward_cumulative_accuracy": 0.8298695021749638 + }, + { + "epoch": 0.6144256455921638, + "grad_norm": 1.040705919265747, + "learning_rate": 1.9426153982000455e-06, + "loss": 0.0141, + "step": 2070, + "video_reward_cumulative_accuracy": 0.8299516908212561 + }, + { + "epoch": 0.6147224695755417, + "grad_norm": 1.5954067707061768, + "learning_rate": 1.9400905365473656e-06, + "loss": 0.033, + "step": 2071, + "video_reward_cumulative_accuracy": 0.8297923708353453 + }, + { + "epoch": 0.6150192935589196, + "grad_norm": 4.421472549438477, + "learning_rate": 1.937566276011837e-06, + "loss": 0.0657, + "step": 2072, + "video_reward_cumulative_accuracy": 0.8298745173745173 + }, + { + "epoch": 0.6153161175422974, + "grad_norm": 0.8702653050422668, + "learning_rate": 1.935042619303501e-06, + "loss": 0.0172, + "step": 2073, + "video_reward_cumulative_accuracy": 0.8299565846599132 + }, + { + "epoch": 0.6156129415256753, + "grad_norm": 1.1119422912597656, + "learning_rate": 1.9325195691317457e-06, + "loss": 0.0224, + "step": 2074, + "video_reward_cumulative_accuracy": 0.8300385728061717 + }, + { + "epoch": 0.6159097655090532, + "grad_norm": 0.5069667100906372, + "learning_rate": 1.929997128205312e-06, + "loss": 0.008, + "step": 2075, + "video_reward_cumulative_accuracy": 0.8301204819277108 + }, + { + "epoch": 0.616206589492431, + "grad_norm": 3.037325143814087, + "learning_rate": 1.927475299232283e-06, + "loss": 0.0442, + "step": 2076, + "video_reward_cumulative_accuracy": 0.8299614643545279 + }, + { + "epoch": 0.6165034134758088, + "grad_norm": 1.0800257921218872, + "learning_rate": 1.924954084920089e-06, + "loss": 0.0153, + "step": 2077, + "video_reward_cumulative_accuracy": 0.8300433317284545 + }, + { + "epoch": 0.6168002374591867, + "grad_norm": 2.045151710510254, + "learning_rate": 1.922433487975498e-06, + "loss": 0.0427, + "step": 2078, + "video_reward_cumulative_accuracy": 0.8298845043310876 + }, + { + "epoch": 0.6170970614425646, + "grad_norm": 2.991358518600464, + "learning_rate": 1.919913511104614e-06, + "loss": 0.0411, + "step": 2079, + "video_reward_cumulative_accuracy": 0.8297258297258298 + }, + { + "epoch": 0.6173938854259424, + "grad_norm": 1.7816275358200073, + "learning_rate": 1.9173941570128786e-06, + "loss": 0.0149, + "step": 2080, + "video_reward_cumulative_accuracy": 0.8298076923076924 + }, + { + "epoch": 0.6176907094093202, + "grad_norm": 1.7904562950134277, + "learning_rate": 1.9148754284050616e-06, + "loss": 0.0377, + "step": 2081, + "video_reward_cumulative_accuracy": 0.829889476213359 + }, + { + "epoch": 0.6179875333926982, + "grad_norm": 3.2209877967834473, + "learning_rate": 1.9123573279852632e-06, + "loss": 0.0225, + "step": 2082, + "video_reward_cumulative_accuracy": 0.829971181556196 + }, + { + "epoch": 0.618284357376076, + "grad_norm": 0.7754690051078796, + "learning_rate": 1.9098398584569085e-06, + "loss": 0.0197, + "step": 2083, + "video_reward_cumulative_accuracy": 0.8300528084493519 + }, + { + "epoch": 0.6185811813594538, + "grad_norm": 1.7929531335830688, + "learning_rate": 1.9073230225227451e-06, + "loss": 0.039, + "step": 2084, + "video_reward_cumulative_accuracy": 0.82989443378119 + }, + { + "epoch": 0.6188780053428317, + "grad_norm": 1.5716784000396729, + "learning_rate": 1.9048068228848412e-06, + "loss": 0.0188, + "step": 2085, + "video_reward_cumulative_accuracy": 0.8299760191846522 + }, + { + "epoch": 0.6191748293262096, + "grad_norm": 2.760315179824829, + "learning_rate": 1.9022912622445808e-06, + "loss": 0.0232, + "step": 2086, + "video_reward_cumulative_accuracy": 0.8300575263662512 + }, + { + "epoch": 0.6194716533095874, + "grad_norm": 7.752729892730713, + "learning_rate": 1.8997763433026631e-06, + "loss": 0.0878, + "step": 2087, + "video_reward_cumulative_accuracy": 0.8298993770963105 + }, + { + "epoch": 0.6197684772929652, + "grad_norm": 2.39508056640625, + "learning_rate": 1.8972620687590964e-06, + "loss": 0.0234, + "step": 2088, + "video_reward_cumulative_accuracy": 0.8299808429118773 + }, + { + "epoch": 0.6200653012763432, + "grad_norm": 3.0315792560577393, + "learning_rate": 1.8947484413131996e-06, + "loss": 0.0888, + "step": 2089, + "video_reward_cumulative_accuracy": 0.8300622307324078 + }, + { + "epoch": 0.620362125259721, + "grad_norm": 0.3297179341316223, + "learning_rate": 1.892235463663596e-06, + "loss": 0.0049, + "step": 2090, + "video_reward_cumulative_accuracy": 0.8301435406698564 + }, + { + "epoch": 0.6206589492430988, + "grad_norm": 0.5340771675109863, + "learning_rate": 1.8897231385082096e-06, + "loss": 0.0084, + "step": 2091, + "video_reward_cumulative_accuracy": 0.8302247728359636 + }, + { + "epoch": 0.6209557732264767, + "grad_norm": 2.871993064880371, + "learning_rate": 1.8872114685442665e-06, + "loss": 0.0711, + "step": 2092, + "video_reward_cumulative_accuracy": 0.8303059273422562 + }, + { + "epoch": 0.6212525972098546, + "grad_norm": 1.4503148794174194, + "learning_rate": 1.8847004564682878e-06, + "loss": 0.0436, + "step": 2093, + "video_reward_cumulative_accuracy": 0.8303870043000477 + }, + { + "epoch": 0.6215494211932324, + "grad_norm": 3.7459793090820312, + "learning_rate": 1.8821901049760882e-06, + "loss": 0.0717, + "step": 2094, + "video_reward_cumulative_accuracy": 0.8304680038204394 + }, + { + "epoch": 0.6218462451766102, + "grad_norm": 2.9643142223358154, + "learning_rate": 1.879680416762775e-06, + "loss": 0.0366, + "step": 2095, + "video_reward_cumulative_accuracy": 0.830310262529833 + }, + { + "epoch": 0.6221430691599882, + "grad_norm": 1.8001595735549927, + "learning_rate": 1.8771713945227404e-06, + "loss": 0.0272, + "step": 2096, + "video_reward_cumulative_accuracy": 0.8303912213740458 + }, + { + "epoch": 0.622439893143366, + "grad_norm": 3.3852388858795166, + "learning_rate": 1.8746630409496647e-06, + "loss": 0.0394, + "step": 2097, + "video_reward_cumulative_accuracy": 0.8304721030042919 + }, + { + "epoch": 0.6227367171267438, + "grad_norm": 2.9447176456451416, + "learning_rate": 1.872155358736508e-06, + "loss": 0.0385, + "step": 2098, + "video_reward_cumulative_accuracy": 0.8305529075309819 + }, + { + "epoch": 0.6230335411101217, + "grad_norm": 2.6223206520080566, + "learning_rate": 1.8696483505755114e-06, + "loss": 0.0507, + "step": 2099, + "video_reward_cumulative_accuracy": 0.8303954263935207 + }, + { + "epoch": 0.6233303650934996, + "grad_norm": 1.8533953428268433, + "learning_rate": 1.8671420191581901e-06, + "loss": 0.0151, + "step": 2100, + "video_reward_cumulative_accuracy": 0.8304761904761905 + }, + { + "epoch": 0.6236271890768774, + "grad_norm": 2.846639633178711, + "learning_rate": 1.8646363671753354e-06, + "loss": 0.0545, + "step": 2101, + "video_reward_cumulative_accuracy": 0.8305568776772966 + }, + { + "epoch": 0.6239240130602552, + "grad_norm": 2.0987446308135986, + "learning_rate": 1.8621313973170074e-06, + "loss": 0.0465, + "step": 2102, + "video_reward_cumulative_accuracy": 0.8306374881065651 + }, + { + "epoch": 0.6242208370436332, + "grad_norm": 2.0528335571289062, + "learning_rate": 1.8596271122725346e-06, + "loss": 0.0421, + "step": 2103, + "video_reward_cumulative_accuracy": 0.8304802662862577 + }, + { + "epoch": 0.624517661027011, + "grad_norm": 1.371273159980774, + "learning_rate": 1.8571235147305106e-06, + "loss": 0.0196, + "step": 2104, + "video_reward_cumulative_accuracy": 0.8305608365019012 + }, + { + "epoch": 0.6248144850103888, + "grad_norm": 1.087815761566162, + "learning_rate": 1.8546206073787882e-06, + "loss": 0.0236, + "step": 2105, + "video_reward_cumulative_accuracy": 0.8306413301662707 + }, + { + "epoch": 0.6251113089937667, + "grad_norm": 0.9951589703559875, + "learning_rate": 1.8521183929044834e-06, + "loss": 0.0072, + "step": 2106, + "video_reward_cumulative_accuracy": 0.8307217473884141 + }, + { + "epoch": 0.6254081329771446, + "grad_norm": 1.7407307624816895, + "learning_rate": 1.8496168739939662e-06, + "loss": 0.0414, + "step": 2107, + "video_reward_cumulative_accuracy": 0.8305647840531561 + }, + { + "epoch": 0.6257049569605224, + "grad_norm": 0.9107375741004944, + "learning_rate": 1.8471160533328591e-06, + "loss": 0.0086, + "step": 2108, + "video_reward_cumulative_accuracy": 0.8306451612903226 + }, + { + "epoch": 0.6260017809439002, + "grad_norm": 1.3645784854888916, + "learning_rate": 1.844615933606037e-06, + "loss": 0.0305, + "step": 2109, + "video_reward_cumulative_accuracy": 0.8307254623044097 + }, + { + "epoch": 0.6262986049272781, + "grad_norm": 0.9860436320304871, + "learning_rate": 1.8421165174976191e-06, + "loss": 0.0254, + "step": 2110, + "video_reward_cumulative_accuracy": 0.8308056872037914 + }, + { + "epoch": 0.626595428910656, + "grad_norm": 1.1706582307815552, + "learning_rate": 1.8396178076909735e-06, + "loss": 0.0133, + "step": 2111, + "video_reward_cumulative_accuracy": 0.8306489815253434 + }, + { + "epoch": 0.6268922528940338, + "grad_norm": 2.995161533355713, + "learning_rate": 1.8371198068687051e-06, + "loss": 0.0276, + "step": 2112, + "video_reward_cumulative_accuracy": 0.8307291666666666 + }, + { + "epoch": 0.6271890768774117, + "grad_norm": 3.4480783939361572, + "learning_rate": 1.8346225177126622e-06, + "loss": 0.0283, + "step": 2113, + "video_reward_cumulative_accuracy": 0.8308092759110269 + }, + { + "epoch": 0.6274859008607896, + "grad_norm": 4.289238929748535, + "learning_rate": 1.8321259429039276e-06, + "loss": 0.0667, + "step": 2114, + "video_reward_cumulative_accuracy": 0.8306527909176916 + }, + { + "epoch": 0.6277827248441674, + "grad_norm": 4.29648494720459, + "learning_rate": 1.829630085122814e-06, + "loss": 0.0629, + "step": 2115, + "video_reward_cumulative_accuracy": 0.8304964539007093 + }, + { + "epoch": 0.6280795488275452, + "grad_norm": 1.3143110275268555, + "learning_rate": 1.8271349470488703e-06, + "loss": 0.0287, + "step": 2116, + "video_reward_cumulative_accuracy": 0.8305765595463138 + }, + { + "epoch": 0.6283763728109231, + "grad_norm": 2.627892255783081, + "learning_rate": 1.8246405313608668e-06, + "loss": 0.055, + "step": 2117, + "video_reward_cumulative_accuracy": 0.8306565895134624 + }, + { + "epoch": 0.628673196794301, + "grad_norm": 4.371956825256348, + "learning_rate": 1.8221468407368009e-06, + "loss": 0.0693, + "step": 2118, + "video_reward_cumulative_accuracy": 0.8307365439093485 + }, + { + "epoch": 0.6289700207776788, + "grad_norm": 1.8598741292953491, + "learning_rate": 1.8196538778538941e-06, + "loss": 0.0516, + "step": 2119, + "video_reward_cumulative_accuracy": 0.8308164228409627 + }, + { + "epoch": 0.6292668447610567, + "grad_norm": 1.7819162607192993, + "learning_rate": 1.8171616453885806e-06, + "loss": 0.029, + "step": 2120, + "video_reward_cumulative_accuracy": 0.8308962264150943 + }, + { + "epoch": 0.6295636687444346, + "grad_norm": 1.686316967010498, + "learning_rate": 1.8146701460165172e-06, + "loss": 0.0179, + "step": 2121, + "video_reward_cumulative_accuracy": 0.830975954738331 + }, + { + "epoch": 0.6298604927278124, + "grad_norm": 3.108572006225586, + "learning_rate": 1.8121793824125677e-06, + "loss": 0.0561, + "step": 2122, + "video_reward_cumulative_accuracy": 0.8310556079170593 + }, + { + "epoch": 0.6301573167111902, + "grad_norm": 1.9357826709747314, + "learning_rate": 1.80968935725081e-06, + "loss": 0.0569, + "step": 2123, + "video_reward_cumulative_accuracy": 0.8311351860574658 + }, + { + "epoch": 0.6304541406945681, + "grad_norm": 5.203329563140869, + "learning_rate": 1.8072000732045265e-06, + "loss": 0.0402, + "step": 2124, + "video_reward_cumulative_accuracy": 0.8312146892655368 + }, + { + "epoch": 0.630750964677946, + "grad_norm": 1.325329065322876, + "learning_rate": 1.804711532946206e-06, + "loss": 0.0609, + "step": 2125, + "video_reward_cumulative_accuracy": 0.8312941176470589 + }, + { + "epoch": 0.6310477886613238, + "grad_norm": 3.6725287437438965, + "learning_rate": 1.8022237391475389e-06, + "loss": 0.1029, + "step": 2126, + "video_reward_cumulative_accuracy": 0.83137347130762 + }, + { + "epoch": 0.6313446126447017, + "grad_norm": 1.4435230493545532, + "learning_rate": 1.7997366944794116e-06, + "loss": 0.0419, + "step": 2127, + "video_reward_cumulative_accuracy": 0.8314527503526093 + }, + { + "epoch": 0.6316414366280796, + "grad_norm": 1.3416098356246948, + "learning_rate": 1.7972504016119092e-06, + "loss": 0.0154, + "step": 2128, + "video_reward_cumulative_accuracy": 0.8315319548872181 + }, + { + "epoch": 0.6319382606114574, + "grad_norm": 2.8020572662353516, + "learning_rate": 1.7947648632143075e-06, + "loss": 0.0215, + "step": 2129, + "video_reward_cumulative_accuracy": 0.8316110850164397 + }, + { + "epoch": 0.6322350845948352, + "grad_norm": 0.8301799893379211, + "learning_rate": 1.7922800819550737e-06, + "loss": 0.0204, + "step": 2130, + "video_reward_cumulative_accuracy": 0.8316901408450704 + }, + { + "epoch": 0.6325319085782131, + "grad_norm": 0.9084307551383972, + "learning_rate": 1.7897960605018623e-06, + "loss": 0.008, + "step": 2131, + "video_reward_cumulative_accuracy": 0.83176912247771 + }, + { + "epoch": 0.632828732561591, + "grad_norm": 1.8782029151916504, + "learning_rate": 1.78731280152151e-06, + "loss": 0.0184, + "step": 2132, + "video_reward_cumulative_accuracy": 0.8318480300187617 + }, + { + "epoch": 0.6331255565449688, + "grad_norm": 1.1400971412658691, + "learning_rate": 1.7848303076800378e-06, + "loss": 0.0157, + "step": 2133, + "video_reward_cumulative_accuracy": 0.8319268635724332 + }, + { + "epoch": 0.6334223805283467, + "grad_norm": 1.0256128311157227, + "learning_rate": 1.7823485816426422e-06, + "loss": 0.0287, + "step": 2134, + "video_reward_cumulative_accuracy": 0.8317713214620431 + }, + { + "epoch": 0.6337192045117246, + "grad_norm": 3.9153847694396973, + "learning_rate": 1.7798676260736986e-06, + "loss": 0.0366, + "step": 2135, + "video_reward_cumulative_accuracy": 0.831615925058548 + }, + { + "epoch": 0.6340160284951024, + "grad_norm": 3.1156997680664062, + "learning_rate": 1.7773874436367521e-06, + "loss": 0.0212, + "step": 2136, + "video_reward_cumulative_accuracy": 0.8316947565543071 + }, + { + "epoch": 0.6343128524784802, + "grad_norm": 1.2179923057556152, + "learning_rate": 1.77490803699452e-06, + "loss": 0.03, + "step": 2137, + "video_reward_cumulative_accuracy": 0.8317735142723444 + }, + { + "epoch": 0.6346096764618581, + "grad_norm": 1.7540775537490845, + "learning_rate": 1.7724294088088867e-06, + "loss": 0.0142, + "step": 2138, + "video_reward_cumulative_accuracy": 0.8318521983161834 + }, + { + "epoch": 0.634906500445236, + "grad_norm": 0.7738943696022034, + "learning_rate": 1.769951561740899e-06, + "loss": 0.0245, + "step": 2139, + "video_reward_cumulative_accuracy": 0.8319308087891538 + }, + { + "epoch": 0.6352033244286138, + "grad_norm": 1.5957173109054565, + "learning_rate": 1.7674744984507668e-06, + "loss": 0.028, + "step": 2140, + "video_reward_cumulative_accuracy": 0.8320093457943926 + }, + { + "epoch": 0.6355001484119916, + "grad_norm": 4.45468282699585, + "learning_rate": 1.7649982215978573e-06, + "loss": 0.0393, + "step": 2141, + "video_reward_cumulative_accuracy": 0.8320878094348435 + }, + { + "epoch": 0.6357969723953696, + "grad_norm": 2.190316677093506, + "learning_rate": 1.7625227338406946e-06, + "loss": 0.0443, + "step": 2142, + "video_reward_cumulative_accuracy": 0.8321661998132587 + }, + { + "epoch": 0.6360937963787474, + "grad_norm": 1.6457760334014893, + "learning_rate": 1.7600480378369555e-06, + "loss": 0.0134, + "step": 2143, + "video_reward_cumulative_accuracy": 0.8322445170321978 + }, + { + "epoch": 0.6363906203621252, + "grad_norm": 2.3591673374176025, + "learning_rate": 1.7575741362434655e-06, + "loss": 0.0413, + "step": 2144, + "video_reward_cumulative_accuracy": 0.831856343283582 + }, + { + "epoch": 0.6366874443455031, + "grad_norm": 2.5493083000183105, + "learning_rate": 1.7551010317161987e-06, + "loss": 0.0269, + "step": 2145, + "video_reward_cumulative_accuracy": 0.8319347319347319 + }, + { + "epoch": 0.636984268328881, + "grad_norm": 1.5170857906341553, + "learning_rate": 1.7526287269102724e-06, + "loss": 0.0253, + "step": 2146, + "video_reward_cumulative_accuracy": 0.8320130475302889 + }, + { + "epoch": 0.6372810923122588, + "grad_norm": 1.9697927236557007, + "learning_rate": 1.750157224479946e-06, + "loss": 0.0429, + "step": 2147, + "video_reward_cumulative_accuracy": 0.8320912901723335 + }, + { + "epoch": 0.6375779162956366, + "grad_norm": 1.7776871919631958, + "learning_rate": 1.7476865270786169e-06, + "loss": 0.0185, + "step": 2148, + "video_reward_cumulative_accuracy": 0.832169459962756 + }, + { + "epoch": 0.6378747402790146, + "grad_norm": 1.8674986362457275, + "learning_rate": 1.7452166373588185e-06, + "loss": 0.0131, + "step": 2149, + "video_reward_cumulative_accuracy": 0.8322475570032574 + }, + { + "epoch": 0.6381715642623924, + "grad_norm": 4.821762561798096, + "learning_rate": 1.7427475579722186e-06, + "loss": 0.1067, + "step": 2150, + "video_reward_cumulative_accuracy": 0.8323255813953488 + }, + { + "epoch": 0.6384683882457702, + "grad_norm": 1.7846288681030273, + "learning_rate": 1.7402792915696115e-06, + "loss": 0.0251, + "step": 2151, + "video_reward_cumulative_accuracy": 0.8324035332403533 + }, + { + "epoch": 0.6387652122291481, + "grad_norm": 3.2803213596343994, + "learning_rate": 1.7378118408009227e-06, + "loss": 0.045, + "step": 2152, + "video_reward_cumulative_accuracy": 0.8322490706319703 + }, + { + "epoch": 0.639062036212526, + "grad_norm": 3.1670420169830322, + "learning_rate": 1.7353452083151975e-06, + "loss": 0.0336, + "step": 2153, + "video_reward_cumulative_accuracy": 0.8323269856014863 + }, + { + "epoch": 0.6393588601959038, + "grad_norm": 0.46488067507743835, + "learning_rate": 1.7328793967606072e-06, + "loss": 0.006, + "step": 2154, + "video_reward_cumulative_accuracy": 0.8324048282265553 + }, + { + "epoch": 0.6396556841792816, + "grad_norm": 2.008270740509033, + "learning_rate": 1.7304144087844405e-06, + "loss": 0.0287, + "step": 2155, + "video_reward_cumulative_accuracy": 0.8324825986078886 + }, + { + "epoch": 0.6399525081626596, + "grad_norm": 4.751814365386963, + "learning_rate": 1.7279502470330994e-06, + "loss": 0.1412, + "step": 2156, + "video_reward_cumulative_accuracy": 0.8325602968460112 + }, + { + "epoch": 0.6402493321460374, + "grad_norm": 0.5087360143661499, + "learning_rate": 1.7254869141521026e-06, + "loss": 0.0166, + "step": 2157, + "video_reward_cumulative_accuracy": 0.832637923041261 + }, + { + "epoch": 0.6405461561294152, + "grad_norm": 2.7528774738311768, + "learning_rate": 1.723024412786074e-06, + "loss": 0.1185, + "step": 2158, + "video_reward_cumulative_accuracy": 0.8322520852641334 + }, + { + "epoch": 0.6408429801127931, + "grad_norm": 2.7754266262054443, + "learning_rate": 1.7205627455787515e-06, + "loss": 0.0346, + "step": 2159, + "video_reward_cumulative_accuracy": 0.8323297823066235 + }, + { + "epoch": 0.641139804096171, + "grad_norm": 2.181504726409912, + "learning_rate": 1.7181019151729709e-06, + "loss": 0.054, + "step": 2160, + "video_reward_cumulative_accuracy": 0.8324074074074074 + }, + { + "epoch": 0.6414366280795488, + "grad_norm": 1.7188265323638916, + "learning_rate": 1.7156419242106736e-06, + "loss": 0.0377, + "step": 2161, + "video_reward_cumulative_accuracy": 0.8324849606663581 + }, + { + "epoch": 0.6417334520629266, + "grad_norm": 3.2530951499938965, + "learning_rate": 1.7131827753329e-06, + "loss": 0.0481, + "step": 2162, + "video_reward_cumulative_accuracy": 0.8323311748381128 + }, + { + "epoch": 0.6420302760463046, + "grad_norm": 0.4359917938709259, + "learning_rate": 1.710724471179782e-06, + "loss": 0.0043, + "step": 2163, + "video_reward_cumulative_accuracy": 0.8324086916319926 + }, + { + "epoch": 0.6423271000296824, + "grad_norm": 2.456068754196167, + "learning_rate": 1.7082670143905516e-06, + "loss": 0.0374, + "step": 2164, + "video_reward_cumulative_accuracy": 0.8324861367837338 + }, + { + "epoch": 0.6426239240130602, + "grad_norm": 1.3380742073059082, + "learning_rate": 1.7058104076035237e-06, + "loss": 0.0174, + "step": 2165, + "video_reward_cumulative_accuracy": 0.8325635103926097 + }, + { + "epoch": 0.6429207479964381, + "grad_norm": 2.8565738201141357, + "learning_rate": 1.7033546534561046e-06, + "loss": 0.0186, + "step": 2166, + "video_reward_cumulative_accuracy": 0.832409972299169 + }, + { + "epoch": 0.643217571979816, + "grad_norm": 3.0799217224121094, + "learning_rate": 1.7008997545847878e-06, + "loss": 0.066, + "step": 2167, + "video_reward_cumulative_accuracy": 0.8322565759113982 + }, + { + "epoch": 0.6435143959631938, + "grad_norm": 1.7654461860656738, + "learning_rate": 1.6984457136251415e-06, + "loss": 0.0339, + "step": 2168, + "video_reward_cumulative_accuracy": 0.8323339483394834 + }, + { + "epoch": 0.6438112199465716, + "grad_norm": 4.95688533782959, + "learning_rate": 1.69599253321182e-06, + "loss": 0.0901, + "step": 2169, + "video_reward_cumulative_accuracy": 0.8321807284462887 + }, + { + "epoch": 0.6441080439299496, + "grad_norm": 1.675530195236206, + "learning_rate": 1.6935402159785482e-06, + "loss": 0.0691, + "step": 2170, + "video_reward_cumulative_accuracy": 0.8320276497695852 + }, + { + "epoch": 0.6444048679133274, + "grad_norm": 2.567598342895508, + "learning_rate": 1.6910887645581288e-06, + "loss": 0.0519, + "step": 2171, + "video_reward_cumulative_accuracy": 0.8321050207277753 + }, + { + "epoch": 0.6447016918967052, + "grad_norm": 2.3529388904571533, + "learning_rate": 1.6886381815824304e-06, + "loss": 0.0239, + "step": 2172, + "video_reward_cumulative_accuracy": 0.8321823204419889 + }, + { + "epoch": 0.6449985158800831, + "grad_norm": 3.154841899871826, + "learning_rate": 1.6861884696823935e-06, + "loss": 0.0572, + "step": 2173, + "video_reward_cumulative_accuracy": 0.8322595490105844 + }, + { + "epoch": 0.645295339863461, + "grad_norm": 2.80783748626709, + "learning_rate": 1.6837396314880216e-06, + "loss": 0.0874, + "step": 2174, + "video_reward_cumulative_accuracy": 0.8323367065317387 + }, + { + "epoch": 0.6455921638468388, + "grad_norm": 1.9993150234222412, + "learning_rate": 1.681291669628379e-06, + "loss": 0.0253, + "step": 2175, + "video_reward_cumulative_accuracy": 0.8324137931034483 + }, + { + "epoch": 0.6458889878302166, + "grad_norm": 2.1698851585388184, + "learning_rate": 1.6788445867315918e-06, + "loss": 0.0296, + "step": 2176, + "video_reward_cumulative_accuracy": 0.8324908088235294 + }, + { + "epoch": 0.6461858118135946, + "grad_norm": 3.3127455711364746, + "learning_rate": 1.6763983854248395e-06, + "loss": 0.0626, + "step": 2177, + "video_reward_cumulative_accuracy": 0.8325677537896188 + }, + { + "epoch": 0.6464826357969724, + "grad_norm": 2.9646496772766113, + "learning_rate": 1.6739530683343574e-06, + "loss": 0.0443, + "step": 2178, + "video_reward_cumulative_accuracy": 0.8321854912764004 + }, + { + "epoch": 0.6467794597803502, + "grad_norm": 3.9705727100372314, + "learning_rate": 1.6715086380854311e-06, + "loss": 0.0516, + "step": 2179, + "video_reward_cumulative_accuracy": 0.8322625057365765 + }, + { + "epoch": 0.647076283763728, + "grad_norm": 1.3406922817230225, + "learning_rate": 1.669065097302393e-06, + "loss": 0.0174, + "step": 2180, + "video_reward_cumulative_accuracy": 0.8323394495412844 + }, + { + "epoch": 0.647373107747106, + "grad_norm": 0.7483333349227905, + "learning_rate": 1.666622448608622e-06, + "loss": 0.0206, + "step": 2181, + "video_reward_cumulative_accuracy": 0.832416322787712 + }, + { + "epoch": 0.6476699317304838, + "grad_norm": 1.2349746227264404, + "learning_rate": 1.6641806946265377e-06, + "loss": 0.0168, + "step": 2182, + "video_reward_cumulative_accuracy": 0.8324931255728689 + }, + { + "epoch": 0.6479667557138616, + "grad_norm": 1.7513976097106934, + "learning_rate": 1.6617398379776008e-06, + "loss": 0.0558, + "step": 2183, + "video_reward_cumulative_accuracy": 0.8323408153916628 + }, + { + "epoch": 0.6482635796972396, + "grad_norm": 0.5160261988639832, + "learning_rate": 1.6592998812823072e-06, + "loss": 0.0109, + "step": 2184, + "video_reward_cumulative_accuracy": 0.8324175824175825 + }, + { + "epoch": 0.6485604036806174, + "grad_norm": 5.037443161010742, + "learning_rate": 1.6568608271601873e-06, + "loss": 0.0496, + "step": 2185, + "video_reward_cumulative_accuracy": 0.8324942791762013 + }, + { + "epoch": 0.6488572276639952, + "grad_norm": 2.4052493572235107, + "learning_rate": 1.6544226782298033e-06, + "loss": 0.0281, + "step": 2186, + "video_reward_cumulative_accuracy": 0.8325709057639524 + }, + { + "epoch": 0.649154051647373, + "grad_norm": 2.0196239948272705, + "learning_rate": 1.6519854371087434e-06, + "loss": 0.0943, + "step": 2187, + "video_reward_cumulative_accuracy": 0.8324188385916781 + }, + { + "epoch": 0.649450875630751, + "grad_norm": 1.2571297883987427, + "learning_rate": 1.6495491064136239e-06, + "loss": 0.0215, + "step": 2188, + "video_reward_cumulative_accuracy": 0.8324954296160878 + }, + { + "epoch": 0.6497476996141288, + "grad_norm": 1.217132806777954, + "learning_rate": 1.6471136887600805e-06, + "loss": 0.017, + "step": 2189, + "video_reward_cumulative_accuracy": 0.832571950662403 + }, + { + "epoch": 0.6500445235975066, + "grad_norm": 2.48207426071167, + "learning_rate": 1.6446791867627718e-06, + "loss": 0.0539, + "step": 2190, + "video_reward_cumulative_accuracy": 0.832648401826484 + }, + { + "epoch": 0.6503413475808846, + "grad_norm": 4.670328617095947, + "learning_rate": 1.642245603035372e-06, + "loss": 0.0683, + "step": 2191, + "video_reward_cumulative_accuracy": 0.8327247832040164 + }, + { + "epoch": 0.6506381715642624, + "grad_norm": 0.8069223761558533, + "learning_rate": 1.6398129401905687e-06, + "loss": 0.032, + "step": 2192, + "video_reward_cumulative_accuracy": 0.8328010948905109 + }, + { + "epoch": 0.6509349955476402, + "grad_norm": 3.4688570499420166, + "learning_rate": 1.6373812008400623e-06, + "loss": 0.0296, + "step": 2193, + "video_reward_cumulative_accuracy": 0.8326493388052896 + }, + { + "epoch": 0.651231819531018, + "grad_norm": 1.3955700397491455, + "learning_rate": 1.6349503875945599e-06, + "loss": 0.041, + "step": 2194, + "video_reward_cumulative_accuracy": 0.8327256153144941 + }, + { + "epoch": 0.651528643514396, + "grad_norm": 0.7285477519035339, + "learning_rate": 1.632520503063777e-06, + "loss": 0.0208, + "step": 2195, + "video_reward_cumulative_accuracy": 0.8328018223234624 + }, + { + "epoch": 0.6518254674977738, + "grad_norm": 1.377070426940918, + "learning_rate": 1.630091549856429e-06, + "loss": 0.0357, + "step": 2196, + "video_reward_cumulative_accuracy": 0.8328779599271403 + }, + { + "epoch": 0.6521222914811516, + "grad_norm": 1.5354235172271729, + "learning_rate": 1.6276635305802336e-06, + "loss": 0.0356, + "step": 2197, + "video_reward_cumulative_accuracy": 0.8329540282203004 + }, + { + "epoch": 0.6524191154645296, + "grad_norm": 0.5859040021896362, + "learning_rate": 1.6252364478419057e-06, + "loss": 0.0181, + "step": 2198, + "video_reward_cumulative_accuracy": 0.8330300272975433 + }, + { + "epoch": 0.6527159394479074, + "grad_norm": 0.6209362745285034, + "learning_rate": 1.622810304247153e-06, + "loss": 0.0127, + "step": 2199, + "video_reward_cumulative_accuracy": 0.833105957253297 + }, + { + "epoch": 0.6530127634312852, + "grad_norm": 1.3316088914871216, + "learning_rate": 1.6203851024006779e-06, + "loss": 0.0219, + "step": 2200, + "video_reward_cumulative_accuracy": 0.8331818181818181 + }, + { + "epoch": 0.6530127634312852, + "eval_runtime": 132.7968, + "eval_samples_per_second": 5.941, + "eval_steps_per_second": 0.745, + "eval_test_set_accuracy": 0.7878787878787878, + "step": 2200 + }, + { + "epoch": 0.653309587414663, + "grad_norm": 1.4014110565185547, + "learning_rate": 1.6179608449061671e-06, + "loss": 0.0302, + "step": 2201, + "video_reward_cumulative_accuracy": 0.8330304407087687 + }, + { + "epoch": 0.653606411398041, + "grad_norm": 1.8992540836334229, + "learning_rate": 1.6155375343662986e-06, + "loss": 0.0529, + "step": 2202, + "video_reward_cumulative_accuracy": 0.8331062670299727 + }, + { + "epoch": 0.6539032353814188, + "grad_norm": 0.6547995209693909, + "learning_rate": 1.6131151733827314e-06, + "loss": 0.0144, + "step": 2203, + "video_reward_cumulative_accuracy": 0.833182024512029 + }, + { + "epoch": 0.6542000593647966, + "grad_norm": 4.633205890655518, + "learning_rate": 1.6106937645561042e-06, + "loss": 0.0385, + "step": 2204, + "video_reward_cumulative_accuracy": 0.8332577132486388 + }, + { + "epoch": 0.6544968833481746, + "grad_norm": 0.8929312229156494, + "learning_rate": 1.6082733104860354e-06, + "loss": 0.0146, + "step": 2205, + "video_reward_cumulative_accuracy": 0.8333333333333334 + }, + { + "epoch": 0.6547937073315524, + "grad_norm": 1.7991684675216675, + "learning_rate": 1.6058538137711155e-06, + "loss": 0.0463, + "step": 2206, + "video_reward_cumulative_accuracy": 0.8334088848594742 + }, + { + "epoch": 0.6550905313149302, + "grad_norm": 1.2529010772705078, + "learning_rate": 1.603435277008912e-06, + "loss": 0.0193, + "step": 2207, + "video_reward_cumulative_accuracy": 0.8334843679202537 + }, + { + "epoch": 0.655387355298308, + "grad_norm": 2.0492186546325684, + "learning_rate": 1.6010177027959556e-06, + "loss": 0.0362, + "step": 2208, + "video_reward_cumulative_accuracy": 0.8335597826086957 + }, + { + "epoch": 0.655684179281686, + "grad_norm": 3.027116060256958, + "learning_rate": 1.598601093727749e-06, + "loss": 0.0604, + "step": 2209, + "video_reward_cumulative_accuracy": 0.833635129017655 + }, + { + "epoch": 0.6559810032650638, + "grad_norm": 2.721015691757202, + "learning_rate": 1.5961854523987569e-06, + "loss": 0.0486, + "step": 2210, + "video_reward_cumulative_accuracy": 0.833710407239819 + }, + { + "epoch": 0.6562778272484416, + "grad_norm": 0.9648367166519165, + "learning_rate": 1.5937707814024024e-06, + "loss": 0.0094, + "step": 2211, + "video_reward_cumulative_accuracy": 0.833785617367707 + }, + { + "epoch": 0.6565746512318196, + "grad_norm": 1.7013683319091797, + "learning_rate": 1.5913570833310721e-06, + "loss": 0.0201, + "step": 2212, + "video_reward_cumulative_accuracy": 0.8338607594936709 + }, + { + "epoch": 0.6568714752151974, + "grad_norm": 1.8337125778198242, + "learning_rate": 1.5889443607761023e-06, + "loss": 0.0281, + "step": 2213, + "video_reward_cumulative_accuracy": 0.8339358337098961 + }, + { + "epoch": 0.6571682991985752, + "grad_norm": 3.3444020748138428, + "learning_rate": 1.5865326163277862e-06, + "loss": 0.0687, + "step": 2214, + "video_reward_cumulative_accuracy": 0.8337850045167118 + }, + { + "epoch": 0.657465123181953, + "grad_norm": 1.9684284925460815, + "learning_rate": 1.584121852575367e-06, + "loss": 0.0323, + "step": 2215, + "video_reward_cumulative_accuracy": 0.8338600451467268 + }, + { + "epoch": 0.657761947165331, + "grad_norm": 5.16708517074585, + "learning_rate": 1.5817120721070302e-06, + "loss": 0.055, + "step": 2216, + "video_reward_cumulative_accuracy": 0.8339350180505415 + }, + { + "epoch": 0.6580587711487088, + "grad_norm": 1.9357199668884277, + "learning_rate": 1.579303277509913e-06, + "loss": 0.0191, + "step": 2217, + "video_reward_cumulative_accuracy": 0.8340099233198015 + }, + { + "epoch": 0.6583555951320866, + "grad_norm": 1.5035184621810913, + "learning_rate": 1.5768954713700868e-06, + "loss": 0.0106, + "step": 2218, + "video_reward_cumulative_accuracy": 0.8340847610459874 + }, + { + "epoch": 0.6586524191154646, + "grad_norm": 2.6335108280181885, + "learning_rate": 1.574488656272567e-06, + "loss": 0.0271, + "step": 2219, + "video_reward_cumulative_accuracy": 0.8341595313204146 + }, + { + "epoch": 0.6589492430988424, + "grad_norm": 2.186413049697876, + "learning_rate": 1.5720828348013022e-06, + "loss": 0.031, + "step": 2220, + "video_reward_cumulative_accuracy": 0.8342342342342343 + }, + { + "epoch": 0.6592460670822202, + "grad_norm": 0.6132137775421143, + "learning_rate": 1.5696780095391762e-06, + "loss": 0.0234, + "step": 2221, + "video_reward_cumulative_accuracy": 0.8343088698784331 + }, + { + "epoch": 0.659542891065598, + "grad_norm": 2.6216671466827393, + "learning_rate": 1.5672741830680022e-06, + "loss": 0.0649, + "step": 2222, + "video_reward_cumulative_accuracy": 0.8343834383438344 + }, + { + "epoch": 0.659839715048976, + "grad_norm": 1.9002763032913208, + "learning_rate": 1.5648713579685201e-06, + "loss": 0.036, + "step": 2223, + "video_reward_cumulative_accuracy": 0.8342330184435448 + }, + { + "epoch": 0.6601365390323538, + "grad_norm": 4.694214820861816, + "learning_rate": 1.5624695368203975e-06, + "loss": 0.0338, + "step": 2224, + "video_reward_cumulative_accuracy": 0.8343075539568345 + }, + { + "epoch": 0.6604333630157316, + "grad_norm": 2.101926326751709, + "learning_rate": 1.560068722202221e-06, + "loss": 0.0163, + "step": 2225, + "video_reward_cumulative_accuracy": 0.8343820224719101 + }, + { + "epoch": 0.6607301869991096, + "grad_norm": 2.703768491744995, + "learning_rate": 1.557668916691499e-06, + "loss": 0.0342, + "step": 2226, + "video_reward_cumulative_accuracy": 0.8344564240790656 + }, + { + "epoch": 0.6610270109824874, + "grad_norm": 1.3713093996047974, + "learning_rate": 1.555270122864656e-06, + "loss": 0.0089, + "step": 2227, + "video_reward_cumulative_accuracy": 0.8345307588684329 + }, + { + "epoch": 0.6613238349658652, + "grad_norm": 1.0959382057189941, + "learning_rate": 1.5528723432970288e-06, + "loss": 0.0285, + "step": 2228, + "video_reward_cumulative_accuracy": 0.8346050269299821 + }, + { + "epoch": 0.661620658949243, + "grad_norm": 2.450881242752075, + "learning_rate": 1.5504755805628677e-06, + "loss": 0.026, + "step": 2229, + "video_reward_cumulative_accuracy": 0.8346792283535217 + }, + { + "epoch": 0.661917482932621, + "grad_norm": 3.218003749847412, + "learning_rate": 1.548079837235329e-06, + "loss": 0.0422, + "step": 2230, + "video_reward_cumulative_accuracy": 0.8345291479820628 + }, + { + "epoch": 0.6622143069159988, + "grad_norm": 3.611631393432617, + "learning_rate": 1.545685115886477e-06, + "loss": 0.1359, + "step": 2231, + "video_reward_cumulative_accuracy": 0.834603316898252 + }, + { + "epoch": 0.6625111308993766, + "grad_norm": 3.1366591453552246, + "learning_rate": 1.5432914190872757e-06, + "loss": 0.1005, + "step": 2232, + "video_reward_cumulative_accuracy": 0.8346774193548387 + }, + { + "epoch": 0.6628079548827546, + "grad_norm": 1.874556064605713, + "learning_rate": 1.5408987494075924e-06, + "loss": 0.0187, + "step": 2233, + "video_reward_cumulative_accuracy": 0.8345275414240931 + }, + { + "epoch": 0.6631047788661324, + "grad_norm": 2.389155864715576, + "learning_rate": 1.53850710941619e-06, + "loss": 0.0388, + "step": 2234, + "video_reward_cumulative_accuracy": 0.8346016114592659 + }, + { + "epoch": 0.6634016028495102, + "grad_norm": 2.063230037689209, + "learning_rate": 1.5361165016807261e-06, + "loss": 0.0311, + "step": 2235, + "video_reward_cumulative_accuracy": 0.834675615212528 + }, + { + "epoch": 0.663698426832888, + "grad_norm": 2.24172043800354, + "learning_rate": 1.5337269287677497e-06, + "loss": 0.0262, + "step": 2236, + "video_reward_cumulative_accuracy": 0.8347495527728086 + }, + { + "epoch": 0.663995250816266, + "grad_norm": 2.5830023288726807, + "learning_rate": 1.5313383932426996e-06, + "loss": 0.0807, + "step": 2237, + "video_reward_cumulative_accuracy": 0.834823424228878 + }, + { + "epoch": 0.6642920747996438, + "grad_norm": 2.467978000640869, + "learning_rate": 1.5289508976699007e-06, + "loss": 0.0606, + "step": 2238, + "video_reward_cumulative_accuracy": 0.8348972296693477 + }, + { + "epoch": 0.6645888987830216, + "grad_norm": 2.111464500427246, + "learning_rate": 1.5265644446125606e-06, + "loss": 0.0398, + "step": 2239, + "video_reward_cumulative_accuracy": 0.8349709691826709 + }, + { + "epoch": 0.6648857227663996, + "grad_norm": 2.0798873901367188, + "learning_rate": 1.5241790366327685e-06, + "loss": 0.0338, + "step": 2240, + "video_reward_cumulative_accuracy": 0.8350446428571429 + }, + { + "epoch": 0.6651825467497774, + "grad_norm": 0.6554487943649292, + "learning_rate": 1.5217946762914924e-06, + "loss": 0.0063, + "step": 2241, + "video_reward_cumulative_accuracy": 0.8351182507809014 + }, + { + "epoch": 0.6654793707331552, + "grad_norm": 0.8652382493019104, + "learning_rate": 1.5194113661485727e-06, + "loss": 0.0122, + "step": 2242, + "video_reward_cumulative_accuracy": 0.8351917930419268 + }, + { + "epoch": 0.665776194716533, + "grad_norm": 1.6087634563446045, + "learning_rate": 1.5170291087627258e-06, + "loss": 0.0285, + "step": 2243, + "video_reward_cumulative_accuracy": 0.8352652697280428 + }, + { + "epoch": 0.666073018699911, + "grad_norm": 2.6194751262664795, + "learning_rate": 1.5146479066915355e-06, + "loss": 0.0324, + "step": 2244, + "video_reward_cumulative_accuracy": 0.8351158645276292 + }, + { + "epoch": 0.6663698426832888, + "grad_norm": 1.7327265739440918, + "learning_rate": 1.5122677624914528e-06, + "loss": 0.0704, + "step": 2245, + "video_reward_cumulative_accuracy": 0.834966592427617 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.7121922373771667, + "learning_rate": 1.5098886787177951e-06, + "loss": 0.0064, + "step": 2246, + "video_reward_cumulative_accuracy": 0.835040071237756 + }, + { + "epoch": 0.6669634906500446, + "grad_norm": 2.6367523670196533, + "learning_rate": 1.507510657924738e-06, + "loss": 0.0327, + "step": 2247, + "video_reward_cumulative_accuracy": 0.835113484646195 + }, + { + "epoch": 0.6672603146334224, + "grad_norm": 4.55868673324585, + "learning_rate": 1.5051337026653195e-06, + "loss": 0.048, + "step": 2248, + "video_reward_cumulative_accuracy": 0.8351868327402135 + }, + { + "epoch": 0.6675571386168002, + "grad_norm": 3.684610605239868, + "learning_rate": 1.502757815491429e-06, + "loss": 0.0326, + "step": 2249, + "video_reward_cumulative_accuracy": 0.8352601156069365 + }, + { + "epoch": 0.667853962600178, + "grad_norm": 3.4382266998291016, + "learning_rate": 1.5003829989538154e-06, + "loss": 0.0588, + "step": 2250, + "video_reward_cumulative_accuracy": 0.8351111111111111 + }, + { + "epoch": 0.668150786583556, + "grad_norm": 2.8827478885650635, + "learning_rate": 1.4980092556020713e-06, + "loss": 0.041, + "step": 2251, + "video_reward_cumulative_accuracy": 0.8351843625055531 + }, + { + "epoch": 0.6684476105669338, + "grad_norm": 0.7824203968048096, + "learning_rate": 1.495636587984643e-06, + "loss": 0.0075, + "step": 2252, + "video_reward_cumulative_accuracy": 0.8352575488454707 + }, + { + "epoch": 0.6687444345503116, + "grad_norm": 1.5358774662017822, + "learning_rate": 1.4932649986488192e-06, + "loss": 0.0158, + "step": 2253, + "video_reward_cumulative_accuracy": 0.8353306702174877 + }, + { + "epoch": 0.6690412585336896, + "grad_norm": 0.3571893572807312, + "learning_rate": 1.4908944901407296e-06, + "loss": 0.0068, + "step": 2254, + "video_reward_cumulative_accuracy": 0.8354037267080745 + }, + { + "epoch": 0.6693380825170674, + "grad_norm": 5.035340785980225, + "learning_rate": 1.488525065005348e-06, + "loss": 0.0604, + "step": 2255, + "video_reward_cumulative_accuracy": 0.8352549889135255 + }, + { + "epoch": 0.6696349065004452, + "grad_norm": 4.127224922180176, + "learning_rate": 1.4861567257864795e-06, + "loss": 0.0443, + "step": 2256, + "video_reward_cumulative_accuracy": 0.8353280141843972 + }, + { + "epoch": 0.669931730483823, + "grad_norm": 1.52211594581604, + "learning_rate": 1.4837894750267664e-06, + "loss": 0.0399, + "step": 2257, + "video_reward_cumulative_accuracy": 0.835400974745237 + }, + { + "epoch": 0.670228554467201, + "grad_norm": 2.7589707374572754, + "learning_rate": 1.481423315267685e-06, + "loss": 0.0633, + "step": 2258, + "video_reward_cumulative_accuracy": 0.8354738706820195 + }, + { + "epoch": 0.6705253784505788, + "grad_norm": 1.0256385803222656, + "learning_rate": 1.479058249049533e-06, + "loss": 0.0133, + "step": 2259, + "video_reward_cumulative_accuracy": 0.8355467020805666 + }, + { + "epoch": 0.6708222024339566, + "grad_norm": 1.3333526849746704, + "learning_rate": 1.4766942789114432e-06, + "loss": 0.0201, + "step": 2260, + "video_reward_cumulative_accuracy": 0.8353982300884956 + }, + { + "epoch": 0.6711190264173346, + "grad_norm": 0.8754663467407227, + "learning_rate": 1.4743314073913636e-06, + "loss": 0.0151, + "step": 2261, + "video_reward_cumulative_accuracy": 0.8354710305174702 + }, + { + "epoch": 0.6714158504007124, + "grad_norm": 3.1899867057800293, + "learning_rate": 1.4719696370260678e-06, + "loss": 0.0595, + "step": 2262, + "video_reward_cumulative_accuracy": 0.8353227232537578 + }, + { + "epoch": 0.6717126743840902, + "grad_norm": 0.2183128446340561, + "learning_rate": 1.469608970351144e-06, + "loss": 0.004, + "step": 2263, + "video_reward_cumulative_accuracy": 0.8353954927087937 + }, + { + "epoch": 0.672009498367468, + "grad_norm": 2.6553597450256348, + "learning_rate": 1.4672494099009981e-06, + "loss": 0.0242, + "step": 2264, + "video_reward_cumulative_accuracy": 0.8354681978798587 + }, + { + "epoch": 0.672306322350846, + "grad_norm": 1.7975043058395386, + "learning_rate": 1.4648909582088482e-06, + "loss": 0.0358, + "step": 2265, + "video_reward_cumulative_accuracy": 0.8355408388520972 + }, + { + "epoch": 0.6726031463342238, + "grad_norm": 2.4716291427612305, + "learning_rate": 1.4625336178067189e-06, + "loss": 0.0977, + "step": 2266, + "video_reward_cumulative_accuracy": 0.8351721094439541 + }, + { + "epoch": 0.6728999703176016, + "grad_norm": 1.4434208869934082, + "learning_rate": 1.4601773912254457e-06, + "loss": 0.0303, + "step": 2267, + "video_reward_cumulative_accuracy": 0.8352448169386855 + }, + { + "epoch": 0.6731967943009796, + "grad_norm": 0.3788398504257202, + "learning_rate": 1.4578222809946655e-06, + "loss": 0.0153, + "step": 2268, + "video_reward_cumulative_accuracy": 0.8353174603174603 + }, + { + "epoch": 0.6734936182843574, + "grad_norm": 2.074415683746338, + "learning_rate": 1.4554682896428179e-06, + "loss": 0.0558, + "step": 2269, + "video_reward_cumulative_accuracy": 0.8351696782723667 + }, + { + "epoch": 0.6737904422677352, + "grad_norm": 6.2886962890625, + "learning_rate": 1.4531154196971414e-06, + "loss": 0.0606, + "step": 2270, + "video_reward_cumulative_accuracy": 0.8352422907488987 + }, + { + "epoch": 0.674087266251113, + "grad_norm": 3.100346803665161, + "learning_rate": 1.45076367368367e-06, + "loss": 0.027, + "step": 2271, + "video_reward_cumulative_accuracy": 0.8350946719506825 + }, + { + "epoch": 0.674384090234491, + "grad_norm": 2.0418272018432617, + "learning_rate": 1.4484130541272323e-06, + "loss": 0.0412, + "step": 2272, + "video_reward_cumulative_accuracy": 0.8351672535211268 + }, + { + "epoch": 0.6746809142178688, + "grad_norm": 1.8124505281448364, + "learning_rate": 1.4460635635514448e-06, + "loss": 0.0292, + "step": 2273, + "video_reward_cumulative_accuracy": 0.8352397712274527 + }, + { + "epoch": 0.6749777382012466, + "grad_norm": 3.0479700565338135, + "learning_rate": 1.443715204478715e-06, + "loss": 0.0704, + "step": 2274, + "video_reward_cumulative_accuracy": 0.8353122251539138 + }, + { + "epoch": 0.6752745621846246, + "grad_norm": 1.432740330696106, + "learning_rate": 1.4413679794302325e-06, + "loss": 0.0208, + "step": 2275, + "video_reward_cumulative_accuracy": 0.8353846153846154 + }, + { + "epoch": 0.6755713861680024, + "grad_norm": 1.0187022686004639, + "learning_rate": 1.4390218909259731e-06, + "loss": 0.0187, + "step": 2276, + "video_reward_cumulative_accuracy": 0.835456942003515 + }, + { + "epoch": 0.6758682101513802, + "grad_norm": 3.2267208099365234, + "learning_rate": 1.436676941484691e-06, + "loss": 0.0574, + "step": 2277, + "video_reward_cumulative_accuracy": 0.8353096179183136 + }, + { + "epoch": 0.676165034134758, + "grad_norm": 1.9733569622039795, + "learning_rate": 1.4343331336239151e-06, + "loss": 0.031, + "step": 2278, + "video_reward_cumulative_accuracy": 0.8353819139596137 + }, + { + "epoch": 0.676461858118136, + "grad_norm": 1.0220005512237549, + "learning_rate": 1.4319904698599524e-06, + "loss": 0.0171, + "step": 2279, + "video_reward_cumulative_accuracy": 0.8354541465555068 + }, + { + "epoch": 0.6767586821015138, + "grad_norm": 0.9054021239280701, + "learning_rate": 1.4296489527078777e-06, + "loss": 0.0106, + "step": 2280, + "video_reward_cumulative_accuracy": 0.8355263157894737 + }, + { + "epoch": 0.6770555060848916, + "grad_norm": 1.9927374124526978, + "learning_rate": 1.4273085846815374e-06, + "loss": 0.0309, + "step": 2281, + "video_reward_cumulative_accuracy": 0.8355984217448488 + }, + { + "epoch": 0.6773523300682696, + "grad_norm": 1.0571626424789429, + "learning_rate": 1.4249693682935462e-06, + "loss": 0.0116, + "step": 2282, + "video_reward_cumulative_accuracy": 0.8356704645048203 + }, + { + "epoch": 0.6776491540516474, + "grad_norm": 2.752652883529663, + "learning_rate": 1.4226313060552774e-06, + "loss": 0.0646, + "step": 2283, + "video_reward_cumulative_accuracy": 0.835742444152431 + }, + { + "epoch": 0.6779459780350252, + "grad_norm": 1.6572239398956299, + "learning_rate": 1.4202944004768694e-06, + "loss": 0.0332, + "step": 2284, + "video_reward_cumulative_accuracy": 0.8358143607705779 + }, + { + "epoch": 0.678242802018403, + "grad_norm": 2.04713773727417, + "learning_rate": 1.4179586540672152e-06, + "loss": 0.0624, + "step": 2285, + "video_reward_cumulative_accuracy": 0.8358862144420132 + }, + { + "epoch": 0.678539626001781, + "grad_norm": 0.3329226076602936, + "learning_rate": 1.4156240693339663e-06, + "loss": 0.0062, + "step": 2286, + "video_reward_cumulative_accuracy": 0.8359580052493438 + }, + { + "epoch": 0.6788364499851588, + "grad_norm": 2.5659382343292236, + "learning_rate": 1.4132906487835263e-06, + "loss": 0.0549, + "step": 2287, + "video_reward_cumulative_accuracy": 0.8360297332750328 + }, + { + "epoch": 0.6791332739685366, + "grad_norm": 2.2131476402282715, + "learning_rate": 1.4109583949210481e-06, + "loss": 0.0267, + "step": 2288, + "video_reward_cumulative_accuracy": 0.8361013986013986 + }, + { + "epoch": 0.6794300979519146, + "grad_norm": 0.8246326446533203, + "learning_rate": 1.408627310250434e-06, + "loss": 0.0205, + "step": 2289, + "video_reward_cumulative_accuracy": 0.836173001310616 + }, + { + "epoch": 0.6797269219352924, + "grad_norm": 1.7747602462768555, + "learning_rate": 1.4062973972743277e-06, + "loss": 0.0255, + "step": 2290, + "video_reward_cumulative_accuracy": 0.8362445414847162 + }, + { + "epoch": 0.6800237459186702, + "grad_norm": 2.40305495262146, + "learning_rate": 1.4039686584941176e-06, + "loss": 0.0255, + "step": 2291, + "video_reward_cumulative_accuracy": 0.8360977738978612 + }, + { + "epoch": 0.680320569902048, + "grad_norm": 1.9818377494812012, + "learning_rate": 1.4016410964099308e-06, + "loss": 0.0512, + "step": 2292, + "video_reward_cumulative_accuracy": 0.8361692844677138 + }, + { + "epoch": 0.680617393885426, + "grad_norm": 2.252643585205078, + "learning_rate": 1.3993147135206311e-06, + "loss": 0.0324, + "step": 2293, + "video_reward_cumulative_accuracy": 0.8362407326646315 + }, + { + "epoch": 0.6809142178688038, + "grad_norm": 2.190392017364502, + "learning_rate": 1.3969895123238177e-06, + "loss": 0.0207, + "step": 2294, + "video_reward_cumulative_accuracy": 0.8360941586748039 + }, + { + "epoch": 0.6812110418521816, + "grad_norm": 2.94880747795105, + "learning_rate": 1.3946654953158176e-06, + "loss": 0.0357, + "step": 2295, + "video_reward_cumulative_accuracy": 0.8359477124183007 + }, + { + "epoch": 0.6815078658355596, + "grad_norm": 1.3520139455795288, + "learning_rate": 1.3923426649916894e-06, + "loss": 0.0097, + "step": 2296, + "video_reward_cumulative_accuracy": 0.8360191637630662 + }, + { + "epoch": 0.6818046898189374, + "grad_norm": 2.1631641387939453, + "learning_rate": 1.3900210238452169e-06, + "loss": 0.0406, + "step": 2297, + "video_reward_cumulative_accuracy": 0.8360905528950805 + }, + { + "epoch": 0.6821015138023152, + "grad_norm": 3.2488834857940674, + "learning_rate": 1.3877005743689087e-06, + "loss": 0.055, + "step": 2298, + "video_reward_cumulative_accuracy": 0.8361618798955613 + }, + { + "epoch": 0.682398337785693, + "grad_norm": 3.1709814071655273, + "learning_rate": 1.3853813190539899e-06, + "loss": 0.0387, + "step": 2299, + "video_reward_cumulative_accuracy": 0.8362331448455851 + }, + { + "epoch": 0.682695161769071, + "grad_norm": 5.1996307373046875, + "learning_rate": 1.3830632603904075e-06, + "loss": 0.0409, + "step": 2300, + "video_reward_cumulative_accuracy": 0.836304347826087 + }, + { + "epoch": 0.6829919857524488, + "grad_norm": 1.6898528337478638, + "learning_rate": 1.3807464008668225e-06, + "loss": 0.0352, + "step": 2301, + "video_reward_cumulative_accuracy": 0.8363754889178618 + }, + { + "epoch": 0.6832888097358266, + "grad_norm": 2.885385513305664, + "learning_rate": 1.3784307429706084e-06, + "loss": 0.0583, + "step": 2302, + "video_reward_cumulative_accuracy": 0.8364465682015638 + }, + { + "epoch": 0.6835856337192046, + "grad_norm": 0.6500884890556335, + "learning_rate": 1.3761162891878496e-06, + "loss": 0.0198, + "step": 2303, + "video_reward_cumulative_accuracy": 0.8363004776378636 + }, + { + "epoch": 0.6838824577025824, + "grad_norm": 2.2626075744628906, + "learning_rate": 1.3738030420033349e-06, + "loss": 0.0381, + "step": 2304, + "video_reward_cumulative_accuracy": 0.8363715277777778 + }, + { + "epoch": 0.6841792816859602, + "grad_norm": 2.9060208797454834, + "learning_rate": 1.3714910039005608e-06, + "loss": 0.0655, + "step": 2305, + "video_reward_cumulative_accuracy": 0.8364425162689805 + }, + { + "epoch": 0.684476105669338, + "grad_norm": 0.6458223462104797, + "learning_rate": 1.3691801773617247e-06, + "loss": 0.0079, + "step": 2306, + "video_reward_cumulative_accuracy": 0.8365134431916739 + }, + { + "epoch": 0.684772929652716, + "grad_norm": 0.7338875532150269, + "learning_rate": 1.3668705648677227e-06, + "loss": 0.0113, + "step": 2307, + "video_reward_cumulative_accuracy": 0.8365843086259211 + }, + { + "epoch": 0.6850697536360938, + "grad_norm": 2.2893717288970947, + "learning_rate": 1.3645621688981497e-06, + "loss": 0.0306, + "step": 2308, + "video_reward_cumulative_accuracy": 0.8366551126516465 + }, + { + "epoch": 0.6853665776194716, + "grad_norm": 0.5800947546958923, + "learning_rate": 1.3622549919312902e-06, + "loss": 0.0128, + "step": 2309, + "video_reward_cumulative_accuracy": 0.8367258553486357 + }, + { + "epoch": 0.6856634016028496, + "grad_norm": 5.060973167419434, + "learning_rate": 1.3599490364441236e-06, + "loss": 0.0676, + "step": 2310, + "video_reward_cumulative_accuracy": 0.8367965367965368 + }, + { + "epoch": 0.6859602255862274, + "grad_norm": 0.9198673367500305, + "learning_rate": 1.3576443049123175e-06, + "loss": 0.0215, + "step": 2311, + "video_reward_cumulative_accuracy": 0.8368671570748594 + }, + { + "epoch": 0.6862570495696052, + "grad_norm": 1.2726801633834839, + "learning_rate": 1.3553407998102243e-06, + "loss": 0.0265, + "step": 2312, + "video_reward_cumulative_accuracy": 0.8369377162629758 + }, + { + "epoch": 0.686553873552983, + "grad_norm": 0.5441017746925354, + "learning_rate": 1.3530385236108817e-06, + "loss": 0.0066, + "step": 2313, + "video_reward_cumulative_accuracy": 0.8370082144401211 + }, + { + "epoch": 0.686850697536361, + "grad_norm": 5.122513771057129, + "learning_rate": 1.3507374787860045e-06, + "loss": 0.0473, + "step": 2314, + "video_reward_cumulative_accuracy": 0.8370786516853933 + }, + { + "epoch": 0.6871475215197388, + "grad_norm": 3.541785478591919, + "learning_rate": 1.3484376678059885e-06, + "loss": 0.0438, + "step": 2315, + "video_reward_cumulative_accuracy": 0.8371490280777538 + }, + { + "epoch": 0.6874443455031166, + "grad_norm": 1.8629510402679443, + "learning_rate": 1.3461390931399044e-06, + "loss": 0.05, + "step": 2316, + "video_reward_cumulative_accuracy": 0.8372193436960277 + }, + { + "epoch": 0.6877411694864946, + "grad_norm": 2.798170328140259, + "learning_rate": 1.3438417572554947e-06, + "loss": 0.0348, + "step": 2317, + "video_reward_cumulative_accuracy": 0.8372895986189037 + }, + { + "epoch": 0.6880379934698724, + "grad_norm": 1.2799954414367676, + "learning_rate": 1.3415456626191737e-06, + "loss": 0.0092, + "step": 2318, + "video_reward_cumulative_accuracy": 0.8373597929249353 + }, + { + "epoch": 0.6883348174532502, + "grad_norm": 2.5710394382476807, + "learning_rate": 1.33925081169602e-06, + "loss": 0.0219, + "step": 2319, + "video_reward_cumulative_accuracy": 0.8374299266925399 + }, + { + "epoch": 0.688631641436628, + "grad_norm": 2.2735490798950195, + "learning_rate": 1.3369572069497802e-06, + "loss": 0.0518, + "step": 2320, + "video_reward_cumulative_accuracy": 0.8375 + }, + { + "epoch": 0.688928465420006, + "grad_norm": 2.3334121704101562, + "learning_rate": 1.3346648508428595e-06, + "loss": 0.0304, + "step": 2321, + "video_reward_cumulative_accuracy": 0.8375700129254632 + }, + { + "epoch": 0.6892252894033838, + "grad_norm": 2.004912853240967, + "learning_rate": 1.3323737458363278e-06, + "loss": 0.0474, + "step": 2322, + "video_reward_cumulative_accuracy": 0.8376399655469423 + }, + { + "epoch": 0.6895221133867616, + "grad_norm": 3.0432021617889404, + "learning_rate": 1.3300838943899064e-06, + "loss": 0.0394, + "step": 2323, + "video_reward_cumulative_accuracy": 0.837709857942316 + }, + { + "epoch": 0.6898189373701396, + "grad_norm": 0.710309624671936, + "learning_rate": 1.327795298961974e-06, + "loss": 0.0132, + "step": 2324, + "video_reward_cumulative_accuracy": 0.8377796901893287 + }, + { + "epoch": 0.6901157613535174, + "grad_norm": 1.954226016998291, + "learning_rate": 1.3255079620095602e-06, + "loss": 0.0161, + "step": 2325, + "video_reward_cumulative_accuracy": 0.8378494623655914 + }, + { + "epoch": 0.6904125853368952, + "grad_norm": 2.4498023986816406, + "learning_rate": 1.323221885988341e-06, + "loss": 0.0582, + "step": 2326, + "video_reward_cumulative_accuracy": 0.8377042132416165 + }, + { + "epoch": 0.690709409320273, + "grad_norm": 0.965714693069458, + "learning_rate": 1.3209370733526444e-06, + "loss": 0.013, + "step": 2327, + "video_reward_cumulative_accuracy": 0.8377739578856898 + }, + { + "epoch": 0.691006233303651, + "grad_norm": 1.6976726055145264, + "learning_rate": 1.3186535265554363e-06, + "loss": 0.0317, + "step": 2328, + "video_reward_cumulative_accuracy": 0.8378436426116839 + }, + { + "epoch": 0.6913030572870288, + "grad_norm": 2.1996965408325195, + "learning_rate": 1.3163712480483255e-06, + "loss": 0.0466, + "step": 2329, + "video_reward_cumulative_accuracy": 0.8379132674967797 + }, + { + "epoch": 0.6915998812704066, + "grad_norm": 5.554974555969238, + "learning_rate": 1.3140902402815616e-06, + "loss": 0.038, + "step": 2330, + "video_reward_cumulative_accuracy": 0.8379828326180258 + }, + { + "epoch": 0.6918967052537845, + "grad_norm": 2.487802028656006, + "learning_rate": 1.3118105057040245e-06, + "loss": 0.0667, + "step": 2331, + "video_reward_cumulative_accuracy": 0.838052338052338 + }, + { + "epoch": 0.6921935292371624, + "grad_norm": 2.990607976913452, + "learning_rate": 1.3095320467632344e-06, + "loss": 0.0428, + "step": 2332, + "video_reward_cumulative_accuracy": 0.8381217838765008 + }, + { + "epoch": 0.6924903532205402, + "grad_norm": 1.0717166662216187, + "learning_rate": 1.3072548659053353e-06, + "loss": 0.0196, + "step": 2333, + "video_reward_cumulative_accuracy": 0.8381911701671667 + }, + { + "epoch": 0.692787177203918, + "grad_norm": 2.585355043411255, + "learning_rate": 1.3049789655751039e-06, + "loss": 0.0557, + "step": 2334, + "video_reward_cumulative_accuracy": 0.8382604970008569 + }, + { + "epoch": 0.693084001187296, + "grad_norm": 1.229258418083191, + "learning_rate": 1.3027043482159378e-06, + "loss": 0.0284, + "step": 2335, + "video_reward_cumulative_accuracy": 0.8383297644539615 + }, + { + "epoch": 0.6933808251706738, + "grad_norm": 1.2362085580825806, + "learning_rate": 1.3004310162698598e-06, + "loss": 0.025, + "step": 2336, + "video_reward_cumulative_accuracy": 0.8383989726027398 + }, + { + "epoch": 0.6936776491540516, + "grad_norm": 2.241290807723999, + "learning_rate": 1.298158972177515e-06, + "loss": 0.0327, + "step": 2337, + "video_reward_cumulative_accuracy": 0.8382541720154044 + }, + { + "epoch": 0.6939744731374295, + "grad_norm": 2.196943998336792, + "learning_rate": 1.2958882183781612e-06, + "loss": 0.0565, + "step": 2338, + "video_reward_cumulative_accuracy": 0.838109495295124 + }, + { + "epoch": 0.6942712971208074, + "grad_norm": 1.276810884475708, + "learning_rate": 1.2936187573096737e-06, + "loss": 0.0237, + "step": 2339, + "video_reward_cumulative_accuracy": 0.837964942283027 + }, + { + "epoch": 0.6945681211041852, + "grad_norm": 2.0675230026245117, + "learning_rate": 1.2913505914085384e-06, + "loss": 0.0823, + "step": 2340, + "video_reward_cumulative_accuracy": 0.8378205128205128 + }, + { + "epoch": 0.694864945087563, + "grad_norm": 1.5794941186904907, + "learning_rate": 1.2890837231098513e-06, + "loss": 0.0305, + "step": 2341, + "video_reward_cumulative_accuracy": 0.8378897906877403 + }, + { + "epoch": 0.695161769070941, + "grad_norm": 1.307797908782959, + "learning_rate": 1.2868181548473168e-06, + "loss": 0.0242, + "step": 2342, + "video_reward_cumulative_accuracy": 0.8379590093936806 + }, + { + "epoch": 0.6954585930543188, + "grad_norm": 1.9544386863708496, + "learning_rate": 1.2845538890532416e-06, + "loss": 0.0246, + "step": 2343, + "video_reward_cumulative_accuracy": 0.8380281690140845 + }, + { + "epoch": 0.6957554170376966, + "grad_norm": 2.473175048828125, + "learning_rate": 1.2822909281585359e-06, + "loss": 0.0998, + "step": 2344, + "video_reward_cumulative_accuracy": 0.8380972696245734 + }, + { + "epoch": 0.6960522410210745, + "grad_norm": 2.0167837142944336, + "learning_rate": 1.280029274592706e-06, + "loss": 0.0363, + "step": 2345, + "video_reward_cumulative_accuracy": 0.8381663113006397 + }, + { + "epoch": 0.6963490650044524, + "grad_norm": 0.8653875589370728, + "learning_rate": 1.2777689307838572e-06, + "loss": 0.0146, + "step": 2346, + "video_reward_cumulative_accuracy": 0.8382352941176471 + }, + { + "epoch": 0.6966458889878302, + "grad_norm": 2.326350688934326, + "learning_rate": 1.2755098991586884e-06, + "loss": 0.0474, + "step": 2347, + "video_reward_cumulative_accuracy": 0.8383042181508309 + }, + { + "epoch": 0.696942712971208, + "grad_norm": 3.8789632320404053, + "learning_rate": 1.273252182142489e-06, + "loss": 0.0332, + "step": 2348, + "video_reward_cumulative_accuracy": 0.8383730834752982 + }, + { + "epoch": 0.697239536954586, + "grad_norm": 1.4527256488800049, + "learning_rate": 1.2709957821591384e-06, + "loss": 0.0275, + "step": 2349, + "video_reward_cumulative_accuracy": 0.8384418901660281 + }, + { + "epoch": 0.6975363609379638, + "grad_norm": 2.494121551513672, + "learning_rate": 1.2687407016310992e-06, + "loss": 0.0569, + "step": 2350, + "video_reward_cumulative_accuracy": 0.8382978723404255 + }, + { + "epoch": 0.6978331849213416, + "grad_norm": 0.8342524170875549, + "learning_rate": 1.2664869429794197e-06, + "loss": 0.0104, + "step": 2351, + "video_reward_cumulative_accuracy": 0.8383666524883029 + }, + { + "epoch": 0.6981300089047195, + "grad_norm": 0.7167454361915588, + "learning_rate": 1.2642345086237294e-06, + "loss": 0.0146, + "step": 2352, + "video_reward_cumulative_accuracy": 0.8384353741496599 + }, + { + "epoch": 0.6984268328880974, + "grad_norm": 3.59609317779541, + "learning_rate": 1.261983400982234e-06, + "loss": 0.0414, + "step": 2353, + "video_reward_cumulative_accuracy": 0.838504037399065 + }, + { + "epoch": 0.6987236568714752, + "grad_norm": 2.267436981201172, + "learning_rate": 1.2597336224717183e-06, + "loss": 0.0416, + "step": 2354, + "video_reward_cumulative_accuracy": 0.8383602378929482 + }, + { + "epoch": 0.699020480854853, + "grad_norm": 2.883449077606201, + "learning_rate": 1.257485175507535e-06, + "loss": 0.038, + "step": 2355, + "video_reward_cumulative_accuracy": 0.8384288747346073 + }, + { + "epoch": 0.699317304838231, + "grad_norm": 0.7676532864570618, + "learning_rate": 1.255238062503612e-06, + "loss": 0.0109, + "step": 2356, + "video_reward_cumulative_accuracy": 0.8384974533106961 + }, + { + "epoch": 0.6996141288216088, + "grad_norm": 0.7922206521034241, + "learning_rate": 1.2529922858724422e-06, + "loss": 0.0178, + "step": 2357, + "video_reward_cumulative_accuracy": 0.8385659736953754 + }, + { + "epoch": 0.6999109528049866, + "grad_norm": 1.5223472118377686, + "learning_rate": 1.2507478480250862e-06, + "loss": 0.0353, + "step": 2358, + "video_reward_cumulative_accuracy": 0.8386344359626803 + }, + { + "epoch": 0.7002077767883645, + "grad_norm": 2.4586706161499023, + "learning_rate": 1.2485047513711643e-06, + "loss": 0.0442, + "step": 2359, + "video_reward_cumulative_accuracy": 0.8387028401865197 + }, + { + "epoch": 0.7005046007717424, + "grad_norm": 3.2379069328308105, + "learning_rate": 1.2462629983188586e-06, + "loss": 0.0436, + "step": 2360, + "video_reward_cumulative_accuracy": 0.8387711864406779 + }, + { + "epoch": 0.7008014247551202, + "grad_norm": 1.5388333797454834, + "learning_rate": 1.2440225912749096e-06, + "loss": 0.0233, + "step": 2361, + "video_reward_cumulative_accuracy": 0.838839474798814 + }, + { + "epoch": 0.701098248738498, + "grad_norm": 1.7202107906341553, + "learning_rate": 1.241783532644611e-06, + "loss": 0.0366, + "step": 2362, + "video_reward_cumulative_accuracy": 0.8389077053344624 + }, + { + "epoch": 0.701395072721876, + "grad_norm": 2.6380770206451416, + "learning_rate": 1.2395458248318107e-06, + "loss": 0.0371, + "step": 2363, + "video_reward_cumulative_accuracy": 0.8387642826914938 + }, + { + "epoch": 0.7016918967052538, + "grad_norm": 1.3252801895141602, + "learning_rate": 1.2373094702389031e-06, + "loss": 0.015, + "step": 2364, + "video_reward_cumulative_accuracy": 0.8388324873096447 + }, + { + "epoch": 0.7019887206886316, + "grad_norm": 2.323251724243164, + "learning_rate": 1.2350744712668332e-06, + "loss": 0.0574, + "step": 2365, + "video_reward_cumulative_accuracy": 0.8386892177589852 + }, + { + "epoch": 0.7022855446720095, + "grad_norm": 2.107527017593384, + "learning_rate": 1.2328408303150892e-06, + "loss": 0.0387, + "step": 2366, + "video_reward_cumulative_accuracy": 0.8387573964497042 + }, + { + "epoch": 0.7025823686553874, + "grad_norm": 2.007369041442871, + "learning_rate": 1.2306085497817016e-06, + "loss": 0.0187, + "step": 2367, + "video_reward_cumulative_accuracy": 0.8388255175327418 + }, + { + "epoch": 0.7028791926387652, + "grad_norm": 2.632237195968628, + "learning_rate": 1.2283776320632409e-06, + "loss": 0.0215, + "step": 2368, + "video_reward_cumulative_accuracy": 0.838893581081081 + }, + { + "epoch": 0.703176016622143, + "grad_norm": 2.760110378265381, + "learning_rate": 1.2261480795548123e-06, + "loss": 0.0942, + "step": 2369, + "video_reward_cumulative_accuracy": 0.8389615871675813 + }, + { + "epoch": 0.703472840605521, + "grad_norm": 1.2644137144088745, + "learning_rate": 1.223919894650058e-06, + "loss": 0.0105, + "step": 2370, + "video_reward_cumulative_accuracy": 0.8390295358649789 + }, + { + "epoch": 0.7037696645888988, + "grad_norm": 1.7368650436401367, + "learning_rate": 1.2216930797411486e-06, + "loss": 0.0209, + "step": 2371, + "video_reward_cumulative_accuracy": 0.8390974272458878 + }, + { + "epoch": 0.7040664885722766, + "grad_norm": 1.736447811126709, + "learning_rate": 1.2194676372187886e-06, + "loss": 0.0279, + "step": 2372, + "video_reward_cumulative_accuracy": 0.8389544688026982 + }, + { + "epoch": 0.7043633125556545, + "grad_norm": 2.79758620262146, + "learning_rate": 1.2172435694722064e-06, + "loss": 0.0739, + "step": 2373, + "video_reward_cumulative_accuracy": 0.838811630847029 + }, + { + "epoch": 0.7046601365390324, + "grad_norm": 0.7806415557861328, + "learning_rate": 1.2150208788891533e-06, + "loss": 0.0124, + "step": 2374, + "video_reward_cumulative_accuracy": 0.8388795282224094 + }, + { + "epoch": 0.7049569605224102, + "grad_norm": 1.3390443325042725, + "learning_rate": 1.2127995678559042e-06, + "loss": 0.015, + "step": 2375, + "video_reward_cumulative_accuracy": 0.8389473684210527 + }, + { + "epoch": 0.705253784505788, + "grad_norm": 2.772890567779541, + "learning_rate": 1.2105796387572514e-06, + "loss": 0.0162, + "step": 2376, + "video_reward_cumulative_accuracy": 0.8390151515151515 + }, + { + "epoch": 0.705550608489166, + "grad_norm": 1.042292594909668, + "learning_rate": 1.2083610939765031e-06, + "loss": 0.0178, + "step": 2377, + "video_reward_cumulative_accuracy": 0.8390828775767775 + }, + { + "epoch": 0.7058474324725438, + "grad_norm": 2.8607852458953857, + "learning_rate": 1.2061439358954862e-06, + "loss": 0.0205, + "step": 2378, + "video_reward_cumulative_accuracy": 0.8391505466778806 + }, + { + "epoch": 0.7061442564559216, + "grad_norm": 1.2580045461654663, + "learning_rate": 1.203928166894532e-06, + "loss": 0.0118, + "step": 2379, + "video_reward_cumulative_accuracy": 0.8392181588902901 + }, + { + "epoch": 0.7064410804392995, + "grad_norm": 2.3520500659942627, + "learning_rate": 1.2017137893524851e-06, + "loss": 0.0244, + "step": 2380, + "video_reward_cumulative_accuracy": 0.8392857142857143 + }, + { + "epoch": 0.7067379044226774, + "grad_norm": 0.5174872875213623, + "learning_rate": 1.1995008056466933e-06, + "loss": 0.0071, + "step": 2381, + "video_reward_cumulative_accuracy": 0.8393532129357413 + }, + { + "epoch": 0.7070347284060552, + "grad_norm": 1.8015003204345703, + "learning_rate": 1.19728921815301e-06, + "loss": 0.019, + "step": 2382, + "video_reward_cumulative_accuracy": 0.8394206549118388 + }, + { + "epoch": 0.707331552389433, + "grad_norm": 1.3630971908569336, + "learning_rate": 1.1950790292457893e-06, + "loss": 0.0423, + "step": 2383, + "video_reward_cumulative_accuracy": 0.8394880402853546 + }, + { + "epoch": 0.707628376372811, + "grad_norm": 3.022733449935913, + "learning_rate": 1.1928702412978833e-06, + "loss": 0.066, + "step": 2384, + "video_reward_cumulative_accuracy": 0.8395553691275168 + }, + { + "epoch": 0.7079252003561888, + "grad_norm": 1.3637797832489014, + "learning_rate": 1.1906628566806414e-06, + "loss": 0.0447, + "step": 2385, + "video_reward_cumulative_accuracy": 0.8394129979035639 + }, + { + "epoch": 0.7082220243395666, + "grad_norm": 0.23082542419433594, + "learning_rate": 1.188456877763903e-06, + "loss": 0.0028, + "step": 2386, + "video_reward_cumulative_accuracy": 0.8394803017602682 + }, + { + "epoch": 0.7085188483229445, + "grad_norm": 3.9995596408843994, + "learning_rate": 1.1862523069160017e-06, + "loss": 0.0492, + "step": 2387, + "video_reward_cumulative_accuracy": 0.8395475492249685 + }, + { + "epoch": 0.7088156723063224, + "grad_norm": 2.762430429458618, + "learning_rate": 1.1840491465037584e-06, + "loss": 0.0473, + "step": 2388, + "video_reward_cumulative_accuracy": 0.8396147403685092 + }, + { + "epoch": 0.7091124962897002, + "grad_norm": 1.9774202108383179, + "learning_rate": 1.1818473988924797e-06, + "loss": 0.0493, + "step": 2389, + "video_reward_cumulative_accuracy": 0.8396818752616158 + }, + { + "epoch": 0.709409320273078, + "grad_norm": 1.2784936428070068, + "learning_rate": 1.179647066445956e-06, + "loss": 0.0216, + "step": 2390, + "video_reward_cumulative_accuracy": 0.8397489539748954 + }, + { + "epoch": 0.709706144256456, + "grad_norm": 3.470581531524658, + "learning_rate": 1.177448151526456e-06, + "loss": 0.0223, + "step": 2391, + "video_reward_cumulative_accuracy": 0.8398159765788373 + }, + { + "epoch": 0.7100029682398338, + "grad_norm": 2.1586861610412598, + "learning_rate": 1.1752506564947294e-06, + "loss": 0.0198, + "step": 2392, + "video_reward_cumulative_accuracy": 0.8398829431438127 + }, + { + "epoch": 0.7102997922232116, + "grad_norm": 2.96563982963562, + "learning_rate": 1.1730545837099999e-06, + "loss": 0.071, + "step": 2393, + "video_reward_cumulative_accuracy": 0.8399498537400752 + }, + { + "epoch": 0.7105966162065895, + "grad_norm": 1.8292112350463867, + "learning_rate": 1.1708599355299662e-06, + "loss": 0.0207, + "step": 2394, + "video_reward_cumulative_accuracy": 0.8400167084377611 + }, + { + "epoch": 0.7108934401899674, + "grad_norm": 4.007872581481934, + "learning_rate": 1.168666714310794e-06, + "loss": 0.0726, + "step": 2395, + "video_reward_cumulative_accuracy": 0.8400835073068893 + }, + { + "epoch": 0.7111902641733452, + "grad_norm": 3.1843671798706055, + "learning_rate": 1.1664749224071203e-06, + "loss": 0.0496, + "step": 2396, + "video_reward_cumulative_accuracy": 0.8401502504173622 + }, + { + "epoch": 0.711487088156723, + "grad_norm": 3.3738343715667725, + "learning_rate": 1.1642845621720463e-06, + "loss": 0.0419, + "step": 2397, + "video_reward_cumulative_accuracy": 0.8402169378389653 + }, + { + "epoch": 0.711783912140101, + "grad_norm": 1.7037007808685303, + "learning_rate": 1.1620956359571364e-06, + "loss": 0.0587, + "step": 2398, + "video_reward_cumulative_accuracy": 0.8402835696413679 + }, + { + "epoch": 0.7120807361234788, + "grad_norm": 2.8686163425445557, + "learning_rate": 1.1599081461124161e-06, + "loss": 0.0247, + "step": 2399, + "video_reward_cumulative_accuracy": 0.8403501458941226 + }, + { + "epoch": 0.7123775601068566, + "grad_norm": 0.6228769421577454, + "learning_rate": 1.1577220949863663e-06, + "loss": 0.0058, + "step": 2400, + "video_reward_cumulative_accuracy": 0.8404166666666667 + }, + { + "epoch": 0.7123775601068566, + "eval_runtime": 130.8618, + "eval_samples_per_second": 6.029, + "eval_steps_per_second": 0.757, + "eval_test_set_accuracy": 0.8131313131313131, + "step": 2400 + }, + { + "epoch": 0.7126743840902345, + "grad_norm": 1.7672585248947144, + "learning_rate": 1.155537484925926e-06, + "loss": 0.0586, + "step": 2401, + "video_reward_cumulative_accuracy": 0.8404831320283215 + }, + { + "epoch": 0.7129712080736124, + "grad_norm": 2.9688684940338135, + "learning_rate": 1.153354318276486e-06, + "loss": 0.0505, + "step": 2402, + "video_reward_cumulative_accuracy": 0.8405495420482931 + }, + { + "epoch": 0.7132680320569902, + "grad_norm": 2.505089044570923, + "learning_rate": 1.1511725973818879e-06, + "loss": 0.0257, + "step": 2403, + "video_reward_cumulative_accuracy": 0.8406158967956721 + }, + { + "epoch": 0.713564856040368, + "grad_norm": 0.5991393327713013, + "learning_rate": 1.1489923245844214e-06, + "loss": 0.0085, + "step": 2404, + "video_reward_cumulative_accuracy": 0.8406821963394343 + }, + { + "epoch": 0.713861680023746, + "grad_norm": 1.6461894512176514, + "learning_rate": 1.1468135022248195e-06, + "loss": 0.0169, + "step": 2405, + "video_reward_cumulative_accuracy": 0.8407484407484408 + }, + { + "epoch": 0.7141585040071238, + "grad_norm": 3.0698747634887695, + "learning_rate": 1.14463613264226e-06, + "loss": 0.0623, + "step": 2406, + "video_reward_cumulative_accuracy": 0.8408146300914381 + }, + { + "epoch": 0.7144553279905016, + "grad_norm": 2.5160300731658936, + "learning_rate": 1.1424602181743609e-06, + "loss": 0.066, + "step": 2407, + "video_reward_cumulative_accuracy": 0.8406730369754881 + }, + { + "epoch": 0.7147521519738795, + "grad_norm": 1.7137246131896973, + "learning_rate": 1.1402857611571772e-06, + "loss": 0.0148, + "step": 2408, + "video_reward_cumulative_accuracy": 0.8407392026578073 + }, + { + "epoch": 0.7150489759572574, + "grad_norm": 1.8538068532943726, + "learning_rate": 1.1381127639252005e-06, + "loss": 0.0292, + "step": 2409, + "video_reward_cumulative_accuracy": 0.8408053134080531 + }, + { + "epoch": 0.7153457999406352, + "grad_norm": 4.450146675109863, + "learning_rate": 1.1359412288113526e-06, + "loss": 0.031, + "step": 2410, + "video_reward_cumulative_accuracy": 0.8404564315352697 + }, + { + "epoch": 0.715642623924013, + "grad_norm": 1.273587703704834, + "learning_rate": 1.133771158146988e-06, + "loss": 0.0124, + "step": 2411, + "video_reward_cumulative_accuracy": 0.8405226047283285 + }, + { + "epoch": 0.715939447907391, + "grad_norm": 2.9038286209106445, + "learning_rate": 1.131602554261888e-06, + "loss": 0.0557, + "step": 2412, + "video_reward_cumulative_accuracy": 0.8405887230514096 + }, + { + "epoch": 0.7162362718907688, + "grad_norm": 1.5233856439590454, + "learning_rate": 1.1294354194842597e-06, + "loss": 0.0135, + "step": 2413, + "video_reward_cumulative_accuracy": 0.8406547865727311 + }, + { + "epoch": 0.7165330958741466, + "grad_norm": 0.3505679666996002, + "learning_rate": 1.1272697561407334e-06, + "loss": 0.004, + "step": 2414, + "video_reward_cumulative_accuracy": 0.8407207953603977 + }, + { + "epoch": 0.7168299198575245, + "grad_norm": 3.6674511432647705, + "learning_rate": 1.125105566556357e-06, + "loss": 0.0877, + "step": 2415, + "video_reward_cumulative_accuracy": 0.8405797101449275 + }, + { + "epoch": 0.7171267438409024, + "grad_norm": 0.8542490601539612, + "learning_rate": 1.1229428530546002e-06, + "loss": 0.0147, + "step": 2416, + "video_reward_cumulative_accuracy": 0.8406456953642384 + }, + { + "epoch": 0.7174235678242802, + "grad_norm": 2.0247857570648193, + "learning_rate": 1.1207816179573427e-06, + "loss": 0.0484, + "step": 2417, + "video_reward_cumulative_accuracy": 0.8407116259826231 + }, + { + "epoch": 0.717720391807658, + "grad_norm": 1.7398759126663208, + "learning_rate": 1.1186218635848838e-06, + "loss": 0.0595, + "step": 2418, + "video_reward_cumulative_accuracy": 0.8407775020678246 + }, + { + "epoch": 0.718017215791036, + "grad_norm": 1.2904212474822998, + "learning_rate": 1.1164635922559273e-06, + "loss": 0.07, + "step": 2419, + "video_reward_cumulative_accuracy": 0.8408433236874742 + }, + { + "epoch": 0.7183140397744138, + "grad_norm": 3.129547595977783, + "learning_rate": 1.114306806287587e-06, + "loss": 0.0425, + "step": 2420, + "video_reward_cumulative_accuracy": 0.8409090909090909 + }, + { + "epoch": 0.7186108637577916, + "grad_norm": 3.4823479652404785, + "learning_rate": 1.1121515079953834e-06, + "loss": 0.0369, + "step": 2421, + "video_reward_cumulative_accuracy": 0.8409748038000826 + }, + { + "epoch": 0.7189076877411695, + "grad_norm": 2.5278398990631104, + "learning_rate": 1.1099976996932357e-06, + "loss": 0.0397, + "step": 2422, + "video_reward_cumulative_accuracy": 0.8410404624277457 + }, + { + "epoch": 0.7192045117245474, + "grad_norm": 1.669782280921936, + "learning_rate": 1.1078453836934697e-06, + "loss": 0.0514, + "step": 2423, + "video_reward_cumulative_accuracy": 0.8406933553446141 + }, + { + "epoch": 0.7195013357079252, + "grad_norm": 1.4766911268234253, + "learning_rate": 1.1056945623068023e-06, + "loss": 0.026, + "step": 2424, + "video_reward_cumulative_accuracy": 0.8407590759075908 + }, + { + "epoch": 0.719798159691303, + "grad_norm": 2.538355588912964, + "learning_rate": 1.1035452378423512e-06, + "loss": 0.0246, + "step": 2425, + "video_reward_cumulative_accuracy": 0.8408247422680413 + }, + { + "epoch": 0.7200949836746809, + "grad_norm": 2.352414131164551, + "learning_rate": 1.1013974126076243e-06, + "loss": 0.0457, + "step": 2426, + "video_reward_cumulative_accuracy": 0.8408903544929925 + }, + { + "epoch": 0.7203918076580588, + "grad_norm": 1.7153733968734741, + "learning_rate": 1.0992510889085187e-06, + "loss": 0.0419, + "step": 2427, + "video_reward_cumulative_accuracy": 0.8409559126493613 + }, + { + "epoch": 0.7206886316414366, + "grad_norm": 4.937928199768066, + "learning_rate": 1.0971062690493242e-06, + "loss": 0.045, + "step": 2428, + "video_reward_cumulative_accuracy": 0.8410214168039539 + }, + { + "epoch": 0.7209854556248145, + "grad_norm": 1.5569862127304077, + "learning_rate": 1.0949629553327106e-06, + "loss": 0.0175, + "step": 2429, + "video_reward_cumulative_accuracy": 0.8410868670234665 + }, + { + "epoch": 0.7212822796081924, + "grad_norm": 0.807574987411499, + "learning_rate": 1.0928211500597355e-06, + "loss": 0.0236, + "step": 2430, + "video_reward_cumulative_accuracy": 0.8411522633744856 + }, + { + "epoch": 0.7215791035915702, + "grad_norm": 1.268849492073059, + "learning_rate": 1.0906808555298323e-06, + "loss": 0.0497, + "step": 2431, + "video_reward_cumulative_accuracy": 0.8410119292472233 + }, + { + "epoch": 0.721875927574948, + "grad_norm": 3.1053664684295654, + "learning_rate": 1.088542074040816e-06, + "loss": 0.0356, + "step": 2432, + "video_reward_cumulative_accuracy": 0.841077302631579 + }, + { + "epoch": 0.7221727515583259, + "grad_norm": 2.7275187969207764, + "learning_rate": 1.0864048078888758e-06, + "loss": 0.0367, + "step": 2433, + "video_reward_cumulative_accuracy": 0.8411426222770243 + }, + { + "epoch": 0.7224695755417038, + "grad_norm": 3.4648077487945557, + "learning_rate": 1.084269059368575e-06, + "loss": 0.041, + "step": 2434, + "video_reward_cumulative_accuracy": 0.8412078882497945 + }, + { + "epoch": 0.7227663995250816, + "grad_norm": 1.614362359046936, + "learning_rate": 1.0821348307728478e-06, + "loss": 0.0259, + "step": 2435, + "video_reward_cumulative_accuracy": 0.8412731006160165 + }, + { + "epoch": 0.7230632235084595, + "grad_norm": 3.655402898788452, + "learning_rate": 1.0800021243929931e-06, + "loss": 0.0496, + "step": 2436, + "video_reward_cumulative_accuracy": 0.8411330049261084 + }, + { + "epoch": 0.7233600474918374, + "grad_norm": 2.584467649459839, + "learning_rate": 1.0778709425186801e-06, + "loss": 0.0659, + "step": 2437, + "video_reward_cumulative_accuracy": 0.8409930242100944 + }, + { + "epoch": 0.7236568714752152, + "grad_norm": 3.22719669342041, + "learning_rate": 1.0757412874379386e-06, + "loss": 0.0443, + "step": 2438, + "video_reward_cumulative_accuracy": 0.8408531583264971 + }, + { + "epoch": 0.723953695458593, + "grad_norm": 0.4424673318862915, + "learning_rate": 1.0736131614371602e-06, + "loss": 0.0074, + "step": 2439, + "video_reward_cumulative_accuracy": 0.8409184091840919 + }, + { + "epoch": 0.7242505194419709, + "grad_norm": 0.7209199070930481, + "learning_rate": 1.0714865668010962e-06, + "loss": 0.03, + "step": 2440, + "video_reward_cumulative_accuracy": 0.840983606557377 + }, + { + "epoch": 0.7245473434253488, + "grad_norm": 2.2565574645996094, + "learning_rate": 1.0693615058128502e-06, + "loss": 0.0298, + "step": 2441, + "video_reward_cumulative_accuracy": 0.8410487505120852 + }, + { + "epoch": 0.7248441674087266, + "grad_norm": 2.199859857559204, + "learning_rate": 1.0672379807538818e-06, + "loss": 0.0788, + "step": 2442, + "video_reward_cumulative_accuracy": 0.8411138411138411 + }, + { + "epoch": 0.7251409913921045, + "grad_norm": 1.1875553131103516, + "learning_rate": 1.0651159939040017e-06, + "loss": 0.0301, + "step": 2443, + "video_reward_cumulative_accuracy": 0.8409742120343839 + }, + { + "epoch": 0.7254378153754824, + "grad_norm": 2.6030995845794678, + "learning_rate": 1.0629955475413691e-06, + "loss": 0.0283, + "step": 2444, + "video_reward_cumulative_accuracy": 0.8410392798690671 + }, + { + "epoch": 0.7257346393588602, + "grad_norm": 2.654780864715576, + "learning_rate": 1.0608766439424895e-06, + "loss": 0.0215, + "step": 2445, + "video_reward_cumulative_accuracy": 0.8411042944785276 + }, + { + "epoch": 0.726031463342238, + "grad_norm": 1.8764584064483643, + "learning_rate": 1.0587592853822096e-06, + "loss": 0.038, + "step": 2446, + "video_reward_cumulative_accuracy": 0.8411692559280458 + }, + { + "epoch": 0.7263282873256159, + "grad_norm": 2.492081642150879, + "learning_rate": 1.0566434741337204e-06, + "loss": 0.0329, + "step": 2447, + "video_reward_cumulative_accuracy": 0.8412341642827953 + }, + { + "epoch": 0.7266251113089938, + "grad_norm": 2.2990763187408447, + "learning_rate": 1.0545292124685506e-06, + "loss": 0.0288, + "step": 2448, + "video_reward_cumulative_accuracy": 0.8412990196078431 + }, + { + "epoch": 0.7269219352923716, + "grad_norm": 1.3020623922348022, + "learning_rate": 1.0524165026565655e-06, + "loss": 0.0178, + "step": 2449, + "video_reward_cumulative_accuracy": 0.8413638219681503 + }, + { + "epoch": 0.7272187592757495, + "grad_norm": 0.8552646040916443, + "learning_rate": 1.0503053469659647e-06, + "loss": 0.0229, + "step": 2450, + "video_reward_cumulative_accuracy": 0.8414285714285714 + }, + { + "epoch": 0.7275155832591274, + "grad_norm": 2.16868257522583, + "learning_rate": 1.0481957476632773e-06, + "loss": 0.0564, + "step": 2451, + "video_reward_cumulative_accuracy": 0.8412892696858425 + }, + { + "epoch": 0.7278124072425052, + "grad_norm": 0.4563276767730713, + "learning_rate": 1.0460877070133634e-06, + "loss": 0.012, + "step": 2452, + "video_reward_cumulative_accuracy": 0.8413539967373572 + }, + { + "epoch": 0.728109231225883, + "grad_norm": 3.3756730556488037, + "learning_rate": 1.0439812272794096e-06, + "loss": 0.0633, + "step": 2453, + "video_reward_cumulative_accuracy": 0.8414186710150836 + }, + { + "epoch": 0.7284060552092609, + "grad_norm": 5.538758754730225, + "learning_rate": 1.0418763107229271e-06, + "loss": 0.0483, + "step": 2454, + "video_reward_cumulative_accuracy": 0.841483292583537 + }, + { + "epoch": 0.7287028791926388, + "grad_norm": 1.5109965801239014, + "learning_rate": 1.0397729596037463e-06, + "loss": 0.0185, + "step": 2455, + "video_reward_cumulative_accuracy": 0.8415478615071283 + }, + { + "epoch": 0.7289997031760166, + "grad_norm": 2.8143222332000732, + "learning_rate": 1.0376711761800196e-06, + "loss": 0.0553, + "step": 2456, + "video_reward_cumulative_accuracy": 0.8416123778501629 + }, + { + "epoch": 0.7292965271593945, + "grad_norm": 1.900389313697815, + "learning_rate": 1.0355709627082155e-06, + "loss": 0.0637, + "step": 2457, + "video_reward_cumulative_accuracy": 0.8414733414733415 + }, + { + "epoch": 0.7295933511427724, + "grad_norm": 2.070751905441284, + "learning_rate": 1.0334723214431175e-06, + "loss": 0.0273, + "step": 2458, + "video_reward_cumulative_accuracy": 0.8415378356387306 + }, + { + "epoch": 0.7298901751261502, + "grad_norm": 3.6097755432128906, + "learning_rate": 1.031375254637821e-06, + "loss": 0.0496, + "step": 2459, + "video_reward_cumulative_accuracy": 0.8413989426596177 + }, + { + "epoch": 0.730186999109528, + "grad_norm": 2.63653302192688, + "learning_rate": 1.0292797645437288e-06, + "loss": 0.0594, + "step": 2460, + "video_reward_cumulative_accuracy": 0.8412601626016261 + }, + { + "epoch": 0.7304838230929059, + "grad_norm": 3.204415798187256, + "learning_rate": 1.0271858534105547e-06, + "loss": 0.0269, + "step": 2461, + "video_reward_cumulative_accuracy": 0.8411214953271028 + }, + { + "epoch": 0.7307806470762838, + "grad_norm": 1.9313973188400269, + "learning_rate": 1.0250935234863147e-06, + "loss": 0.0336, + "step": 2462, + "video_reward_cumulative_accuracy": 0.8411860276198213 + }, + { + "epoch": 0.7310774710596616, + "grad_norm": 1.1721389293670654, + "learning_rate": 1.0230027770173282e-06, + "loss": 0.0178, + "step": 2463, + "video_reward_cumulative_accuracy": 0.8412505075111653 + }, + { + "epoch": 0.7313742950430395, + "grad_norm": 2.0116591453552246, + "learning_rate": 1.0209136162482155e-06, + "loss": 0.026, + "step": 2464, + "video_reward_cumulative_accuracy": 0.841314935064935 + }, + { + "epoch": 0.7316711190264173, + "grad_norm": 2.902461290359497, + "learning_rate": 1.0188260434218919e-06, + "loss": 0.0487, + "step": 2465, + "video_reward_cumulative_accuracy": 0.8411764705882353 + }, + { + "epoch": 0.7319679430097952, + "grad_norm": 0.5362818241119385, + "learning_rate": 1.0167400607795708e-06, + "loss": 0.0077, + "step": 2466, + "video_reward_cumulative_accuracy": 0.8412408759124088 + }, + { + "epoch": 0.732264766993173, + "grad_norm": 1.2604457139968872, + "learning_rate": 1.0146556705607544e-06, + "loss": 0.0254, + "step": 2467, + "video_reward_cumulative_accuracy": 0.841305229023105 + }, + { + "epoch": 0.7325615909765509, + "grad_norm": 0.365590900182724, + "learning_rate": 1.012572875003241e-06, + "loss": 0.0057, + "step": 2468, + "video_reward_cumulative_accuracy": 0.8413695299837926 + }, + { + "epoch": 0.7328584149599288, + "grad_norm": 0.8860239386558533, + "learning_rate": 1.0104916763431133e-06, + "loss": 0.0209, + "step": 2469, + "video_reward_cumulative_accuracy": 0.8414337788578372 + }, + { + "epoch": 0.7331552389433066, + "grad_norm": 1.316934585571289, + "learning_rate": 1.0084120768147385e-06, + "loss": 0.0112, + "step": 2470, + "video_reward_cumulative_accuracy": 0.841497975708502 + }, + { + "epoch": 0.7334520629266845, + "grad_norm": 1.1632755994796753, + "learning_rate": 1.00633407865077e-06, + "loss": 0.0526, + "step": 2471, + "video_reward_cumulative_accuracy": 0.8413597733711048 + }, + { + "epoch": 0.7337488869100623, + "grad_norm": 0.5752823352813721, + "learning_rate": 1.0042576840821394e-06, + "loss": 0.0115, + "step": 2472, + "video_reward_cumulative_accuracy": 0.8414239482200647 + }, + { + "epoch": 0.7340457108934402, + "grad_norm": 2.8331027030944824, + "learning_rate": 1.0021828953380572e-06, + "loss": 0.0209, + "step": 2473, + "video_reward_cumulative_accuracy": 0.8414880711686211 + }, + { + "epoch": 0.734342534876818, + "grad_norm": 3.890144109725952, + "learning_rate": 1.0001097146460134e-06, + "loss": 0.0889, + "step": 2474, + "video_reward_cumulative_accuracy": 0.8413500404203719 + }, + { + "epoch": 0.7346393588601959, + "grad_norm": 1.1951637268066406, + "learning_rate": 9.980381442317661e-07, + "loss": 0.0127, + "step": 2475, + "video_reward_cumulative_accuracy": 0.8414141414141414 + }, + { + "epoch": 0.7349361828435738, + "grad_norm": 1.1834206581115723, + "learning_rate": 9.959681863193489e-07, + "loss": 0.0174, + "step": 2476, + "video_reward_cumulative_accuracy": 0.8414781906300485 + }, + { + "epoch": 0.7352330068269516, + "grad_norm": 0.8492854833602905, + "learning_rate": 9.938998431310604e-07, + "loss": 0.0202, + "step": 2477, + "video_reward_cumulative_accuracy": 0.8415421881308034 + }, + { + "epoch": 0.7355298308103295, + "grad_norm": 1.2624728679656982, + "learning_rate": 9.918331168874693e-07, + "loss": 0.0083, + "step": 2478, + "video_reward_cumulative_accuracy": 0.8416061339790153 + }, + { + "epoch": 0.7358266547937073, + "grad_norm": 2.332775831222534, + "learning_rate": 9.897680098074063e-07, + "loss": 0.0323, + "step": 2479, + "video_reward_cumulative_accuracy": 0.8416700282371924 + }, + { + "epoch": 0.7361234787770852, + "grad_norm": 2.9723246097564697, + "learning_rate": 9.877045241079647e-07, + "loss": 0.0763, + "step": 2480, + "video_reward_cumulative_accuracy": 0.8415322580645161 + }, + { + "epoch": 0.736420302760463, + "grad_norm": 1.4844021797180176, + "learning_rate": 9.85642662004497e-07, + "loss": 0.0088, + "step": 2481, + "video_reward_cumulative_accuracy": 0.841596130592503 + }, + { + "epoch": 0.7367171267438409, + "grad_norm": 0.8543074727058411, + "learning_rate": 9.835824257106112e-07, + "loss": 0.019, + "step": 2482, + "video_reward_cumulative_accuracy": 0.8416599516518937 + }, + { + "epoch": 0.7370139507272188, + "grad_norm": 1.366849422454834, + "learning_rate": 9.815238174381711e-07, + "loss": 0.0092, + "step": 2483, + "video_reward_cumulative_accuracy": 0.8417237213048732 + }, + { + "epoch": 0.7373107747105966, + "grad_norm": 0.7628781199455261, + "learning_rate": 9.794668393972932e-07, + "loss": 0.0266, + "step": 2484, + "video_reward_cumulative_accuracy": 0.8417874396135265 + }, + { + "epoch": 0.7376075986939745, + "grad_norm": 3.3191943168640137, + "learning_rate": 9.774114937963425e-07, + "loss": 0.0279, + "step": 2485, + "video_reward_cumulative_accuracy": 0.8418511066398391 + }, + { + "epoch": 0.7379044226773523, + "grad_norm": 0.4921557903289795, + "learning_rate": 9.753577828419331e-07, + "loss": 0.0094, + "step": 2486, + "video_reward_cumulative_accuracy": 0.8419147224456959 + }, + { + "epoch": 0.7382012466607302, + "grad_norm": 5.423759937286377, + "learning_rate": 9.73305708738921e-07, + "loss": 0.0882, + "step": 2487, + "video_reward_cumulative_accuracy": 0.8415761962203459 + }, + { + "epoch": 0.738498070644108, + "grad_norm": 2.609516143798828, + "learning_rate": 9.712552736904085e-07, + "loss": 0.02, + "step": 2488, + "video_reward_cumulative_accuracy": 0.8416398713826366 + }, + { + "epoch": 0.7387948946274859, + "grad_norm": 2.0234172344207764, + "learning_rate": 9.69206479897736e-07, + "loss": 0.0207, + "step": 2489, + "video_reward_cumulative_accuracy": 0.8417034953796706 + }, + { + "epoch": 0.7390917186108638, + "grad_norm": 2.902467966079712, + "learning_rate": 9.671593295604836e-07, + "loss": 0.0226, + "step": 2490, + "video_reward_cumulative_accuracy": 0.8417670682730923 + }, + { + "epoch": 0.7393885425942416, + "grad_norm": 1.8308614492416382, + "learning_rate": 9.65113824876464e-07, + "loss": 0.0383, + "step": 2491, + "video_reward_cumulative_accuracy": 0.841830590124448 + }, + { + "epoch": 0.7396853665776195, + "grad_norm": 0.8829131722450256, + "learning_rate": 9.63069968041726e-07, + "loss": 0.0106, + "step": 2492, + "video_reward_cumulative_accuracy": 0.8418940609951846 + }, + { + "epoch": 0.7399821905609973, + "grad_norm": 1.45628821849823, + "learning_rate": 9.610277612505483e-07, + "loss": 0.0142, + "step": 2493, + "video_reward_cumulative_accuracy": 0.8419574809466506 + }, + { + "epoch": 0.7402790145443752, + "grad_norm": 2.1599154472351074, + "learning_rate": 9.58987206695438e-07, + "loss": 0.0342, + "step": 2494, + "video_reward_cumulative_accuracy": 0.8420208500400962 + }, + { + "epoch": 0.740575838527753, + "grad_norm": 3.008652687072754, + "learning_rate": 9.569483065671294e-07, + "loss": 0.0649, + "step": 2495, + "video_reward_cumulative_accuracy": 0.8420841683366733 + }, + { + "epoch": 0.7408726625111309, + "grad_norm": 1.7300156354904175, + "learning_rate": 9.549110630545783e-07, + "loss": 0.025, + "step": 2496, + "video_reward_cumulative_accuracy": 0.8419471153846154 + }, + { + "epoch": 0.7411694864945088, + "grad_norm": 1.144446611404419, + "learning_rate": 9.528754783449634e-07, + "loss": 0.0149, + "step": 2497, + "video_reward_cumulative_accuracy": 0.842010412494994 + }, + { + "epoch": 0.7414663104778866, + "grad_norm": 0.29633480310440063, + "learning_rate": 9.508415546236829e-07, + "loss": 0.004, + "step": 2498, + "video_reward_cumulative_accuracy": 0.8420736589271417 + }, + { + "epoch": 0.7417631344612645, + "grad_norm": 2.104227304458618, + "learning_rate": 9.488092940743516e-07, + "loss": 0.0251, + "step": 2499, + "video_reward_cumulative_accuracy": 0.8421368547418968 + }, + { + "epoch": 0.7420599584446423, + "grad_norm": 2.1143417358398438, + "learning_rate": 9.467786988787989e-07, + "loss": 0.0524, + "step": 2500, + "video_reward_cumulative_accuracy": 0.8422 + }, + { + "epoch": 0.7423567824280202, + "grad_norm": 3.5316734313964844, + "learning_rate": 9.447497712170642e-07, + "loss": 0.0873, + "step": 2501, + "video_reward_cumulative_accuracy": 0.8422630947620952 + }, + { + "epoch": 0.742653606411398, + "grad_norm": 1.2431542873382568, + "learning_rate": 9.427225132673992e-07, + "loss": 0.0786, + "step": 2502, + "video_reward_cumulative_accuracy": 0.842326139088729 + }, + { + "epoch": 0.7429504303947759, + "grad_norm": 0.4430326819419861, + "learning_rate": 9.406969272062619e-07, + "loss": 0.0069, + "step": 2503, + "video_reward_cumulative_accuracy": 0.8423891330403516 + }, + { + "epoch": 0.7432472543781538, + "grad_norm": 2.5912365913391113, + "learning_rate": 9.386730152083156e-07, + "loss": 0.0423, + "step": 2504, + "video_reward_cumulative_accuracy": 0.8422523961661342 + }, + { + "epoch": 0.7435440783615316, + "grad_norm": 1.016870379447937, + "learning_rate": 9.366507794464275e-07, + "loss": 0.0096, + "step": 2505, + "video_reward_cumulative_accuracy": 0.8423153692614771 + }, + { + "epoch": 0.7438409023449095, + "grad_norm": 0.46760135889053345, + "learning_rate": 9.346302220916619e-07, + "loss": 0.004, + "step": 2506, + "video_reward_cumulative_accuracy": 0.8423782920989625 + }, + { + "epoch": 0.7441377263282873, + "grad_norm": 2.0241291522979736, + "learning_rate": 9.326113453132848e-07, + "loss": 0.051, + "step": 2507, + "video_reward_cumulative_accuracy": 0.8424411647387315 + }, + { + "epoch": 0.7444345503116652, + "grad_norm": 1.800049901008606, + "learning_rate": 9.305941512787542e-07, + "loss": 0.0245, + "step": 2508, + "video_reward_cumulative_accuracy": 0.8425039872408293 + }, + { + "epoch": 0.744731374295043, + "grad_norm": 0.5981684923171997, + "learning_rate": 9.28578642153726e-07, + "loss": 0.0119, + "step": 2509, + "video_reward_cumulative_accuracy": 0.8425667596652052 + }, + { + "epoch": 0.7450281982784209, + "grad_norm": 2.190657615661621, + "learning_rate": 9.265648201020447e-07, + "loss": 0.0158, + "step": 2510, + "video_reward_cumulative_accuracy": 0.8426294820717132 + }, + { + "epoch": 0.7453250222617988, + "grad_norm": 3.271556854248047, + "learning_rate": 9.245526872857424e-07, + "loss": 0.0234, + "step": 2511, + "video_reward_cumulative_accuracy": 0.8426921545201115 + }, + { + "epoch": 0.7456218462451766, + "grad_norm": 1.8033744096755981, + "learning_rate": 9.225422458650404e-07, + "loss": 0.0207, + "step": 2512, + "video_reward_cumulative_accuracy": 0.8427547770700637 + }, + { + "epoch": 0.7459186702285545, + "grad_norm": 1.6773443222045898, + "learning_rate": 9.205334979983402e-07, + "loss": 0.0244, + "step": 2513, + "video_reward_cumulative_accuracy": 0.8428173497811381 + }, + { + "epoch": 0.7462154942119323, + "grad_norm": 1.4466123580932617, + "learning_rate": 9.185264458422313e-07, + "loss": 0.0266, + "step": 2514, + "video_reward_cumulative_accuracy": 0.8428798727128083 + }, + { + "epoch": 0.7465123181953102, + "grad_norm": 0.8426374793052673, + "learning_rate": 9.165210915514758e-07, + "loss": 0.0225, + "step": 2515, + "video_reward_cumulative_accuracy": 0.8429423459244533 + }, + { + "epoch": 0.746809142178688, + "grad_norm": 1.8962173461914062, + "learning_rate": 9.145174372790178e-07, + "loss": 0.0292, + "step": 2516, + "video_reward_cumulative_accuracy": 0.8430047694753577 + }, + { + "epoch": 0.7471059661620659, + "grad_norm": 2.5202438831329346, + "learning_rate": 9.125154851759749e-07, + "loss": 0.0394, + "step": 2517, + "video_reward_cumulative_accuracy": 0.843067143424712 + }, + { + "epoch": 0.7474027901454438, + "grad_norm": 1.6238250732421875, + "learning_rate": 9.105152373916346e-07, + "loss": 0.069, + "step": 2518, + "video_reward_cumulative_accuracy": 0.8431294678316124 + }, + { + "epoch": 0.7476996141288216, + "grad_norm": 1.0544601678848267, + "learning_rate": 9.085166960734604e-07, + "loss": 0.0164, + "step": 2519, + "video_reward_cumulative_accuracy": 0.8431917427550615 + }, + { + "epoch": 0.7479964381121995, + "grad_norm": 2.0650112628936768, + "learning_rate": 9.06519863367078e-07, + "loss": 0.0742, + "step": 2520, + "video_reward_cumulative_accuracy": 0.8432539682539683 + }, + { + "epoch": 0.7482932620955773, + "grad_norm": 1.7210766077041626, + "learning_rate": 9.045247414162817e-07, + "loss": 0.0288, + "step": 2521, + "video_reward_cumulative_accuracy": 0.843316144387148 + }, + { + "epoch": 0.7485900860789552, + "grad_norm": 2.06630539894104, + "learning_rate": 9.025313323630297e-07, + "loss": 0.0206, + "step": 2522, + "video_reward_cumulative_accuracy": 0.8431800158604282 + }, + { + "epoch": 0.748886910062333, + "grad_norm": 2.9529051780700684, + "learning_rate": 9.005396383474371e-07, + "loss": 0.0575, + "step": 2523, + "video_reward_cumulative_accuracy": 0.8428458184700753 + }, + { + "epoch": 0.7491837340457109, + "grad_norm": 1.8153632879257202, + "learning_rate": 8.985496615077849e-07, + "loss": 0.0322, + "step": 2524, + "video_reward_cumulative_accuracy": 0.8429080824088748 + }, + { + "epoch": 0.7494805580290888, + "grad_norm": 2.185244560241699, + "learning_rate": 8.965614039805029e-07, + "loss": 0.0312, + "step": 2525, + "video_reward_cumulative_accuracy": 0.8429702970297029 + }, + { + "epoch": 0.7497773820124666, + "grad_norm": 0.35279035568237305, + "learning_rate": 8.945748679001808e-07, + "loss": 0.0041, + "step": 2526, + "video_reward_cumulative_accuracy": 0.8430324623911323 + }, + { + "epoch": 0.7500742059958445, + "grad_norm": 1.9734653234481812, + "learning_rate": 8.925900553995564e-07, + "loss": 0.0319, + "step": 2527, + "video_reward_cumulative_accuracy": 0.8430945785516423 + }, + { + "epoch": 0.7503710299792223, + "grad_norm": 1.5152698755264282, + "learning_rate": 8.906069686095189e-07, + "loss": 0.0241, + "step": 2528, + "video_reward_cumulative_accuracy": 0.8431566455696202 + }, + { + "epoch": 0.7506678539626002, + "grad_norm": 0.6552301049232483, + "learning_rate": 8.886256096591048e-07, + "loss": 0.0108, + "step": 2529, + "video_reward_cumulative_accuracy": 0.843218663503361 + }, + { + "epoch": 0.750964677945978, + "grad_norm": 3.156954050064087, + "learning_rate": 8.866459806754948e-07, + "loss": 0.0368, + "step": 2530, + "video_reward_cumulative_accuracy": 0.8432806324110672 + }, + { + "epoch": 0.7512615019293559, + "grad_norm": 1.5369415283203125, + "learning_rate": 8.84668083784014e-07, + "loss": 0.0243, + "step": 2531, + "video_reward_cumulative_accuracy": 0.8433425523508494 + }, + { + "epoch": 0.7515583259127337, + "grad_norm": 2.561065673828125, + "learning_rate": 8.82691921108125e-07, + "loss": 0.0483, + "step": 2532, + "video_reward_cumulative_accuracy": 0.8434044233807267 + }, + { + "epoch": 0.7518551498961116, + "grad_norm": 3.3253390789031982, + "learning_rate": 8.807174947694308e-07, + "loss": 0.0232, + "step": 2533, + "video_reward_cumulative_accuracy": 0.8434662455586261 + }, + { + "epoch": 0.7521519738794895, + "grad_norm": 2.2705724239349365, + "learning_rate": 8.787448068876697e-07, + "loss": 0.0315, + "step": 2534, + "video_reward_cumulative_accuracy": 0.8435280189423836 + }, + { + "epoch": 0.7524487978628673, + "grad_norm": 4.962090015411377, + "learning_rate": 8.76773859580714e-07, + "loss": 0.0608, + "step": 2535, + "video_reward_cumulative_accuracy": 0.8435897435897436 + }, + { + "epoch": 0.7527456218462452, + "grad_norm": 1.0470224618911743, + "learning_rate": 8.748046549645675e-07, + "loss": 0.0121, + "step": 2536, + "video_reward_cumulative_accuracy": 0.8436514195583596 + }, + { + "epoch": 0.753042445829623, + "grad_norm": 1.1866475343704224, + "learning_rate": 8.72837195153361e-07, + "loss": 0.0349, + "step": 2537, + "video_reward_cumulative_accuracy": 0.8437130469057943 + }, + { + "epoch": 0.7533392698130009, + "grad_norm": 1.4927583932876587, + "learning_rate": 8.70871482259354e-07, + "loss": 0.0145, + "step": 2538, + "video_reward_cumulative_accuracy": 0.8435776201733649 + }, + { + "epoch": 0.7536360937963787, + "grad_norm": 1.2378430366516113, + "learning_rate": 8.689075183929304e-07, + "loss": 0.015, + "step": 2539, + "video_reward_cumulative_accuracy": 0.8436392280425364 + }, + { + "epoch": 0.7539329177797566, + "grad_norm": 2.65450382232666, + "learning_rate": 8.669453056625959e-07, + "loss": 0.0736, + "step": 2540, + "video_reward_cumulative_accuracy": 0.8437007874015748 + }, + { + "epoch": 0.7542297417631345, + "grad_norm": 2.1863057613372803, + "learning_rate": 8.649848461749772e-07, + "loss": 0.0301, + "step": 2541, + "video_reward_cumulative_accuracy": 0.8437622983077528 + }, + { + "epoch": 0.7545265657465123, + "grad_norm": 1.4505633115768433, + "learning_rate": 8.630261420348162e-07, + "loss": 0.0232, + "step": 2542, + "video_reward_cumulative_accuracy": 0.8438237608182534 + }, + { + "epoch": 0.7548233897298902, + "grad_norm": 0.5751045942306519, + "learning_rate": 8.610691953449727e-07, + "loss": 0.0088, + "step": 2543, + "video_reward_cumulative_accuracy": 0.8438851749901691 + }, + { + "epoch": 0.755120213713268, + "grad_norm": 1.236505150794983, + "learning_rate": 8.591140082064189e-07, + "loss": 0.018, + "step": 2544, + "video_reward_cumulative_accuracy": 0.8439465408805031 + }, + { + "epoch": 0.7554170376966459, + "grad_norm": 1.4471156597137451, + "learning_rate": 8.571605827182381e-07, + "loss": 0.0264, + "step": 2545, + "video_reward_cumulative_accuracy": 0.844007858546169 + }, + { + "epoch": 0.7557138616800237, + "grad_norm": 1.338175892829895, + "learning_rate": 8.552089209776226e-07, + "loss": 0.0155, + "step": 2546, + "video_reward_cumulative_accuracy": 0.8440691280439906 + }, + { + "epoch": 0.7560106856634016, + "grad_norm": 1.3986073732376099, + "learning_rate": 8.532590250798695e-07, + "loss": 0.0298, + "step": 2547, + "video_reward_cumulative_accuracy": 0.8439340400471143 + }, + { + "epoch": 0.7563075096467795, + "grad_norm": 3.191577911376953, + "learning_rate": 8.513108971183817e-07, + "loss": 0.0398, + "step": 2548, + "video_reward_cumulative_accuracy": 0.8439952904238619 + }, + { + "epoch": 0.7566043336301573, + "grad_norm": 1.3186044692993164, + "learning_rate": 8.493645391846642e-07, + "loss": 0.0241, + "step": 2549, + "video_reward_cumulative_accuracy": 0.8440564927422519 + }, + { + "epoch": 0.7569011576135352, + "grad_norm": 3.0393431186676025, + "learning_rate": 8.474199533683214e-07, + "loss": 0.0545, + "step": 2550, + "video_reward_cumulative_accuracy": 0.8441176470588235 + }, + { + "epoch": 0.757197981596913, + "grad_norm": 2.7258641719818115, + "learning_rate": 8.454771417570537e-07, + "loss": 0.0403, + "step": 2551, + "video_reward_cumulative_accuracy": 0.8441787534300275 + }, + { + "epoch": 0.7574948055802909, + "grad_norm": 1.2162940502166748, + "learning_rate": 8.435361064366585e-07, + "loss": 0.0148, + "step": 2552, + "video_reward_cumulative_accuracy": 0.8442398119122257 + }, + { + "epoch": 0.7577916295636687, + "grad_norm": 0.32078516483306885, + "learning_rate": 8.415968494910253e-07, + "loss": 0.0036, + "step": 2553, + "video_reward_cumulative_accuracy": 0.8443008225616921 + }, + { + "epoch": 0.7580884535470466, + "grad_norm": 1.9234449863433838, + "learning_rate": 8.396593730021355e-07, + "loss": 0.0207, + "step": 2554, + "video_reward_cumulative_accuracy": 0.8441660140955364 + }, + { + "epoch": 0.7583852775304245, + "grad_norm": 4.319919586181641, + "learning_rate": 8.377236790500584e-07, + "loss": 0.0487, + "step": 2555, + "video_reward_cumulative_accuracy": 0.8442270058708415 + }, + { + "epoch": 0.7586821015138023, + "grad_norm": 1.7034393548965454, + "learning_rate": 8.357897697129477e-07, + "loss": 0.0126, + "step": 2556, + "video_reward_cumulative_accuracy": 0.8442879499217527 + }, + { + "epoch": 0.7589789254971802, + "grad_norm": 1.2667155265808105, + "learning_rate": 8.338576470670437e-07, + "loss": 0.0073, + "step": 2557, + "video_reward_cumulative_accuracy": 0.8443488463042628 + }, + { + "epoch": 0.759275749480558, + "grad_norm": 2.665156126022339, + "learning_rate": 8.319273131866675e-07, + "loss": 0.0465, + "step": 2558, + "video_reward_cumulative_accuracy": 0.8444096950742768 + }, + { + "epoch": 0.7595725734639359, + "grad_norm": 2.237888813018799, + "learning_rate": 8.299987701442203e-07, + "loss": 0.0296, + "step": 2559, + "video_reward_cumulative_accuracy": 0.8444704962876124 + }, + { + "epoch": 0.7598693974473137, + "grad_norm": 2.12754225730896, + "learning_rate": 8.280720200101805e-07, + "loss": 0.015, + "step": 2560, + "video_reward_cumulative_accuracy": 0.84453125 + }, + { + "epoch": 0.7601662214306916, + "grad_norm": 2.578101873397827, + "learning_rate": 8.261470648530998e-07, + "loss": 0.0259, + "step": 2561, + "video_reward_cumulative_accuracy": 0.8445919562670832 + }, + { + "epoch": 0.7604630454140695, + "grad_norm": 2.2189407348632812, + "learning_rate": 8.242239067396063e-07, + "loss": 0.0262, + "step": 2562, + "video_reward_cumulative_accuracy": 0.8446526151444185 + }, + { + "epoch": 0.7607598693974473, + "grad_norm": 1.3384156227111816, + "learning_rate": 8.223025477343944e-07, + "loss": 0.0228, + "step": 2563, + "video_reward_cumulative_accuracy": 0.8445181428014046 + }, + { + "epoch": 0.7610566933808252, + "grad_norm": 1.1982171535491943, + "learning_rate": 8.203829899002316e-07, + "loss": 0.0104, + "step": 2564, + "video_reward_cumulative_accuracy": 0.8445787831513261 + }, + { + "epoch": 0.761353517364203, + "grad_norm": 1.45271897315979, + "learning_rate": 8.1846523529795e-07, + "loss": 0.0164, + "step": 2565, + "video_reward_cumulative_accuracy": 0.8446393762183236 + }, + { + "epoch": 0.7616503413475809, + "grad_norm": 1.5878190994262695, + "learning_rate": 8.165492859864435e-07, + "loss": 0.0238, + "step": 2566, + "video_reward_cumulative_accuracy": 0.8445050662509743 + }, + { + "epoch": 0.7619471653309587, + "grad_norm": 2.7507424354553223, + "learning_rate": 8.146351440226711e-07, + "loss": 0.0443, + "step": 2567, + "video_reward_cumulative_accuracy": 0.8445656408258668 + }, + { + "epoch": 0.7622439893143366, + "grad_norm": 2.033236503601074, + "learning_rate": 8.127228114616484e-07, + "loss": 0.0223, + "step": 2568, + "video_reward_cumulative_accuracy": 0.844626168224299 + }, + { + "epoch": 0.7625408132977145, + "grad_norm": 0.4424172043800354, + "learning_rate": 8.108122903564502e-07, + "loss": 0.0082, + "step": 2569, + "video_reward_cumulative_accuracy": 0.8446866485013624 + }, + { + "epoch": 0.7628376372810923, + "grad_norm": 4.878910541534424, + "learning_rate": 8.089035827582087e-07, + "loss": 0.0613, + "step": 2570, + "video_reward_cumulative_accuracy": 0.8447470817120623 + }, + { + "epoch": 0.7631344612644702, + "grad_norm": 0.8770198822021484, + "learning_rate": 8.069966907161042e-07, + "loss": 0.0104, + "step": 2571, + "video_reward_cumulative_accuracy": 0.8448074679113186 + }, + { + "epoch": 0.763431285247848, + "grad_norm": 1.469495415687561, + "learning_rate": 8.05091616277372e-07, + "loss": 0.0102, + "step": 2572, + "video_reward_cumulative_accuracy": 0.8448678071539658 + }, + { + "epoch": 0.7637281092312259, + "grad_norm": 1.270909070968628, + "learning_rate": 8.031883614872929e-07, + "loss": 0.0271, + "step": 2573, + "video_reward_cumulative_accuracy": 0.8449280994947532 + }, + { + "epoch": 0.7640249332146037, + "grad_norm": 3.988424301147461, + "learning_rate": 8.012869283891967e-07, + "loss": 0.0606, + "step": 2574, + "video_reward_cumulative_accuracy": 0.844988344988345 + }, + { + "epoch": 0.7643217571979816, + "grad_norm": 2.989523410797119, + "learning_rate": 7.993873190244558e-07, + "loss": 0.0208, + "step": 2575, + "video_reward_cumulative_accuracy": 0.8448543689320388 + }, + { + "epoch": 0.7646185811813595, + "grad_norm": 0.9885690212249756, + "learning_rate": 7.974895354324857e-07, + "loss": 0.0226, + "step": 2576, + "video_reward_cumulative_accuracy": 0.8449145962732919 + }, + { + "epoch": 0.7649154051647373, + "grad_norm": 2.960150718688965, + "learning_rate": 7.955935796507419e-07, + "loss": 0.049, + "step": 2577, + "video_reward_cumulative_accuracy": 0.8449747768723321 + }, + { + "epoch": 0.7652122291481152, + "grad_norm": 3.4820029735565186, + "learning_rate": 7.936994537147155e-07, + "loss": 0.0499, + "step": 2578, + "video_reward_cumulative_accuracy": 0.8446470131885182 + }, + { + "epoch": 0.765509053131493, + "grad_norm": 2.2043135166168213, + "learning_rate": 7.91807159657935e-07, + "loss": 0.0192, + "step": 2579, + "video_reward_cumulative_accuracy": 0.8447072508724311 + }, + { + "epoch": 0.7658058771148709, + "grad_norm": 2.675544023513794, + "learning_rate": 7.89916699511962e-07, + "loss": 0.0514, + "step": 2580, + "video_reward_cumulative_accuracy": 0.8447674418604652 + }, + { + "epoch": 0.7661027010982487, + "grad_norm": 2.820864200592041, + "learning_rate": 7.880280753063891e-07, + "loss": 0.0133, + "step": 2581, + "video_reward_cumulative_accuracy": 0.8448275862068966 + }, + { + "epoch": 0.7663995250816266, + "grad_norm": 3.3666510581970215, + "learning_rate": 7.86141289068838e-07, + "loss": 0.0393, + "step": 2582, + "video_reward_cumulative_accuracy": 0.8448876839659178 + }, + { + "epoch": 0.7666963490650045, + "grad_norm": 3.5256621837615967, + "learning_rate": 7.842563428249555e-07, + "loss": 0.0183, + "step": 2583, + "video_reward_cumulative_accuracy": 0.8449477351916377 + }, + { + "epoch": 0.7669931730483823, + "grad_norm": 0.805884063243866, + "learning_rate": 7.823732385984154e-07, + "loss": 0.0258, + "step": 2584, + "video_reward_cumulative_accuracy": 0.8450077399380805 + }, + { + "epoch": 0.7672899970317602, + "grad_norm": 2.2179319858551025, + "learning_rate": 7.804919784109124e-07, + "loss": 0.0197, + "step": 2585, + "video_reward_cumulative_accuracy": 0.8450676982591876 + }, + { + "epoch": 0.767586821015138, + "grad_norm": 2.208432674407959, + "learning_rate": 7.786125642821632e-07, + "loss": 0.0248, + "step": 2586, + "video_reward_cumulative_accuracy": 0.8451276102088167 + }, + { + "epoch": 0.7678836449985159, + "grad_norm": 1.296506643295288, + "learning_rate": 7.767349982298992e-07, + "loss": 0.0078, + "step": 2587, + "video_reward_cumulative_accuracy": 0.8451874758407422 + }, + { + "epoch": 0.7681804689818937, + "grad_norm": 2.701901435852051, + "learning_rate": 7.748592822698708e-07, + "loss": 0.02, + "step": 2588, + "video_reward_cumulative_accuracy": 0.8450540958268934 + }, + { + "epoch": 0.7684772929652716, + "grad_norm": 4.324398517608643, + "learning_rate": 7.729854184158411e-07, + "loss": 0.033, + "step": 2589, + "video_reward_cumulative_accuracy": 0.8451139436075705 + }, + { + "epoch": 0.7687741169486495, + "grad_norm": 2.3187663555145264, + "learning_rate": 7.711134086795852e-07, + "loss": 0.0229, + "step": 2590, + "video_reward_cumulative_accuracy": 0.8451737451737452 + }, + { + "epoch": 0.7690709409320273, + "grad_norm": 3.8253369331359863, + "learning_rate": 7.692432550708873e-07, + "loss": 0.0558, + "step": 2591, + "video_reward_cumulative_accuracy": 0.8450405248938634 + }, + { + "epoch": 0.7693677649154052, + "grad_norm": 2.7652409076690674, + "learning_rate": 7.673749595975378e-07, + "loss": 0.0173, + "step": 2592, + "video_reward_cumulative_accuracy": 0.8451003086419753 + }, + { + "epoch": 0.769664588898783, + "grad_norm": 4.698938846588135, + "learning_rate": 7.655085242653337e-07, + "loss": 0.0321, + "step": 2593, + "video_reward_cumulative_accuracy": 0.8449672194369456 + }, + { + "epoch": 0.7699614128821609, + "grad_norm": 3.0443601608276367, + "learning_rate": 7.636439510780747e-07, + "loss": 0.0465, + "step": 2594, + "video_reward_cumulative_accuracy": 0.8450269853508096 + }, + { + "epoch": 0.7702582368655387, + "grad_norm": 3.421412706375122, + "learning_rate": 7.617812420375611e-07, + "loss": 0.0844, + "step": 2595, + "video_reward_cumulative_accuracy": 0.8450867052023121 + }, + { + "epoch": 0.7705550608489166, + "grad_norm": 1.4718027114868164, + "learning_rate": 7.599203991435924e-07, + "loss": 0.0188, + "step": 2596, + "video_reward_cumulative_accuracy": 0.8451463790446841 + }, + { + "epoch": 0.7708518848322945, + "grad_norm": 2.6113193035125732, + "learning_rate": 7.580614243939627e-07, + "loss": 0.0317, + "step": 2597, + "video_reward_cumulative_accuracy": 0.8452060069310743 + }, + { + "epoch": 0.7711487088156723, + "grad_norm": 3.532366991043091, + "learning_rate": 7.562043197844626e-07, + "loss": 0.0406, + "step": 2598, + "video_reward_cumulative_accuracy": 0.8452655889145496 + }, + { + "epoch": 0.7714455327990501, + "grad_norm": 1.3378589153289795, + "learning_rate": 7.543490873088738e-07, + "loss": 0.0172, + "step": 2599, + "video_reward_cumulative_accuracy": 0.8453251250480954 + }, + { + "epoch": 0.771742356782428, + "grad_norm": 6.215091228485107, + "learning_rate": 7.52495728958969e-07, + "loss": 0.0305, + "step": 2600, + "video_reward_cumulative_accuracy": 0.8453846153846154 + }, + { + "epoch": 0.771742356782428, + "eval_runtime": 132.3938, + "eval_samples_per_second": 5.959, + "eval_steps_per_second": 0.748, + "eval_test_set_accuracy": 0.821969696969697, + "step": 2600 + }, + { + "epoch": 0.7720391807658059, + "grad_norm": 2.3559024333953857, + "learning_rate": 7.506442467245084e-07, + "loss": 0.0387, + "step": 2601, + "video_reward_cumulative_accuracy": 0.845444059976932 + }, + { + "epoch": 0.7723360047491837, + "grad_norm": 3.2431516647338867, + "learning_rate": 7.487946425932372e-07, + "loss": 0.0431, + "step": 2602, + "video_reward_cumulative_accuracy": 0.8453112990007686 + }, + { + "epoch": 0.7726328287325616, + "grad_norm": 3.5290749073028564, + "learning_rate": 7.469469185508854e-07, + "loss": 0.0461, + "step": 2603, + "video_reward_cumulative_accuracy": 0.8453707260852862 + }, + { + "epoch": 0.7729296527159395, + "grad_norm": 4.477185249328613, + "learning_rate": 7.451010765811628e-07, + "loss": 0.0644, + "step": 2604, + "video_reward_cumulative_accuracy": 0.8452380952380952 + }, + { + "epoch": 0.7732264766993173, + "grad_norm": 1.7302701473236084, + "learning_rate": 7.432571186657614e-07, + "loss": 0.0116, + "step": 2605, + "video_reward_cumulative_accuracy": 0.8452975047984644 + }, + { + "epoch": 0.7735233006826951, + "grad_norm": 1.9252644777297974, + "learning_rate": 7.414150467843498e-07, + "loss": 0.027, + "step": 2606, + "video_reward_cumulative_accuracy": 0.8453568687643899 + }, + { + "epoch": 0.773820124666073, + "grad_norm": 3.0413153171539307, + "learning_rate": 7.395748629145685e-07, + "loss": 0.088, + "step": 2607, + "video_reward_cumulative_accuracy": 0.8454161871883391 + }, + { + "epoch": 0.7741169486494509, + "grad_norm": 2.653510093688965, + "learning_rate": 7.37736569032036e-07, + "loss": 0.0589, + "step": 2608, + "video_reward_cumulative_accuracy": 0.8452837423312883 + }, + { + "epoch": 0.7744137726328287, + "grad_norm": 2.2542154788970947, + "learning_rate": 7.359001671103361e-07, + "loss": 0.0261, + "step": 2609, + "video_reward_cumulative_accuracy": 0.8453430433116137 + }, + { + "epoch": 0.7747105966162066, + "grad_norm": 1.7998321056365967, + "learning_rate": 7.340656591210279e-07, + "loss": 0.0194, + "step": 2610, + "video_reward_cumulative_accuracy": 0.8454022988505747 + }, + { + "epoch": 0.7750074205995845, + "grad_norm": 2.0335445404052734, + "learning_rate": 7.322330470336314e-07, + "loss": 0.0157, + "step": 2611, + "video_reward_cumulative_accuracy": 0.845461509000383 + }, + { + "epoch": 0.7753042445829623, + "grad_norm": 3.780298948287964, + "learning_rate": 7.304023328156345e-07, + "loss": 0.0582, + "step": 2612, + "video_reward_cumulative_accuracy": 0.84552067381317 + }, + { + "epoch": 0.7756010685663401, + "grad_norm": 2.430021286010742, + "learning_rate": 7.285735184324872e-07, + "loss": 0.0478, + "step": 2613, + "video_reward_cumulative_accuracy": 0.8455797933409873 + }, + { + "epoch": 0.775897892549718, + "grad_norm": 2.428281784057617, + "learning_rate": 7.267466058475969e-07, + "loss": 0.0268, + "step": 2614, + "video_reward_cumulative_accuracy": 0.8456388676358072 + }, + { + "epoch": 0.7761947165330959, + "grad_norm": 3.825349807739258, + "learning_rate": 7.249215970223347e-07, + "loss": 0.033, + "step": 2615, + "video_reward_cumulative_accuracy": 0.8455066921606118 + }, + { + "epoch": 0.7764915405164737, + "grad_norm": 3.4468047618865967, + "learning_rate": 7.230984939160227e-07, + "loss": 0.0454, + "step": 2616, + "video_reward_cumulative_accuracy": 0.845565749235474 + }, + { + "epoch": 0.7767883644998516, + "grad_norm": 2.420203447341919, + "learning_rate": 7.2127729848594e-07, + "loss": 0.0323, + "step": 2617, + "video_reward_cumulative_accuracy": 0.8456247611769201 + }, + { + "epoch": 0.7770851884832295, + "grad_norm": 0.8875495195388794, + "learning_rate": 7.194580126873155e-07, + "loss": 0.0189, + "step": 2618, + "video_reward_cumulative_accuracy": 0.8456837280366692 + }, + { + "epoch": 0.7773820124666073, + "grad_norm": 0.7100759148597717, + "learning_rate": 7.176406384733289e-07, + "loss": 0.0083, + "step": 2619, + "video_reward_cumulative_accuracy": 0.8457426498663612 + }, + { + "epoch": 0.7776788364499851, + "grad_norm": 2.6363914012908936, + "learning_rate": 7.158251777951103e-07, + "loss": 0.0294, + "step": 2620, + "video_reward_cumulative_accuracy": 0.8456106870229008 + }, + { + "epoch": 0.777975660433363, + "grad_norm": 3.5254852771759033, + "learning_rate": 7.140116326017304e-07, + "loss": 0.0296, + "step": 2621, + "video_reward_cumulative_accuracy": 0.8456695917588707 + }, + { + "epoch": 0.7782724844167409, + "grad_norm": 2.1141459941864014, + "learning_rate": 7.122000048402078e-07, + "loss": 0.0356, + "step": 2622, + "video_reward_cumulative_accuracy": 0.8457284515636918 + }, + { + "epoch": 0.7785693084001187, + "grad_norm": 1.737390398979187, + "learning_rate": 7.10390296455499e-07, + "loss": 0.0434, + "step": 2623, + "video_reward_cumulative_accuracy": 0.8457872664887534 + }, + { + "epoch": 0.7788661323834966, + "grad_norm": 1.159108281135559, + "learning_rate": 7.085825093905025e-07, + "loss": 0.0135, + "step": 2624, + "video_reward_cumulative_accuracy": 0.8458460365853658 + }, + { + "epoch": 0.7791629563668745, + "grad_norm": 5.79228401184082, + "learning_rate": 7.06776645586053e-07, + "loss": 0.0594, + "step": 2625, + "video_reward_cumulative_accuracy": 0.8459047619047619 + }, + { + "epoch": 0.7794597803502523, + "grad_norm": 1.6138088703155518, + "learning_rate": 7.049727069809206e-07, + "loss": 0.0134, + "step": 2626, + "video_reward_cumulative_accuracy": 0.845963442498096 + }, + { + "epoch": 0.7797566043336301, + "grad_norm": 4.311069011688232, + "learning_rate": 7.031706955118095e-07, + "loss": 0.0717, + "step": 2627, + "video_reward_cumulative_accuracy": 0.8460220784164446 + }, + { + "epoch": 0.780053428317008, + "grad_norm": 1.8174479007720947, + "learning_rate": 7.013706131133522e-07, + "loss": 0.0319, + "step": 2628, + "video_reward_cumulative_accuracy": 0.8460806697108066 + }, + { + "epoch": 0.7803502523003859, + "grad_norm": 1.904004693031311, + "learning_rate": 6.995724617181124e-07, + "loss": 0.0252, + "step": 2629, + "video_reward_cumulative_accuracy": 0.8461392164321034 + }, + { + "epoch": 0.7806470762837637, + "grad_norm": 2.597764015197754, + "learning_rate": 6.977762432565805e-07, + "loss": 0.0188, + "step": 2630, + "video_reward_cumulative_accuracy": 0.8461977186311787 + }, + { + "epoch": 0.7809439002671416, + "grad_norm": 2.199734926223755, + "learning_rate": 6.95981959657171e-07, + "loss": 0.0377, + "step": 2631, + "video_reward_cumulative_accuracy": 0.8462561763587989 + }, + { + "epoch": 0.7812407242505195, + "grad_norm": 0.7549028396606445, + "learning_rate": 6.941896128462227e-07, + "loss": 0.018, + "step": 2632, + "video_reward_cumulative_accuracy": 0.8463145896656535 + }, + { + "epoch": 0.7815375482338973, + "grad_norm": 2.8155016899108887, + "learning_rate": 6.923992047479921e-07, + "loss": 0.0286, + "step": 2633, + "video_reward_cumulative_accuracy": 0.8463729586023547 + }, + { + "epoch": 0.7818343722172751, + "grad_norm": 2.58642578125, + "learning_rate": 6.906107372846568e-07, + "loss": 0.0239, + "step": 2634, + "video_reward_cumulative_accuracy": 0.8462414578587699 + }, + { + "epoch": 0.782131196200653, + "grad_norm": 2.169006109237671, + "learning_rate": 6.888242123763103e-07, + "loss": 0.0208, + "step": 2635, + "video_reward_cumulative_accuracy": 0.8462998102466793 + }, + { + "epoch": 0.7824280201840309, + "grad_norm": 2.2981460094451904, + "learning_rate": 6.870396319409602e-07, + "loss": 0.0529, + "step": 2636, + "video_reward_cumulative_accuracy": 0.8463581183611533 + }, + { + "epoch": 0.7827248441674087, + "grad_norm": 1.9309078454971313, + "learning_rate": 6.852569978945281e-07, + "loss": 0.0217, + "step": 2637, + "video_reward_cumulative_accuracy": 0.8464163822525598 + }, + { + "epoch": 0.7830216681507866, + "grad_norm": 0.8763541579246521, + "learning_rate": 6.834763121508428e-07, + "loss": 0.0146, + "step": 2638, + "video_reward_cumulative_accuracy": 0.8464746019711903 + }, + { + "epoch": 0.7833184921341645, + "grad_norm": 3.7167813777923584, + "learning_rate": 6.816975766216441e-07, + "loss": 0.0478, + "step": 2639, + "video_reward_cumulative_accuracy": 0.8465327775672603 + }, + { + "epoch": 0.7836153161175423, + "grad_norm": 4.534986972808838, + "learning_rate": 6.799207932165772e-07, + "loss": 0.045, + "step": 2640, + "video_reward_cumulative_accuracy": 0.8464015151515152 + }, + { + "epoch": 0.7839121401009201, + "grad_norm": 1.1529324054718018, + "learning_rate": 6.781459638431923e-07, + "loss": 0.0177, + "step": 2641, + "video_reward_cumulative_accuracy": 0.8464596743657705 + }, + { + "epoch": 0.784208964084298, + "grad_norm": 1.5981630086898804, + "learning_rate": 6.763730904069393e-07, + "loss": 0.0107, + "step": 2642, + "video_reward_cumulative_accuracy": 0.8465177895533686 + }, + { + "epoch": 0.7845057880676759, + "grad_norm": 2.033008575439453, + "learning_rate": 6.746021748111709e-07, + "loss": 0.0226, + "step": 2643, + "video_reward_cumulative_accuracy": 0.846575860764283 + }, + { + "epoch": 0.7848026120510537, + "grad_norm": 0.6989650726318359, + "learning_rate": 6.728332189571368e-07, + "loss": 0.007, + "step": 2644, + "video_reward_cumulative_accuracy": 0.8466338880484114 + }, + { + "epoch": 0.7850994360344316, + "grad_norm": 1.581715703010559, + "learning_rate": 6.710662247439831e-07, + "loss": 0.0276, + "step": 2645, + "video_reward_cumulative_accuracy": 0.8466918714555766 + }, + { + "epoch": 0.7853962600178095, + "grad_norm": 1.8581857681274414, + "learning_rate": 6.693011940687499e-07, + "loss": 0.0321, + "step": 2646, + "video_reward_cumulative_accuracy": 0.8465608465608465 + }, + { + "epoch": 0.7856930840011873, + "grad_norm": 1.9918158054351807, + "learning_rate": 6.675381288263675e-07, + "loss": 0.0228, + "step": 2647, + "video_reward_cumulative_accuracy": 0.8466188137514167 + }, + { + "epoch": 0.7859899079845651, + "grad_norm": 2.124476194381714, + "learning_rate": 6.657770309096584e-07, + "loss": 0.011, + "step": 2648, + "video_reward_cumulative_accuracy": 0.8466767371601208 + }, + { + "epoch": 0.786286731967943, + "grad_norm": 2.0029237270355225, + "learning_rate": 6.640179022093324e-07, + "loss": 0.0261, + "step": 2649, + "video_reward_cumulative_accuracy": 0.846734616836542 + }, + { + "epoch": 0.7865835559513209, + "grad_norm": 1.5002411603927612, + "learning_rate": 6.622607446139844e-07, + "loss": 0.0173, + "step": 2650, + "video_reward_cumulative_accuracy": 0.8467924528301887 + }, + { + "epoch": 0.7868803799346987, + "grad_norm": 2.7715702056884766, + "learning_rate": 6.605055600100945e-07, + "loss": 0.0615, + "step": 2651, + "video_reward_cumulative_accuracy": 0.8466616371180686 + }, + { + "epoch": 0.7871772039180766, + "grad_norm": 2.444265604019165, + "learning_rate": 6.587523502820226e-07, + "loss": 0.0486, + "step": 2652, + "video_reward_cumulative_accuracy": 0.8467194570135747 + }, + { + "epoch": 0.7874740279014545, + "grad_norm": 1.8649511337280273, + "learning_rate": 6.570011173120108e-07, + "loss": 0.043, + "step": 2653, + "video_reward_cumulative_accuracy": 0.8467772333207689 + }, + { + "epoch": 0.7877708518848323, + "grad_norm": 0.8448922634124756, + "learning_rate": 6.552518629801752e-07, + "loss": 0.0105, + "step": 2654, + "video_reward_cumulative_accuracy": 0.8468349660889224 + }, + { + "epoch": 0.7880676758682101, + "grad_norm": 2.1769087314605713, + "learning_rate": 6.535045891645125e-07, + "loss": 0.0765, + "step": 2655, + "video_reward_cumulative_accuracy": 0.8467043314500942 + }, + { + "epoch": 0.788364499851588, + "grad_norm": 3.1270945072174072, + "learning_rate": 6.517592977408909e-07, + "loss": 0.045, + "step": 2656, + "video_reward_cumulative_accuracy": 0.8467620481927711 + }, + { + "epoch": 0.7886613238349659, + "grad_norm": 2.3830227851867676, + "learning_rate": 6.500159905830484e-07, + "loss": 0.0181, + "step": 2657, + "video_reward_cumulative_accuracy": 0.8468197214904027 + }, + { + "epoch": 0.7889581478183437, + "grad_norm": 2.3455231189727783, + "learning_rate": 6.48274669562596e-07, + "loss": 0.0461, + "step": 2658, + "video_reward_cumulative_accuracy": 0.8466892400300978 + }, + { + "epoch": 0.7892549718017215, + "grad_norm": 1.0843396186828613, + "learning_rate": 6.465353365490093e-07, + "loss": 0.0176, + "step": 2659, + "video_reward_cumulative_accuracy": 0.8467468973298232 + }, + { + "epoch": 0.7895517957850995, + "grad_norm": 1.672133207321167, + "learning_rate": 6.447979934096313e-07, + "loss": 0.0171, + "step": 2660, + "video_reward_cumulative_accuracy": 0.8468045112781954 + }, + { + "epoch": 0.7898486197684773, + "grad_norm": 2.30859112739563, + "learning_rate": 6.430626420096703e-07, + "loss": 0.0343, + "step": 2661, + "video_reward_cumulative_accuracy": 0.8468620819240887 + }, + { + "epoch": 0.7901454437518551, + "grad_norm": 0.6453092098236084, + "learning_rate": 6.413292842121927e-07, + "loss": 0.0085, + "step": 2662, + "video_reward_cumulative_accuracy": 0.8469196093163035 + }, + { + "epoch": 0.790442267735233, + "grad_norm": 2.6562001705169678, + "learning_rate": 6.395979218781276e-07, + "loss": 0.0279, + "step": 2663, + "video_reward_cumulative_accuracy": 0.8469770935035674 + }, + { + "epoch": 0.7907390917186109, + "grad_norm": 0.6483455300331116, + "learning_rate": 6.37868556866259e-07, + "loss": 0.013, + "step": 2664, + "video_reward_cumulative_accuracy": 0.8470345345345346 + }, + { + "epoch": 0.7910359157019887, + "grad_norm": 2.082960605621338, + "learning_rate": 6.361411910332288e-07, + "loss": 0.027, + "step": 2665, + "video_reward_cumulative_accuracy": 0.8470919324577861 + }, + { + "epoch": 0.7913327396853665, + "grad_norm": 2.668041944503784, + "learning_rate": 6.34415826233532e-07, + "loss": 0.0209, + "step": 2666, + "video_reward_cumulative_accuracy": 0.8471492873218305 + }, + { + "epoch": 0.7916295636687445, + "grad_norm": 1.8569388389587402, + "learning_rate": 6.326924643195151e-07, + "loss": 0.027, + "step": 2667, + "video_reward_cumulative_accuracy": 0.8470191226096738 + }, + { + "epoch": 0.7919263876521223, + "grad_norm": 2.4845709800720215, + "learning_rate": 6.309711071413752e-07, + "loss": 0.0211, + "step": 2668, + "video_reward_cumulative_accuracy": 0.8470764617691154 + }, + { + "epoch": 0.7922232116355001, + "grad_norm": 3.088460922241211, + "learning_rate": 6.292517565471548e-07, + "loss": 0.0389, + "step": 2669, + "video_reward_cumulative_accuracy": 0.8471337579617835 + }, + { + "epoch": 0.792520035618878, + "grad_norm": 1.7615797519683838, + "learning_rate": 6.275344143827442e-07, + "loss": 0.0808, + "step": 2670, + "video_reward_cumulative_accuracy": 0.8471910112359551 + }, + { + "epoch": 0.7928168596022559, + "grad_norm": 6.1424360275268555, + "learning_rate": 6.258190824918772e-07, + "loss": 0.0601, + "step": 2671, + "video_reward_cumulative_accuracy": 0.8472482216398353 + }, + { + "epoch": 0.7931136835856337, + "grad_norm": 3.102992534637451, + "learning_rate": 6.241057627161287e-07, + "loss": 0.0441, + "step": 2672, + "video_reward_cumulative_accuracy": 0.8473053892215568 + }, + { + "epoch": 0.7934105075690115, + "grad_norm": 0.6833072900772095, + "learning_rate": 6.223944568949147e-07, + "loss": 0.005, + "step": 2673, + "video_reward_cumulative_accuracy": 0.8473625140291807 + }, + { + "epoch": 0.7937073315523895, + "grad_norm": 3.3606889247894287, + "learning_rate": 6.206851668654867e-07, + "loss": 0.0639, + "step": 2674, + "video_reward_cumulative_accuracy": 0.8474195961106956 + }, + { + "epoch": 0.7940041555357673, + "grad_norm": 2.6462979316711426, + "learning_rate": 6.189778944629343e-07, + "loss": 0.0401, + "step": 2675, + "video_reward_cumulative_accuracy": 0.8474766355140186 + }, + { + "epoch": 0.7943009795191451, + "grad_norm": 2.362200975418091, + "learning_rate": 6.172726415201796e-07, + "loss": 0.0143, + "step": 2676, + "video_reward_cumulative_accuracy": 0.8475336322869955 + }, + { + "epoch": 0.794597803502523, + "grad_norm": 3.2839114665985107, + "learning_rate": 6.155694098679785e-07, + "loss": 0.0293, + "step": 2677, + "video_reward_cumulative_accuracy": 0.8475905864774 + }, + { + "epoch": 0.7948946274859009, + "grad_norm": 3.1356849670410156, + "learning_rate": 6.138682013349137e-07, + "loss": 0.0445, + "step": 2678, + "video_reward_cumulative_accuracy": 0.8472740851381628 + }, + { + "epoch": 0.7951914514692787, + "grad_norm": 3.2600576877593994, + "learning_rate": 6.121690177473983e-07, + "loss": 0.0273, + "step": 2679, + "video_reward_cumulative_accuracy": 0.847331093691676 + }, + { + "epoch": 0.7954882754526565, + "grad_norm": 2.7981717586517334, + "learning_rate": 6.104718609296709e-07, + "loss": 0.0729, + "step": 2680, + "video_reward_cumulative_accuracy": 0.8473880597014926 + }, + { + "epoch": 0.7957850994360345, + "grad_norm": 0.5687323212623596, + "learning_rate": 6.087767327037944e-07, + "loss": 0.0221, + "step": 2681, + "video_reward_cumulative_accuracy": 0.8474449832152182 + }, + { + "epoch": 0.7960819234194123, + "grad_norm": 2.2972726821899414, + "learning_rate": 6.070836348896536e-07, + "loss": 0.0153, + "step": 2682, + "video_reward_cumulative_accuracy": 0.8475018642803878 + }, + { + "epoch": 0.7963787474027901, + "grad_norm": 0.8837341666221619, + "learning_rate": 6.053925693049523e-07, + "loss": 0.0174, + "step": 2683, + "video_reward_cumulative_accuracy": 0.8475587029444651 + }, + { + "epoch": 0.796675571386168, + "grad_norm": 3.468062400817871, + "learning_rate": 6.037035377652143e-07, + "loss": 0.0426, + "step": 2684, + "video_reward_cumulative_accuracy": 0.8476154992548435 + }, + { + "epoch": 0.7969723953695459, + "grad_norm": 2.4997260570526123, + "learning_rate": 6.020165420837786e-07, + "loss": 0.0243, + "step": 2685, + "video_reward_cumulative_accuracy": 0.8476722532588454 + }, + { + "epoch": 0.7972692193529237, + "grad_norm": 2.9322338104248047, + "learning_rate": 6.003315840717991e-07, + "loss": 0.0768, + "step": 2686, + "video_reward_cumulative_accuracy": 0.847728965003723 + }, + { + "epoch": 0.7975660433363015, + "grad_norm": 2.4431042671203613, + "learning_rate": 5.986486655382423e-07, + "loss": 0.0188, + "step": 2687, + "video_reward_cumulative_accuracy": 0.847785634536658 + }, + { + "epoch": 0.7978628673196795, + "grad_norm": 1.0281823873519897, + "learning_rate": 5.96967788289883e-07, + "loss": 0.0198, + "step": 2688, + "video_reward_cumulative_accuracy": 0.8478422619047619 + }, + { + "epoch": 0.7981596913030573, + "grad_norm": 3.702180862426758, + "learning_rate": 5.95288954131307e-07, + "loss": 0.0398, + "step": 2689, + "video_reward_cumulative_accuracy": 0.847712904425437 + }, + { + "epoch": 0.7984565152864351, + "grad_norm": 3.876319646835327, + "learning_rate": 5.93612164864906e-07, + "loss": 0.0481, + "step": 2690, + "video_reward_cumulative_accuracy": 0.8477695167286246 + }, + { + "epoch": 0.798753339269813, + "grad_norm": 1.6658989191055298, + "learning_rate": 5.919374222908753e-07, + "loss": 0.0348, + "step": 2691, + "video_reward_cumulative_accuracy": 0.8478260869565217 + }, + { + "epoch": 0.7990501632531909, + "grad_norm": 1.9231022596359253, + "learning_rate": 5.902647282072149e-07, + "loss": 0.0492, + "step": 2692, + "video_reward_cumulative_accuracy": 0.8478826151560178 + }, + { + "epoch": 0.7993469872365687, + "grad_norm": 1.0569583177566528, + "learning_rate": 5.885940844097226e-07, + "loss": 0.0217, + "step": 2693, + "video_reward_cumulative_accuracy": 0.8475677682881545 + }, + { + "epoch": 0.7996438112199465, + "grad_norm": 0.5432111024856567, + "learning_rate": 5.869254926919976e-07, + "loss": 0.011, + "step": 2694, + "video_reward_cumulative_accuracy": 0.8476243504083147 + }, + { + "epoch": 0.7999406352033245, + "grad_norm": 1.2595707178115845, + "learning_rate": 5.852589548454346e-07, + "loss": 0.0299, + "step": 2695, + "video_reward_cumulative_accuracy": 0.8476808905380334 + }, + { + "epoch": 0.8002374591867023, + "grad_norm": 1.7451565265655518, + "learning_rate": 5.835944726592241e-07, + "loss": 0.0576, + "step": 2696, + "video_reward_cumulative_accuracy": 0.8477373887240356 + }, + { + "epoch": 0.8005342831700801, + "grad_norm": 2.6311933994293213, + "learning_rate": 5.8193204792035e-07, + "loss": 0.0461, + "step": 2697, + "video_reward_cumulative_accuracy": 0.8477938450129774 + }, + { + "epoch": 0.800831107153458, + "grad_norm": 1.7497179508209229, + "learning_rate": 5.802716824135849e-07, + "loss": 0.0388, + "step": 2698, + "video_reward_cumulative_accuracy": 0.8478502594514455 + }, + { + "epoch": 0.8011279311368359, + "grad_norm": 0.3311484754085541, + "learning_rate": 5.786133779214939e-07, + "loss": 0.0024, + "step": 2699, + "video_reward_cumulative_accuracy": 0.8479066320859577 + }, + { + "epoch": 0.8014247551202137, + "grad_norm": 2.232048749923706, + "learning_rate": 5.769571362244258e-07, + "loss": 0.0189, + "step": 2700, + "video_reward_cumulative_accuracy": 0.8479629629629629 + }, + { + "epoch": 0.8017215791035915, + "grad_norm": 1.2579126358032227, + "learning_rate": 5.753029591005197e-07, + "loss": 0.0104, + "step": 2701, + "video_reward_cumulative_accuracy": 0.8480192521288412 + }, + { + "epoch": 0.8020184030869695, + "grad_norm": 1.1070455312728882, + "learning_rate": 5.736508483256931e-07, + "loss": 0.0259, + "step": 2702, + "video_reward_cumulative_accuracy": 0.8478904515173945 + }, + { + "epoch": 0.8023152270703473, + "grad_norm": 1.2897884845733643, + "learning_rate": 5.720008056736476e-07, + "loss": 0.0082, + "step": 2703, + "video_reward_cumulative_accuracy": 0.8479467258601554 + }, + { + "epoch": 0.8026120510537251, + "grad_norm": 2.983745813369751, + "learning_rate": 5.703528329158653e-07, + "loss": 0.037, + "step": 2704, + "video_reward_cumulative_accuracy": 0.8478180473372781 + }, + { + "epoch": 0.802908875037103, + "grad_norm": 3.134145975112915, + "learning_rate": 5.687069318216027e-07, + "loss": 0.0506, + "step": 2705, + "video_reward_cumulative_accuracy": 0.8476894639556377 + }, + { + "epoch": 0.8032056990204809, + "grad_norm": 1.839669108390808, + "learning_rate": 5.670631041578969e-07, + "loss": 0.0386, + "step": 2706, + "video_reward_cumulative_accuracy": 0.8477457501847746 + }, + { + "epoch": 0.8035025230038587, + "grad_norm": 1.0478723049163818, + "learning_rate": 5.654213516895549e-07, + "loss": 0.0117, + "step": 2707, + "video_reward_cumulative_accuracy": 0.8478019948282232 + }, + { + "epoch": 0.8037993469872365, + "grad_norm": 2.9105279445648193, + "learning_rate": 5.637816761791573e-07, + "loss": 0.0334, + "step": 2708, + "video_reward_cumulative_accuracy": 0.8478581979320532 + }, + { + "epoch": 0.8040961709706145, + "grad_norm": 0.682174026966095, + "learning_rate": 5.621440793870564e-07, + "loss": 0.0109, + "step": 2709, + "video_reward_cumulative_accuracy": 0.8479143595422666 + }, + { + "epoch": 0.8043929949539923, + "grad_norm": 1.712660312652588, + "learning_rate": 5.605085630713686e-07, + "loss": 0.0253, + "step": 2710, + "video_reward_cumulative_accuracy": 0.847970479704797 + }, + { + "epoch": 0.8046898189373701, + "grad_norm": 1.6642423868179321, + "learning_rate": 5.588751289879823e-07, + "loss": 0.0174, + "step": 2711, + "video_reward_cumulative_accuracy": 0.8480265584655109 + }, + { + "epoch": 0.804986642920748, + "grad_norm": 0.8261018395423889, + "learning_rate": 5.572437788905455e-07, + "loss": 0.0173, + "step": 2712, + "video_reward_cumulative_accuracy": 0.8480825958702065 + }, + { + "epoch": 0.8052834669041259, + "grad_norm": 1.6964601278305054, + "learning_rate": 5.556145145304722e-07, + "loss": 0.0515, + "step": 2713, + "video_reward_cumulative_accuracy": 0.8481385919646148 + }, + { + "epoch": 0.8055802908875037, + "grad_norm": 1.0702115297317505, + "learning_rate": 5.53987337656935e-07, + "loss": 0.015, + "step": 2714, + "video_reward_cumulative_accuracy": 0.8481945467943994 + }, + { + "epoch": 0.8058771148708815, + "grad_norm": 3.9482622146606445, + "learning_rate": 5.523622500168651e-07, + "loss": 0.0711, + "step": 2715, + "video_reward_cumulative_accuracy": 0.8482504604051565 + }, + { + "epoch": 0.8061739388542595, + "grad_norm": 1.610306739807129, + "learning_rate": 5.507392533549549e-07, + "loss": 0.0158, + "step": 2716, + "video_reward_cumulative_accuracy": 0.8483063328424153 + }, + { + "epoch": 0.8064707628376373, + "grad_norm": 2.1032094955444336, + "learning_rate": 5.491183494136462e-07, + "loss": 0.0246, + "step": 2717, + "video_reward_cumulative_accuracy": 0.8483621641516378 + }, + { + "epoch": 0.8067675868210151, + "grad_norm": 2.930027484893799, + "learning_rate": 5.474995399331385e-07, + "loss": 0.0331, + "step": 2718, + "video_reward_cumulative_accuracy": 0.8484179543782193 + }, + { + "epoch": 0.807064410804393, + "grad_norm": 3.511140823364258, + "learning_rate": 5.458828266513788e-07, + "loss": 0.0481, + "step": 2719, + "video_reward_cumulative_accuracy": 0.8482898124310408 + }, + { + "epoch": 0.8073612347877709, + "grad_norm": 1.0248184204101562, + "learning_rate": 5.442682113040674e-07, + "loss": 0.0169, + "step": 2720, + "video_reward_cumulative_accuracy": 0.8481617647058823 + }, + { + "epoch": 0.8076580587711487, + "grad_norm": 0.7856757044792175, + "learning_rate": 5.426556956246495e-07, + "loss": 0.0092, + "step": 2721, + "video_reward_cumulative_accuracy": 0.8482175670709298 + }, + { + "epoch": 0.8079548827545265, + "grad_norm": 5.376564979553223, + "learning_rate": 5.410452813443182e-07, + "loss": 0.0535, + "step": 2722, + "video_reward_cumulative_accuracy": 0.8482733284349743 + }, + { + "epoch": 0.8082517067379045, + "grad_norm": 4.981514930725098, + "learning_rate": 5.394369701920096e-07, + "loss": 0.0552, + "step": 2723, + "video_reward_cumulative_accuracy": 0.8483290488431876 + }, + { + "epoch": 0.8085485307212823, + "grad_norm": 0.30428871512413025, + "learning_rate": 5.378307638944008e-07, + "loss": 0.0053, + "step": 2724, + "video_reward_cumulative_accuracy": 0.8483847283406755 + }, + { + "epoch": 0.8088453547046601, + "grad_norm": 0.45453497767448425, + "learning_rate": 5.362266641759103e-07, + "loss": 0.0064, + "step": 2725, + "video_reward_cumulative_accuracy": 0.848440366972477 + }, + { + "epoch": 0.809142178688038, + "grad_norm": 0.8478443026542664, + "learning_rate": 5.346246727586954e-07, + "loss": 0.0151, + "step": 2726, + "video_reward_cumulative_accuracy": 0.8484959647835657 + }, + { + "epoch": 0.8094390026714159, + "grad_norm": 1.4090654850006104, + "learning_rate": 5.330247913626494e-07, + "loss": 0.0297, + "step": 2727, + "video_reward_cumulative_accuracy": 0.8485515218188485 + }, + { + "epoch": 0.8097358266547937, + "grad_norm": 1.971437931060791, + "learning_rate": 5.314270217054004e-07, + "loss": 0.0297, + "step": 2728, + "video_reward_cumulative_accuracy": 0.8486070381231672 + }, + { + "epoch": 0.8100326506381715, + "grad_norm": 1.2329754829406738, + "learning_rate": 5.298313655023083e-07, + "loss": 0.0111, + "step": 2729, + "video_reward_cumulative_accuracy": 0.8486625137412972 + }, + { + "epoch": 0.8103294746215495, + "grad_norm": 3.727600574493408, + "learning_rate": 5.282378244664655e-07, + "loss": 0.0434, + "step": 2730, + "video_reward_cumulative_accuracy": 0.8487179487179487 + }, + { + "epoch": 0.8106262986049273, + "grad_norm": 2.156374454498291, + "learning_rate": 5.266464003086927e-07, + "loss": 0.0274, + "step": 2731, + "video_reward_cumulative_accuracy": 0.8485902599780301 + }, + { + "epoch": 0.8109231225883051, + "grad_norm": 4.032077789306641, + "learning_rate": 5.250570947375383e-07, + "loss": 0.0297, + "step": 2732, + "video_reward_cumulative_accuracy": 0.8484626647144948 + }, + { + "epoch": 0.811219946571683, + "grad_norm": 2.540410280227661, + "learning_rate": 5.234699094592771e-07, + "loss": 0.0311, + "step": 2733, + "video_reward_cumulative_accuracy": 0.8485181119648738 + }, + { + "epoch": 0.8115167705550609, + "grad_norm": 1.1667871475219727, + "learning_rate": 5.21884846177905e-07, + "loss": 0.0121, + "step": 2734, + "video_reward_cumulative_accuracy": 0.8485735186539868 + }, + { + "epoch": 0.8118135945384387, + "grad_norm": 4.324409484863281, + "learning_rate": 5.203019065951417e-07, + "loss": 0.0714, + "step": 2735, + "video_reward_cumulative_accuracy": 0.8486288848263254 + }, + { + "epoch": 0.8121104185218165, + "grad_norm": 1.3035205602645874, + "learning_rate": 5.187210924104269e-07, + "loss": 0.0293, + "step": 2736, + "video_reward_cumulative_accuracy": 0.8486842105263158 + }, + { + "epoch": 0.8124072425051945, + "grad_norm": 1.0958853960037231, + "learning_rate": 5.171424053209184e-07, + "loss": 0.0078, + "step": 2737, + "video_reward_cumulative_accuracy": 0.8487394957983193 + }, + { + "epoch": 0.8127040664885723, + "grad_norm": 1.4998290538787842, + "learning_rate": 5.155658470214889e-07, + "loss": 0.0165, + "step": 2738, + "video_reward_cumulative_accuracy": 0.8487947406866326 + }, + { + "epoch": 0.8130008904719501, + "grad_norm": 3.2503786087036133, + "learning_rate": 5.139914192047271e-07, + "loss": 0.0309, + "step": 2739, + "video_reward_cumulative_accuracy": 0.8486673968601679 + }, + { + "epoch": 0.813297714455328, + "grad_norm": 3.0941522121429443, + "learning_rate": 5.124191235609344e-07, + "loss": 0.0287, + "step": 2740, + "video_reward_cumulative_accuracy": 0.8485401459854015 + }, + { + "epoch": 0.8135945384387059, + "grad_norm": 1.8579998016357422, + "learning_rate": 5.108489617781226e-07, + "loss": 0.0179, + "step": 2741, + "video_reward_cumulative_accuracy": 0.848595403137541 + }, + { + "epoch": 0.8138913624220837, + "grad_norm": 0.7049815654754639, + "learning_rate": 5.092809355420137e-07, + "loss": 0.0053, + "step": 2742, + "video_reward_cumulative_accuracy": 0.8486506199854121 + }, + { + "epoch": 0.8141881864054615, + "grad_norm": 1.6860101222991943, + "learning_rate": 5.077150465360342e-07, + "loss": 0.0217, + "step": 2743, + "video_reward_cumulative_accuracy": 0.8487057965730952 + }, + { + "epoch": 0.8144850103888395, + "grad_norm": 2.035961389541626, + "learning_rate": 5.06151296441319e-07, + "loss": 0.0333, + "step": 2744, + "video_reward_cumulative_accuracy": 0.8487609329446064 + }, + { + "epoch": 0.8147818343722173, + "grad_norm": 3.6466519832611084, + "learning_rate": 5.045896869367056e-07, + "loss": 0.0462, + "step": 2745, + "video_reward_cumulative_accuracy": 0.8486338797814208 + }, + { + "epoch": 0.8150786583555951, + "grad_norm": 2.12888503074646, + "learning_rate": 5.030302196987333e-07, + "loss": 0.0177, + "step": 2746, + "video_reward_cumulative_accuracy": 0.8486890021849963 + }, + { + "epoch": 0.815375482338973, + "grad_norm": 0.45268264412879944, + "learning_rate": 5.014728964016422e-07, + "loss": 0.0052, + "step": 2747, + "video_reward_cumulative_accuracy": 0.8487440844557699 + }, + { + "epoch": 0.8156723063223509, + "grad_norm": 3.4227840900421143, + "learning_rate": 4.999177187173685e-07, + "loss": 0.1001, + "step": 2748, + "video_reward_cumulative_accuracy": 0.8487991266375546 + }, + { + "epoch": 0.8159691303057287, + "grad_norm": 1.468361735343933, + "learning_rate": 4.983646883155479e-07, + "loss": 0.0192, + "step": 2749, + "video_reward_cumulative_accuracy": 0.8488541287740997 + }, + { + "epoch": 0.8162659542891065, + "grad_norm": 3.7149956226348877, + "learning_rate": 4.968138068635076e-07, + "loss": 0.0808, + "step": 2750, + "video_reward_cumulative_accuracy": 0.8487272727272728 + }, + { + "epoch": 0.8165627782724845, + "grad_norm": 1.8487035036087036, + "learning_rate": 4.952650760262706e-07, + "loss": 0.0174, + "step": 2751, + "video_reward_cumulative_accuracy": 0.8487822609960014 + }, + { + "epoch": 0.8168596022558623, + "grad_norm": 3.4142134189605713, + "learning_rate": 4.937184974665504e-07, + "loss": 0.0341, + "step": 2752, + "video_reward_cumulative_accuracy": 0.8488372093023255 + }, + { + "epoch": 0.8171564262392401, + "grad_norm": 2.0607471466064453, + "learning_rate": 4.921740728447474e-07, + "loss": 0.033, + "step": 2753, + "video_reward_cumulative_accuracy": 0.8487104976389394 + }, + { + "epoch": 0.8174532502226179, + "grad_norm": 0.7195733189582825, + "learning_rate": 4.906318038189531e-07, + "loss": 0.0067, + "step": 2754, + "video_reward_cumulative_accuracy": 0.8487654320987654 + }, + { + "epoch": 0.8177500742059959, + "grad_norm": 0.9425991177558899, + "learning_rate": 4.890916920449415e-07, + "loss": 0.0147, + "step": 2755, + "video_reward_cumulative_accuracy": 0.8488203266787658 + }, + { + "epoch": 0.8180468981893737, + "grad_norm": 6.290672302246094, + "learning_rate": 4.87553739176172e-07, + "loss": 0.0799, + "step": 2756, + "video_reward_cumulative_accuracy": 0.8488751814223512 + }, + { + "epoch": 0.8183437221727515, + "grad_norm": 3.862820863723755, + "learning_rate": 4.860179468637882e-07, + "loss": 0.0691, + "step": 2757, + "video_reward_cumulative_accuracy": 0.848929996372869 + }, + { + "epoch": 0.8186405461561295, + "grad_norm": 1.6942017078399658, + "learning_rate": 4.844843167566104e-07, + "loss": 0.0183, + "step": 2758, + "video_reward_cumulative_accuracy": 0.8489847715736041 + }, + { + "epoch": 0.8189373701395073, + "grad_norm": 4.168092727661133, + "learning_rate": 4.829528505011405e-07, + "loss": 0.091, + "step": 2759, + "video_reward_cumulative_accuracy": 0.8488582819862269 + }, + { + "epoch": 0.8192341941228851, + "grad_norm": 0.7059163451194763, + "learning_rate": 4.81423549741555e-07, + "loss": 0.0061, + "step": 2760, + "video_reward_cumulative_accuracy": 0.8489130434782609 + }, + { + "epoch": 0.8195310181062629, + "grad_norm": 1.3192722797393799, + "learning_rate": 4.798964161197075e-07, + "loss": 0.0222, + "step": 2761, + "video_reward_cumulative_accuracy": 0.8489677653024267 + }, + { + "epoch": 0.8198278420896409, + "grad_norm": 0.7440000772476196, + "learning_rate": 4.78371451275124e-07, + "loss": 0.0124, + "step": 2762, + "video_reward_cumulative_accuracy": 0.8490224475018103 + }, + { + "epoch": 0.8201246660730187, + "grad_norm": 3.4163613319396973, + "learning_rate": 4.768486568450018e-07, + "loss": 0.0335, + "step": 2763, + "video_reward_cumulative_accuracy": 0.8490770901194354 + }, + { + "epoch": 0.8204214900563965, + "grad_norm": 3.9325835704803467, + "learning_rate": 4.7532803446420997e-07, + "loss": 0.038, + "step": 2764, + "video_reward_cumulative_accuracy": 0.8491316931982634 + }, + { + "epoch": 0.8207183140397745, + "grad_norm": 2.8216254711151123, + "learning_rate": 4.7380958576528247e-07, + "loss": 0.0307, + "step": 2765, + "video_reward_cumulative_accuracy": 0.849005424954792 + }, + { + "epoch": 0.8210151380231523, + "grad_norm": 0.6501257419586182, + "learning_rate": 4.722933123784221e-07, + "loss": 0.0089, + "step": 2766, + "video_reward_cumulative_accuracy": 0.849060014461316 + }, + { + "epoch": 0.8213119620065301, + "grad_norm": 5.257883071899414, + "learning_rate": 4.707792159314956e-07, + "loss": 0.0805, + "step": 2767, + "video_reward_cumulative_accuracy": 0.8487531622696061 + }, + { + "epoch": 0.8216087859899079, + "grad_norm": 2.2783634662628174, + "learning_rate": 4.6926729805003234e-07, + "loss": 0.0209, + "step": 2768, + "video_reward_cumulative_accuracy": 0.848807803468208 + }, + { + "epoch": 0.8219056099732859, + "grad_norm": 2.1635773181915283, + "learning_rate": 4.677575603572235e-07, + "loss": 0.0443, + "step": 2769, + "video_reward_cumulative_accuracy": 0.8488624052004333 + }, + { + "epoch": 0.8222024339566637, + "grad_norm": 0.6879743933677673, + "learning_rate": 4.6625000447391795e-07, + "loss": 0.0059, + "step": 2770, + "video_reward_cumulative_accuracy": 0.8489169675090252 + }, + { + "epoch": 0.8224992579400415, + "grad_norm": 3.005458116531372, + "learning_rate": 4.647446320186236e-07, + "loss": 0.1152, + "step": 2771, + "video_reward_cumulative_accuracy": 0.8489714904366654 + }, + { + "epoch": 0.8227960819234195, + "grad_norm": 3.669316291809082, + "learning_rate": 4.6324144460750427e-07, + "loss": 0.032, + "step": 2772, + "video_reward_cumulative_accuracy": 0.849025974025974 + }, + { + "epoch": 0.8230929059067973, + "grad_norm": 2.6024012565612793, + "learning_rate": 4.6174044385437765e-07, + "loss": 0.0662, + "step": 2773, + "video_reward_cumulative_accuracy": 0.8489001081860801 + }, + { + "epoch": 0.8233897298901751, + "grad_norm": 1.4586189985275269, + "learning_rate": 4.602416313707131e-07, + "loss": 0.0176, + "step": 2774, + "video_reward_cumulative_accuracy": 0.8489545782263879 + }, + { + "epoch": 0.8236865538735529, + "grad_norm": 1.6324635744094849, + "learning_rate": 4.5874500876563144e-07, + "loss": 0.0279, + "step": 2775, + "video_reward_cumulative_accuracy": 0.849009009009009 + }, + { + "epoch": 0.8239833778569309, + "grad_norm": 0.6080179810523987, + "learning_rate": 4.572505776459024e-07, + "loss": 0.0066, + "step": 2776, + "video_reward_cumulative_accuracy": 0.8490634005763689 + }, + { + "epoch": 0.8242802018403087, + "grad_norm": 2.1260578632354736, + "learning_rate": 4.557583396159429e-07, + "loss": 0.0368, + "step": 2777, + "video_reward_cumulative_accuracy": 0.8489377025567159 + }, + { + "epoch": 0.8245770258236865, + "grad_norm": 3.0380239486694336, + "learning_rate": 4.542682962778161e-07, + "loss": 0.0527, + "step": 2778, + "video_reward_cumulative_accuracy": 0.8489920806335494 + }, + { + "epoch": 0.8248738498070645, + "grad_norm": 1.556333065032959, + "learning_rate": 4.5278044923122654e-07, + "loss": 0.0195, + "step": 2779, + "video_reward_cumulative_accuracy": 0.8488664987405542 + }, + { + "epoch": 0.8251706737904423, + "grad_norm": 1.5159677267074585, + "learning_rate": 4.512948000735234e-07, + "loss": 0.0095, + "step": 2780, + "video_reward_cumulative_accuracy": 0.8489208633093526 + }, + { + "epoch": 0.8254674977738201, + "grad_norm": 0.4575834274291992, + "learning_rate": 4.498113503996948e-07, + "loss": 0.0059, + "step": 2781, + "video_reward_cumulative_accuracy": 0.8489751887810141 + }, + { + "epoch": 0.8257643217571979, + "grad_norm": 1.370509147644043, + "learning_rate": 4.4833010180236836e-07, + "loss": 0.0194, + "step": 2782, + "video_reward_cumulative_accuracy": 0.8490294751976994 + }, + { + "epoch": 0.8260611457405759, + "grad_norm": 1.5616401433944702, + "learning_rate": 4.4685105587180895e-07, + "loss": 0.0262, + "step": 2783, + "video_reward_cumulative_accuracy": 0.8490837226015092 + }, + { + "epoch": 0.8263579697239537, + "grad_norm": 0.9546812772750854, + "learning_rate": 4.453742141959141e-07, + "loss": 0.0074, + "step": 2784, + "video_reward_cumulative_accuracy": 0.8491379310344828 + }, + { + "epoch": 0.8266547937073315, + "grad_norm": 1.1593352556228638, + "learning_rate": 4.4389957836021765e-07, + "loss": 0.0387, + "step": 2785, + "video_reward_cumulative_accuracy": 0.8491921005385996 + }, + { + "epoch": 0.8269516176907095, + "grad_norm": 1.6280272006988525, + "learning_rate": 4.424271499478844e-07, + "loss": 0.0173, + "step": 2786, + "video_reward_cumulative_accuracy": 0.8492462311557789 + }, + { + "epoch": 0.8272484416740873, + "grad_norm": 3.1349103450775146, + "learning_rate": 4.409569305397088e-07, + "loss": 0.0256, + "step": 2787, + "video_reward_cumulative_accuracy": 0.8493003229278795 + }, + { + "epoch": 0.8275452656574651, + "grad_norm": 1.798938274383545, + "learning_rate": 4.394889217141152e-07, + "loss": 0.0205, + "step": 2788, + "video_reward_cumulative_accuracy": 0.8493543758967002 + }, + { + "epoch": 0.8278420896408429, + "grad_norm": 1.2877916097640991, + "learning_rate": 4.38023125047152e-07, + "loss": 0.0248, + "step": 2789, + "video_reward_cumulative_accuracy": 0.84940839010398 + }, + { + "epoch": 0.8281389136242209, + "grad_norm": 1.9247994422912598, + "learning_rate": 4.365595421124949e-07, + "loss": 0.037, + "step": 2790, + "video_reward_cumulative_accuracy": 0.8494623655913979 + }, + { + "epoch": 0.8284357376075987, + "grad_norm": 1.2538220882415771, + "learning_rate": 4.35098174481442e-07, + "loss": 0.041, + "step": 2791, + "video_reward_cumulative_accuracy": 0.8495163024005733 + }, + { + "epoch": 0.8287325615909765, + "grad_norm": 0.8949556946754456, + "learning_rate": 4.336390237229138e-07, + "loss": 0.0087, + "step": 2792, + "video_reward_cumulative_accuracy": 0.8495702005730659 + }, + { + "epoch": 0.8290293855743545, + "grad_norm": 2.420616626739502, + "learning_rate": 4.321820914034502e-07, + "loss": 0.0267, + "step": 2793, + "video_reward_cumulative_accuracy": 0.849624060150376 + }, + { + "epoch": 0.8293262095577323, + "grad_norm": 2.788456916809082, + "learning_rate": 4.307273790872091e-07, + "loss": 0.0342, + "step": 2794, + "video_reward_cumulative_accuracy": 0.8496778811739442 + }, + { + "epoch": 0.8296230335411101, + "grad_norm": 1.2072490453720093, + "learning_rate": 4.292748883359657e-07, + "loss": 0.0171, + "step": 2795, + "video_reward_cumulative_accuracy": 0.8497316636851521 + }, + { + "epoch": 0.8299198575244879, + "grad_norm": 2.1791484355926514, + "learning_rate": 4.278246207091083e-07, + "loss": 0.0144, + "step": 2796, + "video_reward_cumulative_accuracy": 0.8497854077253219 + }, + { + "epoch": 0.8302166815078659, + "grad_norm": 1.2288272380828857, + "learning_rate": 4.263765777636425e-07, + "loss": 0.0077, + "step": 2797, + "video_reward_cumulative_accuracy": 0.8498391133357168 + }, + { + "epoch": 0.8305135054912437, + "grad_norm": 0.8437438011169434, + "learning_rate": 4.2493076105418114e-07, + "loss": 0.0226, + "step": 2798, + "video_reward_cumulative_accuracy": 0.8498927805575411 + }, + { + "epoch": 0.8308103294746215, + "grad_norm": 1.1709120273590088, + "learning_rate": 4.2348717213294923e-07, + "loss": 0.0083, + "step": 2799, + "video_reward_cumulative_accuracy": 0.84994640943194 + }, + { + "epoch": 0.8311071534579995, + "grad_norm": 1.575054407119751, + "learning_rate": 4.2204581254978034e-07, + "loss": 0.0187, + "step": 2800, + "video_reward_cumulative_accuracy": 0.85 + }, + { + "epoch": 0.8311071534579995, + "eval_runtime": 131.0431, + "eval_samples_per_second": 6.021, + "eval_steps_per_second": 0.755, + "eval_test_set_accuracy": 0.8194444444444444, + "step": 2800 + }, + { + "epoch": 0.8314039774413773, + "grad_norm": 1.4664020538330078, + "learning_rate": 4.2060668385211196e-07, + "loss": 0.0194, + "step": 2801, + "video_reward_cumulative_accuracy": 0.850053552302749 + }, + { + "epoch": 0.8317008014247551, + "grad_norm": 3.3089983463287354, + "learning_rate": 4.1916978758499095e-07, + "loss": 0.0237, + "step": 2802, + "video_reward_cumulative_accuracy": 0.8501070663811563 + }, + { + "epoch": 0.8319976254081329, + "grad_norm": 1.9900091886520386, + "learning_rate": 4.1773512529106305e-07, + "loss": 0.039, + "step": 2803, + "video_reward_cumulative_accuracy": 0.8499821619693186 + }, + { + "epoch": 0.8322944493915109, + "grad_norm": 1.6034806966781616, + "learning_rate": 4.163026985105778e-07, + "loss": 0.0162, + "step": 2804, + "video_reward_cumulative_accuracy": 0.8500356633380884 + }, + { + "epoch": 0.8325912733748887, + "grad_norm": 4.051023960113525, + "learning_rate": 4.1487250878138567e-07, + "loss": 0.0284, + "step": 2805, + "video_reward_cumulative_accuracy": 0.8500891265597148 + }, + { + "epoch": 0.8328880973582665, + "grad_norm": 1.4414976835250854, + "learning_rate": 4.134445576389315e-07, + "loss": 0.0089, + "step": 2806, + "video_reward_cumulative_accuracy": 0.8501425516749822 + }, + { + "epoch": 0.8331849213416445, + "grad_norm": 2.868297815322876, + "learning_rate": 4.1201884661626253e-07, + "loss": 0.051, + "step": 2807, + "video_reward_cumulative_accuracy": 0.8501959387246171 + }, + { + "epoch": 0.8334817453250223, + "grad_norm": 1.3796963691711426, + "learning_rate": 4.105953772440158e-07, + "loss": 0.0085, + "step": 2808, + "video_reward_cumulative_accuracy": 0.8502492877492878 + }, + { + "epoch": 0.8337785693084001, + "grad_norm": 5.215771198272705, + "learning_rate": 4.091741510504249e-07, + "loss": 0.0674, + "step": 2809, + "video_reward_cumulative_accuracy": 0.8503025987896048 + }, + { + "epoch": 0.8340753932917779, + "grad_norm": 1.1552796363830566, + "learning_rate": 4.0775516956131327e-07, + "loss": 0.0084, + "step": 2810, + "video_reward_cumulative_accuracy": 0.850355871886121 + }, + { + "epoch": 0.8343722172751559, + "grad_norm": 4.928788661956787, + "learning_rate": 4.0633843430009563e-07, + "loss": 0.0486, + "step": 2811, + "video_reward_cumulative_accuracy": 0.8502312344361437 + }, + { + "epoch": 0.8346690412585337, + "grad_norm": 0.6429322957992554, + "learning_rate": 4.049239467877747e-07, + "loss": 0.0093, + "step": 2812, + "video_reward_cumulative_accuracy": 0.8502844950213371 + }, + { + "epoch": 0.8349658652419115, + "grad_norm": 4.207029819488525, + "learning_rate": 4.0351170854294017e-07, + "loss": 0.0345, + "step": 2813, + "video_reward_cumulative_accuracy": 0.8503377177390686 + }, + { + "epoch": 0.8352626892252895, + "grad_norm": 1.94601309299469, + "learning_rate": 4.0210172108176767e-07, + "loss": 0.036, + "step": 2814, + "video_reward_cumulative_accuracy": 0.8503909026297086 + }, + { + "epoch": 0.8355595132086673, + "grad_norm": 0.2354949563741684, + "learning_rate": 4.0069398591801423e-07, + "loss": 0.003, + "step": 2815, + "video_reward_cumulative_accuracy": 0.8504440497335701 + }, + { + "epoch": 0.8358563371920451, + "grad_norm": 2.0666418075561523, + "learning_rate": 3.9928850456302073e-07, + "loss": 0.045, + "step": 2816, + "video_reward_cumulative_accuracy": 0.8504971590909091 + }, + { + "epoch": 0.8361531611754229, + "grad_norm": 0.41040000319480896, + "learning_rate": 3.9788527852570813e-07, + "loss": 0.0077, + "step": 2817, + "video_reward_cumulative_accuracy": 0.850550230741924 + }, + { + "epoch": 0.8364499851588009, + "grad_norm": 4.305946350097656, + "learning_rate": 3.964843093125753e-07, + "loss": 0.0499, + "step": 2818, + "video_reward_cumulative_accuracy": 0.8506032647267565 + }, + { + "epoch": 0.8367468091421787, + "grad_norm": 1.3210865259170532, + "learning_rate": 3.950855984276994e-07, + "loss": 0.0141, + "step": 2819, + "video_reward_cumulative_accuracy": 0.8506562610854913 + }, + { + "epoch": 0.8370436331255565, + "grad_norm": 1.932438850402832, + "learning_rate": 3.936891473727314e-07, + "loss": 0.0332, + "step": 2820, + "video_reward_cumulative_accuracy": 0.8507092198581561 + }, + { + "epoch": 0.8373404571089345, + "grad_norm": 1.8300782442092896, + "learning_rate": 3.9229495764689734e-07, + "loss": 0.0406, + "step": 2821, + "video_reward_cumulative_accuracy": 0.8507621410847217 + }, + { + "epoch": 0.8376372810923123, + "grad_norm": 0.9342535734176636, + "learning_rate": 3.9090303074699546e-07, + "loss": 0.0265, + "step": 2822, + "video_reward_cumulative_accuracy": 0.8506378454996456 + }, + { + "epoch": 0.8379341050756901, + "grad_norm": 2.6911323070526123, + "learning_rate": 3.89513368167394e-07, + "loss": 0.0273, + "step": 2823, + "video_reward_cumulative_accuracy": 0.8506907545164718 + }, + { + "epoch": 0.8382309290590679, + "grad_norm": 1.113362431526184, + "learning_rate": 3.881259714000318e-07, + "loss": 0.0098, + "step": 2824, + "video_reward_cumulative_accuracy": 0.8507436260623229 + }, + { + "epoch": 0.8385277530424459, + "grad_norm": 2.1424875259399414, + "learning_rate": 3.8674084193441235e-07, + "loss": 0.0223, + "step": 2825, + "video_reward_cumulative_accuracy": 0.8507964601769912 + }, + { + "epoch": 0.8388245770258237, + "grad_norm": 1.2196357250213623, + "learning_rate": 3.8535798125760695e-07, + "loss": 0.0245, + "step": 2826, + "video_reward_cumulative_accuracy": 0.8508492569002123 + }, + { + "epoch": 0.8391214010092015, + "grad_norm": 1.4248687028884888, + "learning_rate": 3.839773908542513e-07, + "loss": 0.0097, + "step": 2827, + "video_reward_cumulative_accuracy": 0.8509020162716661 + }, + { + "epoch": 0.8394182249925795, + "grad_norm": 3.166527032852173, + "learning_rate": 3.8259907220654286e-07, + "loss": 0.0677, + "step": 2828, + "video_reward_cumulative_accuracy": 0.8509547383309759 + }, + { + "epoch": 0.8397150489759573, + "grad_norm": 1.9461520910263062, + "learning_rate": 3.81223026794241e-07, + "loss": 0.0228, + "step": 2829, + "video_reward_cumulative_accuracy": 0.8510074231177094 + }, + { + "epoch": 0.8400118729593351, + "grad_norm": 4.6057353019714355, + "learning_rate": 3.798492560946632e-07, + "loss": 0.092, + "step": 2830, + "video_reward_cumulative_accuracy": 0.8508833922261484 + }, + { + "epoch": 0.8403086969427129, + "grad_norm": 1.7150532007217407, + "learning_rate": 3.7847776158268594e-07, + "loss": 0.0519, + "step": 2831, + "video_reward_cumulative_accuracy": 0.8507594489579654 + }, + { + "epoch": 0.8406055209260909, + "grad_norm": 4.691923141479492, + "learning_rate": 3.771085447307418e-07, + "loss": 0.0646, + "step": 2832, + "video_reward_cumulative_accuracy": 0.850635593220339 + }, + { + "epoch": 0.8409023449094687, + "grad_norm": 1.0979514122009277, + "learning_rate": 3.757416070088185e-07, + "loss": 0.0091, + "step": 2833, + "video_reward_cumulative_accuracy": 0.8506883162725026 + }, + { + "epoch": 0.8411991688928465, + "grad_norm": 0.6728662848472595, + "learning_rate": 3.7437694988445517e-07, + "loss": 0.0061, + "step": 2834, + "video_reward_cumulative_accuracy": 0.8507410021171489 + }, + { + "epoch": 0.8414959928762245, + "grad_norm": 3.404707193374634, + "learning_rate": 3.730145748227443e-07, + "loss": 0.0272, + "step": 2835, + "video_reward_cumulative_accuracy": 0.8507936507936508 + }, + { + "epoch": 0.8417928168596023, + "grad_norm": 1.896315097808838, + "learning_rate": 3.716544832863275e-07, + "loss": 0.0159, + "step": 2836, + "video_reward_cumulative_accuracy": 0.8506699576868829 + }, + { + "epoch": 0.8420896408429801, + "grad_norm": 1.7696223258972168, + "learning_rate": 3.702966767353958e-07, + "loss": 0.052, + "step": 2837, + "video_reward_cumulative_accuracy": 0.8505463517800493 + }, + { + "epoch": 0.8423864648263579, + "grad_norm": 2.6428275108337402, + "learning_rate": 3.6894115662768596e-07, + "loss": 0.0275, + "step": 2838, + "video_reward_cumulative_accuracy": 0.8504228329809725 + }, + { + "epoch": 0.8426832888097359, + "grad_norm": 1.312769889831543, + "learning_rate": 3.675879244184799e-07, + "loss": 0.0219, + "step": 2839, + "video_reward_cumulative_accuracy": 0.850475519549137 + }, + { + "epoch": 0.8429801127931137, + "grad_norm": 2.8779304027557373, + "learning_rate": 3.66236981560604e-07, + "loss": 0.0236, + "step": 2840, + "video_reward_cumulative_accuracy": 0.8503521126760564 + }, + { + "epoch": 0.8432769367764915, + "grad_norm": 3.4229025840759277, + "learning_rate": 3.6488832950442644e-07, + "loss": 0.0779, + "step": 2841, + "video_reward_cumulative_accuracy": 0.8502287926786343 + }, + { + "epoch": 0.8435737607598695, + "grad_norm": 1.8767544031143188, + "learning_rate": 3.635419696978565e-07, + "loss": 0.0558, + "step": 2842, + "video_reward_cumulative_accuracy": 0.8501055594651654 + }, + { + "epoch": 0.8438705847432473, + "grad_norm": 0.48670053482055664, + "learning_rate": 3.621979035863421e-07, + "loss": 0.0078, + "step": 2843, + "video_reward_cumulative_accuracy": 0.8501582835033415 + }, + { + "epoch": 0.8441674087266251, + "grad_norm": 0.923923134803772, + "learning_rate": 3.6085613261286816e-07, + "loss": 0.0089, + "step": 2844, + "video_reward_cumulative_accuracy": 0.8502109704641351 + }, + { + "epoch": 0.8444642327100029, + "grad_norm": 2.631030797958374, + "learning_rate": 3.5951665821795686e-07, + "loss": 0.0444, + "step": 2845, + "video_reward_cumulative_accuracy": 0.8500878734622144 + }, + { + "epoch": 0.8447610566933809, + "grad_norm": 2.941474437713623, + "learning_rate": 3.5817948183966224e-07, + "loss": 0.0307, + "step": 2846, + "video_reward_cumulative_accuracy": 0.8501405481377372 + }, + { + "epoch": 0.8450578806767587, + "grad_norm": 1.852303385734558, + "learning_rate": 3.5684460491357457e-07, + "loss": 0.0394, + "step": 2847, + "video_reward_cumulative_accuracy": 0.8501931858096242 + }, + { + "epoch": 0.8453547046601365, + "grad_norm": 2.0034775733947754, + "learning_rate": 3.5551202887281423e-07, + "loss": 0.0212, + "step": 2848, + "video_reward_cumulative_accuracy": 0.8502457865168539 + }, + { + "epoch": 0.8456515286435144, + "grad_norm": 1.9412490129470825, + "learning_rate": 3.541817551480292e-07, + "loss": 0.037, + "step": 2849, + "video_reward_cumulative_accuracy": 0.8502983502983503 + }, + { + "epoch": 0.8459483526268923, + "grad_norm": 1.5758916139602661, + "learning_rate": 3.528537851673988e-07, + "loss": 0.0143, + "step": 2850, + "video_reward_cumulative_accuracy": 0.8501754385964913 + }, + { + "epoch": 0.8462451766102701, + "grad_norm": 2.0527806282043457, + "learning_rate": 3.5152812035662674e-07, + "loss": 0.0376, + "step": 2851, + "video_reward_cumulative_accuracy": 0.8500526131182041 + }, + { + "epoch": 0.8465420005936479, + "grad_norm": 1.3531450033187866, + "learning_rate": 3.502047621389426e-07, + "loss": 0.0074, + "step": 2852, + "video_reward_cumulative_accuracy": 0.8501051893408135 + }, + { + "epoch": 0.8468388245770259, + "grad_norm": 0.5980221033096313, + "learning_rate": 3.488837119351018e-07, + "loss": 0.0049, + "step": 2853, + "video_reward_cumulative_accuracy": 0.8501577287066246 + }, + { + "epoch": 0.8471356485604037, + "grad_norm": 1.4160398244857788, + "learning_rate": 3.4756497116337826e-07, + "loss": 0.0129, + "step": 2854, + "video_reward_cumulative_accuracy": 0.8502102312543798 + }, + { + "epoch": 0.8474324725437815, + "grad_norm": 1.262010097503662, + "learning_rate": 3.4624854123956916e-07, + "loss": 0.0192, + "step": 2855, + "video_reward_cumulative_accuracy": 0.850262697022767 + }, + { + "epoch": 0.8477292965271594, + "grad_norm": 0.3952392041683197, + "learning_rate": 3.449344235769886e-07, + "loss": 0.0078, + "step": 2856, + "video_reward_cumulative_accuracy": 0.8503151260504201 + }, + { + "epoch": 0.8480261205105373, + "grad_norm": 2.0614068508148193, + "learning_rate": 3.4362261958647e-07, + "loss": 0.0281, + "step": 2857, + "video_reward_cumulative_accuracy": 0.8503675183759188 + }, + { + "epoch": 0.8483229444939151, + "grad_norm": 3.423657178878784, + "learning_rate": 3.423131306763627e-07, + "loss": 0.0277, + "step": 2858, + "video_reward_cumulative_accuracy": 0.8504198740377886 + }, + { + "epoch": 0.8486197684772929, + "grad_norm": 0.3777397871017456, + "learning_rate": 3.4100595825252965e-07, + "loss": 0.005, + "step": 2859, + "video_reward_cumulative_accuracy": 0.8504721930745016 + }, + { + "epoch": 0.8489165924606709, + "grad_norm": 0.7437081336975098, + "learning_rate": 3.3970110371834814e-07, + "loss": 0.0104, + "step": 2860, + "video_reward_cumulative_accuracy": 0.8505244755244755 + }, + { + "epoch": 0.8492134164440487, + "grad_norm": 1.1904757022857666, + "learning_rate": 3.3839856847470485e-07, + "loss": 0.0124, + "step": 2861, + "video_reward_cumulative_accuracy": 0.8505767214260748 + }, + { + "epoch": 0.8495102404274265, + "grad_norm": 3.3361258506774902, + "learning_rate": 3.3709835391999846e-07, + "loss": 0.0594, + "step": 2862, + "video_reward_cumulative_accuracy": 0.8504542278127184 + }, + { + "epoch": 0.8498070644108044, + "grad_norm": 5.005155086517334, + "learning_rate": 3.3580046145013534e-07, + "loss": 0.0646, + "step": 2863, + "video_reward_cumulative_accuracy": 0.8503318197694726 + }, + { + "epoch": 0.8501038883941823, + "grad_norm": 1.4068641662597656, + "learning_rate": 3.3450489245852926e-07, + "loss": 0.0191, + "step": 2864, + "video_reward_cumulative_accuracy": 0.8502094972067039 + }, + { + "epoch": 0.8504007123775601, + "grad_norm": 1.33848237991333, + "learning_rate": 3.3321164833609976e-07, + "loss": 0.0272, + "step": 2865, + "video_reward_cumulative_accuracy": 0.8502617801047121 + }, + { + "epoch": 0.8506975363609379, + "grad_norm": 1.8083772659301758, + "learning_rate": 3.319207304712688e-07, + "loss": 0.0599, + "step": 2866, + "video_reward_cumulative_accuracy": 0.8503140265177949 + }, + { + "epoch": 0.8509943603443159, + "grad_norm": 3.3464255332946777, + "learning_rate": 3.306321402499627e-07, + "loss": 0.0327, + "step": 2867, + "video_reward_cumulative_accuracy": 0.8503662364841298 + }, + { + "epoch": 0.8512911843276937, + "grad_norm": 3.124976873397827, + "learning_rate": 3.2934587905560756e-07, + "loss": 0.0613, + "step": 2868, + "video_reward_cumulative_accuracy": 0.850418410041841 + }, + { + "epoch": 0.8515880083110715, + "grad_norm": 4.9768171310424805, + "learning_rate": 3.2806194826913107e-07, + "loss": 0.0197, + "step": 2869, + "video_reward_cumulative_accuracy": 0.8504705472289996 + }, + { + "epoch": 0.8518848322944494, + "grad_norm": 3.0971407890319824, + "learning_rate": 3.267803492689556e-07, + "loss": 0.0494, + "step": 2870, + "video_reward_cumulative_accuracy": 0.8505226480836237 + }, + { + "epoch": 0.8521816562778273, + "grad_norm": 0.6683565974235535, + "learning_rate": 3.2550108343100293e-07, + "loss": 0.0072, + "step": 2871, + "video_reward_cumulative_accuracy": 0.8505747126436781 + }, + { + "epoch": 0.8524784802612051, + "grad_norm": 0.6179012060165405, + "learning_rate": 3.242241521286893e-07, + "loss": 0.0036, + "step": 2872, + "video_reward_cumulative_accuracy": 0.8506267409470752 + }, + { + "epoch": 0.8527753042445829, + "grad_norm": 2.777172327041626, + "learning_rate": 3.2294955673292437e-07, + "loss": 0.023, + "step": 2873, + "video_reward_cumulative_accuracy": 0.8506787330316742 + }, + { + "epoch": 0.8530721282279609, + "grad_norm": 3.996541976928711, + "learning_rate": 3.2167729861211026e-07, + "loss": 0.0392, + "step": 2874, + "video_reward_cumulative_accuracy": 0.8505567153792624 + }, + { + "epoch": 0.8533689522113387, + "grad_norm": 1.6711208820343018, + "learning_rate": 3.2040737913213853e-07, + "loss": 0.0176, + "step": 2875, + "video_reward_cumulative_accuracy": 0.8506086956521739 + }, + { + "epoch": 0.8536657761947165, + "grad_norm": 0.736202597618103, + "learning_rate": 3.1913979965639166e-07, + "loss": 0.0054, + "step": 2876, + "video_reward_cumulative_accuracy": 0.8506606397774688 + }, + { + "epoch": 0.8539626001780944, + "grad_norm": 1.9681017398834229, + "learning_rate": 3.178745615457393e-07, + "loss": 0.0178, + "step": 2877, + "video_reward_cumulative_accuracy": 0.8507125477928398 + }, + { + "epoch": 0.8542594241614723, + "grad_norm": 4.273847579956055, + "learning_rate": 3.1661166615853723e-07, + "loss": 0.0847, + "step": 2878, + "video_reward_cumulative_accuracy": 0.8505906879777624 + }, + { + "epoch": 0.8545562481448501, + "grad_norm": 2.556535243988037, + "learning_rate": 3.153511148506269e-07, + "loss": 0.0457, + "step": 2879, + "video_reward_cumulative_accuracy": 0.8506425842306357 + }, + { + "epoch": 0.8548530721282279, + "grad_norm": 3.7858211994171143, + "learning_rate": 3.140929089753311e-07, + "loss": 0.0359, + "step": 2880, + "video_reward_cumulative_accuracy": 0.8506944444444444 + }, + { + "epoch": 0.8551498961116059, + "grad_norm": 3.342463254928589, + "learning_rate": 3.128370498834571e-07, + "loss": 0.0899, + "step": 2881, + "video_reward_cumulative_accuracy": 0.8505727178063173 + }, + { + "epoch": 0.8554467200949837, + "grad_norm": 3.1436612606048584, + "learning_rate": 3.1158353892329075e-07, + "loss": 0.0864, + "step": 2882, + "video_reward_cumulative_accuracy": 0.8506245662734212 + }, + { + "epoch": 0.8557435440783615, + "grad_norm": 0.46163490414619446, + "learning_rate": 3.1033237744059805e-07, + "loss": 0.0036, + "step": 2883, + "video_reward_cumulative_accuracy": 0.8506763787721123 + }, + { + "epoch": 0.8560403680617394, + "grad_norm": 1.36380934715271, + "learning_rate": 3.090835667786232e-07, + "loss": 0.0211, + "step": 2884, + "video_reward_cumulative_accuracy": 0.8507281553398058 + }, + { + "epoch": 0.8563371920451173, + "grad_norm": 5.0028977394104, + "learning_rate": 3.078371082780843e-07, + "loss": 0.0455, + "step": 2885, + "video_reward_cumulative_accuracy": 0.8506065857885615 + }, + { + "epoch": 0.8566340160284951, + "grad_norm": 3.535642147064209, + "learning_rate": 3.065930032771763e-07, + "loss": 0.0395, + "step": 2886, + "video_reward_cumulative_accuracy": 0.8506583506583506 + }, + { + "epoch": 0.8569308400118729, + "grad_norm": 0.9238957166671753, + "learning_rate": 3.053512531115654e-07, + "loss": 0.0094, + "step": 2887, + "video_reward_cumulative_accuracy": 0.8507100796674749 + }, + { + "epoch": 0.8572276639952509, + "grad_norm": 1.2704198360443115, + "learning_rate": 3.041118591143924e-07, + "loss": 0.0114, + "step": 2888, + "video_reward_cumulative_accuracy": 0.8507617728531855 + }, + { + "epoch": 0.8575244879786287, + "grad_norm": 5.546009063720703, + "learning_rate": 3.0287482261626727e-07, + "loss": 0.1227, + "step": 2889, + "video_reward_cumulative_accuracy": 0.8506403599861544 + }, + { + "epoch": 0.8578213119620065, + "grad_norm": 0.7070528268814087, + "learning_rate": 3.016401449452674e-07, + "loss": 0.011, + "step": 2890, + "video_reward_cumulative_accuracy": 0.8506920415224913 + }, + { + "epoch": 0.8581181359453844, + "grad_norm": 2.1009104251861572, + "learning_rate": 3.0040782742694037e-07, + "loss": 0.0506, + "step": 2891, + "video_reward_cumulative_accuracy": 0.8507436873054306 + }, + { + "epoch": 0.8584149599287623, + "grad_norm": 1.1483750343322754, + "learning_rate": 2.991778713842969e-07, + "loss": 0.025, + "step": 2892, + "video_reward_cumulative_accuracy": 0.8507952973720608 + }, + { + "epoch": 0.8587117839121401, + "grad_norm": 0.6957268714904785, + "learning_rate": 2.979502781378163e-07, + "loss": 0.0083, + "step": 2893, + "video_reward_cumulative_accuracy": 0.8508468717594193 + }, + { + "epoch": 0.8590086078955179, + "grad_norm": 1.5722553730010986, + "learning_rate": 2.967250490054377e-07, + "loss": 0.044, + "step": 2894, + "video_reward_cumulative_accuracy": 0.850898410504492 + }, + { + "epoch": 0.8593054318788959, + "grad_norm": 1.9897538423538208, + "learning_rate": 2.955021853025639e-07, + "loss": 0.0717, + "step": 2895, + "video_reward_cumulative_accuracy": 0.8509499136442141 + }, + { + "epoch": 0.8596022558622737, + "grad_norm": 1.8997362852096558, + "learning_rate": 2.942816883420582e-07, + "loss": 0.0146, + "step": 2896, + "video_reward_cumulative_accuracy": 0.8510013812154696 + }, + { + "epoch": 0.8598990798456515, + "grad_norm": 1.694429636001587, + "learning_rate": 2.9306355943424097e-07, + "loss": 0.0154, + "step": 2897, + "video_reward_cumulative_accuracy": 0.8510528132550915 + }, + { + "epoch": 0.8601959038290294, + "grad_norm": 2.0843210220336914, + "learning_rate": 2.91847799886894e-07, + "loss": 0.0171, + "step": 2898, + "video_reward_cumulative_accuracy": 0.851104209799862 + }, + { + "epoch": 0.8604927278124073, + "grad_norm": 3.370720386505127, + "learning_rate": 2.9063441100525167e-07, + "loss": 0.0258, + "step": 2899, + "video_reward_cumulative_accuracy": 0.8509830976198689 + }, + { + "epoch": 0.8607895517957851, + "grad_norm": 1.6068284511566162, + "learning_rate": 2.8942339409200523e-07, + "loss": 0.0278, + "step": 2900, + "video_reward_cumulative_accuracy": 0.8510344827586207 + }, + { + "epoch": 0.8610863757791629, + "grad_norm": 1.0114235877990723, + "learning_rate": 2.88214750447299e-07, + "loss": 0.0117, + "step": 2901, + "video_reward_cumulative_accuracy": 0.8510858324715616 + }, + { + "epoch": 0.8613831997625409, + "grad_norm": 1.6765480041503906, + "learning_rate": 2.8700848136872823e-07, + "loss": 0.0457, + "step": 2902, + "video_reward_cumulative_accuracy": 0.8511371467953136 + }, + { + "epoch": 0.8616800237459187, + "grad_norm": 4.060369968414307, + "learning_rate": 2.858045881513416e-07, + "loss": 0.0417, + "step": 2903, + "video_reward_cumulative_accuracy": 0.8510161901481227 + }, + { + "epoch": 0.8619768477292965, + "grad_norm": 2.511009693145752, + "learning_rate": 2.846030720876339e-07, + "loss": 0.0231, + "step": 2904, + "video_reward_cumulative_accuracy": 0.8510674931129476 + }, + { + "epoch": 0.8622736717126744, + "grad_norm": 1.7390601634979248, + "learning_rate": 2.834039344675504e-07, + "loss": 0.0318, + "step": 2905, + "video_reward_cumulative_accuracy": 0.851118760757315 + }, + { + "epoch": 0.8625704956960523, + "grad_norm": 2.4160759449005127, + "learning_rate": 2.8220717657848037e-07, + "loss": 0.0218, + "step": 2906, + "video_reward_cumulative_accuracy": 0.8511699931176876 + }, + { + "epoch": 0.8628673196794301, + "grad_norm": 2.386310338973999, + "learning_rate": 2.8101279970526e-07, + "loss": 0.0381, + "step": 2907, + "video_reward_cumulative_accuracy": 0.8512211902304782 + }, + { + "epoch": 0.8631641436628079, + "grad_norm": 1.4243983030319214, + "learning_rate": 2.798208051301693e-07, + "loss": 0.0189, + "step": 2908, + "video_reward_cumulative_accuracy": 0.8512723521320495 + }, + { + "epoch": 0.8634609676461859, + "grad_norm": 2.750420331954956, + "learning_rate": 2.786311941329298e-07, + "loss": 0.0442, + "step": 2909, + "video_reward_cumulative_accuracy": 0.8511515984874527 + }, + { + "epoch": 0.8637577916295637, + "grad_norm": 1.8915764093399048, + "learning_rate": 2.774439679907051e-07, + "loss": 0.0549, + "step": 2910, + "video_reward_cumulative_accuracy": 0.8512027491408934 + }, + { + "epoch": 0.8640546156129415, + "grad_norm": 2.3890373706817627, + "learning_rate": 2.762591279780963e-07, + "loss": 0.0192, + "step": 2911, + "video_reward_cumulative_accuracy": 0.8512538646513226 + }, + { + "epoch": 0.8643514395963194, + "grad_norm": 2.8765714168548584, + "learning_rate": 2.7507667536714496e-07, + "loss": 0.0201, + "step": 2912, + "video_reward_cumulative_accuracy": 0.851304945054945 + }, + { + "epoch": 0.8646482635796973, + "grad_norm": 0.4579935073852539, + "learning_rate": 2.738966114273287e-07, + "loss": 0.0051, + "step": 2913, + "video_reward_cumulative_accuracy": 0.8513559903879162 + }, + { + "epoch": 0.8649450875630751, + "grad_norm": 1.4378376007080078, + "learning_rate": 2.727189374255604e-07, + "loss": 0.0094, + "step": 2914, + "video_reward_cumulative_accuracy": 0.8514070006863418 + }, + { + "epoch": 0.8652419115464529, + "grad_norm": 1.244295358657837, + "learning_rate": 2.715436546261882e-07, + "loss": 0.0155, + "step": 2915, + "video_reward_cumulative_accuracy": 0.8514579759862779 + }, + { + "epoch": 0.8655387355298308, + "grad_norm": 2.9890527725219727, + "learning_rate": 2.703707642909914e-07, + "loss": 0.0832, + "step": 2916, + "video_reward_cumulative_accuracy": 0.8513374485596708 + }, + { + "epoch": 0.8658355595132087, + "grad_norm": 0.4092223644256592, + "learning_rate": 2.6920026767918163e-07, + "loss": 0.0084, + "step": 2917, + "video_reward_cumulative_accuracy": 0.8513884127528283 + }, + { + "epoch": 0.8661323834965865, + "grad_norm": 3.3004024028778076, + "learning_rate": 2.680321660474011e-07, + "loss": 0.054, + "step": 2918, + "video_reward_cumulative_accuracy": 0.8514393420150789 + }, + { + "epoch": 0.8664292074799644, + "grad_norm": 2.0161333084106445, + "learning_rate": 2.6686646064971983e-07, + "loss": 0.0137, + "step": 2919, + "video_reward_cumulative_accuracy": 0.8514902363823227 + }, + { + "epoch": 0.8667260314633423, + "grad_norm": 1.7455112934112549, + "learning_rate": 2.6570315273763663e-07, + "loss": 0.0391, + "step": 2920, + "video_reward_cumulative_accuracy": 0.8515410958904109 + }, + { + "epoch": 0.8670228554467201, + "grad_norm": 1.8997581005096436, + "learning_rate": 2.6454224356007417e-07, + "loss": 0.0372, + "step": 2921, + "video_reward_cumulative_accuracy": 0.8515919205751455 + }, + { + "epoch": 0.8673196794300979, + "grad_norm": 3.565519094467163, + "learning_rate": 2.633837343633816e-07, + "loss": 0.0423, + "step": 2922, + "video_reward_cumulative_accuracy": 0.8516427104722792 + }, + { + "epoch": 0.8676165034134758, + "grad_norm": 2.546748399734497, + "learning_rate": 2.622276263913312e-07, + "loss": 0.0663, + "step": 2923, + "video_reward_cumulative_accuracy": 0.8515224084844338 + }, + { + "epoch": 0.8679133273968537, + "grad_norm": 0.6215806603431702, + "learning_rate": 2.6107392088511706e-07, + "loss": 0.0154, + "step": 2924, + "video_reward_cumulative_accuracy": 0.8515731874145007 + }, + { + "epoch": 0.8682101513802315, + "grad_norm": 2.480520009994507, + "learning_rate": 2.5992261908335454e-07, + "loss": 0.0223, + "step": 2925, + "video_reward_cumulative_accuracy": 0.8516239316239316 + }, + { + "epoch": 0.8685069753636094, + "grad_norm": 2.851381301879883, + "learning_rate": 2.587737222220765e-07, + "loss": 0.0466, + "step": 2926, + "video_reward_cumulative_accuracy": 0.8515037593984962 + }, + { + "epoch": 0.8688037993469873, + "grad_norm": 1.1734604835510254, + "learning_rate": 2.576272315347361e-07, + "loss": 0.0104, + "step": 2927, + "video_reward_cumulative_accuracy": 0.8515544926545952 + }, + { + "epoch": 0.8691006233303651, + "grad_norm": 2.432842493057251, + "learning_rate": 2.5648314825220233e-07, + "loss": 0.0306, + "step": 2928, + "video_reward_cumulative_accuracy": 0.8516051912568307 + }, + { + "epoch": 0.8693974473137429, + "grad_norm": 1.94566011428833, + "learning_rate": 2.5534147360276014e-07, + "loss": 0.0697, + "step": 2929, + "video_reward_cumulative_accuracy": 0.8514851485148515 + }, + { + "epoch": 0.8696942712971208, + "grad_norm": 3.096381902694702, + "learning_rate": 2.542022088121068e-07, + "loss": 0.0651, + "step": 2930, + "video_reward_cumulative_accuracy": 0.8515358361774744 + }, + { + "epoch": 0.8699910952804987, + "grad_norm": 1.368943452835083, + "learning_rate": 2.530653551033546e-07, + "loss": 0.0131, + "step": 2931, + "video_reward_cumulative_accuracy": 0.8515864892528148 + }, + { + "epoch": 0.8702879192638765, + "grad_norm": 4.917116165161133, + "learning_rate": 2.519309136970258e-07, + "loss": 0.0782, + "step": 2932, + "video_reward_cumulative_accuracy": 0.8516371077762619 + }, + { + "epoch": 0.8705847432472544, + "grad_norm": 3.473444700241089, + "learning_rate": 2.507988858110538e-07, + "loss": 0.0313, + "step": 2933, + "video_reward_cumulative_accuracy": 0.8516876917831572 + }, + { + "epoch": 0.8708815672306323, + "grad_norm": 0.6671600937843323, + "learning_rate": 2.4966927266078077e-07, + "loss": 0.008, + "step": 2934, + "video_reward_cumulative_accuracy": 0.8517382413087935 + }, + { + "epoch": 0.8711783912140101, + "grad_norm": 0.5415387749671936, + "learning_rate": 2.4854207545895515e-07, + "loss": 0.0091, + "step": 2935, + "video_reward_cumulative_accuracy": 0.8517887563884157 + }, + { + "epoch": 0.8714752151973879, + "grad_norm": 1.7057019472122192, + "learning_rate": 2.474172954157328e-07, + "loss": 0.0371, + "step": 2936, + "video_reward_cumulative_accuracy": 0.8518392370572208 + }, + { + "epoch": 0.8717720391807658, + "grad_norm": 1.5639770030975342, + "learning_rate": 2.462949337386744e-07, + "loss": 0.0343, + "step": 2937, + "video_reward_cumulative_accuracy": 0.8518896833503575 + }, + { + "epoch": 0.8720688631641437, + "grad_norm": 2.864452362060547, + "learning_rate": 2.4517499163274395e-07, + "loss": 0.0471, + "step": 2938, + "video_reward_cumulative_accuracy": 0.8519400953029271 + }, + { + "epoch": 0.8723656871475215, + "grad_norm": 2.2662036418914795, + "learning_rate": 2.4405747030030903e-07, + "loss": 0.0436, + "step": 2939, + "video_reward_cumulative_accuracy": 0.851990472949983 + }, + { + "epoch": 0.8726625111308994, + "grad_norm": 0.40300530195236206, + "learning_rate": 2.4294237094113576e-07, + "loss": 0.0072, + "step": 2940, + "video_reward_cumulative_accuracy": 0.8520408163265306 + }, + { + "epoch": 0.8729593351142773, + "grad_norm": 5.474096298217773, + "learning_rate": 2.418296947523927e-07, + "loss": 0.0834, + "step": 2941, + "video_reward_cumulative_accuracy": 0.8520911254675281 + }, + { + "epoch": 0.8732561590976551, + "grad_norm": 2.9174013137817383, + "learning_rate": 2.4071944292864414e-07, + "loss": 0.0285, + "step": 2942, + "video_reward_cumulative_accuracy": 0.8521414004078858 + }, + { + "epoch": 0.8735529830810329, + "grad_norm": 2.5676088333129883, + "learning_rate": 2.3961161666185476e-07, + "loss": 0.027, + "step": 2943, + "video_reward_cumulative_accuracy": 0.8521916411824668 + }, + { + "epoch": 0.8738498070644108, + "grad_norm": 2.0752131938934326, + "learning_rate": 2.385062171413838e-07, + "loss": 0.0428, + "step": 2944, + "video_reward_cumulative_accuracy": 0.8522418478260869 + }, + { + "epoch": 0.8741466310477887, + "grad_norm": 1.9137473106384277, + "learning_rate": 2.3740324555398346e-07, + "loss": 0.0525, + "step": 2945, + "video_reward_cumulative_accuracy": 0.8522920203735145 + }, + { + "epoch": 0.8744434550311665, + "grad_norm": 2.5203845500946045, + "learning_rate": 2.3630270308380221e-07, + "loss": 0.0348, + "step": 2946, + "video_reward_cumulative_accuracy": 0.8521724372029871 + }, + { + "epoch": 0.8747402790145444, + "grad_norm": 1.3323040008544922, + "learning_rate": 2.352045909123779e-07, + "loss": 0.0112, + "step": 2947, + "video_reward_cumulative_accuracy": 0.8522225992534781 + }, + { + "epoch": 0.8750371029979223, + "grad_norm": 0.23800607025623322, + "learning_rate": 2.3410891021864058e-07, + "loss": 0.002, + "step": 2948, + "video_reward_cumulative_accuracy": 0.8522727272727273 + }, + { + "epoch": 0.8753339269813001, + "grad_norm": 1.832039475440979, + "learning_rate": 2.3301566217891148e-07, + "loss": 0.0263, + "step": 2949, + "video_reward_cumulative_accuracy": 0.8523228212953543 + }, + { + "epoch": 0.8756307509646779, + "grad_norm": 2.428018569946289, + "learning_rate": 2.31924847966897e-07, + "loss": 0.0579, + "step": 2950, + "video_reward_cumulative_accuracy": 0.8520338983050847 + }, + { + "epoch": 0.8759275749480558, + "grad_norm": 1.4113751649856567, + "learning_rate": 2.3083646875369293e-07, + "loss": 0.0297, + "step": 2951, + "video_reward_cumulative_accuracy": 0.8520840393087089 + }, + { + "epoch": 0.8762243989314337, + "grad_norm": 2.2765519618988037, + "learning_rate": 2.2975052570777896e-07, + "loss": 0.0271, + "step": 2952, + "video_reward_cumulative_accuracy": 0.8519647696476965 + }, + { + "epoch": 0.8765212229148115, + "grad_norm": 1.4904448986053467, + "learning_rate": 2.2866701999502083e-07, + "loss": 0.0244, + "step": 2953, + "video_reward_cumulative_accuracy": 0.8520149001015916 + }, + { + "epoch": 0.8768180468981893, + "grad_norm": 2.649986505508423, + "learning_rate": 2.275859527786675e-07, + "loss": 0.0505, + "step": 2954, + "video_reward_cumulative_accuracy": 0.8518957345971564 + }, + { + "epoch": 0.8771148708815673, + "grad_norm": 2.690307140350342, + "learning_rate": 2.2650732521934891e-07, + "loss": 0.0813, + "step": 2955, + "video_reward_cumulative_accuracy": 0.8519458544839256 + }, + { + "epoch": 0.8774116948649451, + "grad_norm": 1.8081152439117432, + "learning_rate": 2.2543113847507735e-07, + "loss": 0.0534, + "step": 2956, + "video_reward_cumulative_accuracy": 0.8519959404600812 + }, + { + "epoch": 0.8777085188483229, + "grad_norm": 1.8062191009521484, + "learning_rate": 2.2435739370124277e-07, + "loss": 0.0196, + "step": 2957, + "video_reward_cumulative_accuracy": 0.852045992560027 + }, + { + "epoch": 0.8780053428317008, + "grad_norm": 1.1374989748001099, + "learning_rate": 2.2328609205061442e-07, + "loss": 0.0244, + "step": 2958, + "video_reward_cumulative_accuracy": 0.8520960108181204 + }, + { + "epoch": 0.8783021668150787, + "grad_norm": 0.8241883516311646, + "learning_rate": 2.2221723467333922e-07, + "loss": 0.022, + "step": 2959, + "video_reward_cumulative_accuracy": 0.8521459952686719 + }, + { + "epoch": 0.8785989907984565, + "grad_norm": 2.666593551635742, + "learning_rate": 2.21150822716939e-07, + "loss": 0.0145, + "step": 2960, + "video_reward_cumulative_accuracy": 0.8521959459459459 + }, + { + "epoch": 0.8788958147818343, + "grad_norm": 1.3375494480133057, + "learning_rate": 2.2008685732631096e-07, + "loss": 0.0177, + "step": 2961, + "video_reward_cumulative_accuracy": 0.8522458628841607 + }, + { + "epoch": 0.8791926387652123, + "grad_norm": 3.6023035049438477, + "learning_rate": 2.1902533964372448e-07, + "loss": 0.0534, + "step": 2962, + "video_reward_cumulative_accuracy": 0.8521269412559082 + }, + { + "epoch": 0.8794894627485901, + "grad_norm": 3.426396608352661, + "learning_rate": 2.1796627080882205e-07, + "loss": 0.0531, + "step": 2963, + "video_reward_cumulative_accuracy": 0.8521768477894026 + }, + { + "epoch": 0.8797862867319679, + "grad_norm": 2.153428077697754, + "learning_rate": 2.1690965195861668e-07, + "loss": 0.0237, + "step": 2964, + "video_reward_cumulative_accuracy": 0.8522267206477733 + }, + { + "epoch": 0.8800831107153458, + "grad_norm": 1.4342325925827026, + "learning_rate": 2.1585548422749236e-07, + "loss": 0.0163, + "step": 2965, + "video_reward_cumulative_accuracy": 0.8522765598650928 + }, + { + "epoch": 0.8803799346987237, + "grad_norm": 1.447503924369812, + "learning_rate": 2.1480376874719876e-07, + "loss": 0.0233, + "step": 2966, + "video_reward_cumulative_accuracy": 0.8523263654753878 + }, + { + "epoch": 0.8806767586821015, + "grad_norm": 2.423767328262329, + "learning_rate": 2.1375450664685577e-07, + "loss": 0.0742, + "step": 2967, + "video_reward_cumulative_accuracy": 0.8523761375126391 + }, + { + "epoch": 0.8809735826654793, + "grad_norm": 2.4350879192352295, + "learning_rate": 2.1270769905294752e-07, + "loss": 0.0423, + "step": 2968, + "video_reward_cumulative_accuracy": 0.8524258760107817 + }, + { + "epoch": 0.8812704066488573, + "grad_norm": 4.910870552062988, + "learning_rate": 2.1166334708932367e-07, + "loss": 0.051, + "step": 2969, + "video_reward_cumulative_accuracy": 0.8524755810037049 + }, + { + "epoch": 0.8815672306322351, + "grad_norm": 3.4264378547668457, + "learning_rate": 2.1062145187719818e-07, + "loss": 0.0618, + "step": 2970, + "video_reward_cumulative_accuracy": 0.8525252525252526 + }, + { + "epoch": 0.8818640546156129, + "grad_norm": 0.8235085010528564, + "learning_rate": 2.0958201453514515e-07, + "loss": 0.0156, + "step": 2971, + "video_reward_cumulative_accuracy": 0.8525748906092225 + }, + { + "epoch": 0.8821608785989908, + "grad_norm": 3.6554598808288574, + "learning_rate": 2.0854503617910278e-07, + "loss": 0.0275, + "step": 2972, + "video_reward_cumulative_accuracy": 0.8526244952893675 + }, + { + "epoch": 0.8824577025823687, + "grad_norm": 1.463263750076294, + "learning_rate": 2.0751051792236714e-07, + "loss": 0.022, + "step": 2973, + "video_reward_cumulative_accuracy": 0.8526740665993946 + }, + { + "epoch": 0.8827545265657465, + "grad_norm": 2.6287763118743896, + "learning_rate": 2.0647846087559459e-07, + "loss": 0.0397, + "step": 2974, + "video_reward_cumulative_accuracy": 0.8527236045729657 + }, + { + "epoch": 0.8830513505491243, + "grad_norm": 1.2779161930084229, + "learning_rate": 2.0544886614679848e-07, + "loss": 0.0286, + "step": 2975, + "video_reward_cumulative_accuracy": 0.8526050420168068 + }, + { + "epoch": 0.8833481745325023, + "grad_norm": 2.465494394302368, + "learning_rate": 2.0442173484134826e-07, + "loss": 0.0588, + "step": 2976, + "video_reward_cumulative_accuracy": 0.8526545698924731 + }, + { + "epoch": 0.8836449985158801, + "grad_norm": 1.980695128440857, + "learning_rate": 2.033970680619693e-07, + "loss": 0.0563, + "step": 2977, + "video_reward_cumulative_accuracy": 0.8527040644944575 + }, + { + "epoch": 0.8839418224992579, + "grad_norm": 1.9491227865219116, + "learning_rate": 2.023748669087408e-07, + "loss": 0.0332, + "step": 2978, + "video_reward_cumulative_accuracy": 0.8525856279382136 + }, + { + "epoch": 0.8842386464826358, + "grad_norm": 2.006883144378662, + "learning_rate": 2.0135513247909493e-07, + "loss": 0.0141, + "step": 2979, + "video_reward_cumulative_accuracy": 0.8526351124538436 + }, + { + "epoch": 0.8845354704660137, + "grad_norm": 2.424649715423584, + "learning_rate": 2.0033786586781624e-07, + "loss": 0.0493, + "step": 2980, + "video_reward_cumulative_accuracy": 0.85251677852349 + }, + { + "epoch": 0.8848322944493915, + "grad_norm": 2.046546697616577, + "learning_rate": 1.9932306816703773e-07, + "loss": 0.084, + "step": 2981, + "video_reward_cumulative_accuracy": 0.8525662529352567 + }, + { + "epoch": 0.8851291184327693, + "grad_norm": 1.7148088216781616, + "learning_rate": 1.9831074046624488e-07, + "loss": 0.0261, + "step": 2982, + "video_reward_cumulative_accuracy": 0.85261569416499 + }, + { + "epoch": 0.8854259424161472, + "grad_norm": 1.020740032196045, + "learning_rate": 1.9730088385226774e-07, + "loss": 0.01, + "step": 2983, + "video_reward_cumulative_accuracy": 0.852665102246061 + }, + { + "epoch": 0.8857227663995251, + "grad_norm": 1.4954924583435059, + "learning_rate": 1.9629349940928715e-07, + "loss": 0.0195, + "step": 2984, + "video_reward_cumulative_accuracy": 0.8527144772117963 + }, + { + "epoch": 0.8860195903829029, + "grad_norm": 0.6353304982185364, + "learning_rate": 1.952885882188277e-07, + "loss": 0.023, + "step": 2985, + "video_reward_cumulative_accuracy": 0.8527638190954774 + }, + { + "epoch": 0.8863164143662808, + "grad_norm": 1.529733657836914, + "learning_rate": 1.9428615135975855e-07, + "loss": 0.018, + "step": 2986, + "video_reward_cumulative_accuracy": 0.8528131279303416 + }, + { + "epoch": 0.8866132383496587, + "grad_norm": 2.5808000564575195, + "learning_rate": 1.9328618990829384e-07, + "loss": 0.0279, + "step": 2987, + "video_reward_cumulative_accuracy": 0.8528624037495816 + }, + { + "epoch": 0.8869100623330365, + "grad_norm": 1.363269329071045, + "learning_rate": 1.9228870493798763e-07, + "loss": 0.0136, + "step": 2988, + "video_reward_cumulative_accuracy": 0.8529116465863453 + }, + { + "epoch": 0.8872068863164143, + "grad_norm": 0.9487797021865845, + "learning_rate": 1.912936975197388e-07, + "loss": 0.0137, + "step": 2989, + "video_reward_cumulative_accuracy": 0.852960856473737 + }, + { + "epoch": 0.8875037102997922, + "grad_norm": 0.6261467337608337, + "learning_rate": 1.9030116872178317e-07, + "loss": 0.0065, + "step": 2990, + "video_reward_cumulative_accuracy": 0.8530100334448161 + }, + { + "epoch": 0.8878005342831701, + "grad_norm": 1.148743987083435, + "learning_rate": 1.8931111960969694e-07, + "loss": 0.0226, + "step": 2991, + "video_reward_cumulative_accuracy": 0.8530591775325977 + }, + { + "epoch": 0.8880973582665479, + "grad_norm": 2.52793025970459, + "learning_rate": 1.8832355124639463e-07, + "loss": 0.0447, + "step": 2992, + "video_reward_cumulative_accuracy": 0.852774064171123 + }, + { + "epoch": 0.8883941822499258, + "grad_norm": 1.2899773120880127, + "learning_rate": 1.873384646921253e-07, + "loss": 0.0289, + "step": 2993, + "video_reward_cumulative_accuracy": 0.8528232542599399 + }, + { + "epoch": 0.8886910062333037, + "grad_norm": 1.8950523138046265, + "learning_rate": 1.8635586100447633e-07, + "loss": 0.0585, + "step": 2994, + "video_reward_cumulative_accuracy": 0.852872411489646 + }, + { + "epoch": 0.8889878302166815, + "grad_norm": 2.048049211502075, + "learning_rate": 1.8537574123836748e-07, + "loss": 0.0715, + "step": 2995, + "video_reward_cumulative_accuracy": 0.8529215358931552 + }, + { + "epoch": 0.8892846542000593, + "grad_norm": 1.89472496509552, + "learning_rate": 1.843981064460529e-07, + "loss": 0.0298, + "step": 2996, + "video_reward_cumulative_accuracy": 0.8529706275033377 + }, + { + "epoch": 0.8895814781834372, + "grad_norm": 2.2065141201019287, + "learning_rate": 1.8342295767711794e-07, + "loss": 0.0269, + "step": 2997, + "video_reward_cumulative_accuracy": 0.8530196863530197 + }, + { + "epoch": 0.8898783021668151, + "grad_norm": 1.0499368906021118, + "learning_rate": 1.8245029597847907e-07, + "loss": 0.006, + "step": 2998, + "video_reward_cumulative_accuracy": 0.8530687124749833 + }, + { + "epoch": 0.8901751261501929, + "grad_norm": 3.8206775188446045, + "learning_rate": 1.8148012239438434e-07, + "loss": 0.033, + "step": 2999, + "video_reward_cumulative_accuracy": 0.8531177059019673 + }, + { + "epoch": 0.8904719501335708, + "grad_norm": 3.880343198776245, + "learning_rate": 1.8051243796640805e-07, + "loss": 0.0258, + "step": 3000, + "video_reward_cumulative_accuracy": 0.8531666666666666 + }, + { + "epoch": 0.8904719501335708, + "eval_runtime": 132.9639, + "eval_samples_per_second": 5.934, + "eval_steps_per_second": 0.745, + "eval_test_set_accuracy": 0.8169191919191919, + "step": 3000 + }, + { + "epoch": 0.8907687741169487, + "grad_norm": 1.0296834707260132, + "learning_rate": 1.7954724373345445e-07, + "loss": 0.0095, + "step": 3001, + "video_reward_cumulative_accuracy": 0.8532155948017327 + }, + { + "epoch": 0.8910655981003265, + "grad_norm": 1.028411626815796, + "learning_rate": 1.7858454073175185e-07, + "loss": 0.0088, + "step": 3002, + "video_reward_cumulative_accuracy": 0.8532644903397735 + }, + { + "epoch": 0.8913624220837043, + "grad_norm": 3.756903886795044, + "learning_rate": 1.776243299948563e-07, + "loss": 0.0585, + "step": 3003, + "video_reward_cumulative_accuracy": 0.8533133533133533 + }, + { + "epoch": 0.8916592460670822, + "grad_norm": 2.9036705493927, + "learning_rate": 1.7666661255364704e-07, + "loss": 0.0412, + "step": 3004, + "video_reward_cumulative_accuracy": 0.8533621837549934 + }, + { + "epoch": 0.8919560700504601, + "grad_norm": 2.294867992401123, + "learning_rate": 1.7571138943632688e-07, + "loss": 0.0249, + "step": 3005, + "video_reward_cumulative_accuracy": 0.8534109816971713 + }, + { + "epoch": 0.8922528940338379, + "grad_norm": 3.16927170753479, + "learning_rate": 1.7475866166842048e-07, + "loss": 0.0349, + "step": 3006, + "video_reward_cumulative_accuracy": 0.853459747172322 + }, + { + "epoch": 0.8925497180172158, + "grad_norm": 1.4870845079421997, + "learning_rate": 1.73808430272773e-07, + "loss": 0.0282, + "step": 3007, + "video_reward_cumulative_accuracy": 0.8535084802128368 + }, + { + "epoch": 0.8928465420005937, + "grad_norm": 2.188786029815674, + "learning_rate": 1.728606962695506e-07, + "loss": 0.0509, + "step": 3008, + "video_reward_cumulative_accuracy": 0.8535571808510638 + }, + { + "epoch": 0.8931433659839715, + "grad_norm": 3.7729499340057373, + "learning_rate": 1.7191546067623772e-07, + "loss": 0.0906, + "step": 3009, + "video_reward_cumulative_accuracy": 0.8534396809571286 + }, + { + "epoch": 0.8934401899673493, + "grad_norm": 3.361833095550537, + "learning_rate": 1.7097272450763646e-07, + "loss": 0.0877, + "step": 3010, + "video_reward_cumulative_accuracy": 0.8533222591362126 + }, + { + "epoch": 0.8937370139507272, + "grad_norm": 2.5203890800476074, + "learning_rate": 1.7003248877586558e-07, + "loss": 0.0243, + "step": 3011, + "video_reward_cumulative_accuracy": 0.8533709730986383 + }, + { + "epoch": 0.8940338379341051, + "grad_norm": 1.8145414590835571, + "learning_rate": 1.6909475449035929e-07, + "loss": 0.0231, + "step": 3012, + "video_reward_cumulative_accuracy": 0.8534196547144755 + }, + { + "epoch": 0.8943306619174829, + "grad_norm": 3.811539888381958, + "learning_rate": 1.6815952265786638e-07, + "loss": 0.0447, + "step": 3013, + "video_reward_cumulative_accuracy": 0.8533023564553601 + }, + { + "epoch": 0.8946274859008608, + "grad_norm": 3.9783248901367188, + "learning_rate": 1.6722679428244903e-07, + "loss": 0.0442, + "step": 3014, + "video_reward_cumulative_accuracy": 0.8533510285335103 + }, + { + "epoch": 0.8949243098842387, + "grad_norm": 0.7299622893333435, + "learning_rate": 1.6629657036548175e-07, + "loss": 0.01, + "step": 3015, + "video_reward_cumulative_accuracy": 0.8533996683250414 + }, + { + "epoch": 0.8952211338676165, + "grad_norm": 3.1430046558380127, + "learning_rate": 1.6536885190565066e-07, + "loss": 0.0237, + "step": 3016, + "video_reward_cumulative_accuracy": 0.8532824933687002 + }, + { + "epoch": 0.8955179578509943, + "grad_norm": 1.1799105405807495, + "learning_rate": 1.6444363989895046e-07, + "loss": 0.0112, + "step": 3017, + "video_reward_cumulative_accuracy": 0.8533311236327478 + }, + { + "epoch": 0.8958147818343722, + "grad_norm": 2.950824022293091, + "learning_rate": 1.6352093533868658e-07, + "loss": 0.0471, + "step": 3018, + "video_reward_cumulative_accuracy": 0.8532140490390987 + }, + { + "epoch": 0.8961116058177501, + "grad_norm": 2.92531156539917, + "learning_rate": 1.6260073921547215e-07, + "loss": 0.0304, + "step": 3019, + "video_reward_cumulative_accuracy": 0.8532626697581981 + }, + { + "epoch": 0.8964084298011279, + "grad_norm": 2.9492204189300537, + "learning_rate": 1.616830525172272e-07, + "loss": 0.0292, + "step": 3020, + "video_reward_cumulative_accuracy": 0.8533112582781457 + }, + { + "epoch": 0.8967052537845058, + "grad_norm": 1.7324609756469727, + "learning_rate": 1.6076787622917673e-07, + "loss": 0.0184, + "step": 3021, + "video_reward_cumulative_accuracy": 0.8533598146309169 + }, + { + "epoch": 0.8970020777678837, + "grad_norm": 2.3193140029907227, + "learning_rate": 1.5985521133385168e-07, + "loss": 0.0551, + "step": 3022, + "video_reward_cumulative_accuracy": 0.8534083388484447 + }, + { + "epoch": 0.8972989017512615, + "grad_norm": 3.8793790340423584, + "learning_rate": 1.5894505881108635e-07, + "loss": 0.0541, + "step": 3023, + "video_reward_cumulative_accuracy": 0.8531260337413166 + }, + { + "epoch": 0.8975957257346393, + "grad_norm": 1.8072460889816284, + "learning_rate": 1.58037419638018e-07, + "loss": 0.0564, + "step": 3024, + "video_reward_cumulative_accuracy": 0.8531746031746031 + }, + { + "epoch": 0.8978925497180172, + "grad_norm": 3.4717845916748047, + "learning_rate": 1.5713229478908577e-07, + "loss": 0.0491, + "step": 3025, + "video_reward_cumulative_accuracy": 0.8532231404958678 + }, + { + "epoch": 0.8981893737013951, + "grad_norm": 0.3415747880935669, + "learning_rate": 1.562296852360279e-07, + "loss": 0.0053, + "step": 3026, + "video_reward_cumulative_accuracy": 0.8532716457369465 + }, + { + "epoch": 0.8984861976847729, + "grad_norm": 1.9290162324905396, + "learning_rate": 1.5532959194788395e-07, + "loss": 0.035, + "step": 3027, + "video_reward_cumulative_accuracy": 0.8529897588371325 + }, + { + "epoch": 0.8987830216681508, + "grad_norm": 3.8427798748016357, + "learning_rate": 1.5443201589099149e-07, + "loss": 0.0378, + "step": 3028, + "video_reward_cumulative_accuracy": 0.8530383091149274 + }, + { + "epoch": 0.8990798456515287, + "grad_norm": 1.119760513305664, + "learning_rate": 1.5353695802898556e-07, + "loss": 0.0185, + "step": 3029, + "video_reward_cumulative_accuracy": 0.8530868273357544 + }, + { + "epoch": 0.8993766696349065, + "grad_norm": 1.9450827836990356, + "learning_rate": 1.52644419322798e-07, + "loss": 0.0364, + "step": 3030, + "video_reward_cumulative_accuracy": 0.8531353135313532 + }, + { + "epoch": 0.8996734936182843, + "grad_norm": 1.9632068872451782, + "learning_rate": 1.5175440073065485e-07, + "loss": 0.0607, + "step": 3031, + "video_reward_cumulative_accuracy": 0.8531837677334213 + }, + { + "epoch": 0.8999703176016622, + "grad_norm": 0.8110266327857971, + "learning_rate": 1.508669032080781e-07, + "loss": 0.0176, + "step": 3032, + "video_reward_cumulative_accuracy": 0.8532321899736148 + }, + { + "epoch": 0.9002671415850401, + "grad_norm": 1.5525119304656982, + "learning_rate": 1.499819277078818e-07, + "loss": 0.0092, + "step": 3033, + "video_reward_cumulative_accuracy": 0.8532805802835477 + }, + { + "epoch": 0.9005639655684179, + "grad_norm": 1.3226412534713745, + "learning_rate": 1.4909947518017387e-07, + "loss": 0.0238, + "step": 3034, + "video_reward_cumulative_accuracy": 0.8533289386947923 + }, + { + "epoch": 0.9008607895517958, + "grad_norm": 0.9626790881156921, + "learning_rate": 1.4821954657235266e-07, + "loss": 0.012, + "step": 3035, + "video_reward_cumulative_accuracy": 0.8533772652388797 + }, + { + "epoch": 0.9011576135351737, + "grad_norm": 0.8461338877677917, + "learning_rate": 1.4734214282910664e-07, + "loss": 0.0223, + "step": 3036, + "video_reward_cumulative_accuracy": 0.8532608695652174 + }, + { + "epoch": 0.9014544375185515, + "grad_norm": 4.338505268096924, + "learning_rate": 1.4646726489241397e-07, + "loss": 0.0458, + "step": 3037, + "video_reward_cumulative_accuracy": 0.8531445505432993 + }, + { + "epoch": 0.9017512615019293, + "grad_norm": 1.510209560394287, + "learning_rate": 1.4559491370154083e-07, + "loss": 0.0465, + "step": 3038, + "video_reward_cumulative_accuracy": 0.8531928900592495 + }, + { + "epoch": 0.9020480854853072, + "grad_norm": 2.6341168880462646, + "learning_rate": 1.4472509019304053e-07, + "loss": 0.0466, + "step": 3039, + "video_reward_cumulative_accuracy": 0.8532411977624218 + }, + { + "epoch": 0.9023449094686851, + "grad_norm": 2.911071300506592, + "learning_rate": 1.4385779530075444e-07, + "loss": 0.0256, + "step": 3040, + "video_reward_cumulative_accuracy": 0.8532894736842105 + }, + { + "epoch": 0.9026417334520629, + "grad_norm": 2.444882869720459, + "learning_rate": 1.4299302995580634e-07, + "loss": 0.027, + "step": 3041, + "video_reward_cumulative_accuracy": 0.8533377178559685 + }, + { + "epoch": 0.9029385574354408, + "grad_norm": 4.529541969299316, + "learning_rate": 1.4213079508660688e-07, + "loss": 0.051, + "step": 3042, + "video_reward_cumulative_accuracy": 0.8532215647600263 + }, + { + "epoch": 0.9032353814188187, + "grad_norm": 3.1979563236236572, + "learning_rate": 1.412710916188481e-07, + "loss": 0.0291, + "step": 3043, + "video_reward_cumulative_accuracy": 0.8532697995399277 + }, + { + "epoch": 0.9035322054021965, + "grad_norm": 3.461008071899414, + "learning_rate": 1.4041392047550506e-07, + "loss": 0.0434, + "step": 3044, + "video_reward_cumulative_accuracy": 0.8533180026281209 + }, + { + "epoch": 0.9038290293855743, + "grad_norm": 3.109524726867676, + "learning_rate": 1.3955928257683465e-07, + "loss": 0.0798, + "step": 3045, + "video_reward_cumulative_accuracy": 0.8533661740558293 + }, + { + "epoch": 0.9041258533689522, + "grad_norm": 3.351715326309204, + "learning_rate": 1.3870717884037321e-07, + "loss": 0.0346, + "step": 3046, + "video_reward_cumulative_accuracy": 0.853414313854235 + }, + { + "epoch": 0.9044226773523301, + "grad_norm": 1.3447494506835938, + "learning_rate": 1.3785761018093757e-07, + "loss": 0.0162, + "step": 3047, + "video_reward_cumulative_accuracy": 0.8534624220544799 + }, + { + "epoch": 0.9047195013357079, + "grad_norm": 4.213351249694824, + "learning_rate": 1.3701057751062146e-07, + "loss": 0.0712, + "step": 3048, + "video_reward_cumulative_accuracy": 0.853510498687664 + }, + { + "epoch": 0.9050163253190858, + "grad_norm": 2.7610223293304443, + "learning_rate": 1.3616608173879636e-07, + "loss": 0.0292, + "step": 3049, + "video_reward_cumulative_accuracy": 0.8535585437848475 + }, + { + "epoch": 0.9053131493024636, + "grad_norm": 0.9333323240280151, + "learning_rate": 1.3532412377211119e-07, + "loss": 0.0066, + "step": 3050, + "video_reward_cumulative_accuracy": 0.8536065573770492 + }, + { + "epoch": 0.9056099732858415, + "grad_norm": 2.758444309234619, + "learning_rate": 1.3448470451448896e-07, + "loss": 0.0446, + "step": 3051, + "video_reward_cumulative_accuracy": 0.8536545394952475 + }, + { + "epoch": 0.9059067972692193, + "grad_norm": 1.3979527950286865, + "learning_rate": 1.3364782486712857e-07, + "loss": 0.016, + "step": 3052, + "video_reward_cumulative_accuracy": 0.8535386631716907 + }, + { + "epoch": 0.9062036212525972, + "grad_norm": 2.014826774597168, + "learning_rate": 1.3281348572850045e-07, + "loss": 0.0658, + "step": 3053, + "video_reward_cumulative_accuracy": 0.853422862757943 + }, + { + "epoch": 0.9065004452359751, + "grad_norm": 1.4265682697296143, + "learning_rate": 1.3198168799434947e-07, + "loss": 0.0116, + "step": 3054, + "video_reward_cumulative_accuracy": 0.8534708578912901 + }, + { + "epoch": 0.9067972692193529, + "grad_norm": 1.420961618423462, + "learning_rate": 1.3115243255769072e-07, + "loss": 0.0359, + "step": 3055, + "video_reward_cumulative_accuracy": 0.853518821603928 + }, + { + "epoch": 0.9070940932027308, + "grad_norm": 1.8929790258407593, + "learning_rate": 1.3032572030881097e-07, + "loss": 0.0078, + "step": 3056, + "video_reward_cumulative_accuracy": 0.8535667539267016 + }, + { + "epoch": 0.9073909171861086, + "grad_norm": 3.999789237976074, + "learning_rate": 1.295015521352652e-07, + "loss": 0.0465, + "step": 3057, + "video_reward_cumulative_accuracy": 0.8536146548904154 + }, + { + "epoch": 0.9076877411694865, + "grad_norm": 1.7095073461532593, + "learning_rate": 1.2867992892187846e-07, + "loss": 0.0234, + "step": 3058, + "video_reward_cumulative_accuracy": 0.8534990189666448 + }, + { + "epoch": 0.9079845651528643, + "grad_norm": 1.0274499654769897, + "learning_rate": 1.2786085155074318e-07, + "loss": 0.0247, + "step": 3059, + "video_reward_cumulative_accuracy": 0.8533834586466166 + }, + { + "epoch": 0.9082813891362422, + "grad_norm": 3.9192311763763428, + "learning_rate": 1.2704432090121815e-07, + "loss": 0.0385, + "step": 3060, + "video_reward_cumulative_accuracy": 0.8534313725490196 + }, + { + "epoch": 0.9085782131196201, + "grad_norm": 0.8952836394309998, + "learning_rate": 1.2623033784992855e-07, + "loss": 0.0113, + "step": 3061, + "video_reward_cumulative_accuracy": 0.8534792551453774 + }, + { + "epoch": 0.9088750371029979, + "grad_norm": 0.603430449962616, + "learning_rate": 1.2541890327076368e-07, + "loss": 0.0136, + "step": 3062, + "video_reward_cumulative_accuracy": 0.8535271064663619 + }, + { + "epoch": 0.9091718610863758, + "grad_norm": 1.0425788164138794, + "learning_rate": 1.246100180348775e-07, + "loss": 0.0135, + "step": 3063, + "video_reward_cumulative_accuracy": 0.8535749265426053 + }, + { + "epoch": 0.9094686850697536, + "grad_norm": 0.720369279384613, + "learning_rate": 1.2380368301068728e-07, + "loss": 0.0103, + "step": 3064, + "video_reward_cumulative_accuracy": 0.8536227154046997 + }, + { + "epoch": 0.9097655090531315, + "grad_norm": 1.9932835102081299, + "learning_rate": 1.2299989906387116e-07, + "loss": 0.0127, + "step": 3065, + "video_reward_cumulative_accuracy": 0.8536704730831974 + }, + { + "epoch": 0.9100623330365093, + "grad_norm": 1.871851921081543, + "learning_rate": 1.221986670573702e-07, + "loss": 0.0446, + "step": 3066, + "video_reward_cumulative_accuracy": 0.8537181996086106 + }, + { + "epoch": 0.9103591570198872, + "grad_norm": 3.508359909057617, + "learning_rate": 1.2139998785138386e-07, + "loss": 0.0373, + "step": 3067, + "video_reward_cumulative_accuracy": 0.8537658950114118 + }, + { + "epoch": 0.9106559810032651, + "grad_norm": 1.359955072402954, + "learning_rate": 1.20603862303372e-07, + "loss": 0.0153, + "step": 3068, + "video_reward_cumulative_accuracy": 0.8538135593220338 + }, + { + "epoch": 0.9109528049866429, + "grad_norm": 0.9213302135467529, + "learning_rate": 1.1981029126805293e-07, + "loss": 0.0142, + "step": 3069, + "video_reward_cumulative_accuracy": 0.85386119257087 + }, + { + "epoch": 0.9112496289700208, + "grad_norm": 0.6175004839897156, + "learning_rate": 1.1901927559740178e-07, + "loss": 0.0092, + "step": 3070, + "video_reward_cumulative_accuracy": 0.8539087947882736 + }, + { + "epoch": 0.9115464529533986, + "grad_norm": 1.5162551403045654, + "learning_rate": 1.1823081614065146e-07, + "loss": 0.0549, + "step": 3071, + "video_reward_cumulative_accuracy": 0.8537935525887334 + }, + { + "epoch": 0.9118432769367765, + "grad_norm": 0.9067859053611755, + "learning_rate": 1.1744491374428845e-07, + "loss": 0.0102, + "step": 3072, + "video_reward_cumulative_accuracy": 0.8538411458333334 + }, + { + "epoch": 0.9121401009201543, + "grad_norm": 1.3547265529632568, + "learning_rate": 1.1666156925205619e-07, + "loss": 0.048, + "step": 3073, + "video_reward_cumulative_accuracy": 0.8538887081028311 + }, + { + "epoch": 0.9124369249035322, + "grad_norm": 2.882599353790283, + "learning_rate": 1.158807835049508e-07, + "loss": 0.0496, + "step": 3074, + "video_reward_cumulative_accuracy": 0.8537735849056604 + }, + { + "epoch": 0.9127337488869101, + "grad_norm": 2.568582773208618, + "learning_rate": 1.151025573412215e-07, + "loss": 0.0347, + "step": 3075, + "video_reward_cumulative_accuracy": 0.8536585365853658 + }, + { + "epoch": 0.9130305728702879, + "grad_norm": 1.8911688327789307, + "learning_rate": 1.1432689159636995e-07, + "loss": 0.0527, + "step": 3076, + "video_reward_cumulative_accuracy": 0.85370611183355 + }, + { + "epoch": 0.9133273968536658, + "grad_norm": 1.7498602867126465, + "learning_rate": 1.1355378710314779e-07, + "loss": 0.0607, + "step": 3077, + "video_reward_cumulative_accuracy": 0.853753656158596 + }, + { + "epoch": 0.9136242208370436, + "grad_norm": 2.1445226669311523, + "learning_rate": 1.1278324469155888e-07, + "loss": 0.0548, + "step": 3078, + "video_reward_cumulative_accuracy": 0.8538011695906432 + }, + { + "epoch": 0.9139210448204215, + "grad_norm": 2.0121655464172363, + "learning_rate": 1.120152651888537e-07, + "loss": 0.0366, + "step": 3079, + "video_reward_cumulative_accuracy": 0.8538486521597921 + }, + { + "epoch": 0.9142178688037993, + "grad_norm": 2.319936990737915, + "learning_rate": 1.1124984941953465e-07, + "loss": 0.0509, + "step": 3080, + "video_reward_cumulative_accuracy": 0.8538961038961039 + }, + { + "epoch": 0.9145146927871772, + "grad_norm": 3.709453582763672, + "learning_rate": 1.1048699820534831e-07, + "loss": 0.0419, + "step": 3081, + "video_reward_cumulative_accuracy": 0.8539435248296008 + }, + { + "epoch": 0.914811516770555, + "grad_norm": 1.580238938331604, + "learning_rate": 1.0972671236529037e-07, + "loss": 0.0369, + "step": 3082, + "video_reward_cumulative_accuracy": 0.853990914990266 + }, + { + "epoch": 0.9151083407539329, + "grad_norm": 1.9665991067886353, + "learning_rate": 1.0896899271560152e-07, + "loss": 0.0545, + "step": 3083, + "video_reward_cumulative_accuracy": 0.8540382744080441 + }, + { + "epoch": 0.9154051647373108, + "grad_norm": 1.6465821266174316, + "learning_rate": 1.0821384006976631e-07, + "loss": 0.0158, + "step": 3084, + "video_reward_cumulative_accuracy": 0.8540856031128404 + }, + { + "epoch": 0.9157019887206886, + "grad_norm": 3.7942326068878174, + "learning_rate": 1.074612552385157e-07, + "loss": 0.0206, + "step": 3085, + "video_reward_cumulative_accuracy": 0.8541329011345219 + }, + { + "epoch": 0.9159988127040665, + "grad_norm": 0.7322973012924194, + "learning_rate": 1.0671123902982166e-07, + "loss": 0.015, + "step": 3086, + "video_reward_cumulative_accuracy": 0.8541801685029164 + }, + { + "epoch": 0.9162956366874443, + "grad_norm": 1.750848412513733, + "learning_rate": 1.0596379224889986e-07, + "loss": 0.0467, + "step": 3087, + "video_reward_cumulative_accuracy": 0.8540654356980888 + }, + { + "epoch": 0.9165924606708222, + "grad_norm": 3.0623202323913574, + "learning_rate": 1.0521891569820698e-07, + "loss": 0.05, + "step": 3088, + "video_reward_cumulative_accuracy": 0.8541126943005182 + }, + { + "epoch": 0.9168892846542, + "grad_norm": 1.9967219829559326, + "learning_rate": 1.0447661017743971e-07, + "loss": 0.0225, + "step": 3089, + "video_reward_cumulative_accuracy": 0.8539980576238265 + }, + { + "epoch": 0.9171861086375779, + "grad_norm": 1.6754310131072998, + "learning_rate": 1.0373687648353586e-07, + "loss": 0.016, + "step": 3090, + "video_reward_cumulative_accuracy": 0.8538834951456311 + }, + { + "epoch": 0.9174829326209558, + "grad_norm": 3.090818166732788, + "learning_rate": 1.02999715410671e-07, + "loss": 0.0264, + "step": 3091, + "video_reward_cumulative_accuracy": 0.8539307667421546 + }, + { + "epoch": 0.9177797566043336, + "grad_norm": 0.48801377415657043, + "learning_rate": 1.0226512775025899e-07, + "loss": 0.0097, + "step": 3092, + "video_reward_cumulative_accuracy": 0.8539780077619664 + }, + { + "epoch": 0.9180765805877115, + "grad_norm": 0.5541077852249146, + "learning_rate": 1.015331142909512e-07, + "loss": 0.0132, + "step": 3093, + "video_reward_cumulative_accuracy": 0.8538635628839315 + }, + { + "epoch": 0.9183734045710893, + "grad_norm": 6.074105739593506, + "learning_rate": 1.0080367581863425e-07, + "loss": 0.0787, + "step": 3094, + "video_reward_cumulative_accuracy": 0.8537491919844861 + }, + { + "epoch": 0.9186702285544672, + "grad_norm": 1.0763957500457764, + "learning_rate": 1.0007681311643258e-07, + "loss": 0.0124, + "step": 3095, + "video_reward_cumulative_accuracy": 0.8537964458804523 + }, + { + "epoch": 0.918967052537845, + "grad_norm": 1.8836250305175781, + "learning_rate": 9.935252696470305e-08, + "loss": 0.027, + "step": 3096, + "video_reward_cumulative_accuracy": 0.853843669250646 + }, + { + "epoch": 0.9192638765212229, + "grad_norm": 0.8730959296226501, + "learning_rate": 9.863081814103725e-08, + "loss": 0.0128, + "step": 3097, + "video_reward_cumulative_accuracy": 0.8538908621246367 + }, + { + "epoch": 0.9195607005046008, + "grad_norm": 1.2798937559127808, + "learning_rate": 9.791168742025958e-08, + "loss": 0.0128, + "step": 3098, + "video_reward_cumulative_accuracy": 0.8539380245319561 + }, + { + "epoch": 0.9198575244879786, + "grad_norm": 1.4854799509048462, + "learning_rate": 9.719513557442661e-08, + "loss": 0.0283, + "step": 3099, + "video_reward_cumulative_accuracy": 0.8539851565020975 + }, + { + "epoch": 0.9201543484713565, + "grad_norm": 1.6215635538101196, + "learning_rate": 9.648116337282631e-08, + "loss": 0.0177, + "step": 3100, + "video_reward_cumulative_accuracy": 0.8540322580645161 + }, + { + "epoch": 0.9204511724547343, + "grad_norm": 5.3873209953308105, + "learning_rate": 9.57697715819772e-08, + "loss": 0.0611, + "step": 3101, + "video_reward_cumulative_accuracy": 0.8540793292486295 + }, + { + "epoch": 0.9207479964381122, + "grad_norm": 1.7452203035354614, + "learning_rate": 9.506096096562806e-08, + "loss": 0.0156, + "step": 3102, + "video_reward_cumulative_accuracy": 0.8541263700838169 + }, + { + "epoch": 0.92104482042149, + "grad_norm": 1.4887933731079102, + "learning_rate": 9.435473228475462e-08, + "loss": 0.0424, + "step": 3103, + "video_reward_cumulative_accuracy": 0.8541733805994199 + }, + { + "epoch": 0.9213416444048679, + "grad_norm": 2.0373613834381104, + "learning_rate": 9.365108629756259e-08, + "loss": 0.0173, + "step": 3104, + "video_reward_cumulative_accuracy": 0.8542203608247423 + }, + { + "epoch": 0.9216384683882458, + "grad_norm": 4.531852722167969, + "learning_rate": 9.295002375948436e-08, + "loss": 0.0994, + "step": 3105, + "video_reward_cumulative_accuracy": 0.8541062801932368 + }, + { + "epoch": 0.9219352923716236, + "grad_norm": 0.6652273535728455, + "learning_rate": 9.225154542317843e-08, + "loss": 0.0133, + "step": 3106, + "video_reward_cumulative_accuracy": 0.8541532517707663 + }, + { + "epoch": 0.9222321163550015, + "grad_norm": 0.861679196357727, + "learning_rate": 9.15556520385294e-08, + "loss": 0.0291, + "step": 3107, + "video_reward_cumulative_accuracy": 0.854200193112327 + }, + { + "epoch": 0.9225289403383793, + "grad_norm": 2.65281343460083, + "learning_rate": 9.086234435264574e-08, + "loss": 0.0827, + "step": 3108, + "video_reward_cumulative_accuracy": 0.8540862290862291 + }, + { + "epoch": 0.9228257643217572, + "grad_norm": 1.5000420808792114, + "learning_rate": 9.017162310986067e-08, + "loss": 0.0087, + "step": 3109, + "video_reward_cumulative_accuracy": 0.8541331617883564 + }, + { + "epoch": 0.923122588305135, + "grad_norm": 0.5715723633766174, + "learning_rate": 8.94834890517307e-08, + "loss": 0.0047, + "step": 3110, + "video_reward_cumulative_accuracy": 0.8541800643086817 + }, + { + "epoch": 0.9234194122885129, + "grad_norm": 2.6026268005371094, + "learning_rate": 8.879794291703464e-08, + "loss": 0.0407, + "step": 3111, + "video_reward_cumulative_accuracy": 0.8542269366763099 + }, + { + "epoch": 0.9237162362718908, + "grad_norm": 1.6955796480178833, + "learning_rate": 8.811498544177316e-08, + "loss": 0.0212, + "step": 3112, + "video_reward_cumulative_accuracy": 0.8542737789203085 + }, + { + "epoch": 0.9240130602552686, + "grad_norm": 2.5438053607940674, + "learning_rate": 8.743461735916642e-08, + "loss": 0.039, + "step": 3113, + "video_reward_cumulative_accuracy": 0.8543205910697077 + }, + { + "epoch": 0.9243098842386465, + "grad_norm": 2.37009859085083, + "learning_rate": 8.675683939965595e-08, + "loss": 0.0194, + "step": 3114, + "video_reward_cumulative_accuracy": 0.8543673731535003 + }, + { + "epoch": 0.9246067082220243, + "grad_norm": 3.996877908706665, + "learning_rate": 8.608165229090248e-08, + "loss": 0.0388, + "step": 3115, + "video_reward_cumulative_accuracy": 0.8540930979133227 + }, + { + "epoch": 0.9249035322054022, + "grad_norm": 1.1192494630813599, + "learning_rate": 8.540905675778504e-08, + "loss": 0.0274, + "step": 3116, + "video_reward_cumulative_accuracy": 0.8541399229781772 + }, + { + "epoch": 0.92520035618878, + "grad_norm": 3.1228842735290527, + "learning_rate": 8.473905352239936e-08, + "loss": 0.0702, + "step": 3117, + "video_reward_cumulative_accuracy": 0.8541867179980751 + }, + { + "epoch": 0.9254971801721579, + "grad_norm": 2.0726311206817627, + "learning_rate": 8.407164330405976e-08, + "loss": 0.0352, + "step": 3118, + "video_reward_cumulative_accuracy": 0.8542334830019244 + }, + { + "epoch": 0.9257940041555358, + "grad_norm": 2.6938111782073975, + "learning_rate": 8.34068268192953e-08, + "loss": 0.0524, + "step": 3119, + "video_reward_cumulative_accuracy": 0.8542802180185957 + }, + { + "epoch": 0.9260908281389136, + "grad_norm": 2.6164710521698, + "learning_rate": 8.27446047818517e-08, + "loss": 0.0353, + "step": 3120, + "video_reward_cumulative_accuracy": 0.854326923076923 + }, + { + "epoch": 0.9263876521222915, + "grad_norm": 2.0380940437316895, + "learning_rate": 8.208497790268833e-08, + "loss": 0.0319, + "step": 3121, + "video_reward_cumulative_accuracy": 0.8543735982057034 + }, + { + "epoch": 0.9266844761056693, + "grad_norm": 1.6423115730285645, + "learning_rate": 8.142794688997812e-08, + "loss": 0.014, + "step": 3122, + "video_reward_cumulative_accuracy": 0.8544202434336964 + }, + { + "epoch": 0.9269813000890472, + "grad_norm": 2.5781912803649902, + "learning_rate": 8.077351244910825e-08, + "loss": 0.0401, + "step": 3123, + "video_reward_cumulative_accuracy": 0.8543067563240474 + }, + { + "epoch": 0.927278124072425, + "grad_norm": 2.5099053382873535, + "learning_rate": 8.012167528267723e-08, + "loss": 0.0258, + "step": 3124, + "video_reward_cumulative_accuracy": 0.8541933418693982 + }, + { + "epoch": 0.9275749480558029, + "grad_norm": 1.2233213186264038, + "learning_rate": 7.947243609049581e-08, + "loss": 0.0303, + "step": 3125, + "video_reward_cumulative_accuracy": 0.85408 + }, + { + "epoch": 0.9278717720391808, + "grad_norm": 2.0083823204040527, + "learning_rate": 7.882579556958536e-08, + "loss": 0.0336, + "step": 3126, + "video_reward_cumulative_accuracy": 0.8541266794625719 + }, + { + "epoch": 0.9281685960225586, + "grad_norm": 2.4612114429473877, + "learning_rate": 7.818175441417692e-08, + "loss": 0.0207, + "step": 3127, + "video_reward_cumulative_accuracy": 0.8541733290693956 + }, + { + "epoch": 0.9284654200059365, + "grad_norm": 2.7933738231658936, + "learning_rate": 7.754031331571127e-08, + "loss": 0.0375, + "step": 3128, + "video_reward_cumulative_accuracy": 0.8542199488491049 + }, + { + "epoch": 0.9287622439893143, + "grad_norm": 3.2718217372894287, + "learning_rate": 7.690147296283757e-08, + "loss": 0.0352, + "step": 3129, + "video_reward_cumulative_accuracy": 0.8541067433684884 + }, + { + "epoch": 0.9290590679726922, + "grad_norm": 0.4795750081539154, + "learning_rate": 7.626523404141328e-08, + "loss": 0.0169, + "step": 3130, + "video_reward_cumulative_accuracy": 0.8539936102236422 + }, + { + "epoch": 0.92935589195607, + "grad_norm": 2.361750602722168, + "learning_rate": 7.563159723450259e-08, + "loss": 0.0193, + "step": 3131, + "video_reward_cumulative_accuracy": 0.8540402427339508 + }, + { + "epoch": 0.9296527159394479, + "grad_norm": 1.1606436967849731, + "learning_rate": 7.500056322237576e-08, + "loss": 0.0291, + "step": 3132, + "video_reward_cumulative_accuracy": 0.8540868454661558 + }, + { + "epoch": 0.9299495399228258, + "grad_norm": 2.2710154056549072, + "learning_rate": 7.437213268250948e-08, + "loss": 0.0486, + "step": 3133, + "video_reward_cumulative_accuracy": 0.8539738270028726 + }, + { + "epoch": 0.9302463639062036, + "grad_norm": 1.7540185451507568, + "learning_rate": 7.374630628958462e-08, + "loss": 0.0182, + "step": 3134, + "video_reward_cumulative_accuracy": 0.8540204211869815 + }, + { + "epoch": 0.9305431878895815, + "grad_norm": 0.9131705164909363, + "learning_rate": 7.312308471548624e-08, + "loss": 0.0137, + "step": 3135, + "video_reward_cumulative_accuracy": 0.854066985645933 + }, + { + "epoch": 0.9308400118729593, + "grad_norm": 1.74937105178833, + "learning_rate": 7.250246862930411e-08, + "loss": 0.0159, + "step": 3136, + "video_reward_cumulative_accuracy": 0.8541135204081632 + }, + { + "epoch": 0.9311368358563372, + "grad_norm": 0.4030010402202606, + "learning_rate": 7.188445869732913e-08, + "loss": 0.0111, + "step": 3137, + "video_reward_cumulative_accuracy": 0.854160025502072 + }, + { + "epoch": 0.931433659839715, + "grad_norm": 4.948916435241699, + "learning_rate": 7.12690555830553e-08, + "loss": 0.065, + "step": 3138, + "video_reward_cumulative_accuracy": 0.8542065009560229 + }, + { + "epoch": 0.9317304838230929, + "grad_norm": 3.3688721656799316, + "learning_rate": 7.065625994717717e-08, + "loss": 0.0579, + "step": 3139, + "video_reward_cumulative_accuracy": 0.8542529467983434 + }, + { + "epoch": 0.9320273078064708, + "grad_norm": 3.2429494857788086, + "learning_rate": 7.004607244759071e-08, + "loss": 0.0308, + "step": 3140, + "video_reward_cumulative_accuracy": 0.8542993630573248 + }, + { + "epoch": 0.9323241317898486, + "grad_norm": 2.271911144256592, + "learning_rate": 6.943849373939132e-08, + "loss": 0.0129, + "step": 3141, + "video_reward_cumulative_accuracy": 0.8543457497612226 + }, + { + "epoch": 0.9326209557732265, + "grad_norm": 3.0307109355926514, + "learning_rate": 6.883352447487363e-08, + "loss": 0.03, + "step": 3142, + "video_reward_cumulative_accuracy": 0.8543921069382558 + }, + { + "epoch": 0.9329177797566043, + "grad_norm": 3.5836822986602783, + "learning_rate": 6.823116530353113e-08, + "loss": 0.0431, + "step": 3143, + "video_reward_cumulative_accuracy": 0.8544384346166083 + }, + { + "epoch": 0.9332146037399822, + "grad_norm": 2.957613706588745, + "learning_rate": 6.763141687205432e-08, + "loss": 0.0648, + "step": 3144, + "video_reward_cumulative_accuracy": 0.8544847328244275 + }, + { + "epoch": 0.93351142772336, + "grad_norm": 1.7011269330978394, + "learning_rate": 6.703427982433202e-08, + "loss": 0.0163, + "step": 3145, + "video_reward_cumulative_accuracy": 0.8545310015898251 + }, + { + "epoch": 0.9338082517067379, + "grad_norm": 0.9397748112678528, + "learning_rate": 6.64397548014481e-08, + "loss": 0.0156, + "step": 3146, + "video_reward_cumulative_accuracy": 0.8545772409408773 + }, + { + "epoch": 0.9341050756901158, + "grad_norm": 0.7569989562034607, + "learning_rate": 6.584784244168335e-08, + "loss": 0.0134, + "step": 3147, + "video_reward_cumulative_accuracy": 0.8546234509056244 + }, + { + "epoch": 0.9344018996734936, + "grad_norm": 2.637448310852051, + "learning_rate": 6.525854338051335e-08, + "loss": 0.0423, + "step": 3148, + "video_reward_cumulative_accuracy": 0.8546696315120712 + }, + { + "epoch": 0.9346987236568715, + "grad_norm": 2.607063055038452, + "learning_rate": 6.467185825060728e-08, + "loss": 0.0455, + "step": 3149, + "video_reward_cumulative_accuracy": 0.8547157827881867 + }, + { + "epoch": 0.9349955476402493, + "grad_norm": 1.5537196397781372, + "learning_rate": 6.408778768182883e-08, + "loss": 0.0456, + "step": 3150, + "video_reward_cumulative_accuracy": 0.8546031746031746 + }, + { + "epoch": 0.9352923716236272, + "grad_norm": 2.6270835399627686, + "learning_rate": 6.350633230123443e-08, + "loss": 0.0361, + "step": 3151, + "video_reward_cumulative_accuracy": 0.8544906378927325 + }, + { + "epoch": 0.935589195607005, + "grad_norm": 2.062340259552002, + "learning_rate": 6.292749273307308e-08, + "loss": 0.0188, + "step": 3152, + "video_reward_cumulative_accuracy": 0.8545368020304569 + }, + { + "epoch": 0.9358860195903829, + "grad_norm": 1.0614312887191772, + "learning_rate": 6.23512695987849e-08, + "loss": 0.0088, + "step": 3153, + "video_reward_cumulative_accuracy": 0.8545829368855059 + }, + { + "epoch": 0.9361828435737608, + "grad_norm": 1.1237103939056396, + "learning_rate": 6.177766351700115e-08, + "loss": 0.0129, + "step": 3154, + "video_reward_cumulative_accuracy": 0.8546290424857323 + }, + { + "epoch": 0.9364796675571386, + "grad_norm": 3.2817189693450928, + "learning_rate": 6.120667510354422e-08, + "loss": 0.0625, + "step": 3155, + "video_reward_cumulative_accuracy": 0.8546751188589541 + }, + { + "epoch": 0.9367764915405165, + "grad_norm": 2.6169803142547607, + "learning_rate": 6.063830497142514e-08, + "loss": 0.0305, + "step": 3156, + "video_reward_cumulative_accuracy": 0.8547211660329531 + }, + { + "epoch": 0.9370733155238943, + "grad_norm": 2.9755945205688477, + "learning_rate": 6.007255373084498e-08, + "loss": 0.0336, + "step": 3157, + "video_reward_cumulative_accuracy": 0.8547671840354767 + }, + { + "epoch": 0.9373701395072722, + "grad_norm": 0.9379553198814392, + "learning_rate": 5.950942198919202e-08, + "loss": 0.0127, + "step": 3158, + "video_reward_cumulative_accuracy": 0.8548131728942369 + }, + { + "epoch": 0.93766696349065, + "grad_norm": 1.1103352308273315, + "learning_rate": 5.8948910351042943e-08, + "loss": 0.0217, + "step": 3159, + "video_reward_cumulative_accuracy": 0.8548591326369104 + }, + { + "epoch": 0.9379637874740279, + "grad_norm": 2.294884443283081, + "learning_rate": 5.839101941816166e-08, + "loss": 0.0284, + "step": 3160, + "video_reward_cumulative_accuracy": 0.854746835443038 + }, + { + "epoch": 0.9382606114574058, + "grad_norm": 1.6113910675048828, + "learning_rate": 5.783574978949796e-08, + "loss": 0.0242, + "step": 3161, + "video_reward_cumulative_accuracy": 0.8547927870926922 + }, + { + "epoch": 0.9385574354407836, + "grad_norm": 1.6590948104858398, + "learning_rate": 5.728310206118776e-08, + "loss": 0.0327, + "step": 3162, + "video_reward_cumulative_accuracy": 0.8548387096774194 + }, + { + "epoch": 0.9388542594241615, + "grad_norm": 0.5105149149894714, + "learning_rate": 5.6733076826552267e-08, + "loss": 0.0084, + "step": 3163, + "video_reward_cumulative_accuracy": 0.8548846032247865 + }, + { + "epoch": 0.9391510834075393, + "grad_norm": 2.3923985958099365, + "learning_rate": 5.618567467609637e-08, + "loss": 0.0233, + "step": 3164, + "video_reward_cumulative_accuracy": 0.8549304677623262 + }, + { + "epoch": 0.9394479073909172, + "grad_norm": 2.449561834335327, + "learning_rate": 5.564089619751023e-08, + "loss": 0.0187, + "step": 3165, + "video_reward_cumulative_accuracy": 0.8549763033175355 + }, + { + "epoch": 0.939744731374295, + "grad_norm": 0.8511427044868469, + "learning_rate": 5.509874197566573e-08, + "loss": 0.015, + "step": 3166, + "video_reward_cumulative_accuracy": 0.8550221099178774 + }, + { + "epoch": 0.9400415553576729, + "grad_norm": 2.5197205543518066, + "learning_rate": 5.455921259261837e-08, + "loss": 0.0333, + "step": 3167, + "video_reward_cumulative_accuracy": 0.8550678875907799 + }, + { + "epoch": 0.9403383793410508, + "grad_norm": 2.1884961128234863, + "learning_rate": 5.40223086276051e-08, + "loss": 0.0673, + "step": 3168, + "video_reward_cumulative_accuracy": 0.8551136363636364 + }, + { + "epoch": 0.9406352033244286, + "grad_norm": 2.423147678375244, + "learning_rate": 5.348803065704483e-08, + "loss": 0.0129, + "step": 3169, + "video_reward_cumulative_accuracy": 0.8550015777847901 + }, + { + "epoch": 0.9409320273078065, + "grad_norm": 2.657482624053955, + "learning_rate": 5.2956379254536226e-08, + "loss": 0.058, + "step": 3170, + "video_reward_cumulative_accuracy": 0.8550473186119874 + }, + { + "epoch": 0.9412288512911843, + "grad_norm": 3.016252040863037, + "learning_rate": 5.2427354990859106e-08, + "loss": 0.028, + "step": 3171, + "video_reward_cumulative_accuracy": 0.8550930305897193 + }, + { + "epoch": 0.9415256752745622, + "grad_norm": 2.315845012664795, + "learning_rate": 5.190095843397275e-08, + "loss": 0.0678, + "step": 3172, + "video_reward_cumulative_accuracy": 0.8551387137452712 + }, + { + "epoch": 0.94182249925794, + "grad_norm": 0.9423714876174927, + "learning_rate": 5.137719014901399e-08, + "loss": 0.0241, + "step": 3173, + "video_reward_cumulative_accuracy": 0.8551843681058935 + }, + { + "epoch": 0.9421193232413179, + "grad_norm": 1.8504040241241455, + "learning_rate": 5.0856050698299684e-08, + "loss": 0.0228, + "step": 3174, + "video_reward_cumulative_accuracy": 0.855072463768116 + }, + { + "epoch": 0.9424161472246958, + "grad_norm": 1.4434008598327637, + "learning_rate": 5.0337540641322846e-08, + "loss": 0.0293, + "step": 3175, + "video_reward_cumulative_accuracy": 0.8551181102362204 + }, + { + "epoch": 0.9427129712080736, + "grad_norm": 2.862295627593994, + "learning_rate": 4.9821660534755125e-08, + "loss": 0.0318, + "step": 3176, + "video_reward_cumulative_accuracy": 0.8550062972292192 + }, + { + "epoch": 0.9430097951914514, + "grad_norm": 1.7346522808074951, + "learning_rate": 4.930841093244349e-08, + "loss": 0.0158, + "step": 3177, + "video_reward_cumulative_accuracy": 0.8550519357884797 + }, + { + "epoch": 0.9433066191748293, + "grad_norm": 3.134310722351074, + "learning_rate": 4.8797792385411325e-08, + "loss": 0.0237, + "step": 3178, + "video_reward_cumulative_accuracy": 0.85509754562618 + }, + { + "epoch": 0.9436034431582072, + "grad_norm": 1.7474472522735596, + "learning_rate": 4.828980544185735e-08, + "loss": 0.0242, + "step": 3179, + "video_reward_cumulative_accuracy": 0.8551431267694244 + }, + { + "epoch": 0.943900267141585, + "grad_norm": 4.279950141906738, + "learning_rate": 4.77844506471542e-08, + "loss": 0.0626, + "step": 3180, + "video_reward_cumulative_accuracy": 0.855188679245283 + }, + { + "epoch": 0.9441970911249629, + "grad_norm": 2.3081438541412354, + "learning_rate": 4.728172854385038e-08, + "loss": 0.0226, + "step": 3181, + "video_reward_cumulative_accuracy": 0.8552342030807922 + }, + { + "epoch": 0.9444939151083408, + "grad_norm": 1.8738723993301392, + "learning_rate": 4.678163967166582e-08, + "loss": 0.0248, + "step": 3182, + "video_reward_cumulative_accuracy": 0.8552796983029541 + }, + { + "epoch": 0.9447907390917186, + "grad_norm": 3.566354990005493, + "learning_rate": 4.628418456749495e-08, + "loss": 0.0372, + "step": 3183, + "video_reward_cumulative_accuracy": 0.8551680804272699 + }, + { + "epoch": 0.9450875630750964, + "grad_norm": 4.133174419403076, + "learning_rate": 4.5789363765404436e-08, + "loss": 0.0472, + "step": 3184, + "video_reward_cumulative_accuracy": 0.855213567839196 + }, + { + "epoch": 0.9453843870584743, + "grad_norm": 2.822946310043335, + "learning_rate": 4.529717779663129e-08, + "loss": 0.0269, + "step": 3185, + "video_reward_cumulative_accuracy": 0.8551020408163266 + }, + { + "epoch": 0.9456812110418522, + "grad_norm": 4.163548946380615, + "learning_rate": 4.4807627189586425e-08, + "loss": 0.0885, + "step": 3186, + "video_reward_cumulative_accuracy": 0.8551475204017577 + }, + { + "epoch": 0.94597803502523, + "grad_norm": 2.597421169281006, + "learning_rate": 4.432071246984859e-08, + "loss": 0.0321, + "step": 3187, + "video_reward_cumulative_accuracy": 0.8551929714465014 + }, + { + "epoch": 0.9462748590086079, + "grad_norm": 3.691058397293091, + "learning_rate": 4.383643416016908e-08, + "loss": 0.0655, + "step": 3188, + "video_reward_cumulative_accuracy": 0.855081555834379 + }, + { + "epoch": 0.9465716829919858, + "grad_norm": 2.430384874343872, + "learning_rate": 4.3354792780467004e-08, + "loss": 0.026, + "step": 3189, + "video_reward_cumulative_accuracy": 0.8549702100972092 + }, + { + "epoch": 0.9468685069753636, + "grad_norm": 2.202420234680176, + "learning_rate": 4.287578884783122e-08, + "loss": 0.0367, + "step": 3190, + "video_reward_cumulative_accuracy": 0.8550156739811913 + }, + { + "epoch": 0.9471653309587414, + "grad_norm": 2.3409032821655273, + "learning_rate": 4.2399422876518995e-08, + "loss": 0.0338, + "step": 3191, + "video_reward_cumulative_accuracy": 0.8550611093701034 + }, + { + "epoch": 0.9474621549421193, + "grad_norm": 1.7721819877624512, + "learning_rate": 4.192569537795538e-08, + "loss": 0.0257, + "step": 3192, + "video_reward_cumulative_accuracy": 0.8551065162907269 + }, + { + "epoch": 0.9477589789254972, + "grad_norm": 1.6074609756469727, + "learning_rate": 4.145460686073327e-08, + "loss": 0.0371, + "step": 3193, + "video_reward_cumulative_accuracy": 0.8549953022236142 + }, + { + "epoch": 0.948055802908875, + "grad_norm": 2.8421127796173096, + "learning_rate": 4.098615783061144e-08, + "loss": 0.1067, + "step": 3194, + "video_reward_cumulative_accuracy": 0.8550407013149656 + }, + { + "epoch": 0.9483526268922529, + "grad_norm": 0.6036382913589478, + "learning_rate": 4.0520348790515084e-08, + "loss": 0.0188, + "step": 3195, + "video_reward_cumulative_accuracy": 0.8550860719874804 + }, + { + "epoch": 0.9486494508756308, + "grad_norm": 2.0517990589141846, + "learning_rate": 4.005718024053612e-08, + "loss": 0.024, + "step": 3196, + "video_reward_cumulative_accuracy": 0.8549749687108886 + }, + { + "epoch": 0.9489462748590086, + "grad_norm": 1.565876841545105, + "learning_rate": 3.959665267793067e-08, + "loss": 0.0138, + "step": 3197, + "video_reward_cumulative_accuracy": 0.8550203315608382 + }, + { + "epoch": 0.9492430988423864, + "grad_norm": 2.432234525680542, + "learning_rate": 3.91387665971199e-08, + "loss": 0.0496, + "step": 3198, + "video_reward_cumulative_accuracy": 0.8550656660412758 + }, + { + "epoch": 0.9495399228257643, + "grad_norm": 0.7143085598945618, + "learning_rate": 3.868352248968865e-08, + "loss": 0.0061, + "step": 3199, + "video_reward_cumulative_accuracy": 0.8551109721788058 + }, + { + "epoch": 0.9498367468091422, + "grad_norm": 1.3272526264190674, + "learning_rate": 3.823092084438568e-08, + "loss": 0.0249, + "step": 3200, + "video_reward_cumulative_accuracy": 0.85515625 + }, + { + "epoch": 0.9498367468091422, + "eval_runtime": 136.0857, + "eval_samples_per_second": 5.798, + "eval_steps_per_second": 0.727, + "eval_test_set_accuracy": 0.8320707070707071, + "step": 3200 + }, + { + "epoch": 0.95013357079252, + "grad_norm": 1.0475165843963623, + "learning_rate": 3.778096214712285e-08, + "loss": 0.0085, + "step": 3201, + "video_reward_cumulative_accuracy": 0.8552014995313965 + }, + { + "epoch": 0.9504303947758979, + "grad_norm": 2.317171335220337, + "learning_rate": 3.733364688097485e-08, + "loss": 0.0783, + "step": 3202, + "video_reward_cumulative_accuracy": 0.8552467207995003 + }, + { + "epoch": 0.9507272187592758, + "grad_norm": 2.8476126194000244, + "learning_rate": 3.6888975526177815e-08, + "loss": 0.0534, + "step": 3203, + "video_reward_cumulative_accuracy": 0.8552919138307836 + }, + { + "epoch": 0.9510240427426536, + "grad_norm": 3.3761157989501953, + "learning_rate": 3.6446948560129314e-08, + "loss": 0.0177, + "step": 3204, + "video_reward_cumulative_accuracy": 0.8553370786516854 + }, + { + "epoch": 0.9513208667260314, + "grad_norm": 1.30337655544281, + "learning_rate": 3.600756645738834e-08, + "loss": 0.0228, + "step": 3205, + "video_reward_cumulative_accuracy": 0.8553822152886116 + }, + { + "epoch": 0.9516176907094093, + "grad_norm": 4.232937812805176, + "learning_rate": 3.557082968967423e-08, + "loss": 0.0748, + "step": 3206, + "video_reward_cumulative_accuracy": 0.8552713661883967 + }, + { + "epoch": 0.9519145146927872, + "grad_norm": 3.1819849014282227, + "learning_rate": 3.5136738725866646e-08, + "loss": 0.0287, + "step": 3207, + "video_reward_cumulative_accuracy": 0.8553164951668226 + }, + { + "epoch": 0.952211338676165, + "grad_norm": 0.5942104458808899, + "learning_rate": 3.47052940320039e-08, + "loss": 0.0064, + "step": 3208, + "video_reward_cumulative_accuracy": 0.8553615960099751 + }, + { + "epoch": 0.9525081626595429, + "grad_norm": 2.6708426475524902, + "learning_rate": 3.4276496071284084e-08, + "loss": 0.0283, + "step": 3209, + "video_reward_cumulative_accuracy": 0.855406668744157 + }, + { + "epoch": 0.9528049866429208, + "grad_norm": 0.9444103240966797, + "learning_rate": 3.385034530406311e-08, + "loss": 0.0048, + "step": 3210, + "video_reward_cumulative_accuracy": 0.8554517133956386 + }, + { + "epoch": 0.9531018106262986, + "grad_norm": 1.708574652671814, + "learning_rate": 3.34268421878553e-08, + "loss": 0.0249, + "step": 3211, + "video_reward_cumulative_accuracy": 0.8554967299906571 + }, + { + "epoch": 0.9533986346096764, + "grad_norm": 1.0022554397583008, + "learning_rate": 3.300598717733278e-08, + "loss": 0.0133, + "step": 3212, + "video_reward_cumulative_accuracy": 0.8555417185554172 + }, + { + "epoch": 0.9536954585930543, + "grad_norm": 1.676685094833374, + "learning_rate": 3.258778072432356e-08, + "loss": 0.0193, + "step": 3213, + "video_reward_cumulative_accuracy": 0.8555866791160909 + }, + { + "epoch": 0.9539922825764322, + "grad_norm": 2.026139259338379, + "learning_rate": 3.217222327781322e-08, + "loss": 0.0297, + "step": 3214, + "video_reward_cumulative_accuracy": 0.8556316116988176 + }, + { + "epoch": 0.95428910655981, + "grad_norm": 0.3578716516494751, + "learning_rate": 3.175931528394294e-08, + "loss": 0.009, + "step": 3215, + "video_reward_cumulative_accuracy": 0.8556765163297045 + }, + { + "epoch": 0.9545859305431879, + "grad_norm": 1.398017406463623, + "learning_rate": 3.134905718600978e-08, + "loss": 0.0171, + "step": 3216, + "video_reward_cumulative_accuracy": 0.8557213930348259 + }, + { + "epoch": 0.9548827545265658, + "grad_norm": 2.376380205154419, + "learning_rate": 3.094144942446531e-08, + "loss": 0.0243, + "step": 3217, + "video_reward_cumulative_accuracy": 0.8557662418402238 + }, + { + "epoch": 0.9551795785099436, + "grad_norm": 2.7739336490631104, + "learning_rate": 3.053649243691587e-08, + "loss": 0.0437, + "step": 3218, + "video_reward_cumulative_accuracy": 0.8556556867619639 + }, + { + "epoch": 0.9554764024933214, + "grad_norm": 2.0257623195648193, + "learning_rate": 3.013418665812257e-08, + "loss": 0.0569, + "step": 3219, + "video_reward_cumulative_accuracy": 0.8557005281143212 + }, + { + "epoch": 0.9557732264766993, + "grad_norm": 2.442366600036621, + "learning_rate": 2.973453251999936e-08, + "loss": 0.0582, + "step": 3220, + "video_reward_cumulative_accuracy": 0.8557453416149068 + }, + { + "epoch": 0.9560700504600772, + "grad_norm": 1.158838152885437, + "learning_rate": 2.933753045161386e-08, + "loss": 0.0107, + "step": 3221, + "video_reward_cumulative_accuracy": 0.8557901272896616 + }, + { + "epoch": 0.956366874443455, + "grad_norm": 1.9528629779815674, + "learning_rate": 2.8943180879186517e-08, + "loss": 0.0212, + "step": 3222, + "video_reward_cumulative_accuracy": 0.8558348851644941 + }, + { + "epoch": 0.9566636984268329, + "grad_norm": 2.6688528060913086, + "learning_rate": 2.85514842260895e-08, + "loss": 0.0283, + "step": 3223, + "video_reward_cumulative_accuracy": 0.8557244802978592 + }, + { + "epoch": 0.9569605224102108, + "grad_norm": 0.49328845739364624, + "learning_rate": 2.8162440912847532e-08, + "loss": 0.0061, + "step": 3224, + "video_reward_cumulative_accuracy": 0.8557692307692307 + }, + { + "epoch": 0.9572573463935886, + "grad_norm": 4.010133266448975, + "learning_rate": 2.7776051357135957e-08, + "loss": 0.0418, + "step": 3225, + "video_reward_cumulative_accuracy": 0.8558139534883721 + }, + { + "epoch": 0.9575541703769664, + "grad_norm": 1.154459834098816, + "learning_rate": 2.7392315973781835e-08, + "loss": 0.0174, + "step": 3226, + "video_reward_cumulative_accuracy": 0.8558586484810912 + }, + { + "epoch": 0.9578509943603443, + "grad_norm": 1.608305811882019, + "learning_rate": 2.7011235174762284e-08, + "loss": 0.0243, + "step": 3227, + "video_reward_cumulative_accuracy": 0.8559033157731639 + }, + { + "epoch": 0.9581478183437222, + "grad_norm": 0.8377227783203125, + "learning_rate": 2.6632809369204205e-08, + "loss": 0.0106, + "step": 3228, + "video_reward_cumulative_accuracy": 0.8559479553903345 + }, + { + "epoch": 0.9584446423271, + "grad_norm": 2.1696717739105225, + "learning_rate": 2.6257038963385106e-08, + "loss": 0.0256, + "step": 3229, + "video_reward_cumulative_accuracy": 0.8559925673583153 + }, + { + "epoch": 0.9587414663104779, + "grad_norm": 1.0289219617843628, + "learning_rate": 2.588392436073034e-08, + "loss": 0.0131, + "step": 3230, + "video_reward_cumulative_accuracy": 0.8558823529411764 + }, + { + "epoch": 0.9590382902938558, + "grad_norm": 0.3926885426044464, + "learning_rate": 2.5513465961814475e-08, + "loss": 0.0062, + "step": 3231, + "video_reward_cumulative_accuracy": 0.8559269575982668 + }, + { + "epoch": 0.9593351142772336, + "grad_norm": 1.4411845207214355, + "learning_rate": 2.5145664164361593e-08, + "loss": 0.0107, + "step": 3232, + "video_reward_cumulative_accuracy": 0.8559715346534653 + }, + { + "epoch": 0.9596319382606114, + "grad_norm": 0.6198068857192993, + "learning_rate": 2.4780519363241663e-08, + "loss": 0.0051, + "step": 3233, + "video_reward_cumulative_accuracy": 0.8560160841323848 + }, + { + "epoch": 0.9599287622439893, + "grad_norm": 1.5043619871139526, + "learning_rate": 2.4418031950473597e-08, + "loss": 0.0278, + "step": 3234, + "video_reward_cumulative_accuracy": 0.8560606060606061 + }, + { + "epoch": 0.9602255862273672, + "grad_norm": 2.1934850215911865, + "learning_rate": 2.405820231522249e-08, + "loss": 0.0246, + "step": 3235, + "video_reward_cumulative_accuracy": 0.855950540958269 + }, + { + "epoch": 0.960522410210745, + "grad_norm": 3.164400100708008, + "learning_rate": 2.3701030843800433e-08, + "loss": 0.025, + "step": 3236, + "video_reward_cumulative_accuracy": 0.8559950556242274 + }, + { + "epoch": 0.9608192341941229, + "grad_norm": 0.8490694761276245, + "learning_rate": 2.334651791966569e-08, + "loss": 0.0187, + "step": 3237, + "video_reward_cumulative_accuracy": 0.8560395427865307 + }, + { + "epoch": 0.9611160581775008, + "grad_norm": 1.7095571756362915, + "learning_rate": 2.2994663923422422e-08, + "loss": 0.0225, + "step": 3238, + "video_reward_cumulative_accuracy": 0.8560840024706609 + }, + { + "epoch": 0.9614128821608786, + "grad_norm": 1.3501557111740112, + "learning_rate": 2.2645469232820127e-08, + "loss": 0.0204, + "step": 3239, + "video_reward_cumulative_accuracy": 0.8561284347020686 + }, + { + "epoch": 0.9617097061442564, + "grad_norm": 0.568364143371582, + "learning_rate": 2.229893422275281e-08, + "loss": 0.0066, + "step": 3240, + "video_reward_cumulative_accuracy": 0.8561728395061728 + }, + { + "epoch": 0.9620065301276343, + "grad_norm": 1.0702372789382935, + "learning_rate": 2.1955059265259815e-08, + "loss": 0.0162, + "step": 3241, + "video_reward_cumulative_accuracy": 0.8562172169083616 + }, + { + "epoch": 0.9623033541110122, + "grad_norm": 4.149465560913086, + "learning_rate": 2.161384472952416e-08, + "loss": 0.0673, + "step": 3242, + "video_reward_cumulative_accuracy": 0.8562615669339914 + }, + { + "epoch": 0.96260017809439, + "grad_norm": 2.0406556129455566, + "learning_rate": 2.1275290981872532e-08, + "loss": 0.0408, + "step": 3243, + "video_reward_cumulative_accuracy": 0.8563058896083873 + }, + { + "epoch": 0.9628970020777678, + "grad_norm": 1.5414153337478638, + "learning_rate": 2.0939398385775578e-08, + "loss": 0.0101, + "step": 3244, + "video_reward_cumulative_accuracy": 0.8563501849568435 + }, + { + "epoch": 0.9631938260611458, + "grad_norm": 3.055986166000366, + "learning_rate": 2.0606167301846503e-08, + "loss": 0.0537, + "step": 3245, + "video_reward_cumulative_accuracy": 0.8563944530046225 + }, + { + "epoch": 0.9634906500445236, + "grad_norm": 1.648927927017212, + "learning_rate": 2.0275598087841075e-08, + "loss": 0.0365, + "step": 3246, + "video_reward_cumulative_accuracy": 0.8564386937769563 + }, + { + "epoch": 0.9637874740279014, + "grad_norm": 3.5667824745178223, + "learning_rate": 1.994769109865735e-08, + "loss": 0.0396, + "step": 3247, + "video_reward_cumulative_accuracy": 0.8564829072990453 + }, + { + "epoch": 0.9640842980112793, + "grad_norm": 1.4304312467575073, + "learning_rate": 1.962244668633595e-08, + "loss": 0.0193, + "step": 3248, + "video_reward_cumulative_accuracy": 0.8565270935960592 + }, + { + "epoch": 0.9643811219946572, + "grad_norm": 4.079548358917236, + "learning_rate": 1.9299865200057556e-08, + "loss": 0.0538, + "step": 3249, + "video_reward_cumulative_accuracy": 0.8564173591874423 + }, + { + "epoch": 0.964677945978035, + "grad_norm": 1.535839557647705, + "learning_rate": 1.8979946986145137e-08, + "loss": 0.0123, + "step": 3250, + "video_reward_cumulative_accuracy": 0.8564615384615385 + }, + { + "epoch": 0.9649747699614128, + "grad_norm": 2.697875738143921, + "learning_rate": 1.8662692388061733e-08, + "loss": 0.03, + "step": 3251, + "video_reward_cumulative_accuracy": 0.8565056905567517 + }, + { + "epoch": 0.9652715939447908, + "grad_norm": 2.8388845920562744, + "learning_rate": 1.8348101746410994e-08, + "loss": 0.0312, + "step": 3252, + "video_reward_cumulative_accuracy": 0.856549815498155 + }, + { + "epoch": 0.9655684179281686, + "grad_norm": 3.0264766216278076, + "learning_rate": 1.803617539893665e-08, + "loss": 0.0642, + "step": 3253, + "video_reward_cumulative_accuracy": 0.85659391331079 + }, + { + "epoch": 0.9658652419115464, + "grad_norm": 1.1789016723632812, + "learning_rate": 1.772691368052165e-08, + "loss": 0.0103, + "step": 3254, + "video_reward_cumulative_accuracy": 0.8566379840196681 + }, + { + "epoch": 0.9661620658949243, + "grad_norm": 2.1506567001342773, + "learning_rate": 1.742031692318874e-08, + "loss": 0.0225, + "step": 3255, + "video_reward_cumulative_accuracy": 0.8566820276497696 + }, + { + "epoch": 0.9664588898783022, + "grad_norm": 1.4436240196228027, + "learning_rate": 1.711638545609906e-08, + "loss": 0.0403, + "step": 3256, + "video_reward_cumulative_accuracy": 0.8565724815724816 + }, + { + "epoch": 0.96675571386168, + "grad_norm": 0.8938013911247253, + "learning_rate": 1.681511960555271e-08, + "loss": 0.0166, + "step": 3257, + "video_reward_cumulative_accuracy": 0.8566165182683451 + }, + { + "epoch": 0.9670525378450578, + "grad_norm": 2.3233988285064697, + "learning_rate": 1.651651969498791e-08, + "loss": 0.0305, + "step": 3258, + "video_reward_cumulative_accuracy": 0.8566605279312461 + }, + { + "epoch": 0.9673493618284358, + "grad_norm": 0.6429560780525208, + "learning_rate": 1.6220586044980448e-08, + "loss": 0.0046, + "step": 3259, + "video_reward_cumulative_accuracy": 0.8567045105860693 + }, + { + "epoch": 0.9676461858118136, + "grad_norm": 1.2214471101760864, + "learning_rate": 1.592731897324368e-08, + "loss": 0.0175, + "step": 3260, + "video_reward_cumulative_accuracy": 0.8567484662576688 + }, + { + "epoch": 0.9679430097951914, + "grad_norm": 2.7271018028259277, + "learning_rate": 1.5636718794628523e-08, + "loss": 0.0793, + "step": 3261, + "video_reward_cumulative_accuracy": 0.8567923949708678 + }, + { + "epoch": 0.9682398337785693, + "grad_norm": 2.33453106880188, + "learning_rate": 1.5348785821122648e-08, + "loss": 0.0387, + "step": 3262, + "video_reward_cumulative_accuracy": 0.8568362967504598 + }, + { + "epoch": 0.9685366577619472, + "grad_norm": 0.8599418997764587, + "learning_rate": 1.5063520361849604e-08, + "loss": 0.0055, + "step": 3263, + "video_reward_cumulative_accuracy": 0.8568801716212074 + }, + { + "epoch": 0.968833481745325, + "grad_norm": 0.911806583404541, + "learning_rate": 1.4780922723069968e-08, + "loss": 0.0228, + "step": 3264, + "video_reward_cumulative_accuracy": 0.8569240196078431 + }, + { + "epoch": 0.9691303057287028, + "grad_norm": 1.4930540323257446, + "learning_rate": 1.4500993208179382e-08, + "loss": 0.0299, + "step": 3265, + "video_reward_cumulative_accuracy": 0.8569678407350689 + }, + { + "epoch": 0.9694271297120808, + "grad_norm": 2.40919828414917, + "learning_rate": 1.4223732117709387e-08, + "loss": 0.064, + "step": 3266, + "video_reward_cumulative_accuracy": 0.8570116350275566 + }, + { + "epoch": 0.9697239536954586, + "grad_norm": 1.3808552026748657, + "learning_rate": 1.3949139749326601e-08, + "loss": 0.0271, + "step": 3267, + "video_reward_cumulative_accuracy": 0.8570554025099479 + }, + { + "epoch": 0.9700207776788364, + "grad_norm": 1.2729172706604004, + "learning_rate": 1.367721639783326e-08, + "loss": 0.0252, + "step": 3268, + "video_reward_cumulative_accuracy": 0.8570991432068543 + }, + { + "epoch": 0.9703176016622143, + "grad_norm": 5.71325159072876, + "learning_rate": 1.3407962355164728e-08, + "loss": 0.0391, + "step": 3269, + "video_reward_cumulative_accuracy": 0.8571428571428571 + }, + { + "epoch": 0.9706144256455922, + "grad_norm": 0.38675758242607117, + "learning_rate": 1.3141377910391718e-08, + "loss": 0.0065, + "step": 3270, + "video_reward_cumulative_accuracy": 0.8571865443425076 + }, + { + "epoch": 0.97091124962897, + "grad_norm": 1.710315227508545, + "learning_rate": 1.2877463349718067e-08, + "loss": 0.0198, + "step": 3271, + "video_reward_cumulative_accuracy": 0.8572302048303271 + }, + { + "epoch": 0.9712080736123478, + "grad_norm": 1.825723648071289, + "learning_rate": 1.2616218956482407e-08, + "loss": 0.0223, + "step": 3272, + "video_reward_cumulative_accuracy": 0.8572738386308069 + }, + { + "epoch": 0.9715048975957258, + "grad_norm": 0.4373869299888611, + "learning_rate": 1.2357645011155106e-08, + "loss": 0.0071, + "step": 3273, + "video_reward_cumulative_accuracy": 0.8573174457684082 + }, + { + "epoch": 0.9718017215791036, + "grad_norm": 2.230316400527954, + "learning_rate": 1.2101741791341049e-08, + "loss": 0.02, + "step": 3274, + "video_reward_cumulative_accuracy": 0.8573610262675626 + }, + { + "epoch": 0.9720985455624814, + "grad_norm": 3.0452535152435303, + "learning_rate": 1.1848509571777133e-08, + "loss": 0.0309, + "step": 3275, + "video_reward_cumulative_accuracy": 0.8574045801526717 + }, + { + "epoch": 0.9723953695458593, + "grad_norm": 1.0447007417678833, + "learning_rate": 1.1597948624332278e-08, + "loss": 0.0155, + "step": 3276, + "video_reward_cumulative_accuracy": 0.8574481074481074 + }, + { + "epoch": 0.9726921935292372, + "grad_norm": 3.0469367504119873, + "learning_rate": 1.1350059218008248e-08, + "loss": 0.0523, + "step": 3277, + "video_reward_cumulative_accuracy": 0.8574916081782118 + }, + { + "epoch": 0.972989017512615, + "grad_norm": 1.8677681684494019, + "learning_rate": 1.1104841618938545e-08, + "loss": 0.0189, + "step": 3278, + "video_reward_cumulative_accuracy": 0.8575350823672971 + }, + { + "epoch": 0.9732858414959928, + "grad_norm": 0.8737713694572449, + "learning_rate": 1.0862296090387859e-08, + "loss": 0.0115, + "step": 3279, + "video_reward_cumulative_accuracy": 0.8575785300396462 + }, + { + "epoch": 0.9735826654793708, + "grad_norm": 1.362595796585083, + "learning_rate": 1.0622422892752338e-08, + "loss": 0.0202, + "step": 3280, + "video_reward_cumulative_accuracy": 0.8576219512195122 + }, + { + "epoch": 0.9738794894627486, + "grad_norm": 4.023233413696289, + "learning_rate": 1.0385222283559037e-08, + "loss": 0.071, + "step": 3281, + "video_reward_cumulative_accuracy": 0.8576653459311185 + }, + { + "epoch": 0.9741763134461264, + "grad_norm": 4.000472545623779, + "learning_rate": 1.0150694517466198e-08, + "loss": 0.0249, + "step": 3282, + "video_reward_cumulative_accuracy": 0.8577087141986593 + }, + { + "epoch": 0.9744731374295043, + "grad_norm": 0.7309911251068115, + "learning_rate": 9.918839846261852e-09, + "loss": 0.0066, + "step": 3283, + "video_reward_cumulative_accuracy": 0.8577520560462991 + }, + { + "epoch": 0.9747699614128822, + "grad_norm": 1.770171880722046, + "learning_rate": 9.689658518864664e-09, + "loss": 0.0367, + "step": 3284, + "video_reward_cumulative_accuracy": 0.8576431181485993 + }, + { + "epoch": 0.97506678539626, + "grad_norm": 2.3775479793548584, + "learning_rate": 9.463150781322816e-09, + "loss": 0.019, + "step": 3285, + "video_reward_cumulative_accuracy": 0.8576864535768646 + }, + { + "epoch": 0.9753636093796378, + "grad_norm": 3.345357894897461, + "learning_rate": 9.239316876814564e-09, + "loss": 0.0785, + "step": 3286, + "video_reward_cumulative_accuracy": 0.8577297626293365 + }, + { + "epoch": 0.9756604333630158, + "grad_norm": 1.9170372486114502, + "learning_rate": 9.018157045647124e-09, + "loss": 0.0236, + "step": 3287, + "video_reward_cumulative_accuracy": 0.8577730453300882 + }, + { + "epoch": 0.9759572573463936, + "grad_norm": 1.4506341218948364, + "learning_rate": 8.799671525257236e-09, + "loss": 0.0337, + "step": 3288, + "video_reward_cumulative_accuracy": 0.8576642335766423 + }, + { + "epoch": 0.9762540813297714, + "grad_norm": 3.2569446563720703, + "learning_rate": 8.583860550210043e-09, + "loss": 0.0416, + "step": 3289, + "video_reward_cumulative_accuracy": 0.857707509881423 + }, + { + "epoch": 0.9765509053131493, + "grad_norm": 2.6264233589172363, + "learning_rate": 8.370724352199933e-09, + "loss": 0.0574, + "step": 3290, + "video_reward_cumulative_accuracy": 0.8577507598784194 + }, + { + "epoch": 0.9768477292965272, + "grad_norm": 0.6166547536849976, + "learning_rate": 8.160263160049143e-09, + "loss": 0.0137, + "step": 3291, + "video_reward_cumulative_accuracy": 0.8577939835916135 + }, + { + "epoch": 0.977144553279905, + "grad_norm": 2.100522041320801, + "learning_rate": 7.952477199708042e-09, + "loss": 0.0219, + "step": 3292, + "video_reward_cumulative_accuracy": 0.8578371810449574 + }, + { + "epoch": 0.9774413772632828, + "grad_norm": 1.396987795829773, + "learning_rate": 7.747366694255409e-09, + "loss": 0.0129, + "step": 3293, + "video_reward_cumulative_accuracy": 0.8578803522623747 + }, + { + "epoch": 0.9777382012466608, + "grad_norm": 0.7728352546691895, + "learning_rate": 7.544931863896765e-09, + "loss": 0.0088, + "step": 3294, + "video_reward_cumulative_accuracy": 0.8579234972677595 + }, + { + "epoch": 0.9780350252300386, + "grad_norm": 0.9085843563079834, + "learning_rate": 7.345172925966038e-09, + "loss": 0.0196, + "step": 3295, + "video_reward_cumulative_accuracy": 0.8579666160849773 + }, + { + "epoch": 0.9783318492134164, + "grad_norm": 1.8223828077316284, + "learning_rate": 7.148090094923343e-09, + "loss": 0.0362, + "step": 3296, + "video_reward_cumulative_accuracy": 0.8580097087378641 + }, + { + "epoch": 0.9786286731967943, + "grad_norm": 5.303034782409668, + "learning_rate": 6.953683582356652e-09, + "loss": 0.099, + "step": 3297, + "video_reward_cumulative_accuracy": 0.8579011222323324 + }, + { + "epoch": 0.9789254971801722, + "grad_norm": 2.5379104614257812, + "learning_rate": 6.76195359698012e-09, + "loss": 0.0362, + "step": 3298, + "video_reward_cumulative_accuracy": 0.8577926015767131 + }, + { + "epoch": 0.97922232116355, + "grad_norm": 2.565481185913086, + "learning_rate": 6.57290034463437e-09, + "loss": 0.0284, + "step": 3299, + "video_reward_cumulative_accuracy": 0.8578357077902394 + }, + { + "epoch": 0.9795191451469278, + "grad_norm": 2.1413698196411133, + "learning_rate": 6.386524028286489e-09, + "loss": 0.0321, + "step": 3300, + "video_reward_cumulative_accuracy": 0.8578787878787879 + }, + { + "epoch": 0.9798159691303058, + "grad_norm": 2.323068857192993, + "learning_rate": 6.202824848029476e-09, + "loss": 0.0302, + "step": 3301, + "video_reward_cumulative_accuracy": 0.8579218418661012 + }, + { + "epoch": 0.9801127931136836, + "grad_norm": 1.3499592542648315, + "learning_rate": 6.021803001082238e-09, + "loss": 0.0093, + "step": 3302, + "video_reward_cumulative_accuracy": 0.8579648697758934 + }, + { + "epoch": 0.9804096170970614, + "grad_norm": 0.8288122415542603, + "learning_rate": 5.843458681789594e-09, + "loss": 0.0155, + "step": 3303, + "video_reward_cumulative_accuracy": 0.8580078716318499 + }, + { + "epoch": 0.9807064410804393, + "grad_norm": 3.430920124053955, + "learning_rate": 5.66779208162116e-09, + "loss": 0.0318, + "step": 3304, + "video_reward_cumulative_accuracy": 0.8580508474576272 + }, + { + "epoch": 0.9810032650638172, + "grad_norm": 3.632520914077759, + "learning_rate": 5.4948033891721875e-09, + "loss": 0.0411, + "step": 3305, + "video_reward_cumulative_accuracy": 0.8580937972768532 + }, + { + "epoch": 0.981300089047195, + "grad_norm": 2.451488494873047, + "learning_rate": 5.3244927901627274e-09, + "loss": 0.0354, + "step": 3306, + "video_reward_cumulative_accuracy": 0.8581367211131277 + }, + { + "epoch": 0.9815969130305728, + "grad_norm": 1.2364314794540405, + "learning_rate": 5.1568604674376295e-09, + "loss": 0.0326, + "step": 3307, + "video_reward_cumulative_accuracy": 0.8581796189900212 + }, + { + "epoch": 0.9818937370139508, + "grad_norm": 2.0246939659118652, + "learning_rate": 4.991906600966823e-09, + "loss": 0.0493, + "step": 3308, + "video_reward_cumulative_accuracy": 0.8580713422007256 + }, + { + "epoch": 0.9821905609973286, + "grad_norm": 3.2378106117248535, + "learning_rate": 4.829631367844201e-09, + "loss": 0.0415, + "step": 3309, + "video_reward_cumulative_accuracy": 0.858114233907525 + }, + { + "epoch": 0.9824873849807064, + "grad_norm": 3.8841092586517334, + "learning_rate": 4.670034942287904e-09, + "loss": 0.0413, + "step": 3310, + "video_reward_cumulative_accuracy": 0.8581570996978852 + }, + { + "epoch": 0.9827842089640842, + "grad_norm": 0.6869316101074219, + "learning_rate": 4.51311749564004e-09, + "loss": 0.0038, + "step": 3311, + "video_reward_cumulative_accuracy": 0.8581999395952884 + }, + { + "epoch": 0.9830810329474622, + "grad_norm": 1.650780439376831, + "learning_rate": 4.358879196366961e-09, + "loss": 0.012, + "step": 3312, + "video_reward_cumulative_accuracy": 0.8582427536231884 + }, + { + "epoch": 0.98337785693084, + "grad_norm": 1.0909397602081299, + "learning_rate": 4.207320210058153e-09, + "loss": 0.007, + "step": 3313, + "video_reward_cumulative_accuracy": 0.8582855418050106 + }, + { + "epoch": 0.9836746809142178, + "grad_norm": 0.3308217227458954, + "learning_rate": 4.058440699427346e-09, + "loss": 0.0066, + "step": 3314, + "video_reward_cumulative_accuracy": 0.8583283041641521 + }, + { + "epoch": 0.9839715048975958, + "grad_norm": 1.89927077293396, + "learning_rate": 3.9122408243105755e-09, + "loss": 0.0213, + "step": 3315, + "video_reward_cumulative_accuracy": 0.8583710407239818 + }, + { + "epoch": 0.9842683288809736, + "grad_norm": 2.1102702617645264, + "learning_rate": 3.768720741668119e-09, + "loss": 0.0126, + "step": 3316, + "video_reward_cumulative_accuracy": 0.8584137515078407 + }, + { + "epoch": 0.9845651528643514, + "grad_norm": 2.2954890727996826, + "learning_rate": 3.6278806055825566e-09, + "loss": 0.0284, + "step": 3317, + "video_reward_cumulative_accuracy": 0.8584564365390414 + }, + { + "epoch": 0.9848619768477292, + "grad_norm": 2.33181095123291, + "learning_rate": 3.489720567259325e-09, + "loss": 0.0763, + "step": 3318, + "video_reward_cumulative_accuracy": 0.858499095840868 + }, + { + "epoch": 0.9851588008311072, + "grad_norm": 4.192658424377441, + "learning_rate": 3.3542407750264404e-09, + "loss": 0.0394, + "step": 3319, + "video_reward_cumulative_accuracy": 0.8585417294365773 + }, + { + "epoch": 0.985455624814485, + "grad_norm": 1.9729253053665161, + "learning_rate": 3.2214413743353323e-09, + "loss": 0.0377, + "step": 3320, + "video_reward_cumulative_accuracy": 0.858433734939759 + }, + { + "epoch": 0.9857524487978628, + "grad_norm": 0.8455713987350464, + "learning_rate": 3.0913225077580653e-09, + "loss": 0.0084, + "step": 3321, + "video_reward_cumulative_accuracy": 0.8584763625414031 + }, + { + "epoch": 0.9860492727812408, + "grad_norm": 1.2491599321365356, + "learning_rate": 2.9638843149906725e-09, + "loss": 0.0166, + "step": 3322, + "video_reward_cumulative_accuracy": 0.8585189644792294 + }, + { + "epoch": 0.9863460967646186, + "grad_norm": 2.424283981323242, + "learning_rate": 2.839126932850378e-09, + "loss": 0.0389, + "step": 3323, + "video_reward_cumulative_accuracy": 0.8585615407764069 + }, + { + "epoch": 0.9866429207479964, + "grad_norm": 1.9569655656814575, + "learning_rate": 2.7170504952755972e-09, + "loss": 0.0246, + "step": 3324, + "video_reward_cumulative_accuracy": 0.858453670276775 + }, + { + "epoch": 0.9869397447313742, + "grad_norm": 2.326342821121216, + "learning_rate": 2.5976551333281586e-09, + "loss": 0.0267, + "step": 3325, + "video_reward_cumulative_accuracy": 0.8584962406015038 + }, + { + "epoch": 0.9872365687147522, + "grad_norm": 3.129794120788574, + "learning_rate": 2.4809409751899718e-09, + "loss": 0.086, + "step": 3326, + "video_reward_cumulative_accuracy": 0.8582381238725195 + }, + { + "epoch": 0.98753339269813, + "grad_norm": 1.876774787902832, + "learning_rate": 2.3669081461652476e-09, + "loss": 0.0505, + "step": 3327, + "video_reward_cumulative_accuracy": 0.8581304478509167 + }, + { + "epoch": 0.9878302166815078, + "grad_norm": 0.5072069764137268, + "learning_rate": 2.2555567686791124e-09, + "loss": 0.0061, + "step": 3328, + "video_reward_cumulative_accuracy": 0.8581730769230769 + }, + { + "epoch": 0.9881270406648858, + "grad_norm": 1.15468168258667, + "learning_rate": 2.1468869622781608e-09, + "loss": 0.046, + "step": 3329, + "video_reward_cumulative_accuracy": 0.8582156803844998 + }, + { + "epoch": 0.9884238646482636, + "grad_norm": 1.5895243883132935, + "learning_rate": 2.040898843630179e-09, + "loss": 0.0176, + "step": 3330, + "video_reward_cumulative_accuracy": 0.8582582582582583 + }, + { + "epoch": 0.9887206886316414, + "grad_norm": 2.2743732929229736, + "learning_rate": 1.9375925265235907e-09, + "loss": 0.0536, + "step": 3331, + "video_reward_cumulative_accuracy": 0.8583008105673972 + }, + { + "epoch": 0.9890175126150192, + "grad_norm": 0.7520642280578613, + "learning_rate": 1.8369681218677327e-09, + "loss": 0.0133, + "step": 3332, + "video_reward_cumulative_accuracy": 0.8581932773109243 + }, + { + "epoch": 0.9893143365983972, + "grad_norm": 1.7850524187088013, + "learning_rate": 1.739025737692579e-09, + "loss": 0.0138, + "step": 3333, + "video_reward_cumulative_accuracy": 0.858085808580858 + }, + { + "epoch": 0.989611160581775, + "grad_norm": 0.7477326989173889, + "learning_rate": 1.64376547914874e-09, + "loss": 0.0129, + "step": 3334, + "video_reward_cumulative_accuracy": 0.8579784043191362 + }, + { + "epoch": 0.9899079845651528, + "grad_norm": 2.771977663040161, + "learning_rate": 1.551187448507463e-09, + "loss": 0.0591, + "step": 3335, + "video_reward_cumulative_accuracy": 0.8578710644677661 + }, + { + "epoch": 0.9902048085485308, + "grad_norm": 5.376701831817627, + "learning_rate": 1.4612917451603536e-09, + "loss": 0.0271, + "step": 3336, + "video_reward_cumulative_accuracy": 0.8579136690647482 + }, + { + "epoch": 0.9905016325319086, + "grad_norm": 1.8040037155151367, + "learning_rate": 1.3740784656190998e-09, + "loss": 0.0504, + "step": 3337, + "video_reward_cumulative_accuracy": 0.8578064129457597 + }, + { + "epoch": 0.9907984565152864, + "grad_norm": 1.739055871963501, + "learning_rate": 1.2895477035154703e-09, + "loss": 0.0359, + "step": 3338, + "video_reward_cumulative_accuracy": 0.8578490113840623 + }, + { + "epoch": 0.9910952804986642, + "grad_norm": 2.0168232917785645, + "learning_rate": 1.2076995496015931e-09, + "loss": 0.04, + "step": 3339, + "video_reward_cumulative_accuracy": 0.8578915843066787 + }, + { + "epoch": 0.9913921044820422, + "grad_norm": 1.7344579696655273, + "learning_rate": 1.1285340917494004e-09, + "loss": 0.0256, + "step": 3340, + "video_reward_cumulative_accuracy": 0.857934131736527 + }, + { + "epoch": 0.99168892846542, + "grad_norm": 1.7917364835739136, + "learning_rate": 1.0520514149506278e-09, + "loss": 0.021, + "step": 3341, + "video_reward_cumulative_accuracy": 0.857976653696498 + }, + { + "epoch": 0.9919857524487978, + "grad_norm": 1.345489740371704, + "learning_rate": 9.782516013168154e-10, + "loss": 0.0126, + "step": 3342, + "video_reward_cumulative_accuracy": 0.8580191502094554 + }, + { + "epoch": 0.9922825764321758, + "grad_norm": 0.9671461582183838, + "learning_rate": 9.071347300793065e-10, + "loss": 0.0065, + "step": 3343, + "video_reward_cumulative_accuracy": 0.8580616212982352 + }, + { + "epoch": 0.9925794004155536, + "grad_norm": 1.9350098371505737, + "learning_rate": 8.387008775889716e-10, + "loss": 0.0142, + "step": 3344, + "video_reward_cumulative_accuracy": 0.8581040669856459 + }, + { + "epoch": 0.9928762243989314, + "grad_norm": 1.1504771709442139, + "learning_rate": 7.729501173162068e-10, + "loss": 0.009, + "step": 3345, + "video_reward_cumulative_accuracy": 0.8581464872944694 + }, + { + "epoch": 0.9931730483823092, + "grad_norm": 1.2408806085586548, + "learning_rate": 7.098825198509351e-10, + "loss": 0.0086, + "step": 3346, + "video_reward_cumulative_accuracy": 0.8581888822474597 + }, + { + "epoch": 0.9934698723656872, + "grad_norm": 3.347973108291626, + "learning_rate": 6.494981529020505e-10, + "loss": 0.0431, + "step": 3347, + "video_reward_cumulative_accuracy": 0.8582312518673438 + }, + { + "epoch": 0.993766696349065, + "grad_norm": 1.9659161567687988, + "learning_rate": 5.91797081298251e-10, + "loss": 0.0124, + "step": 3348, + "video_reward_cumulative_accuracy": 0.8581242532855436 + }, + { + "epoch": 0.9940635203324428, + "grad_norm": 4.217858791351318, + "learning_rate": 5.367793669874832e-10, + "loss": 0.0681, + "step": 3349, + "video_reward_cumulative_accuracy": 0.8581666169005673 + }, + { + "epoch": 0.9943603443158208, + "grad_norm": 2.9570226669311523, + "learning_rate": 4.844450690358327e-10, + "loss": 0.0385, + "step": 3350, + "video_reward_cumulative_accuracy": 0.8582089552238806 + }, + { + "epoch": 0.9946571682991986, + "grad_norm": 0.8007720708847046, + "learning_rate": 4.347942436300212e-10, + "loss": 0.0163, + "step": 3351, + "video_reward_cumulative_accuracy": 0.8582512682781259 + }, + { + "epoch": 0.9949539922825764, + "grad_norm": 1.299566626548767, + "learning_rate": 3.8782694407463184e-10, + "loss": 0.0321, + "step": 3352, + "video_reward_cumulative_accuracy": 0.8582935560859188 + }, + { + "epoch": 0.9952508162659542, + "grad_norm": 2.04730486869812, + "learning_rate": 3.435432207937739e-10, + "loss": 0.0355, + "step": 3353, + "video_reward_cumulative_accuracy": 0.8583358186698479 + }, + { + "epoch": 0.9955476402493322, + "grad_norm": 2.532416343688965, + "learning_rate": 3.019431213299728e-10, + "loss": 0.0196, + "step": 3354, + "video_reward_cumulative_accuracy": 0.8583780560524746 + }, + { + "epoch": 0.99584446423271, + "grad_norm": 1.875337839126587, + "learning_rate": 2.6302669034555807e-10, + "loss": 0.0339, + "step": 3355, + "video_reward_cumulative_accuracy": 0.8584202682563339 + }, + { + "epoch": 0.9961412882160878, + "grad_norm": 2.1054089069366455, + "learning_rate": 2.2679396962071999e-10, + "loss": 0.0239, + "step": 3356, + "video_reward_cumulative_accuracy": 0.8584624553039333 + }, + { + "epoch": 0.9964381121994658, + "grad_norm": 2.2614123821258545, + "learning_rate": 1.9324499805489783e-10, + "loss": 0.0458, + "step": 3357, + "video_reward_cumulative_accuracy": 0.858504617217754 + }, + { + "epoch": 0.9967349361828436, + "grad_norm": 1.6584018468856812, + "learning_rate": 1.6237981166622451e-10, + "loss": 0.0176, + "step": 3358, + "video_reward_cumulative_accuracy": 0.8585467540202502 + }, + { + "epoch": 0.9970317601662214, + "grad_norm": 2.8550844192504883, + "learning_rate": 1.341984435912491e-10, + "loss": 0.0352, + "step": 3359, + "video_reward_cumulative_accuracy": 0.8585888657338494 + }, + { + "epoch": 0.9973285841495992, + "grad_norm": 3.7079176902770996, + "learning_rate": 1.0870092408576949e-10, + "loss": 0.0719, + "step": 3360, + "video_reward_cumulative_accuracy": 0.8584821428571429 + }, + { + "epoch": 0.9976254081329772, + "grad_norm": 1.3505829572677612, + "learning_rate": 8.588728052344453e-11, + "loss": 0.0224, + "step": 3361, + "video_reward_cumulative_accuracy": 0.8585242487354954 + }, + { + "epoch": 0.997922232116355, + "grad_norm": 1.7025647163391113, + "learning_rate": 6.575753739718193e-11, + "loss": 0.0293, + "step": 3362, + "video_reward_cumulative_accuracy": 0.8585663295657346 + }, + { + "epoch": 0.9982190560997328, + "grad_norm": 2.158022403717041, + "learning_rate": 4.8311716318028e-11, + "loss": 0.0603, + "step": 3363, + "video_reward_cumulative_accuracy": 0.8586083853702052 + }, + { + "epoch": 0.9985158800831108, + "grad_norm": 2.8020150661468506, + "learning_rate": 3.354983601600026e-11, + "loss": 0.0205, + "step": 3364, + "video_reward_cumulative_accuracy": 0.8586504161712247 + }, + { + "epoch": 0.9988127040664886, + "grad_norm": 3.6079211235046387, + "learning_rate": 2.1471912339532386e-11, + "loss": 0.0442, + "step": 3365, + "video_reward_cumulative_accuracy": 0.8585438335809806 + }, + { + "epoch": 0.9991095280498664, + "grad_norm": 1.7951223850250244, + "learning_rate": 1.2077958254919087e-11, + "loss": 0.0145, + "step": 3366, + "video_reward_cumulative_accuracy": 0.8585858585858586 + }, + { + "epoch": 0.9994063520332442, + "grad_norm": 2.862751007080078, + "learning_rate": 5.367983847981428e-12, + "loss": 0.0244, + "step": 3367, + "video_reward_cumulative_accuracy": 0.8584793584793585 + }, + { + "epoch": 0.9997031760166222, + "grad_norm": 1.4409502744674683, + "learning_rate": 1.3419963221239506e-12, + "loss": 0.0109, + "step": 3368, + "video_reward_cumulative_accuracy": 0.858521377672209 + }, + { + "epoch": 1.0, + "grad_norm": 1.1117860078811646, + "learning_rate": 0.0, + "loss": 0.0134, + "step": 3369, + "video_reward_cumulative_accuracy": 0.8585633719204512 + } + ], + "logging_steps": 1, + "max_steps": 3369, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6663849150126752e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}