Upload exp_phase5_stage_a_v7_reasoning_t1_r256_FIXED_20260502_121414/log.jsonl with huggingface_hub
Browse files
exp_phase5_stage_a_v7_reasoning_t1_r256_FIXED_20260502_121414/log.jsonl
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "loss": 1.9774447679519653, "elapsed_s": 7.060880184173584}
|
| 2 |
+
{"step": 40, "loss": 1.8001002073287964, "elapsed_s": 10.964189052581787}
|
| 3 |
+
{"step": 60, "loss": 1.6066473722457886, "elapsed_s": 14.794878005981445}
|
| 4 |
+
{"step": 80, "loss": 1.7254116535186768, "elapsed_s": 18.51723074913025}
|
| 5 |
+
{"step": 100, "loss": 1.230107069015503, "elapsed_s": 22.617149829864502}
|
| 6 |
+
{"step": 120, "loss": 1.297135829925537, "elapsed_s": 26.377729892730713}
|
| 7 |
+
{"step": 140, "loss": 0.9509831666946411, "elapsed_s": 29.918986558914185}
|
| 8 |
+
{"step": 160, "loss": 0.7812271118164062, "elapsed_s": 33.68785047531128}
|
| 9 |
+
{"step": 180, "loss": 1.1528620719909668, "elapsed_s": 37.36045265197754}
|
| 10 |
+
{"step": 200, "loss": 0.9863306283950806, "elapsed_s": 40.95762276649475}
|
| 11 |
+
{"step": 220, "loss": 1.3034881353378296, "elapsed_s": 44.60539698600769}
|
| 12 |
+
{"step": 240, "loss": 1.2247743606567383, "elapsed_s": 48.20915627479553}
|
| 13 |
+
{"step": 260, "loss": 0.9104486703872681, "elapsed_s": 51.81660294532776}
|
| 14 |
+
{"step": 280, "loss": 0.7686321139335632, "elapsed_s": 55.51269817352295}
|
| 15 |
+
{"step": 300, "loss": 1.1527222394943237, "elapsed_s": 59.04076623916626}
|
| 16 |
+
{"step": 320, "loss": 1.2258694171905518, "elapsed_s": 62.5277373790741}
|
| 17 |
+
{"step": 340, "loss": 1.147497296333313, "elapsed_s": 66.16446542739868}
|
| 18 |
+
{"step": 360, "loss": 0.8179966807365417, "elapsed_s": 69.78625106811523}
|
| 19 |
+
{"step": 380, "loss": 0.9413669109344482, "elapsed_s": 73.33791327476501}
|
| 20 |
+
{"step": 400, "loss": 1.2409865856170654, "elapsed_s": 76.77564096450806}
|
| 21 |
+
{"step": 420, "loss": 0.9674402475357056, "elapsed_s": 80.22393083572388}
|
| 22 |
+
{"step": 440, "loss": 0.6676923632621765, "elapsed_s": 83.5635404586792}
|
| 23 |
+
{"step": 460, "loss": 0.7688860893249512, "elapsed_s": 87.09018087387085}
|
| 24 |
+
{"step": 480, "loss": 0.7909336686134338, "elapsed_s": 90.70071530342102}
|
| 25 |
+
{"step": 500, "loss": 0.7822729349136353, "elapsed_s": 94.18792486190796}
|
| 26 |
+
{"step": 520, "loss": 0.7914894223213196, "elapsed_s": 97.67328023910522}
|
| 27 |
+
{"step": 540, "loss": 1.074857473373413, "elapsed_s": 101.13494873046875}
|
| 28 |
+
{"step": 560, "loss": 0.7601761817932129, "elapsed_s": 104.80744767189026}
|
| 29 |
+
{"step": 580, "loss": 0.6478149890899658, "elapsed_s": 108.21289086341858}
|
| 30 |
+
{"step": 600, "loss": 0.8634665012359619, "elapsed_s": 111.52514505386353}
|
| 31 |
+
{"step": 620, "loss": 0.670783519744873, "elapsed_s": 115.12612986564636}
|
| 32 |
+
{"step": 640, "loss": 0.5818716883659363, "elapsed_s": 118.45429110527039}
|
| 33 |
+
{"step": 660, "loss": 1.0098869800567627, "elapsed_s": 121.90421795845032}
|
| 34 |
+
{"step": 680, "loss": 0.8480299115180969, "elapsed_s": 125.34604239463806}
|
| 35 |
+
{"step": 700, "loss": 0.7909002900123596, "elapsed_s": 128.74657273292542}
|
| 36 |
+
{"step": 720, "loss": 0.8845273852348328, "elapsed_s": 132.07222366333008}
|
| 37 |
+
{"step": 740, "loss": 0.6812204718589783, "elapsed_s": 135.50419187545776}
|
| 38 |
+
{"step": 760, "loss": 0.6597556471824646, "elapsed_s": 138.8681619167328}
|
| 39 |
+
{"step": 780, "loss": 0.9638181924819946, "elapsed_s": 142.20667123794556}
|
| 40 |
+
{"step": 800, "loss": 0.6828988194465637, "elapsed_s": 145.54660964012146}
|
| 41 |
+
{"step": 820, "loss": 0.8073567748069763, "elapsed_s": 148.7208433151245}
|
| 42 |
+
{"step": 840, "loss": 0.7933673858642578, "elapsed_s": 151.9730761051178}
|
| 43 |
+
{"step": 860, "loss": 0.8765690326690674, "elapsed_s": 155.4128439426422}
|
| 44 |
+
{"step": 880, "loss": 0.5836101174354553, "elapsed_s": 158.86644887924194}
|
| 45 |
+
{"step": 900, "loss": 0.692015528678894, "elapsed_s": 162.1712589263916}
|
| 46 |
+
{"step": 920, "loss": 0.7565768957138062, "elapsed_s": 165.58387684822083}
|
| 47 |
+
{"step": 940, "loss": 0.8986669182777405, "elapsed_s": 169.04263496398926}
|
| 48 |
+
{"step": 960, "loss": 0.9206324219703674, "elapsed_s": 172.34876918792725}
|
| 49 |
+
{"step": 980, "loss": 1.1026443243026733, "elapsed_s": 175.5754954814911}
|
| 50 |
+
{"step": 1000, "loss": 0.7296143770217896, "elapsed_s": 178.79400658607483}
|
| 51 |
+
{"step": 1020, "loss": 0.6172620058059692, "elapsed_s": 182.32546162605286}
|
| 52 |
+
{"step": 1040, "loss": 0.8027003407478333, "elapsed_s": 185.63901567459106}
|
| 53 |
+
{"step": 1060, "loss": 0.8336893916130066, "elapsed_s": 188.90677881240845}
|
| 54 |
+
{"step": 1080, "loss": 0.5698167085647583, "elapsed_s": 192.12601804733276}
|
| 55 |
+
{"step": 1100, "loss": 0.5825605988502502, "elapsed_s": 195.39535212516785}
|
| 56 |
+
{"step": 1120, "loss": 0.634892463684082, "elapsed_s": 198.76903414726257}
|
| 57 |
+
{"step": 1140, "loss": 0.450910359621048, "elapsed_s": 202.1686065196991}
|
| 58 |
+
{"step": 1160, "loss": 0.6793229579925537, "elapsed_s": 205.3421483039856}
|
| 59 |
+
{"step": 1180, "loss": 0.8239572644233704, "elapsed_s": 208.66398167610168}
|
| 60 |
+
{"step": 1200, "loss": 1.021950364112854, "elapsed_s": 211.89100980758667}
|
| 61 |
+
{"step": 1220, "loss": 0.7072740197181702, "elapsed_s": 215.18245005607605}
|
| 62 |
+
{"step": 1240, "loss": 0.7504220604896545, "elapsed_s": 218.57585906982422}
|
| 63 |
+
{"step": 1260, "loss": 0.46929630637168884, "elapsed_s": 221.85744833946228}
|
| 64 |
+
{"step": 1280, "loss": 0.6561456322669983, "elapsed_s": 225.18012404441833}
|
| 65 |
+
{"step": 1300, "loss": 0.8426336646080017, "elapsed_s": 228.52220582962036}
|
| 66 |
+
{"step": 1320, "loss": 0.5124194622039795, "elapsed_s": 231.77366542816162}
|
| 67 |
+
{"step": 1340, "loss": 0.8788986802101135, "elapsed_s": 234.99401450157166}
|
| 68 |
+
{"step": 1360, "loss": 0.7607057690620422, "elapsed_s": 238.23633003234863}
|
| 69 |
+
{"step": 1380, "loss": 0.7068525552749634, "elapsed_s": 241.5894844532013}
|
| 70 |
+
{"step": 1400, "loss": 0.6355752348899841, "elapsed_s": 244.86072993278503}
|
| 71 |
+
{"step": 1420, "loss": 0.5642361640930176, "elapsed_s": 248.18115901947021}
|
| 72 |
+
{"step": 1440, "loss": 0.606167197227478, "elapsed_s": 251.47394227981567}
|
| 73 |
+
{"step": 1460, "loss": 0.6251195073127747, "elapsed_s": 254.6563014984131}
|
| 74 |
+
{"step": 1480, "loss": 0.681820809841156, "elapsed_s": 257.85839200019836}
|
| 75 |
+
{"step": 1500, "loss": 0.5975695848464966, "elapsed_s": 260.92788195610046}
|
| 76 |
+
{"step": 1520, "loss": 0.7802863717079163, "elapsed_s": 264.1795175075531}
|
| 77 |
+
{"step": 1540, "loss": 0.5139927268028259, "elapsed_s": 267.4874050617218}
|
| 78 |
+
{"step": 1560, "loss": 0.6520804166793823, "elapsed_s": 270.61988043785095}
|
| 79 |
+
{"step": 1580, "loss": 1.022620677947998, "elapsed_s": 273.94956827163696}
|
| 80 |
+
{"step": 1600, "loss": 0.8647533655166626, "elapsed_s": 277.1666066646576}
|
| 81 |
+
{"step": 1620, "loss": 0.6097546815872192, "elapsed_s": 280.27260088920593}
|
| 82 |
+
{"step": 1640, "loss": 0.6338759064674377, "elapsed_s": 283.6018397808075}
|
| 83 |
+
{"step": 1660, "loss": 0.6845680475234985, "elapsed_s": 286.9227509498596}
|
| 84 |
+
{"step": 1680, "loss": 0.6087700724601746, "elapsed_s": 290.0667157173157}
|
| 85 |
+
{"step": 1700, "loss": 0.9896318912506104, "elapsed_s": 293.40989327430725}
|
| 86 |
+
{"step": 1720, "loss": 0.6001601219177246, "elapsed_s": 296.61187529563904}
|
| 87 |
+
{"step": 1740, "loss": 0.717741072177887, "elapsed_s": 299.7739837169647}
|
| 88 |
+
{"step": 1760, "loss": 0.70790696144104, "elapsed_s": 303.0345802307129}
|
| 89 |
+
{"step": 1780, "loss": 0.7461473345756531, "elapsed_s": 306.15549659729004}
|
| 90 |
+
{"step": 1800, "loss": 0.7627187967300415, "elapsed_s": 309.370224237442}
|
| 91 |
+
{"step": 1820, "loss": 0.8616876006126404, "elapsed_s": 312.5532078742981}
|
| 92 |
+
{"step": 1840, "loss": 0.9856967926025391, "elapsed_s": 315.7647068500519}
|
| 93 |
+
{"step": 1860, "loss": 0.5373476147651672, "elapsed_s": 319.05323028564453}
|
| 94 |
+
{"step": 1880, "loss": 0.6587440967559814, "elapsed_s": 322.482027053833}
|
| 95 |
+
{"step": 1900, "loss": 0.6262999773025513, "elapsed_s": 325.72165036201477}
|
| 96 |
+
{"step": 1920, "loss": 0.9090701937675476, "elapsed_s": 328.9079270362854}
|
| 97 |
+
{"step": 1940, "loss": 0.6410947442054749, "elapsed_s": 332.0704896450043}
|
| 98 |
+
{"step": 1960, "loss": 0.435920774936676, "elapsed_s": 335.2602422237396}
|
| 99 |
+
{"step": 1980, "loss": 0.5406534671783447, "elapsed_s": 338.4276361465454}
|
| 100 |
+
{"step": 2000, "loss": 0.5773667693138123, "elapsed_s": 341.55367493629456}
|
| 101 |
+
{"step": 2020, "loss": 0.6169989109039307, "elapsed_s": 344.89976716041565}
|
| 102 |
+
{"step": 2040, "loss": 0.6530997157096863, "elapsed_s": 348.08249282836914}
|
| 103 |
+
{"step": 2060, "loss": 0.5530748963356018, "elapsed_s": 351.2829647064209}
|
| 104 |
+
{"step": 2080, "loss": 0.603181779384613, "elapsed_s": 354.49969816207886}
|
| 105 |
+
{"step": 2100, "loss": 0.498020738363266, "elapsed_s": 357.6935017108917}
|
| 106 |
+
{"step": 2120, "loss": 0.5354554653167725, "elapsed_s": 360.85755252838135}
|
| 107 |
+
{"step": 2140, "loss": 0.8288055658340454, "elapsed_s": 364.0057420730591}
|
| 108 |
+
{"step": 2160, "loss": 0.5955426692962646, "elapsed_s": 367.1199309825897}
|
| 109 |
+
{"step": 2180, "loss": 0.725967526435852, "elapsed_s": 370.28251242637634}
|
| 110 |
+
{"step": 2200, "loss": 0.6121144890785217, "elapsed_s": 373.53574419021606}
|
| 111 |
+
{"step": 2220, "loss": 0.8256104588508606, "elapsed_s": 376.77718019485474}
|
| 112 |
+
{"step": 2240, "loss": 0.5698134899139404, "elapsed_s": 379.9467816352844}
|
| 113 |
+
{"step": 2260, "loss": 0.6788922548294067, "elapsed_s": 383.1890392303467}
|
| 114 |
+
{"step": 2280, "loss": 0.5523660182952881, "elapsed_s": 386.48220205307007}
|
| 115 |
+
{"step": 2300, "loss": 0.6382620334625244, "elapsed_s": 389.6579256057739}
|
| 116 |
+
{"step": 2320, "loss": 0.577393651008606, "elapsed_s": 392.7342321872711}
|
| 117 |
+
{"step": 2340, "loss": 0.649537205696106, "elapsed_s": 395.9158036708832}
|
| 118 |
+
{"step": 2360, "loss": 0.6187525391578674, "elapsed_s": 399.06821155548096}
|
| 119 |
+
{"step": 2380, "loss": 0.7006586790084839, "elapsed_s": 402.1479959487915}
|
| 120 |
+
{"step": 2400, "loss": 0.7813540697097778, "elapsed_s": 405.28808522224426}
|
| 121 |
+
{"step": 2420, "loss": 0.6868652701377869, "elapsed_s": 408.41245913505554}
|
| 122 |
+
{"step": 2440, "loss": 0.686132550239563, "elapsed_s": 411.54040241241455}
|
| 123 |
+
{"step": 2460, "loss": 0.9299863576889038, "elapsed_s": 414.96952724456787}
|
| 124 |
+
{"step": 2480, "loss": 0.6651830673217773, "elapsed_s": 418.04893255233765}
|
| 125 |
+
{"step": 2500, "loss": 0.7336837649345398, "elapsed_s": 421.2211227416992}
|
| 126 |
+
{"step": 2520, "loss": 0.7012184262275696, "elapsed_s": 424.4099967479706}
|
| 127 |
+
{"step": 2540, "loss": 0.6246220469474792, "elapsed_s": 427.59114623069763}
|
| 128 |
+
{"step": 2560, "loss": 0.7059709429740906, "elapsed_s": 430.68378591537476}
|
| 129 |
+
{"step": 2580, "loss": 0.5579457879066467, "elapsed_s": 433.8242359161377}
|
| 130 |
+
{"step": 2600, "loss": 0.4734269082546234, "elapsed_s": 436.9621512889862}
|
| 131 |
+
{"step": 2620, "loss": 0.5161091685295105, "elapsed_s": 440.09408831596375}
|
| 132 |
+
{"step": 2640, "loss": 0.36350110173225403, "elapsed_s": 443.20841789245605}
|
| 133 |
+
{"step": 2660, "loss": 0.6496555209159851, "elapsed_s": 446.38027691841125}
|
| 134 |
+
{"step": 2680, "loss": 0.813278317451477, "elapsed_s": 449.5309491157532}
|
| 135 |
+
{"step": 2700, "loss": 0.4930036962032318, "elapsed_s": 452.720495223999}
|
| 136 |
+
{"step": 2720, "loss": 0.5410283803939819, "elapsed_s": 455.93883752822876}
|
| 137 |
+
{"step": 2740, "loss": 0.5784133672714233, "elapsed_s": 459.0784754753113}
|
| 138 |
+
{"step": 2760, "loss": 0.6152942776679993, "elapsed_s": 462.22138381004333}
|
| 139 |
+
{"step": 2780, "loss": 0.48594051599502563, "elapsed_s": 465.3520166873932}
|
| 140 |
+
{"step": 2800, "loss": 0.5207051038742065, "elapsed_s": 468.50602436065674}
|
| 141 |
+
{"step": 2820, "loss": 0.7204872369766235, "elapsed_s": 471.59734988212585}
|
| 142 |
+
{"step": 2840, "loss": 0.6046464443206787, "elapsed_s": 474.67349123954773}
|
| 143 |
+
{"step": 2860, "loss": 0.7046807408332825, "elapsed_s": 477.8796308040619}
|
| 144 |
+
{"step": 2880, "loss": 0.738498866558075, "elapsed_s": 481.0243833065033}
|
| 145 |
+
{"step": 2900, "loss": 0.5067113637924194, "elapsed_s": 484.1894190311432}
|
| 146 |
+
{"step": 2920, "loss": 0.5920405387878418, "elapsed_s": 487.36884593963623}
|
| 147 |
+
{"step": 2940, "loss": 0.4744235575199127, "elapsed_s": 490.54821157455444}
|
| 148 |
+
{"step": 2960, "loss": 0.5193720459938049, "elapsed_s": 493.7349932193756}
|
| 149 |
+
{"step": 2980, "loss": 0.7624729871749878, "elapsed_s": 496.94343400001526}
|
| 150 |
+
{"step": 3000, "loss": 0.5966009497642517, "elapsed_s": 499.9988257884979}
|