Upload exp_phase5_stage_a_v7_reasoning_t1_r128_FIXED_20260502_104222/log.jsonl with huggingface_hub
Browse files
exp_phase5_stage_a_v7_reasoning_t1_r128_FIXED_20260502_104222/log.jsonl
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "loss": 2.202322006225586, "elapsed_s": 8.068516731262207}
|
| 2 |
+
{"step": 40, "loss": 1.9049887657165527, "elapsed_s": 12.802325963973999}
|
| 3 |
+
{"step": 60, "loss": 2.2096848487854004, "elapsed_s": 17.480759620666504}
|
| 4 |
+
{"step": 80, "loss": 1.5484510660171509, "elapsed_s": 21.957751750946045}
|
| 5 |
+
{"step": 100, "loss": 1.457550048828125, "elapsed_s": 26.64709210395813}
|
| 6 |
+
{"step": 120, "loss": 1.162634015083313, "elapsed_s": 31.26972508430481}
|
| 7 |
+
{"step": 140, "loss": 1.2464854717254639, "elapsed_s": 35.48639941215515}
|
| 8 |
+
{"step": 160, "loss": 1.4211183786392212, "elapsed_s": 40.0321581363678}
|
| 9 |
+
{"step": 180, "loss": 1.2450991868972778, "elapsed_s": 44.51671004295349}
|
| 10 |
+
{"step": 200, "loss": 1.425645351409912, "elapsed_s": 49.02465200424194}
|
| 11 |
+
{"step": 220, "loss": 1.1186270713806152, "elapsed_s": 53.512261390686035}
|
| 12 |
+
{"step": 240, "loss": 1.110804796218872, "elapsed_s": 58.05427169799805}
|
| 13 |
+
{"step": 260, "loss": 1.1730107069015503, "elapsed_s": 62.70865345001221}
|
| 14 |
+
{"step": 280, "loss": 1.287969708442688, "elapsed_s": 67.04114842414856}
|
| 15 |
+
{"step": 300, "loss": 0.9371508955955505, "elapsed_s": 71.0534987449646}
|
| 16 |
+
{"step": 320, "loss": 1.2246824502944946, "elapsed_s": 73.9830870628357}
|
| 17 |
+
{"step": 340, "loss": 0.9295085668563843, "elapsed_s": 76.76304697990417}
|
| 18 |
+
{"step": 360, "loss": 0.8869633674621582, "elapsed_s": 79.67706656455994}
|
| 19 |
+
{"step": 380, "loss": 0.9282847046852112, "elapsed_s": 82.42340469360352}
|
| 20 |
+
{"step": 400, "loss": 1.0696427822113037, "elapsed_s": 85.04908680915833}
|
| 21 |
+
{"step": 420, "loss": 1.0172228813171387, "elapsed_s": 87.88851737976074}
|
| 22 |
+
{"step": 440, "loss": 0.6957511305809021, "elapsed_s": 90.56218552589417}
|
| 23 |
+
{"step": 460, "loss": 0.9947313070297241, "elapsed_s": 93.0493712425232}
|
| 24 |
+
{"step": 480, "loss": 0.9986017942428589, "elapsed_s": 95.70555472373962}
|
| 25 |
+
{"step": 500, "loss": 1.0452866554260254, "elapsed_s": 98.57984614372253}
|
| 26 |
+
{"step": 520, "loss": 0.9874975681304932, "elapsed_s": 101.2092912197113}
|
| 27 |
+
{"step": 540, "loss": 0.6997621059417725, "elapsed_s": 103.82567572593689}
|
| 28 |
+
{"step": 560, "loss": 0.9506767392158508, "elapsed_s": 106.55046057701111}
|
| 29 |
+
{"step": 580, "loss": 1.0515797138214111, "elapsed_s": 109.06288242340088}
|
| 30 |
+
{"step": 600, "loss": 0.7966925501823425, "elapsed_s": 111.84230089187622}
|
| 31 |
+
{"step": 620, "loss": 0.8643088340759277, "elapsed_s": 114.531001329422}
|
| 32 |
+
{"step": 640, "loss": 0.7618396282196045, "elapsed_s": 117.23952126502991}
|
| 33 |
+
{"step": 660, "loss": 0.8909296989440918, "elapsed_s": 119.85855078697205}
|
| 34 |
+
{"step": 680, "loss": 0.8046140670776367, "elapsed_s": 122.53493285179138}
|
| 35 |
+
{"step": 700, "loss": 0.6990590691566467, "elapsed_s": 125.10298776626587}
|
| 36 |
+
{"step": 720, "loss": 0.8425854444503784, "elapsed_s": 127.64088010787964}
|
| 37 |
+
{"step": 740, "loss": 0.6527794599533081, "elapsed_s": 130.24441981315613}
|
| 38 |
+
{"step": 760, "loss": 0.6643272042274475, "elapsed_s": 132.81543278694153}
|
| 39 |
+
{"step": 780, "loss": 1.0508562326431274, "elapsed_s": 135.2940113544464}
|
| 40 |
+
{"step": 800, "loss": 0.7018465399742126, "elapsed_s": 137.92540550231934}
|
| 41 |
+
{"step": 820, "loss": 0.8643544316291809, "elapsed_s": 140.41546893119812}
|
| 42 |
+
{"step": 840, "loss": 0.8483926057815552, "elapsed_s": 142.97425031661987}
|
| 43 |
+
{"step": 860, "loss": 0.847130537033081, "elapsed_s": 145.5282425880432}
|
| 44 |
+
{"step": 880, "loss": 0.8131736516952515, "elapsed_s": 148.1882529258728}
|
| 45 |
+
{"step": 900, "loss": 0.7206243276596069, "elapsed_s": 150.7061734199524}
|
| 46 |
+
{"step": 920, "loss": 1.1386528015136719, "elapsed_s": 153.1884515285492}
|
| 47 |
+
{"step": 940, "loss": 0.5656690001487732, "elapsed_s": 155.64203000068665}
|
| 48 |
+
{"step": 960, "loss": 0.7620475888252258, "elapsed_s": 158.2638282775879}
|
| 49 |
+
{"step": 980, "loss": 0.7873883247375488, "elapsed_s": 160.8760335445404}
|
| 50 |
+
{"step": 1000, "loss": 0.908439576625824, "elapsed_s": 163.27594542503357}
|
| 51 |
+
{"step": 1020, "loss": 0.9677100777626038, "elapsed_s": 165.61307096481323}
|
| 52 |
+
{"step": 1040, "loss": 0.90561443567276, "elapsed_s": 168.0649311542511}
|
| 53 |
+
{"step": 1060, "loss": 0.780480682849884, "elapsed_s": 170.37006449699402}
|
| 54 |
+
{"step": 1080, "loss": 1.0314478874206543, "elapsed_s": 172.9392056465149}
|
| 55 |
+
{"step": 1100, "loss": 0.9421583414077759, "elapsed_s": 175.3266565799713}
|
| 56 |
+
{"step": 1120, "loss": 0.5605111718177795, "elapsed_s": 177.78286480903625}
|
| 57 |
+
{"step": 1140, "loss": 0.5297425389289856, "elapsed_s": 180.17284560203552}
|
| 58 |
+
{"step": 1160, "loss": 0.8475654721260071, "elapsed_s": 182.63426613807678}
|
| 59 |
+
{"step": 1180, "loss": 0.6529051065444946, "elapsed_s": 185.25115537643433}
|
| 60 |
+
{"step": 1200, "loss": 0.9733421802520752, "elapsed_s": 187.48249197006226}
|
| 61 |
+
{"step": 1220, "loss": 0.5475715398788452, "elapsed_s": 189.91303849220276}
|
| 62 |
+
{"step": 1240, "loss": 0.7890834212303162, "elapsed_s": 192.28043603897095}
|
| 63 |
+
{"step": 1260, "loss": 0.8984258770942688, "elapsed_s": 194.74862909317017}
|
| 64 |
+
{"step": 1280, "loss": 0.823652982711792, "elapsed_s": 197.26314783096313}
|
| 65 |
+
{"step": 1300, "loss": 1.241571068763733, "elapsed_s": 199.68783330917358}
|
| 66 |
+
{"step": 1320, "loss": 0.8544700741767883, "elapsed_s": 202.14582777023315}
|
| 67 |
+
{"step": 1340, "loss": 0.705428421497345, "elapsed_s": 204.72348141670227}
|
| 68 |
+
{"step": 1360, "loss": 0.6074650287628174, "elapsed_s": 207.08247423171997}
|
| 69 |
+
{"step": 1380, "loss": 0.7451711893081665, "elapsed_s": 209.39916610717773}
|
| 70 |
+
{"step": 1400, "loss": 0.9869830012321472, "elapsed_s": 211.83866381645203}
|
| 71 |
+
{"step": 1420, "loss": 0.7240380048751831, "elapsed_s": 214.11703944206238}
|
| 72 |
+
{"step": 1440, "loss": 1.601798415184021, "elapsed_s": 216.56427717208862}
|
| 73 |
+
{"step": 1460, "loss": 0.9507465362548828, "elapsed_s": 218.90848064422607}
|
| 74 |
+
{"step": 1480, "loss": 0.6565518379211426, "elapsed_s": 221.15885877609253}
|
| 75 |
+
{"step": 1500, "loss": 0.6873279809951782, "elapsed_s": 223.42951011657715}
|
| 76 |
+
{"step": 1520, "loss": 0.9432864189147949, "elapsed_s": 225.8092725276947}
|
| 77 |
+
{"step": 1540, "loss": 1.066676139831543, "elapsed_s": 228.17720127105713}
|
| 78 |
+
{"step": 1560, "loss": 0.7735134959220886, "elapsed_s": 230.66011929512024}
|
| 79 |
+
{"step": 1580, "loss": 0.7439104914665222, "elapsed_s": 233.04117274284363}
|
| 80 |
+
{"step": 1600, "loss": 0.5365155339241028, "elapsed_s": 235.41436219215393}
|
| 81 |
+
{"step": 1620, "loss": 0.9830235242843628, "elapsed_s": 237.74253010749817}
|
| 82 |
+
{"step": 1640, "loss": 0.5660899877548218, "elapsed_s": 240.82435989379883}
|
| 83 |
+
{"step": 1660, "loss": 0.5424519777297974, "elapsed_s": 243.022780418396}
|
| 84 |
+
{"step": 1680, "loss": 0.9700843095779419, "elapsed_s": 245.37768483161926}
|
| 85 |
+
{"step": 1700, "loss": 0.6830655932426453, "elapsed_s": 247.76059103012085}
|
| 86 |
+
{"step": 1720, "loss": 0.6878824234008789, "elapsed_s": 250.04659032821655}
|
| 87 |
+
{"step": 1740, "loss": 0.7137572765350342, "elapsed_s": 252.4363090991974}
|
| 88 |
+
{"step": 1760, "loss": 0.7030797600746155, "elapsed_s": 254.70366787910461}
|
| 89 |
+
{"step": 1780, "loss": 0.5040646195411682, "elapsed_s": 257.0819311141968}
|
| 90 |
+
{"step": 1800, "loss": 0.8512486815452576, "elapsed_s": 259.5026316642761}
|
| 91 |
+
{"step": 1820, "loss": 1.0612444877624512, "elapsed_s": 262.01344442367554}
|
| 92 |
+
{"step": 1840, "loss": 0.7402116060256958, "elapsed_s": 264.44833731651306}
|
| 93 |
+
{"step": 1860, "loss": 0.9219162464141846, "elapsed_s": 266.7960126399994}
|
| 94 |
+
{"step": 1880, "loss": 0.9056073427200317, "elapsed_s": 269.0323603153229}
|
| 95 |
+
{"step": 1900, "loss": 1.1353096961975098, "elapsed_s": 271.34942531585693}
|
| 96 |
+
{"step": 1920, "loss": 0.6707111597061157, "elapsed_s": 273.6354241371155}
|
| 97 |
+
{"step": 1940, "loss": 0.6909093260765076, "elapsed_s": 275.8811547756195}
|
| 98 |
+
{"step": 1960, "loss": 0.5704458951950073, "elapsed_s": 278.1117763519287}
|
| 99 |
+
{"step": 1980, "loss": 0.6369386315345764, "elapsed_s": 280.36421608924866}
|
| 100 |
+
{"step": 2000, "loss": 0.8341976404190063, "elapsed_s": 282.70127749443054}
|
| 101 |
+
{"step": 2020, "loss": 0.9534465074539185, "elapsed_s": 285.03411197662354}
|
| 102 |
+
{"step": 2040, "loss": 0.7341903448104858, "elapsed_s": 287.24849700927734}
|
| 103 |
+
{"step": 2060, "loss": 0.8844702243804932, "elapsed_s": 289.557368516922}
|
| 104 |
+
{"step": 2080, "loss": 0.684898316860199, "elapsed_s": 291.7943947315216}
|
| 105 |
+
{"step": 2100, "loss": 0.8408010601997375, "elapsed_s": 294.20451736450195}
|
| 106 |
+
{"step": 2120, "loss": 0.5095275044441223, "elapsed_s": 296.5270690917969}
|
| 107 |
+
{"step": 2140, "loss": 0.6561337113380432, "elapsed_s": 298.830194234848}
|
| 108 |
+
{"step": 2160, "loss": 0.6959529519081116, "elapsed_s": 301.0671739578247}
|
| 109 |
+
{"step": 2180, "loss": 1.2064918279647827, "elapsed_s": 303.4138615131378}
|
| 110 |
+
{"step": 2200, "loss": 0.9527730941772461, "elapsed_s": 305.688688993454}
|
| 111 |
+
{"step": 2220, "loss": 0.734599769115448, "elapsed_s": 307.91563725471497}
|
| 112 |
+
{"step": 2240, "loss": 0.5510088801383972, "elapsed_s": 310.2139894962311}
|
| 113 |
+
{"step": 2260, "loss": 0.7135998606681824, "elapsed_s": 312.47889971733093}
|
| 114 |
+
{"step": 2280, "loss": 0.7499587535858154, "elapsed_s": 314.7604830265045}
|
| 115 |
+
{"step": 2300, "loss": 0.49959662556648254, "elapsed_s": 317.03707122802734}
|
| 116 |
+
{"step": 2320, "loss": 0.7008457779884338, "elapsed_s": 319.2195107936859}
|
| 117 |
+
{"step": 2340, "loss": 0.7360463738441467, "elapsed_s": 321.53323340415955}
|
| 118 |
+
{"step": 2360, "loss": 0.6017570495605469, "elapsed_s": 323.8350667953491}
|
| 119 |
+
{"step": 2380, "loss": 0.5641533732414246, "elapsed_s": 326.01655864715576}
|
| 120 |
+
{"step": 2400, "loss": 0.7830227613449097, "elapsed_s": 328.33099389076233}
|
| 121 |
+
{"step": 2420, "loss": 0.8194772601127625, "elapsed_s": 330.654661655426}
|
| 122 |
+
{"step": 2440, "loss": 0.8186568021774292, "elapsed_s": 332.96458411216736}
|
| 123 |
+
{"step": 2460, "loss": 0.7382733821868896, "elapsed_s": 335.2771816253662}
|
| 124 |
+
{"step": 2480, "loss": 0.6406037211418152, "elapsed_s": 337.55707025527954}
|
| 125 |
+
{"step": 2500, "loss": 0.7257729172706604, "elapsed_s": 339.7473740577698}
|
| 126 |
+
{"step": 2520, "loss": 0.9197375774383545, "elapsed_s": 342.1266133785248}
|
| 127 |
+
{"step": 2540, "loss": 0.8234144449234009, "elapsed_s": 344.3270652294159}
|
| 128 |
+
{"step": 2560, "loss": 0.7239054441452026, "elapsed_s": 346.5580265522003}
|
| 129 |
+
{"step": 2580, "loss": 0.6573614478111267, "elapsed_s": 348.83669900894165}
|
| 130 |
+
{"step": 2600, "loss": 0.5115126967430115, "elapsed_s": 351.04119062423706}
|
| 131 |
+
{"step": 2620, "loss": 0.6841891407966614, "elapsed_s": 353.3731095790863}
|
| 132 |
+
{"step": 2640, "loss": 0.4693373441696167, "elapsed_s": 355.64368176460266}
|
| 133 |
+
{"step": 2660, "loss": 0.6603880524635315, "elapsed_s": 357.930406332016}
|
| 134 |
+
{"step": 2680, "loss": 0.5674179792404175, "elapsed_s": 360.19948506355286}
|
| 135 |
+
{"step": 2700, "loss": 0.5704478025436401, "elapsed_s": 362.46419072151184}
|
| 136 |
+
{"step": 2720, "loss": 0.8556029200553894, "elapsed_s": 364.7355716228485}
|
| 137 |
+
{"step": 2740, "loss": 0.6067067980766296, "elapsed_s": 366.99259662628174}
|
| 138 |
+
{"step": 2760, "loss": 0.6326735615730286, "elapsed_s": 369.35428643226624}
|
| 139 |
+
{"step": 2780, "loss": 0.8130509257316589, "elapsed_s": 371.6213490962982}
|
| 140 |
+
{"step": 2800, "loss": 0.6584718823432922, "elapsed_s": 373.7778697013855}
|
| 141 |
+
{"step": 2820, "loss": 0.5811054706573486, "elapsed_s": 375.98267793655396}
|
| 142 |
+
{"step": 2840, "loss": 0.7197288870811462, "elapsed_s": 378.21638083457947}
|
| 143 |
+
{"step": 2860, "loss": 0.6227004528045654, "elapsed_s": 380.45498847961426}
|
| 144 |
+
{"step": 2880, "loss": 0.5917772054672241, "elapsed_s": 382.81819200515747}
|
| 145 |
+
{"step": 2900, "loss": 0.5877758860588074, "elapsed_s": 384.9969575405121}
|
| 146 |
+
{"step": 2920, "loss": 0.6115587949752808, "elapsed_s": 387.1873667240143}
|
| 147 |
+
{"step": 2940, "loss": 0.7582858800888062, "elapsed_s": 389.362286567688}
|
| 148 |
+
{"step": 2960, "loss": 0.6297599077224731, "elapsed_s": 391.5322082042694}
|
| 149 |
+
{"step": 2980, "loss": 0.7046560645103455, "elapsed_s": 393.8774299621582}
|
| 150 |
+
{"step": 3000, "loss": 0.6998088955879211, "elapsed_s": 396.1864001750946}
|