Upload exp_phase5_stage_a_v7_reasoning_t1_smallscale_20260502_072354/log.jsonl with huggingface_hub
Browse files
exp_phase5_stage_a_v7_reasoning_t1_smallscale_20260502_072354/log.jsonl
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "loss": NaN, "elapsed_s": 7.046496391296387}
|
| 2 |
+
{"step": 40, "loss": 1.883128046989441, "elapsed_s": 10.89460277557373}
|
| 3 |
+
{"step": 60, "loss": NaN, "elapsed_s": 14.954717636108398}
|
| 4 |
+
{"step": 80, "loss": NaN, "elapsed_s": 18.966348886489868}
|
| 5 |
+
{"step": 100, "loss": NaN, "elapsed_s": 23.28045630455017}
|
| 6 |
+
{"step": 120, "loss": NaN, "elapsed_s": 27.13900637626648}
|
| 7 |
+
{"step": 140, "loss": NaN, "elapsed_s": 30.971492290496826}
|
| 8 |
+
{"step": 160, "loss": NaN, "elapsed_s": 34.92390704154968}
|
| 9 |
+
{"step": 180, "loss": 2.180785655975342, "elapsed_s": 38.78481125831604}
|
| 10 |
+
{"step": 200, "loss": NaN, "elapsed_s": 42.748425245285034}
|
| 11 |
+
{"step": 220, "loss": 3.9301598072052, "elapsed_s": 46.67312669754028}
|
| 12 |
+
{"step": 240, "loss": NaN, "elapsed_s": 50.605058670043945}
|
| 13 |
+
{"step": 260, "loss": 1.5636593103408813, "elapsed_s": 54.452040910720825}
|
| 14 |
+
{"step": 280, "loss": NaN, "elapsed_s": 58.31750297546387}
|
| 15 |
+
{"step": 300, "loss": NaN, "elapsed_s": 62.28974771499634}
|
| 16 |
+
{"step": 320, "loss": NaN, "elapsed_s": 66.18788957595825}
|
| 17 |
+
{"step": 340, "loss": 1.7059087753295898, "elapsed_s": 70.14261794090271}
|
| 18 |
+
{"step": 360, "loss": NaN, "elapsed_s": 74.08298683166504}
|
| 19 |
+
{"step": 380, "loss": NaN, "elapsed_s": 78.00297689437866}
|
| 20 |
+
{"step": 400, "loss": 1.2791242599487305, "elapsed_s": 82.04931640625}
|
| 21 |
+
{"step": 420, "loss": NaN, "elapsed_s": 85.88797760009766}
|
| 22 |
+
{"step": 440, "loss": NaN, "elapsed_s": 89.77120447158813}
|
| 23 |
+
{"step": 460, "loss": NaN, "elapsed_s": 93.64342927932739}
|
| 24 |
+
{"step": 480, "loss": NaN, "elapsed_s": 97.66012573242188}
|
| 25 |
+
{"step": 500, "loss": 1.1725105047225952, "elapsed_s": 101.59122109413147}
|
| 26 |
+
{"step": 520, "loss": NaN, "elapsed_s": 105.61851263046265}
|
| 27 |
+
{"step": 540, "loss": NaN, "elapsed_s": 109.52174282073975}
|
| 28 |
+
{"step": 560, "loss": NaN, "elapsed_s": 113.42886066436768}
|
| 29 |
+
{"step": 580, "loss": NaN, "elapsed_s": 117.3463990688324}
|
| 30 |
+
{"step": 600, "loss": NaN, "elapsed_s": 121.19623136520386}
|
| 31 |
+
{"step": 620, "loss": NaN, "elapsed_s": 125.11007475852966}
|
| 32 |
+
{"step": 640, "loss": 1.4129315614700317, "elapsed_s": 129.10240411758423}
|
| 33 |
+
{"step": 660, "loss": 0.5568574666976929, "elapsed_s": 133.0407407283783}
|
| 34 |
+
{"step": 680, "loss": 1.5337448120117188, "elapsed_s": 136.99234747886658}
|
| 35 |
+
{"step": 700, "loss": NaN, "elapsed_s": 140.9094705581665}
|
| 36 |
+
{"step": 720, "loss": NaN, "elapsed_s": 144.7622675895691}
|
| 37 |
+
{"step": 740, "loss": NaN, "elapsed_s": 148.64524507522583}
|
| 38 |
+
{"step": 760, "loss": 1.2852839231491089, "elapsed_s": 152.56872749328613}
|
| 39 |
+
{"step": 780, "loss": NaN, "elapsed_s": 156.4635124206543}
|
| 40 |
+
{"step": 800, "loss": NaN, "elapsed_s": 160.3264615535736}
|
| 41 |
+
{"step": 820, "loss": NaN, "elapsed_s": 164.20602416992188}
|
| 42 |
+
{"step": 840, "loss": 1.3029004335403442, "elapsed_s": 168.08221769332886}
|
| 43 |
+
{"step": 860, "loss": NaN, "elapsed_s": 171.94904017448425}
|
| 44 |
+
{"step": 880, "loss": NaN, "elapsed_s": 175.91217255592346}
|
| 45 |
+
{"step": 900, "loss": NaN, "elapsed_s": 180.02082419395447}
|
| 46 |
+
{"step": 920, "loss": 1.0680805444717407, "elapsed_s": 183.89619088172913}
|
| 47 |
+
{"step": 940, "loss": NaN, "elapsed_s": 187.8524787425995}
|
| 48 |
+
{"step": 960, "loss": 0.8346782326698303, "elapsed_s": 191.73251175880432}
|
| 49 |
+
{"step": 980, "loss": NaN, "elapsed_s": 195.60576224327087}
|
| 50 |
+
{"step": 1000, "loss": NaN, "elapsed_s": 199.50892162322998}
|
| 51 |
+
{"step": 1020, "loss": 0.9821131825447083, "elapsed_s": 203.58017349243164}
|
| 52 |
+
{"step": 1040, "loss": NaN, "elapsed_s": 207.43412160873413}
|
| 53 |
+
{"step": 1060, "loss": NaN, "elapsed_s": 211.3195035457611}
|
| 54 |
+
{"step": 1080, "loss": NaN, "elapsed_s": 215.20072293281555}
|
| 55 |
+
{"step": 1100, "loss": 1.6384103298187256, "elapsed_s": 219.06908512115479}
|
| 56 |
+
{"step": 1120, "loss": NaN, "elapsed_s": 222.9537754058838}
|
| 57 |
+
{"step": 1140, "loss": 0.8619658350944519, "elapsed_s": 226.82253670692444}
|
| 58 |
+
{"step": 1160, "loss": 0.7067429423332214, "elapsed_s": 230.71015810966492}
|
| 59 |
+
{"step": 1180, "loss": NaN, "elapsed_s": 234.5830454826355}
|
| 60 |
+
{"step": 1200, "loss": NaN, "elapsed_s": 238.44837474822998}
|
| 61 |
+
{"step": 1220, "loss": 0.6564204692840576, "elapsed_s": 242.3402111530304}
|
| 62 |
+
{"step": 1240, "loss": NaN, "elapsed_s": 246.1985592842102}
|
| 63 |
+
{"step": 1260, "loss": NaN, "elapsed_s": 250.06778693199158}
|
| 64 |
+
{"step": 1280, "loss": NaN, "elapsed_s": 253.90845155715942}
|
| 65 |
+
{"step": 1300, "loss": 0.8227419257164001, "elapsed_s": 257.7313253879547}
|
| 66 |
+
{"step": 1320, "loss": NaN, "elapsed_s": 261.59926772117615}
|
| 67 |
+
{"step": 1340, "loss": NaN, "elapsed_s": 265.47069573402405}
|
| 68 |
+
{"step": 1360, "loss": NaN, "elapsed_s": 269.3667929172516}
|
| 69 |
+
{"step": 1380, "loss": NaN, "elapsed_s": 273.2494463920593}
|
| 70 |
+
{"step": 1400, "loss": NaN, "elapsed_s": 277.1233515739441}
|
| 71 |
+
{"step": 1420, "loss": NaN, "elapsed_s": 280.7957773208618}
|
| 72 |
+
{"step": 1440, "loss": NaN, "elapsed_s": 284.66238617897034}
|
| 73 |
+
{"step": 1460, "loss": NaN, "elapsed_s": 288.51220417022705}
|
| 74 |
+
{"step": 1480, "loss": NaN, "elapsed_s": 292.3973722457886}
|
| 75 |
+
{"step": 1500, "loss": NaN, "elapsed_s": 296.2711820602417}
|
| 76 |
+
{"step": 1520, "loss": 0.3594600260257721, "elapsed_s": 300.1500482559204}
|
| 77 |
+
{"step": 1540, "loss": NaN, "elapsed_s": 303.9989743232727}
|
| 78 |
+
{"step": 1560, "loss": NaN, "elapsed_s": 307.8348693847656}
|
| 79 |
+
{"step": 1580, "loss": NaN, "elapsed_s": 311.70453214645386}
|
| 80 |
+
{"step": 1600, "loss": NaN, "elapsed_s": 315.5782346725464}
|
| 81 |
+
{"step": 1620, "loss": NaN, "elapsed_s": 319.4706244468689}
|
| 82 |
+
{"step": 1640, "loss": 0.5584579110145569, "elapsed_s": 323.3663320541382}
|
| 83 |
+
{"step": 1660, "loss": 1.7984603643417358, "elapsed_s": 327.2288627624512}
|
| 84 |
+
{"step": 1680, "loss": NaN, "elapsed_s": 331.0545070171356}
|
| 85 |
+
{"step": 1700, "loss": 0.9963536858558655, "elapsed_s": 334.91862750053406}
|
| 86 |
+
{"step": 1720, "loss": NaN, "elapsed_s": 338.8110411167145}
|
| 87 |
+
{"step": 1740, "loss": NaN, "elapsed_s": 342.6977026462555}
|
| 88 |
+
{"step": 1760, "loss": NaN, "elapsed_s": 346.53812623023987}
|
| 89 |
+
{"step": 1780, "loss": NaN, "elapsed_s": 350.43041706085205}
|
| 90 |
+
{"step": 1800, "loss": NaN, "elapsed_s": 354.32639598846436}
|
| 91 |
+
{"step": 1820, "loss": NaN, "elapsed_s": 358.2091929912567}
|
| 92 |
+
{"step": 1840, "loss": NaN, "elapsed_s": 362.0776467323303}
|
| 93 |
+
{"step": 1860, "loss": NaN, "elapsed_s": 365.95827317237854}
|
| 94 |
+
{"step": 1880, "loss": NaN, "elapsed_s": 370.13679552078247}
|
| 95 |
+
{"step": 1900, "loss": NaN, "elapsed_s": 374.0098886489868}
|
| 96 |
+
{"step": 1920, "loss": NaN, "elapsed_s": 377.88127970695496}
|
| 97 |
+
{"step": 1940, "loss": NaN, "elapsed_s": 381.77360105514526}
|
| 98 |
+
{"step": 1960, "loss": NaN, "elapsed_s": 385.6535875797272}
|
| 99 |
+
{"step": 1980, "loss": 1.302137851715088, "elapsed_s": 389.5378613471985}
|
| 100 |
+
{"step": 2000, "loss": NaN, "elapsed_s": 393.3899636268616}
|