{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03333333333333333, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "step": 0, "train_flow_matching_loss": 0.326171875, "train_physics_loss": 0.0 }, { "grad_norm": 4.608675956726074, "learning_rate": 3.0000000000000004e-07, "loss": 0.2961, "step": 10 }, { "step": 10, "train_flow_matching_loss": 0.453125, "train_physics_loss": 1.1655902862548828 }, { "grad_norm": 6.491508960723877, "learning_rate": 6.333333333333333e-07, "loss": 0.269, "step": 20 }, { "step": 20, "train_flow_matching_loss": 0.29296875, "train_physics_loss": 0.0 }, { "grad_norm": 20.94609260559082, "learning_rate": 9.666666666666668e-07, "loss": 0.2762, "step": 30 }, { "step": 30, "train_flow_matching_loss": 0.236328125, "train_physics_loss": 0.0 }, { "grad_norm": 14.161486625671387, "learning_rate": 1.3e-06, "loss": 0.2519, "step": 40 }, { "step": 40, "train_flow_matching_loss": 0.279296875, "train_physics_loss": 0.0 }, { "grad_norm": 2.596518039703369, "learning_rate": 1.6333333333333333e-06, "loss": 0.2743, "step": 50 }, { "step": 50, "train_flow_matching_loss": 0.373046875, "train_physics_loss": 1.1627117395401 }, { "grad_norm": 1.793614149093628, "learning_rate": 1.9666666666666668e-06, "loss": 0.2641, "step": 60 }, { "step": 60, "train_flow_matching_loss": 0.193359375, "train_physics_loss": 0.0 }, { "grad_norm": 2.097046375274658, "learning_rate": 2.3e-06, "loss": 0.2583, "step": 70 }, { "step": 70, "train_flow_matching_loss": 0.2412109375, "train_physics_loss": 0.0 }, { "grad_norm": 2.097332239151001, "learning_rate": 2.6333333333333337e-06, "loss": 0.2466, "step": 80 }, { "step": 80, "train_flow_matching_loss": 0.1591796875, "train_physics_loss": 0.0 }, { "grad_norm": 1.5931936502456665, "learning_rate": 2.966666666666667e-06, "loss": 0.2396, "step": 90 }, { "step": 90, "train_flow_matching_loss": 0.193359375, "train_physics_loss": 1.241982102394104 }, { "grad_norm": 0.4996283948421478, "learning_rate": 3.3e-06, "loss": 0.2646, "step": 100 }, { "step": 100, "train_flow_matching_loss": 0.212890625, "train_physics_loss": 0.0 }, { "grad_norm": 0.8610798120498657, "learning_rate": 3.633333333333334e-06, "loss": 0.2498, "step": 110 }, { "step": 110, "train_flow_matching_loss": 0.193359375, "train_physics_loss": 0.0 }, { "grad_norm": 0.8145138621330261, "learning_rate": 3.966666666666667e-06, "loss": 0.2608, "step": 120 }, { "step": 120, "train_flow_matching_loss": 0.2041015625, "train_physics_loss": 0.0 }, { "grad_norm": 0.5870611071586609, "learning_rate": 4.2999999999999995e-06, "loss": 0.2163, "step": 130 }, { "step": 130, "train_flow_matching_loss": 0.294921875, "train_physics_loss": 1.3060048818588257 }, { "grad_norm": 0.4015425741672516, "learning_rate": 4.633333333333334e-06, "loss": 0.2404, "step": 140 }, { "step": 140, "train_flow_matching_loss": 0.2021484375, "train_physics_loss": 0.0 }, { "grad_norm": 0.8249984383583069, "learning_rate": 4.966666666666667e-06, "loss": 0.2589, "step": 150 }, { "step": 150, "train_flow_matching_loss": 0.181640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.8456190228462219, "learning_rate": 5.3e-06, "loss": 0.245, "step": 160 }, { "step": 160, "train_flow_matching_loss": 0.197265625, "train_physics_loss": 0.0 }, { "grad_norm": 0.5049912929534912, "learning_rate": 5.633333333333333e-06, "loss": 0.2202, "step": 170 }, { "step": 170, "train_flow_matching_loss": 0.33984375, "train_physics_loss": 1.0072226524353027 }, { "grad_norm": 0.3911513686180115, "learning_rate": 5.9666666666666666e-06, "loss": 0.2456, "step": 180 }, { "step": 180, "train_flow_matching_loss": 0.1611328125, "train_physics_loss": 0.0 }, { "grad_norm": 0.4234141707420349, "learning_rate": 6.300000000000001e-06, "loss": 0.2408, "step": 190 }, { "step": 190, "train_flow_matching_loss": 0.181640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.4998401701450348, "learning_rate": 6.633333333333333e-06, "loss": 0.2404, "step": 200 }, { "step": 200, "train_flow_matching_loss": 0.2099609375, "train_physics_loss": 0.0 }, { "grad_norm": 0.3860056698322296, "learning_rate": 6.966666666666667e-06, "loss": 0.218, "step": 210 }, { "step": 210, "train_flow_matching_loss": 0.1806640625, "train_physics_loss": 1.2362585067749023 }, { "grad_norm": 0.23343786597251892, "learning_rate": 7.2999999999999996e-06, "loss": 0.2417, "step": 220 }, { "step": 220, "train_flow_matching_loss": 0.1669921875, "train_physics_loss": 0.0 }, { "grad_norm": 0.4194835126399994, "learning_rate": 7.633333333333334e-06, "loss": 0.2401, "step": 230 }, { "step": 230, "train_flow_matching_loss": 0.1884765625, "train_physics_loss": 0.0 }, { "grad_norm": 0.34282320737838745, "learning_rate": 7.966666666666666e-06, "loss": 0.2376, "step": 240 }, { "step": 240, "train_flow_matching_loss": 0.142578125, "train_physics_loss": 0.0 }, { "grad_norm": 0.3080269992351532, "learning_rate": 8.3e-06, "loss": 0.2253, "step": 250 }, { "step": 250, "train_flow_matching_loss": 0.1728515625, "train_physics_loss": 1.1502143144607544 }, { "grad_norm": 0.2572602331638336, "learning_rate": 8.633333333333334e-06, "loss": 0.2245, "step": 260 }, { "step": 260, "train_flow_matching_loss": 0.232421875, "train_physics_loss": 0.0 }, { "grad_norm": 0.3650667071342468, "learning_rate": 8.966666666666668e-06, "loss": 0.2423, "step": 270 }, { "step": 270, "train_flow_matching_loss": 0.205078125, "train_physics_loss": 0.0 }, { "grad_norm": 0.3034899830818176, "learning_rate": 9.3e-06, "loss": 0.2305, "step": 280 }, { "step": 280, "train_flow_matching_loss": 0.259765625, "train_physics_loss": 0.0 }, { "grad_norm": 0.5119873881340027, "learning_rate": 9.633333333333335e-06, "loss": 0.2384, "step": 290 }, { "step": 290, "train_flow_matching_loss": 0.138671875, "train_physics_loss": 1.1380618810653687 }, { "grad_norm": 0.3853524923324585, "learning_rate": 9.966666666666667e-06, "loss": 0.2341, "step": 300 }, { "step": 300, "train_flow_matching_loss": 0.140625, "train_physics_loss": 0.0 }, { "grad_norm": 0.37617555260658264, "learning_rate": 1.03e-05, "loss": 0.2242, "step": 310 }, { "step": 310, "train_flow_matching_loss": 0.1787109375, "train_physics_loss": 0.0 }, { "grad_norm": 0.37467101216316223, "learning_rate": 1.0633333333333334e-05, "loss": 0.223, "step": 320 }, { "step": 320, "train_flow_matching_loss": 0.1845703125, "train_physics_loss": 0.0 }, { "grad_norm": 0.24817711114883423, "learning_rate": 1.0966666666666666e-05, "loss": 0.2219, "step": 330 }, { "step": 330, "train_flow_matching_loss": 0.251953125, "train_physics_loss": 1.1071827411651611 }, { "grad_norm": 0.2167033553123474, "learning_rate": 1.13e-05, "loss": 0.2104, "step": 340 }, { "step": 340, "train_flow_matching_loss": 0.1943359375, "train_physics_loss": 0.0 }, { "grad_norm": 0.5935896635055542, "learning_rate": 1.1633333333333334e-05, "loss": 0.2388, "step": 350 }, { "step": 350, "train_flow_matching_loss": 0.154296875, "train_physics_loss": 0.0 }, { "grad_norm": 0.350766658782959, "learning_rate": 1.1966666666666668e-05, "loss": 0.22, "step": 360 }, { "step": 360, "train_flow_matching_loss": 0.279296875, "train_physics_loss": 0.0 }, { "grad_norm": 0.24915441870689392, "learning_rate": 1.23e-05, "loss": 0.2195, "step": 370 }, { "step": 370, "train_flow_matching_loss": 0.43359375, "train_physics_loss": 0.0 }, { "grad_norm": 0.3285883367061615, "learning_rate": 1.2633333333333333e-05, "loss": 0.2257, "step": 380 }, { "step": 380, "train_flow_matching_loss": 0.1455078125, "train_physics_loss": 0.0 }, { "grad_norm": 0.2852630615234375, "learning_rate": 1.2966666666666669e-05, "loss": 0.2277, "step": 390 }, { "step": 390, "train_flow_matching_loss": 0.11669921875, "train_physics_loss": 0.0 }, { "grad_norm": 0.17581817507743835, "learning_rate": 1.3300000000000001e-05, "loss": 0.2136, "step": 400 }, { "step": 400, "train_flow_matching_loss": 0.2021484375, "train_physics_loss": 0.0 }, { "grad_norm": 0.27428138256073, "learning_rate": 1.3633333333333334e-05, "loss": 0.2146, "step": 410 }, { "step": 410, "train_flow_matching_loss": 0.296875, "train_physics_loss": 1.2539079189300537 }, { "grad_norm": 0.16316437721252441, "learning_rate": 1.3966666666666666e-05, "loss": 0.2154, "step": 420 }, { "step": 420, "train_flow_matching_loss": 0.1630859375, "train_physics_loss": 0.0 }, { "grad_norm": 0.815127432346344, "learning_rate": 1.43e-05, "loss": 0.2357, "step": 430 }, { "step": 430, "train_flow_matching_loss": 0.318359375, "train_physics_loss": 0.0 }, { "grad_norm": 0.460473895072937, "learning_rate": 1.4633333333333334e-05, "loss": 0.2258, "step": 440 }, { "step": 440, "train_flow_matching_loss": 0.234375, "train_physics_loss": 0.0 }, { "grad_norm": 0.1416216492652893, "learning_rate": 1.4966666666666668e-05, "loss": 0.2165, "step": 450 }, { "step": 450, "train_flow_matching_loss": 0.34375, "train_physics_loss": 0.9498165249824524 }, { "grad_norm": 0.24502915143966675, "learning_rate": 1.53e-05, "loss": 0.2286, "step": 460 }, { "step": 460, "train_flow_matching_loss": 0.1494140625, "train_physics_loss": 0.0 }, { "grad_norm": 0.2592589259147644, "learning_rate": 1.563333333333333e-05, "loss": 0.2263, "step": 470 }, { "step": 470, "train_flow_matching_loss": 0.1796875, "train_physics_loss": 0.0 }, { "grad_norm": 0.90907883644104, "learning_rate": 1.5966666666666667e-05, "loss": 0.2325, "step": 480 }, { "step": 480, "train_flow_matching_loss": 0.244140625, "train_physics_loss": 0.0 }, { "grad_norm": 0.2792417109012604, "learning_rate": 1.63e-05, "loss": 0.2354, "step": 490 }, { "step": 490, "train_flow_matching_loss": 0.310546875, "train_physics_loss": 1.0009559392929077 }, { "grad_norm": 0.26415398716926575, "learning_rate": 1.6633333333333336e-05, "loss": 0.2251, "step": 500 }, { "step": 500, "train_flow_matching_loss": 0.1552734375, "train_physics_loss": 0.0 }, { "grad_norm": 0.1738256961107254, "learning_rate": 1.6966666666666668e-05, "loss": 0.2237, "step": 510 }, { "step": 510, "train_flow_matching_loss": 0.208984375, "train_physics_loss": 0.0 }, { "grad_norm": 0.19109994173049927, "learning_rate": 1.73e-05, "loss": 0.2211, "step": 520 }, { "step": 520, "train_flow_matching_loss": 0.177734375, "train_physics_loss": 0.0 }, { "grad_norm": 0.19409514963626862, "learning_rate": 1.7633333333333336e-05, "loss": 0.221, "step": 530 }, { "step": 530, "train_flow_matching_loss": 0.2314453125, "train_physics_loss": 1.2211284637451172 }, { "grad_norm": 0.15948127210140228, "learning_rate": 1.796666666666667e-05, "loss": 0.2248, "step": 540 }, { "step": 540, "train_flow_matching_loss": 0.181640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.18917964398860931, "learning_rate": 1.83e-05, "loss": 0.2302, "step": 550 }, { "step": 550, "train_flow_matching_loss": 0.25390625, "train_physics_loss": 0.0 }, { "grad_norm": 0.367927223443985, "learning_rate": 1.8633333333333333e-05, "loss": 0.2435, "step": 560 }, { "step": 560, "train_flow_matching_loss": 0.1357421875, "train_physics_loss": 0.0 }, { "grad_norm": 0.16830769181251526, "learning_rate": 1.896666666666667e-05, "loss": 0.216, "step": 570 }, { "step": 570, "train_flow_matching_loss": 0.1708984375, "train_physics_loss": 1.1493914127349854 }, { "grad_norm": 0.2152395248413086, "learning_rate": 1.93e-05, "loss": 0.2205, "step": 580 }, { "step": 580, "train_flow_matching_loss": 0.140625, "train_physics_loss": 0.0 }, { "grad_norm": 0.22404998540878296, "learning_rate": 1.9633333333333334e-05, "loss": 0.2322, "step": 590 }, { "step": 590, "train_flow_matching_loss": 0.2314453125, "train_physics_loss": 0.0 }, { "grad_norm": 0.23788565397262573, "learning_rate": 1.9966666666666666e-05, "loss": 0.2235, "step": 600 }, { "step": 600, "train_flow_matching_loss": 0.185546875, "train_physics_loss": 0.0 }, { "grad_norm": 0.14863210916519165, "learning_rate": 2.0300000000000002e-05, "loss": 0.2163, "step": 610 }, { "step": 610, "train_flow_matching_loss": 0.220703125, "train_physics_loss": 0.9689019918441772 }, { "grad_norm": 0.11917997896671295, "learning_rate": 2.0633333333333335e-05, "loss": 0.2123, "step": 620 }, { "step": 620, "train_flow_matching_loss": 0.302734375, "train_physics_loss": 0.0 }, { "grad_norm": 0.23156337440013885, "learning_rate": 2.0966666666666667e-05, "loss": 0.2265, "step": 630 }, { "step": 630, "train_flow_matching_loss": 0.240234375, "train_physics_loss": 0.0 }, { "grad_norm": 0.21961408853530884, "learning_rate": 2.13e-05, "loss": 0.2207, "step": 640 }, { "step": 640, "train_flow_matching_loss": 0.224609375, "train_physics_loss": 0.0 }, { "grad_norm": 0.1400945484638214, "learning_rate": 2.1633333333333332e-05, "loss": 0.2121, "step": 650 }, { "step": 650, "train_flow_matching_loss": 0.322265625, "train_physics_loss": 1.0760555267333984 }, { "grad_norm": 0.18436451256275177, "learning_rate": 2.1966666666666668e-05, "loss": 0.2279, "step": 660 }, { "step": 660, "train_flow_matching_loss": 0.2060546875, "train_physics_loss": 0.0 }, { "grad_norm": 0.19369827210903168, "learning_rate": 2.23e-05, "loss": 0.2094, "step": 670 }, { "step": 670, "train_flow_matching_loss": 0.2451171875, "train_physics_loss": 0.0 }, { "grad_norm": 0.1745651811361313, "learning_rate": 2.2633333333333336e-05, "loss": 0.2323, "step": 680 }, { "step": 680, "train_flow_matching_loss": 0.1689453125, "train_physics_loss": 0.0 }, { "grad_norm": 0.22019273042678833, "learning_rate": 2.2966666666666668e-05, "loss": 0.2192, "step": 690 }, { "step": 690, "train_flow_matching_loss": 0.234375, "train_physics_loss": 1.089104175567627 }, { "grad_norm": 0.12370491772890091, "learning_rate": 2.3300000000000004e-05, "loss": 0.234, "step": 700 }, { "step": 700, "train_flow_matching_loss": 0.2138671875, "train_physics_loss": 0.0 }, { "grad_norm": 0.2408670336008072, "learning_rate": 2.3633333333333336e-05, "loss": 0.2253, "step": 710 }, { "step": 710, "train_flow_matching_loss": 0.2255859375, "train_physics_loss": 0.0 }, { "grad_norm": 0.16252945363521576, "learning_rate": 2.396666666666667e-05, "loss": 0.2238, "step": 720 }, { "step": 720, "train_flow_matching_loss": 0.1806640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.11820516735315323, "learning_rate": 2.43e-05, "loss": 0.2215, "step": 730 }, { "step": 730, "train_flow_matching_loss": 0.1806640625, "train_physics_loss": 1.0379387140274048 }, { "grad_norm": 0.20267561078071594, "learning_rate": 2.4633333333333334e-05, "loss": 0.2231, "step": 740 }, { "step": 740, "train_flow_matching_loss": 0.2392578125, "train_physics_loss": 0.0 }, { "grad_norm": 0.2036902904510498, "learning_rate": 2.496666666666667e-05, "loss": 0.2372, "step": 750 }, { "step": 750, "train_flow_matching_loss": 0.1904296875, "train_physics_loss": 0.0 }, { "grad_norm": 0.270289808511734, "learning_rate": 2.5300000000000002e-05, "loss": 0.2215, "step": 760 }, { "step": 760, "train_flow_matching_loss": 0.1533203125, "train_physics_loss": 0.0 }, { "grad_norm": 0.13340096175670624, "learning_rate": 2.5633333333333338e-05, "loss": 0.2246, "step": 770 }, { "step": 770, "train_flow_matching_loss": 0.1689453125, "train_physics_loss": 1.0493122339248657 }, { "grad_norm": 0.13536763191223145, "learning_rate": 2.5966666666666667e-05, "loss": 0.2324, "step": 780 }, { "step": 780, "train_flow_matching_loss": 0.1904296875, "train_physics_loss": 0.0 }, { "grad_norm": 0.14027945697307587, "learning_rate": 2.6300000000000002e-05, "loss": 0.2158, "step": 790 }, { "step": 790, "train_flow_matching_loss": 0.2197265625, "train_physics_loss": 0.0 }, { "grad_norm": 0.23080722987651825, "learning_rate": 2.663333333333333e-05, "loss": 0.215, "step": 800 }, { "step": 800, "train_flow_matching_loss": 0.1748046875, "train_physics_loss": 0.0 }, { "grad_norm": 0.12672141194343567, "learning_rate": 2.6966666666666667e-05, "loss": 0.2059, "step": 810 }, { "step": 810, "train_flow_matching_loss": 0.1005859375, "train_physics_loss": 1.0300136804580688 }, { "grad_norm": 0.14777213335037231, "learning_rate": 2.7300000000000003e-05, "loss": 0.2307, "step": 820 }, { "step": 820, "train_flow_matching_loss": 0.25390625, "train_physics_loss": 0.0 }, { "grad_norm": 0.24080197513103485, "learning_rate": 2.7633333333333332e-05, "loss": 0.2369, "step": 830 }, { "step": 830, "train_flow_matching_loss": 0.2431640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.2703765630722046, "learning_rate": 2.7966666666666668e-05, "loss": 0.214, "step": 840 }, { "step": 840, "train_flow_matching_loss": 0.228515625, "train_physics_loss": 0.0 }, { "grad_norm": 0.21210747957229614, "learning_rate": 2.83e-05, "loss": 0.2176, "step": 850 }, { "step": 850, "train_flow_matching_loss": 0.1904296875, "train_physics_loss": 1.1258294582366943 }, { "grad_norm": 0.19727841019630432, "learning_rate": 2.8633333333333336e-05, "loss": 0.2321, "step": 860 }, { "step": 860, "train_flow_matching_loss": 0.345703125, "train_physics_loss": 0.0 }, { "grad_norm": 0.17061693966388702, "learning_rate": 2.8966666666666668e-05, "loss": 0.2445, "step": 870 }, { "step": 870, "train_flow_matching_loss": 0.1552734375, "train_physics_loss": 0.0 }, { "grad_norm": 0.13961118459701538, "learning_rate": 2.93e-05, "loss": 0.2176, "step": 880 }, { "step": 880, "train_flow_matching_loss": 0.2265625, "train_physics_loss": 0.0 }, { "grad_norm": 0.21723823249340057, "learning_rate": 2.9633333333333336e-05, "loss": 0.216, "step": 890 }, { "step": 890, "train_flow_matching_loss": 0.1787109375, "train_physics_loss": 1.0784062147140503 }, { "grad_norm": 0.1270458996295929, "learning_rate": 2.9966666666666672e-05, "loss": 0.2123, "step": 900 }, { "step": 900, "train_flow_matching_loss": 0.1806640625, "train_physics_loss": 0.0 }, { "grad_norm": 0.29031702876091003, "learning_rate": 3.03e-05, "loss": 0.2136, "step": 910 }, { "step": 910, "train_flow_matching_loss": 0.2099609375, "train_physics_loss": 0.0 }, { "grad_norm": 0.16241152584552765, "learning_rate": 3.063333333333334e-05, "loss": 0.2122, "step": 920 }, { "step": 920, "train_flow_matching_loss": 0.263671875, "train_physics_loss": 0.0 }, { "grad_norm": 0.1760573536157608, "learning_rate": 3.096666666666666e-05, "loss": 0.2106, "step": 930 }, { "step": 930, "train_flow_matching_loss": 0.27734375, "train_physics_loss": 1.1044496297836304 }, { "grad_norm": 0.10349904000759125, "learning_rate": 3.13e-05, "loss": 0.2143, "step": 940 }, { "step": 940, "train_flow_matching_loss": 0.2119140625, "train_physics_loss": 0.0 }, { "grad_norm": 0.1912197321653366, "learning_rate": 3.1633333333333334e-05, "loss": 0.2239, "step": 950 }, { "step": 950, "train_flow_matching_loss": 0.2138671875, "train_physics_loss": 0.0 }, { "grad_norm": 0.12100717425346375, "learning_rate": 3.196666666666667e-05, "loss": 0.2167, "step": 960 }, { "step": 960, "train_flow_matching_loss": 0.146484375, "train_physics_loss": 0.0 }, { "grad_norm": 0.10931448638439178, "learning_rate": 3.2300000000000006e-05, "loss": 0.2247, "step": 970 }, { "step": 970, "train_flow_matching_loss": 0.400390625, "train_physics_loss": 1.1122357845306396 }, { "grad_norm": 0.13774625957012177, "learning_rate": 3.263333333333333e-05, "loss": 0.214, "step": 980 }, { "step": 980, "train_flow_matching_loss": 0.2333984375, "train_physics_loss": 0.0 }, { "grad_norm": 0.13239137828350067, "learning_rate": 3.296666666666667e-05, "loss": 0.2072, "step": 990 }, { "step": 990, "train_flow_matching_loss": 0.12451171875, "train_physics_loss": 0.0 }, { "grad_norm": 0.17724885046482086, "learning_rate": 3.33e-05, "loss": 0.2073, "step": 1000 } ], "logging_steps": 10, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }