GeoSR-Model / GeoSR4D-Model /trainer_state.json
SuhZhang's picture
Add GeoSR static and dynamic checkpoints
a7f71f2
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999781770568124,
"eval_steps": 500,
"global_step": 1718,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0058194515166945515,
"grad_norm": 292.5841369628906,
"learning_rate": 1.1654988945205933e-07,
"loss": 3.4335,
"step": 10
},
{
"epoch": 0.011638903033389103,
"grad_norm": 310.4792785644531,
"learning_rate": 1.5163490216845022e-07,
"loss": 3.3043,
"step": 20
},
{
"epoch": 0.017458354550083655,
"grad_norm": 255.72767639160156,
"learning_rate": 1.721583189448638e-07,
"loss": 2.6653,
"step": 30
},
{
"epoch": 0.023277806066778206,
"grad_norm": 148.07073974609375,
"learning_rate": 1.867199148848411e-07,
"loss": 1.4797,
"step": 40
},
{
"epoch": 0.029097257583472758,
"grad_norm": 68.8450927734375,
"learning_rate": 1.9801476618772772e-07,
"loss": 0.8688,
"step": 50
},
{
"epoch": 0.03491670910016731,
"grad_norm": 17.056119918823242,
"learning_rate": 2e-07,
"loss": 0.4755,
"step": 60
},
{
"epoch": 0.04073616061686186,
"grad_norm": 18.346956253051758,
"learning_rate": 2e-07,
"loss": 0.4493,
"step": 70
},
{
"epoch": 0.04655561213355641,
"grad_norm": 12.33590316772461,
"learning_rate": 2e-07,
"loss": 0.4181,
"step": 80
},
{
"epoch": 0.052375063650250964,
"grad_norm": 10.863751411437988,
"learning_rate": 2e-07,
"loss": 0.423,
"step": 90
},
{
"epoch": 0.058194515166945515,
"grad_norm": 12.51539421081543,
"learning_rate": 2e-07,
"loss": 0.4138,
"step": 100
},
{
"epoch": 0.06401396668364007,
"grad_norm": 10.982370376586914,
"learning_rate": 2e-07,
"loss": 0.3545,
"step": 110
},
{
"epoch": 0.06983341820033462,
"grad_norm": 14.997520446777344,
"learning_rate": 2e-07,
"loss": 0.4036,
"step": 120
},
{
"epoch": 0.07565286971702917,
"grad_norm": 15.653407096862793,
"learning_rate": 2e-07,
"loss": 0.3872,
"step": 130
},
{
"epoch": 0.08147232123372372,
"grad_norm": 14.640938758850098,
"learning_rate": 2e-07,
"loss": 0.3816,
"step": 140
},
{
"epoch": 0.08729177275041827,
"grad_norm": 12.07015609741211,
"learning_rate": 2e-07,
"loss": 0.3119,
"step": 150
},
{
"epoch": 0.09311122426711282,
"grad_norm": 23.116605758666992,
"learning_rate": 2e-07,
"loss": 0.3326,
"step": 160
},
{
"epoch": 0.09893067578380738,
"grad_norm": 14.743234634399414,
"learning_rate": 2e-07,
"loss": 0.3304,
"step": 170
},
{
"epoch": 0.10475012730050193,
"grad_norm": 13.361212730407715,
"learning_rate": 2e-07,
"loss": 0.3284,
"step": 180
},
{
"epoch": 0.11056957881719648,
"grad_norm": 14.600347518920898,
"learning_rate": 2e-07,
"loss": 0.2876,
"step": 190
},
{
"epoch": 0.11638903033389103,
"grad_norm": 16.508502960205078,
"learning_rate": 2e-07,
"loss": 0.3143,
"step": 200
},
{
"epoch": 0.12220848185058558,
"grad_norm": 11.057723999023438,
"learning_rate": 2e-07,
"loss": 0.2918,
"step": 210
},
{
"epoch": 0.12802793336728013,
"grad_norm": 14.558637619018555,
"learning_rate": 2e-07,
"loss": 0.3074,
"step": 220
},
{
"epoch": 0.13384738488397468,
"grad_norm": 11.879530906677246,
"learning_rate": 2e-07,
"loss": 0.28,
"step": 230
},
{
"epoch": 0.13966683640066924,
"grad_norm": 11.994890213012695,
"learning_rate": 2e-07,
"loss": 0.2601,
"step": 240
},
{
"epoch": 0.1454862879173638,
"grad_norm": 15.544328689575195,
"learning_rate": 2e-07,
"loss": 0.281,
"step": 250
},
{
"epoch": 0.15130573943405834,
"grad_norm": 11.14696979522705,
"learning_rate": 2e-07,
"loss": 0.2891,
"step": 260
},
{
"epoch": 0.1571251909507529,
"grad_norm": 8.271623611450195,
"learning_rate": 2e-07,
"loss": 0.2932,
"step": 270
},
{
"epoch": 0.16294464246744744,
"grad_norm": 16.06687355041504,
"learning_rate": 2e-07,
"loss": 0.2363,
"step": 280
},
{
"epoch": 0.168764093984142,
"grad_norm": 8.106555938720703,
"learning_rate": 2e-07,
"loss": 0.2313,
"step": 290
},
{
"epoch": 0.17458354550083655,
"grad_norm": 13.634657859802246,
"learning_rate": 2e-07,
"loss": 0.2727,
"step": 300
},
{
"epoch": 0.1804029970175311,
"grad_norm": 14.710253715515137,
"learning_rate": 2e-07,
"loss": 0.2649,
"step": 310
},
{
"epoch": 0.18622244853422565,
"grad_norm": 9.026782035827637,
"learning_rate": 2e-07,
"loss": 0.2547,
"step": 320
},
{
"epoch": 0.1920419000509202,
"grad_norm": 10.011273384094238,
"learning_rate": 2e-07,
"loss": 0.2771,
"step": 330
},
{
"epoch": 0.19786135156761475,
"grad_norm": 13.526799201965332,
"learning_rate": 2e-07,
"loss": 0.2589,
"step": 340
},
{
"epoch": 0.2036808030843093,
"grad_norm": 16.426071166992188,
"learning_rate": 2e-07,
"loss": 0.2436,
"step": 350
},
{
"epoch": 0.20950025460100385,
"grad_norm": 14.218461036682129,
"learning_rate": 2e-07,
"loss": 0.2593,
"step": 360
},
{
"epoch": 0.2153197061176984,
"grad_norm": 6.507007122039795,
"learning_rate": 2e-07,
"loss": 0.2245,
"step": 370
},
{
"epoch": 0.22113915763439296,
"grad_norm": 19.18690299987793,
"learning_rate": 2e-07,
"loss": 0.2447,
"step": 380
},
{
"epoch": 0.2269586091510875,
"grad_norm": 7.621412754058838,
"learning_rate": 2e-07,
"loss": 0.2131,
"step": 390
},
{
"epoch": 0.23277806066778206,
"grad_norm": 9.732011795043945,
"learning_rate": 2e-07,
"loss": 0.2495,
"step": 400
},
{
"epoch": 0.2385975121844766,
"grad_norm": 15.301701545715332,
"learning_rate": 2e-07,
"loss": 0.2427,
"step": 410
},
{
"epoch": 0.24441696370117116,
"grad_norm": 8.665528297424316,
"learning_rate": 2e-07,
"loss": 0.2281,
"step": 420
},
{
"epoch": 0.2502364152178657,
"grad_norm": 9.586000442504883,
"learning_rate": 2e-07,
"loss": 0.232,
"step": 430
},
{
"epoch": 0.25605586673456027,
"grad_norm": 13.73252010345459,
"learning_rate": 2e-07,
"loss": 0.2326,
"step": 440
},
{
"epoch": 0.2618753182512548,
"grad_norm": 14.053579330444336,
"learning_rate": 2e-07,
"loss": 0.2483,
"step": 450
},
{
"epoch": 0.26769476976794937,
"grad_norm": 9.641685485839844,
"learning_rate": 2e-07,
"loss": 0.2461,
"step": 460
},
{
"epoch": 0.2735142212846439,
"grad_norm": 13.011364936828613,
"learning_rate": 2e-07,
"loss": 0.1929,
"step": 470
},
{
"epoch": 0.2793336728013385,
"grad_norm": 13.232110023498535,
"learning_rate": 2e-07,
"loss": 0.204,
"step": 480
},
{
"epoch": 0.285153124318033,
"grad_norm": 10.551194190979004,
"learning_rate": 2e-07,
"loss": 0.2341,
"step": 490
},
{
"epoch": 0.2909725758347276,
"grad_norm": 11.238757133483887,
"learning_rate": 2e-07,
"loss": 0.2342,
"step": 500
},
{
"epoch": 0.2967920273514221,
"grad_norm": 14.688443183898926,
"learning_rate": 2e-07,
"loss": 0.2177,
"step": 510
},
{
"epoch": 0.3026114788681167,
"grad_norm": 14.724873542785645,
"learning_rate": 2e-07,
"loss": 0.1969,
"step": 520
},
{
"epoch": 0.30843093038481123,
"grad_norm": 11.847085952758789,
"learning_rate": 2e-07,
"loss": 0.1847,
"step": 530
},
{
"epoch": 0.3142503819015058,
"grad_norm": 11.497530937194824,
"learning_rate": 2e-07,
"loss": 0.2017,
"step": 540
},
{
"epoch": 0.32006983341820033,
"grad_norm": 11.654367446899414,
"learning_rate": 2e-07,
"loss": 0.2206,
"step": 550
},
{
"epoch": 0.3258892849348949,
"grad_norm": 12.561141967773438,
"learning_rate": 2e-07,
"loss": 0.2275,
"step": 560
},
{
"epoch": 0.33170873645158944,
"grad_norm": 10.0696382522583,
"learning_rate": 2e-07,
"loss": 0.2008,
"step": 570
},
{
"epoch": 0.337528187968284,
"grad_norm": 11.895383834838867,
"learning_rate": 2e-07,
"loss": 0.2192,
"step": 580
},
{
"epoch": 0.34334763948497854,
"grad_norm": 15.39511489868164,
"learning_rate": 2e-07,
"loss": 0.2113,
"step": 590
},
{
"epoch": 0.3491670910016731,
"grad_norm": 11.026963233947754,
"learning_rate": 2e-07,
"loss": 0.2042,
"step": 600
},
{
"epoch": 0.35498654251836764,
"grad_norm": 10.234797477722168,
"learning_rate": 2e-07,
"loss": 0.2026,
"step": 610
},
{
"epoch": 0.3608059940350622,
"grad_norm": 11.014677047729492,
"learning_rate": 2e-07,
"loss": 0.2051,
"step": 620
},
{
"epoch": 0.36662544555175675,
"grad_norm": 9.119762420654297,
"learning_rate": 2e-07,
"loss": 0.2137,
"step": 630
},
{
"epoch": 0.3724448970684513,
"grad_norm": 6.889257907867432,
"learning_rate": 2e-07,
"loss": 0.1647,
"step": 640
},
{
"epoch": 0.37826434858514585,
"grad_norm": 7.318251132965088,
"learning_rate": 2e-07,
"loss": 0.1973,
"step": 650
},
{
"epoch": 0.3840838001018404,
"grad_norm": 13.953539848327637,
"learning_rate": 2e-07,
"loss": 0.1962,
"step": 660
},
{
"epoch": 0.38990325161853495,
"grad_norm": 11.493965148925781,
"learning_rate": 2e-07,
"loss": 0.2017,
"step": 670
},
{
"epoch": 0.3957227031352295,
"grad_norm": 9.594313621520996,
"learning_rate": 2e-07,
"loss": 0.2475,
"step": 680
},
{
"epoch": 0.40154215465192405,
"grad_norm": 8.401884078979492,
"learning_rate": 2e-07,
"loss": 0.1946,
"step": 690
},
{
"epoch": 0.4073616061686186,
"grad_norm": 9.508219718933105,
"learning_rate": 2e-07,
"loss": 0.2113,
"step": 700
},
{
"epoch": 0.41318105768531316,
"grad_norm": 11.010660171508789,
"learning_rate": 2e-07,
"loss": 0.2058,
"step": 710
},
{
"epoch": 0.4190005092020077,
"grad_norm": 15.212239265441895,
"learning_rate": 2e-07,
"loss": 0.2155,
"step": 720
},
{
"epoch": 0.42481996071870226,
"grad_norm": 8.596692085266113,
"learning_rate": 2e-07,
"loss": 0.2143,
"step": 730
},
{
"epoch": 0.4306394122353968,
"grad_norm": 18.07278823852539,
"learning_rate": 2e-07,
"loss": 0.1929,
"step": 740
},
{
"epoch": 0.43645886375209136,
"grad_norm": 11.786556243896484,
"learning_rate": 2e-07,
"loss": 0.2149,
"step": 750
},
{
"epoch": 0.4422783152687859,
"grad_norm": 5.36111307144165,
"learning_rate": 2e-07,
"loss": 0.201,
"step": 760
},
{
"epoch": 0.44809776678548047,
"grad_norm": 17.974634170532227,
"learning_rate": 2e-07,
"loss": 0.2216,
"step": 770
},
{
"epoch": 0.453917218302175,
"grad_norm": 7.572098731994629,
"learning_rate": 2e-07,
"loss": 0.2137,
"step": 780
},
{
"epoch": 0.45973666981886957,
"grad_norm": 10.07806396484375,
"learning_rate": 2e-07,
"loss": 0.2011,
"step": 790
},
{
"epoch": 0.4655561213355641,
"grad_norm": 11.195019721984863,
"learning_rate": 2e-07,
"loss": 0.1976,
"step": 800
},
{
"epoch": 0.4713755728522587,
"grad_norm": 13.922369956970215,
"learning_rate": 2e-07,
"loss": 0.1755,
"step": 810
},
{
"epoch": 0.4771950243689532,
"grad_norm": 14.419588088989258,
"learning_rate": 2e-07,
"loss": 0.2261,
"step": 820
},
{
"epoch": 0.4830144758856478,
"grad_norm": 7.6839070320129395,
"learning_rate": 2e-07,
"loss": 0.1957,
"step": 830
},
{
"epoch": 0.4888339274023423,
"grad_norm": 18.723840713500977,
"learning_rate": 2e-07,
"loss": 0.1964,
"step": 840
},
{
"epoch": 0.4946533789190369,
"grad_norm": 11.51264476776123,
"learning_rate": 2e-07,
"loss": 0.2429,
"step": 850
},
{
"epoch": 0.5004728304357314,
"grad_norm": 14.401782989501953,
"learning_rate": 2e-07,
"loss": 0.2077,
"step": 860
},
{
"epoch": 0.506292281952426,
"grad_norm": 7.909298896789551,
"learning_rate": 2e-07,
"loss": 0.1981,
"step": 870
},
{
"epoch": 0.5121117334691205,
"grad_norm": 9.616816520690918,
"learning_rate": 2e-07,
"loss": 0.2059,
"step": 880
},
{
"epoch": 0.5179311849858151,
"grad_norm": 14.229058265686035,
"learning_rate": 2e-07,
"loss": 0.1838,
"step": 890
},
{
"epoch": 0.5237506365025096,
"grad_norm": 8.7423734664917,
"learning_rate": 2e-07,
"loss": 0.1883,
"step": 900
},
{
"epoch": 0.5295700880192042,
"grad_norm": 7.891780853271484,
"learning_rate": 2e-07,
"loss": 0.2177,
"step": 910
},
{
"epoch": 0.5353895395358987,
"grad_norm": 15.503663063049316,
"learning_rate": 2e-07,
"loss": 0.2028,
"step": 920
},
{
"epoch": 0.5412089910525933,
"grad_norm": 10.924641609191895,
"learning_rate": 2e-07,
"loss": 0.1923,
"step": 930
},
{
"epoch": 0.5470284425692878,
"grad_norm": 14.79828929901123,
"learning_rate": 2e-07,
"loss": 0.2322,
"step": 940
},
{
"epoch": 0.5528478940859824,
"grad_norm": 11.492549896240234,
"learning_rate": 2e-07,
"loss": 0.2306,
"step": 950
},
{
"epoch": 0.558667345602677,
"grad_norm": 5.280726909637451,
"learning_rate": 2e-07,
"loss": 0.1846,
"step": 960
},
{
"epoch": 0.5644867971193716,
"grad_norm": 7.49058198928833,
"learning_rate": 2e-07,
"loss": 0.186,
"step": 970
},
{
"epoch": 0.570306248636066,
"grad_norm": 17.624052047729492,
"learning_rate": 2e-07,
"loss": 0.2145,
"step": 980
},
{
"epoch": 0.5761257001527607,
"grad_norm": 11.165759086608887,
"learning_rate": 2e-07,
"loss": 0.201,
"step": 990
},
{
"epoch": 0.5819451516694552,
"grad_norm": 7.842067718505859,
"learning_rate": 2e-07,
"loss": 0.1968,
"step": 1000
},
{
"epoch": 0.5877646031861498,
"grad_norm": 9.594118118286133,
"learning_rate": 2e-07,
"loss": 0.2136,
"step": 1010
},
{
"epoch": 0.5935840547028443,
"grad_norm": 14.04518985748291,
"learning_rate": 2e-07,
"loss": 0.2075,
"step": 1020
},
{
"epoch": 0.5994035062195389,
"grad_norm": 15.331314086914062,
"learning_rate": 2e-07,
"loss": 0.2023,
"step": 1030
},
{
"epoch": 0.6052229577362334,
"grad_norm": 8.51887321472168,
"learning_rate": 2e-07,
"loss": 0.1713,
"step": 1040
},
{
"epoch": 0.611042409252928,
"grad_norm": 11.08820629119873,
"learning_rate": 2e-07,
"loss": 0.1868,
"step": 1050
},
{
"epoch": 0.6168618607696225,
"grad_norm": 12.212711334228516,
"learning_rate": 2e-07,
"loss": 0.1876,
"step": 1060
},
{
"epoch": 0.6226813122863171,
"grad_norm": 11.730500221252441,
"learning_rate": 2e-07,
"loss": 0.1934,
"step": 1070
},
{
"epoch": 0.6285007638030116,
"grad_norm": 9.89484977722168,
"learning_rate": 2e-07,
"loss": 0.2097,
"step": 1080
},
{
"epoch": 0.6343202153197062,
"grad_norm": 7.291867256164551,
"learning_rate": 2e-07,
"loss": 0.1582,
"step": 1090
},
{
"epoch": 0.6401396668364007,
"grad_norm": 7.9038920402526855,
"learning_rate": 2e-07,
"loss": 0.21,
"step": 1100
},
{
"epoch": 0.6459591183530953,
"grad_norm": 18.028404235839844,
"learning_rate": 2e-07,
"loss": 0.2293,
"step": 1110
},
{
"epoch": 0.6517785698697898,
"grad_norm": 12.41182804107666,
"learning_rate": 2e-07,
"loss": 0.2048,
"step": 1120
},
{
"epoch": 0.6575980213864844,
"grad_norm": 11.216751098632812,
"learning_rate": 2e-07,
"loss": 0.1903,
"step": 1130
},
{
"epoch": 0.6634174729031789,
"grad_norm": 13.232751846313477,
"learning_rate": 2e-07,
"loss": 0.19,
"step": 1140
},
{
"epoch": 0.6692369244198735,
"grad_norm": 9.882608413696289,
"learning_rate": 2e-07,
"loss": 0.1936,
"step": 1150
},
{
"epoch": 0.675056375936568,
"grad_norm": 9.839518547058105,
"learning_rate": 2e-07,
"loss": 0.2019,
"step": 1160
},
{
"epoch": 0.6808758274532626,
"grad_norm": 12.619885444641113,
"learning_rate": 2e-07,
"loss": 0.2124,
"step": 1170
},
{
"epoch": 0.6866952789699571,
"grad_norm": 12.874058723449707,
"learning_rate": 2e-07,
"loss": 0.1875,
"step": 1180
},
{
"epoch": 0.6925147304866517,
"grad_norm": 13.787298202514648,
"learning_rate": 2e-07,
"loss": 0.2071,
"step": 1190
},
{
"epoch": 0.6983341820033462,
"grad_norm": 11.749211311340332,
"learning_rate": 2e-07,
"loss": 0.1957,
"step": 1200
},
{
"epoch": 0.7041536335200408,
"grad_norm": 12.892156600952148,
"learning_rate": 2e-07,
"loss": 0.1748,
"step": 1210
},
{
"epoch": 0.7099730850367353,
"grad_norm": 14.410128593444824,
"learning_rate": 2e-07,
"loss": 0.1934,
"step": 1220
},
{
"epoch": 0.7157925365534299,
"grad_norm": 11.821023941040039,
"learning_rate": 2e-07,
"loss": 0.2113,
"step": 1230
},
{
"epoch": 0.7216119880701244,
"grad_norm": 7.469109058380127,
"learning_rate": 2e-07,
"loss": 0.177,
"step": 1240
},
{
"epoch": 0.727431439586819,
"grad_norm": 11.144964218139648,
"learning_rate": 2e-07,
"loss": 0.2026,
"step": 1250
},
{
"epoch": 0.7332508911035135,
"grad_norm": 8.659164428710938,
"learning_rate": 2e-07,
"loss": 0.1866,
"step": 1260
},
{
"epoch": 0.7390703426202081,
"grad_norm": 12.093481063842773,
"learning_rate": 2e-07,
"loss": 0.1819,
"step": 1270
},
{
"epoch": 0.7448897941369026,
"grad_norm": 9.76320743560791,
"learning_rate": 2e-07,
"loss": 0.1826,
"step": 1280
},
{
"epoch": 0.7507092456535972,
"grad_norm": 9.821404457092285,
"learning_rate": 2e-07,
"loss": 0.1916,
"step": 1290
},
{
"epoch": 0.7565286971702917,
"grad_norm": 13.206873893737793,
"learning_rate": 2e-07,
"loss": 0.2037,
"step": 1300
},
{
"epoch": 0.7623481486869863,
"grad_norm": 11.560912132263184,
"learning_rate": 2e-07,
"loss": 0.179,
"step": 1310
},
{
"epoch": 0.7681676002036808,
"grad_norm": 14.256608009338379,
"learning_rate": 2e-07,
"loss": 0.1879,
"step": 1320
},
{
"epoch": 0.7739870517203754,
"grad_norm": 17.668615341186523,
"learning_rate": 2e-07,
"loss": 0.1772,
"step": 1330
},
{
"epoch": 0.7798065032370699,
"grad_norm": 11.932788848876953,
"learning_rate": 2e-07,
"loss": 0.1657,
"step": 1340
},
{
"epoch": 0.7856259547537645,
"grad_norm": 8.989192008972168,
"learning_rate": 2e-07,
"loss": 0.18,
"step": 1350
},
{
"epoch": 0.791445406270459,
"grad_norm": 8.768953323364258,
"learning_rate": 2e-07,
"loss": 0.2002,
"step": 1360
},
{
"epoch": 0.7972648577871536,
"grad_norm": 16.538890838623047,
"learning_rate": 2e-07,
"loss": 0.1693,
"step": 1370
},
{
"epoch": 0.8030843093038481,
"grad_norm": 7.928899765014648,
"learning_rate": 2e-07,
"loss": 0.1926,
"step": 1380
},
{
"epoch": 0.8089037608205427,
"grad_norm": 11.980950355529785,
"learning_rate": 2e-07,
"loss": 0.2042,
"step": 1390
},
{
"epoch": 0.8147232123372372,
"grad_norm": 14.4302978515625,
"learning_rate": 2e-07,
"loss": 0.1797,
"step": 1400
},
{
"epoch": 0.8205426638539318,
"grad_norm": 11.229952812194824,
"learning_rate": 2e-07,
"loss": 0.17,
"step": 1410
},
{
"epoch": 0.8263621153706263,
"grad_norm": 11.317793846130371,
"learning_rate": 2e-07,
"loss": 0.1728,
"step": 1420
},
{
"epoch": 0.8321815668873209,
"grad_norm": 10.537130355834961,
"learning_rate": 2e-07,
"loss": 0.1787,
"step": 1430
},
{
"epoch": 0.8380010184040154,
"grad_norm": 11.120368003845215,
"learning_rate": 2e-07,
"loss": 0.1621,
"step": 1440
},
{
"epoch": 0.84382046992071,
"grad_norm": 13.397139549255371,
"learning_rate": 2e-07,
"loss": 0.1639,
"step": 1450
},
{
"epoch": 0.8496399214374045,
"grad_norm": 12.438237190246582,
"learning_rate": 2e-07,
"loss": 0.1769,
"step": 1460
},
{
"epoch": 0.8554593729540991,
"grad_norm": 11.550435066223145,
"learning_rate": 2e-07,
"loss": 0.2065,
"step": 1470
},
{
"epoch": 0.8612788244707936,
"grad_norm": 6.573584079742432,
"learning_rate": 2e-07,
"loss": 0.2021,
"step": 1480
},
{
"epoch": 0.8670982759874882,
"grad_norm": 6.305631637573242,
"learning_rate": 2e-07,
"loss": 0.1811,
"step": 1490
},
{
"epoch": 0.8729177275041827,
"grad_norm": 9.760597229003906,
"learning_rate": 2e-07,
"loss": 0.1937,
"step": 1500
},
{
"epoch": 0.8787371790208773,
"grad_norm": 13.0894193649292,
"learning_rate": 2e-07,
"loss": 0.176,
"step": 1510
},
{
"epoch": 0.8845566305375718,
"grad_norm": 7.848855495452881,
"learning_rate": 2e-07,
"loss": 0.1773,
"step": 1520
},
{
"epoch": 0.8903760820542664,
"grad_norm": 14.45218563079834,
"learning_rate": 2e-07,
"loss": 0.2096,
"step": 1530
},
{
"epoch": 0.8961955335709609,
"grad_norm": 7.301393032073975,
"learning_rate": 2e-07,
"loss": 0.1656,
"step": 1540
},
{
"epoch": 0.9020149850876555,
"grad_norm": 10.425517082214355,
"learning_rate": 2e-07,
"loss": 0.1802,
"step": 1550
},
{
"epoch": 0.90783443660435,
"grad_norm": 12.547024726867676,
"learning_rate": 2e-07,
"loss": 0.1922,
"step": 1560
},
{
"epoch": 0.9136538881210446,
"grad_norm": 12.041275978088379,
"learning_rate": 2e-07,
"loss": 0.1827,
"step": 1570
},
{
"epoch": 0.9194733396377391,
"grad_norm": 10.960613250732422,
"learning_rate": 2e-07,
"loss": 0.2234,
"step": 1580
},
{
"epoch": 0.9252927911544337,
"grad_norm": 11.155454635620117,
"learning_rate": 2e-07,
"loss": 0.1615,
"step": 1590
},
{
"epoch": 0.9311122426711282,
"grad_norm": 17.65553092956543,
"learning_rate": 2e-07,
"loss": 0.1892,
"step": 1600
},
{
"epoch": 0.9369316941878228,
"grad_norm": 18.907163619995117,
"learning_rate": 2e-07,
"loss": 0.1915,
"step": 1610
},
{
"epoch": 0.9427511457045173,
"grad_norm": 14.12991714477539,
"learning_rate": 2e-07,
"loss": 0.2156,
"step": 1620
},
{
"epoch": 0.948570597221212,
"grad_norm": 9.105369567871094,
"learning_rate": 2e-07,
"loss": 0.1808,
"step": 1630
},
{
"epoch": 0.9543900487379064,
"grad_norm": 10.135030746459961,
"learning_rate": 2e-07,
"loss": 0.1842,
"step": 1640
},
{
"epoch": 0.9602095002546011,
"grad_norm": 33.01081466674805,
"learning_rate": 2e-07,
"loss": 0.1757,
"step": 1650
},
{
"epoch": 0.9660289517712956,
"grad_norm": 7.512867450714111,
"learning_rate": 2e-07,
"loss": 0.1591,
"step": 1660
},
{
"epoch": 0.9718484032879902,
"grad_norm": 7.911075592041016,
"learning_rate": 2e-07,
"loss": 0.1885,
"step": 1670
},
{
"epoch": 0.9776678548046847,
"grad_norm": 4.568904399871826,
"learning_rate": 2e-07,
"loss": 0.1929,
"step": 1680
},
{
"epoch": 0.9834873063213793,
"grad_norm": 14.508501052856445,
"learning_rate": 2e-07,
"loss": 0.2354,
"step": 1690
},
{
"epoch": 0.9893067578380738,
"grad_norm": 9.957372665405273,
"learning_rate": 2e-07,
"loss": 0.1523,
"step": 1700
},
{
"epoch": 0.9951262093547684,
"grad_norm": 8.227477073669434,
"learning_rate": 2e-07,
"loss": 0.1816,
"step": 1710
},
{
"epoch": 0.999781770568124,
"step": 1718,
"total_flos": 6.783175773021798e+16,
"train_loss": 0.28378574687193936,
"train_runtime": 68908.423,
"train_samples_per_second": 0.798,
"train_steps_per_second": 0.025
}
],
"logging_steps": 10,
"max_steps": 1718,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.783175773021798e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}