sciworld-3b-self-reflect-fork / trainer_state.json
mfirth's picture
Fork from ZHLiu627/sciworld_self-reflect_v2_sciworld_Llama-3.2-3B-Instruct
3f36ffa verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9922822491730982,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011025358324145534,
"grad_norm": 22.754425048828125,
"learning_rate": 4.945054945054946e-07,
"loss": 1.2587,
"step": 10
},
{
"epoch": 0.022050716648291068,
"grad_norm": 7.113337993621826,
"learning_rate": 1.0439560439560442e-06,
"loss": 0.945,
"step": 20
},
{
"epoch": 0.03307607497243661,
"grad_norm": 6.807334899902344,
"learning_rate": 1.5934065934065933e-06,
"loss": 0.7046,
"step": 30
},
{
"epoch": 0.044101433296582136,
"grad_norm": 4.160985946655273,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.4965,
"step": 40
},
{
"epoch": 0.05512679162072767,
"grad_norm": 2.954188108444214,
"learning_rate": 2.6923076923076923e-06,
"loss": 0.4281,
"step": 50
},
{
"epoch": 0.06615214994487321,
"grad_norm": 3.6939523220062256,
"learning_rate": 3.2417582417582424e-06,
"loss": 0.3876,
"step": 60
},
{
"epoch": 0.07717750826901874,
"grad_norm": 3.4522147178649902,
"learning_rate": 3.7912087912087915e-06,
"loss": 0.3595,
"step": 70
},
{
"epoch": 0.08820286659316427,
"grad_norm": 3.375483274459839,
"learning_rate": 4.340659340659341e-06,
"loss": 0.3622,
"step": 80
},
{
"epoch": 0.09922822491730982,
"grad_norm": 3.3890573978424072,
"learning_rate": 4.890109890109891e-06,
"loss": 0.3491,
"step": 90
},
{
"epoch": 0.11025358324145534,
"grad_norm": 2.7892234325408936,
"learning_rate": 4.998814299283415e-06,
"loss": 0.3274,
"step": 100
},
{
"epoch": 0.12127894156560089,
"grad_norm": 3.0670788288116455,
"learning_rate": 4.993999317659293e-06,
"loss": 0.3478,
"step": 110
},
{
"epoch": 0.13230429988974643,
"grad_norm": 2.922692060470581,
"learning_rate": 4.985488079432037e-06,
"loss": 0.3286,
"step": 120
},
{
"epoch": 0.14332965821389196,
"grad_norm": 2.717001438140869,
"learning_rate": 4.973293198767286e-06,
"loss": 0.324,
"step": 130
},
{
"epoch": 0.1543550165380375,
"grad_norm": 2.99025821685791,
"learning_rate": 4.957432749209755e-06,
"loss": 0.3256,
"step": 140
},
{
"epoch": 0.16538037486218302,
"grad_norm": 2.956892251968384,
"learning_rate": 4.937930236897151e-06,
"loss": 0.323,
"step": 150
},
{
"epoch": 0.17640573318632854,
"grad_norm": 2.796661615371704,
"learning_rate": 4.914814565722671e-06,
"loss": 0.3198,
"step": 160
},
{
"epoch": 0.1874310915104741,
"grad_norm": 2.8440957069396973,
"learning_rate": 4.888119994497701e-06,
"loss": 0.3164,
"step": 170
},
{
"epoch": 0.19845644983461963,
"grad_norm": 2.8787901401519775,
"learning_rate": 4.857886086178194e-06,
"loss": 0.3155,
"step": 180
},
{
"epoch": 0.20948180815876516,
"grad_norm": 3.005969524383545,
"learning_rate": 4.824157649230005e-06,
"loss": 0.314,
"step": 190
},
{
"epoch": 0.2205071664829107,
"grad_norm": 2.5972912311553955,
"learning_rate": 4.786984671220053e-06,
"loss": 0.2983,
"step": 200
},
{
"epoch": 0.23153252480705622,
"grad_norm": 2.5450024604797363,
"learning_rate": 4.746422244731743e-06,
"loss": 0.3146,
"step": 210
},
{
"epoch": 0.24255788313120177,
"grad_norm": 2.8743643760681152,
"learning_rate": 4.702530485714462e-06,
"loss": 0.3144,
"step": 220
},
{
"epoch": 0.2535832414553473,
"grad_norm": 2.759552001953125,
"learning_rate": 4.655374444388127e-06,
"loss": 0.2969,
"step": 230
},
{
"epoch": 0.26460859977949286,
"grad_norm": 2.5727880001068115,
"learning_rate": 4.6050240088348634e-06,
"loss": 0.3076,
"step": 240
},
{
"epoch": 0.2756339581036384,
"grad_norm": 2.36820387840271,
"learning_rate": 4.551553801420671e-06,
"loss": 0.2894,
"step": 250
},
{
"epoch": 0.2866593164277839,
"grad_norm": 2.892141103744507,
"learning_rate": 4.4950430682005995e-06,
"loss": 0.3084,
"step": 260
},
{
"epoch": 0.29768467475192945,
"grad_norm": 2.46459698677063,
"learning_rate": 4.435575561471346e-06,
"loss": 0.2968,
"step": 270
},
{
"epoch": 0.308710033076075,
"grad_norm": 2.428185224533081,
"learning_rate": 4.373239415645324e-06,
"loss": 0.2923,
"step": 280
},
{
"epoch": 0.3197353914002205,
"grad_norm": 2.903369188308716,
"learning_rate": 4.308127016630176e-06,
"loss": 0.3055,
"step": 290
},
{
"epoch": 0.33076074972436603,
"grad_norm": 2.4219541549682617,
"learning_rate": 4.240334864907317e-06,
"loss": 0.2966,
"step": 300
},
{
"epoch": 0.34178610804851156,
"grad_norm": 2.812272548675537,
"learning_rate": 4.169963432512436e-06,
"loss": 0.2833,
"step": 310
},
{
"epoch": 0.3528114663726571,
"grad_norm": 2.7986788749694824,
"learning_rate": 4.097117014129903e-06,
"loss": 0.2946,
"step": 320
},
{
"epoch": 0.3638368246968026,
"grad_norm": 2.4811017513275146,
"learning_rate": 4.021903572521802e-06,
"loss": 0.28,
"step": 330
},
{
"epoch": 0.3748621830209482,
"grad_norm": 2.9100232124328613,
"learning_rate": 3.9444345785206285e-06,
"loss": 0.2973,
"step": 340
},
{
"epoch": 0.38588754134509373,
"grad_norm": 2.6646060943603516,
"learning_rate": 3.864824845822837e-06,
"loss": 0.2914,
"step": 350
},
{
"epoch": 0.39691289966923926,
"grad_norm": 2.480253219604492,
"learning_rate": 3.7831923608280516e-06,
"loss": 0.278,
"step": 360
},
{
"epoch": 0.4079382579933848,
"grad_norm": 2.5499134063720703,
"learning_rate": 3.699658107776148e-06,
"loss": 0.2827,
"step": 370
},
{
"epoch": 0.4189636163175303,
"grad_norm": 3.0008158683776855,
"learning_rate": 3.6143458894413463e-06,
"loss": 0.2829,
"step": 380
},
{
"epoch": 0.42998897464167585,
"grad_norm": 2.6385183334350586,
"learning_rate": 3.527382143649075e-06,
"loss": 0.2823,
"step": 390
},
{
"epoch": 0.4410143329658214,
"grad_norm": 2.9152820110321045,
"learning_rate": 3.438895755887532e-06,
"loss": 0.2723,
"step": 400
},
{
"epoch": 0.4520396912899669,
"grad_norm": 2.694945812225342,
"learning_rate": 3.3490178682916534e-06,
"loss": 0.2836,
"step": 410
},
{
"epoch": 0.46306504961411243,
"grad_norm": 2.779081106185913,
"learning_rate": 3.257881685282609e-06,
"loss": 0.2522,
"step": 420
},
{
"epoch": 0.474090407938258,
"grad_norm": 2.4859516620635986,
"learning_rate": 3.1656222761508525e-06,
"loss": 0.2783,
"step": 430
},
{
"epoch": 0.48511576626240355,
"grad_norm": 2.886754035949707,
"learning_rate": 3.0723763748753354e-06,
"loss": 0.2619,
"step": 440
},
{
"epoch": 0.4961411245865491,
"grad_norm": 2.810600519180298,
"learning_rate": 2.9782821774755454e-06,
"loss": 0.2847,
"step": 450
},
{
"epoch": 0.5071664829106945,
"grad_norm": 2.813720226287842,
"learning_rate": 2.883479137196714e-06,
"loss": 0.2665,
"step": 460
},
{
"epoch": 0.5181918412348401,
"grad_norm": 2.1755166053771973,
"learning_rate": 2.7881077578317445e-06,
"loss": 0.2701,
"step": 470
},
{
"epoch": 0.5292171995589857,
"grad_norm": 2.8941328525543213,
"learning_rate": 2.6923093854861597e-06,
"loss": 0.2584,
"step": 480
},
{
"epoch": 0.5402425578831312,
"grad_norm": 2.368398427963257,
"learning_rate": 2.596225999094696e-06,
"loss": 0.2684,
"step": 490
},
{
"epoch": 0.5512679162072768,
"grad_norm": 2.5928003787994385,
"learning_rate": 2.5e-06,
"loss": 0.2546,
"step": 500
},
{
"epoch": 0.5622932745314223,
"grad_norm": 2.835794687271118,
"learning_rate": 2.4037740009053053e-06,
"loss": 0.2653,
"step": 510
},
{
"epoch": 0.5733186328555678,
"grad_norm": 2.550260543823242,
"learning_rate": 2.3076906145138407e-06,
"loss": 0.264,
"step": 520
},
{
"epoch": 0.5843439911797134,
"grad_norm": 2.7682080268859863,
"learning_rate": 2.2118922421682563e-06,
"loss": 0.2613,
"step": 530
},
{
"epoch": 0.5953693495038589,
"grad_norm": 2.712428569793701,
"learning_rate": 2.1165208628032863e-06,
"loss": 0.2488,
"step": 540
},
{
"epoch": 0.6063947078280044,
"grad_norm": 2.650545358657837,
"learning_rate": 2.0217178225244554e-06,
"loss": 0.258,
"step": 550
},
{
"epoch": 0.61742006615215,
"grad_norm": 2.616968870162964,
"learning_rate": 1.9276236251246655e-06,
"loss": 0.2552,
"step": 560
},
{
"epoch": 0.6284454244762955,
"grad_norm": 2.836118698120117,
"learning_rate": 1.8343777238491477e-06,
"loss": 0.251,
"step": 570
},
{
"epoch": 0.639470782800441,
"grad_norm": 2.6420810222625732,
"learning_rate": 1.7421183147173915e-06,
"loss": 0.2587,
"step": 580
},
{
"epoch": 0.6504961411245865,
"grad_norm": 2.2103841304779053,
"learning_rate": 1.6509821317083466e-06,
"loss": 0.2528,
"step": 590
},
{
"epoch": 0.6615214994487321,
"grad_norm": 2.5041706562042236,
"learning_rate": 1.5611042441124687e-06,
"loss": 0.2466,
"step": 600
},
{
"epoch": 0.6725468577728776,
"grad_norm": 3.019406795501709,
"learning_rate": 1.4726178563509258e-06,
"loss": 0.247,
"step": 610
},
{
"epoch": 0.6835722160970231,
"grad_norm": 2.844505548477173,
"learning_rate": 1.3856541105586545e-06,
"loss": 0.2487,
"step": 620
},
{
"epoch": 0.6945975744211687,
"grad_norm": 2.6431515216827393,
"learning_rate": 1.300341892223852e-06,
"loss": 0.2596,
"step": 630
},
{
"epoch": 0.7056229327453142,
"grad_norm": 2.4335758686065674,
"learning_rate": 1.2168076391719492e-06,
"loss": 0.2467,
"step": 640
},
{
"epoch": 0.7166482910694597,
"grad_norm": 2.8177690505981445,
"learning_rate": 1.1351751541771644e-06,
"loss": 0.2345,
"step": 650
},
{
"epoch": 0.7276736493936052,
"grad_norm": 2.927981376647949,
"learning_rate": 1.0555654214793723e-06,
"loss": 0.249,
"step": 660
},
{
"epoch": 0.7386990077177509,
"grad_norm": 3.3543944358825684,
"learning_rate": 9.780964274781984e-07,
"loss": 0.2612,
"step": 670
},
{
"epoch": 0.7497243660418964,
"grad_norm": 3.0488715171813965,
"learning_rate": 9.028829858700974e-07,
"loss": 0.2355,
"step": 680
},
{
"epoch": 0.7607497243660419,
"grad_norm": 2.7804830074310303,
"learning_rate": 8.300365674875652e-07,
"loss": 0.2562,
"step": 690
},
{
"epoch": 0.7717750826901875,
"grad_norm": 2.2365427017211914,
"learning_rate": 7.596651350926837e-07,
"loss": 0.2355,
"step": 700
},
{
"epoch": 0.782800441014333,
"grad_norm": 2.474679708480835,
"learning_rate": 6.91872983369826e-07,
"loss": 0.2279,
"step": 710
},
{
"epoch": 0.7938257993384785,
"grad_norm": 2.5682637691497803,
"learning_rate": 6.267605843546768e-07,
"loss": 0.2561,
"step": 720
},
{
"epoch": 0.804851157662624,
"grad_norm": 2.0722885131835938,
"learning_rate": 5.644244385286548e-07,
"loss": 0.2311,
"step": 730
},
{
"epoch": 0.8158765159867696,
"grad_norm": 2.517178773880005,
"learning_rate": 5.049569317994013e-07,
"loss": 0.2356,
"step": 740
},
{
"epoch": 0.8269018743109151,
"grad_norm": 2.4816508293151855,
"learning_rate": 4.484461985793298e-07,
"loss": 0.2499,
"step": 750
},
{
"epoch": 0.8379272326350606,
"grad_norm": 2.7292091846466064,
"learning_rate": 3.9497599116513714e-07,
"loss": 0.2577,
"step": 760
},
{
"epoch": 0.8489525909592062,
"grad_norm": 2.906921148300171,
"learning_rate": 3.446255556118736e-07,
"loss": 0.2316,
"step": 770
},
{
"epoch": 0.8599779492833517,
"grad_norm": 2.4659173488616943,
"learning_rate": 2.9746951428553884e-07,
"loss": 0.2407,
"step": 780
},
{
"epoch": 0.8710033076074972,
"grad_norm": 2.541038751602173,
"learning_rate": 2.535777552682578e-07,
"loss": 0.2399,
"step": 790
},
{
"epoch": 0.8820286659316428,
"grad_norm": 2.69195556640625,
"learning_rate": 2.1301532877994747e-07,
"loss": 0.2339,
"step": 800
},
{
"epoch": 0.8930540242557883,
"grad_norm": 2.950699806213379,
"learning_rate": 1.7584235076999468e-07,
"loss": 0.2384,
"step": 810
},
{
"epoch": 0.9040793825799338,
"grad_norm": 3.1038763523101807,
"learning_rate": 1.421139138218064e-07,
"loss": 0.2414,
"step": 820
},
{
"epoch": 0.9151047409040793,
"grad_norm": 2.633563756942749,
"learning_rate": 1.1188000550230005e-07,
"loss": 0.2364,
"step": 830
},
{
"epoch": 0.9261300992282249,
"grad_norm": 2.5586025714874268,
"learning_rate": 8.518543427732951e-08,
"loss": 0.2324,
"step": 840
},
{
"epoch": 0.9371554575523704,
"grad_norm": 2.448216199874878,
"learning_rate": 6.206976310284996e-08,
"loss": 0.242,
"step": 850
},
{
"epoch": 0.948180815876516,
"grad_norm": 2.557136297225952,
"learning_rate": 4.256725079024554e-08,
"loss": 0.2349,
"step": 860
},
{
"epoch": 0.9592061742006616,
"grad_norm": 2.8758692741394043,
"learning_rate": 2.670680123271402e-08,
"loss": 0.2337,
"step": 870
},
{
"epoch": 0.9702315325248071,
"grad_norm": 2.5542616844177246,
"learning_rate": 1.4511920567963911e-08,
"loss": 0.2404,
"step": 880
},
{
"epoch": 0.9812568908489526,
"grad_norm": 2.937741994857788,
"learning_rate": 6.00068234070772e-09,
"loss": 0.2434,
"step": 890
},
{
"epoch": 0.9922822491730982,
"grad_norm": 2.797497272491455,
"learning_rate": 1.1857007165852475e-09,
"loss": 0.2483,
"step": 900
}
],
"logging_steps": 10,
"max_steps": 907,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.777891824668508e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}