pick_objects_tune_1k / trainer_state.json
Dongkkka's picture
Upload folder using huggingface_hub
037a2e4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.291005291005291,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05291005291005291,
"grad_norm": 12.614516258239746,
"learning_rate": 9e-07,
"loss": 1.0027,
"step": 10
},
{
"epoch": 0.10582010582010581,
"grad_norm": 7.134069919586182,
"learning_rate": 1.9e-06,
"loss": 0.9062,
"step": 20
},
{
"epoch": 0.15873015873015872,
"grad_norm": 4.088052749633789,
"learning_rate": 2.9e-06,
"loss": 0.6868,
"step": 30
},
{
"epoch": 0.21164021164021163,
"grad_norm": 1.5182710886001587,
"learning_rate": 3.9e-06,
"loss": 0.4484,
"step": 40
},
{
"epoch": 0.26455026455026454,
"grad_norm": 1.0434691905975342,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.2952,
"step": 50
},
{
"epoch": 0.31746031746031744,
"grad_norm": 1.0803996324539185,
"learning_rate": 5.9e-06,
"loss": 0.2199,
"step": 60
},
{
"epoch": 0.37037037037037035,
"grad_norm": 1.0268789529800415,
"learning_rate": 6.900000000000001e-06,
"loss": 0.1769,
"step": 70
},
{
"epoch": 0.42328042328042326,
"grad_norm": 1.586670994758606,
"learning_rate": 7.9e-06,
"loss": 0.1669,
"step": 80
},
{
"epoch": 0.47619047619047616,
"grad_norm": 1.1374748945236206,
"learning_rate": 8.9e-06,
"loss": 0.145,
"step": 90
},
{
"epoch": 0.5291005291005291,
"grad_norm": 1.359847903251648,
"learning_rate": 9.900000000000002e-06,
"loss": 0.1334,
"step": 100
},
{
"epoch": 0.582010582010582,
"grad_norm": 0.8251723647117615,
"learning_rate": 1.09e-05,
"loss": 0.1212,
"step": 110
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.7170350551605225,
"learning_rate": 1.19e-05,
"loss": 0.1069,
"step": 120
},
{
"epoch": 0.6878306878306878,
"grad_norm": 0.7338834404945374,
"learning_rate": 1.29e-05,
"loss": 0.0926,
"step": 130
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.7547397017478943,
"learning_rate": 1.3900000000000002e-05,
"loss": 0.0854,
"step": 140
},
{
"epoch": 0.7936507936507936,
"grad_norm": 0.7571271061897278,
"learning_rate": 1.49e-05,
"loss": 0.0805,
"step": 150
},
{
"epoch": 0.8465608465608465,
"grad_norm": 0.6611447334289551,
"learning_rate": 1.59e-05,
"loss": 0.074,
"step": 160
},
{
"epoch": 0.8994708994708994,
"grad_norm": 0.725141704082489,
"learning_rate": 1.69e-05,
"loss": 0.0735,
"step": 170
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.7854066491127014,
"learning_rate": 1.79e-05,
"loss": 0.0657,
"step": 180
},
{
"epoch": 1.0052910052910053,
"grad_norm": 0.689892053604126,
"learning_rate": 1.8900000000000002e-05,
"loss": 0.0576,
"step": 190
},
{
"epoch": 1.0582010582010581,
"grad_norm": 0.4899508059024811,
"learning_rate": 1.9900000000000003e-05,
"loss": 0.0554,
"step": 200
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.44909000396728516,
"learning_rate": 2.09e-05,
"loss": 0.0555,
"step": 210
},
{
"epoch": 1.164021164021164,
"grad_norm": 0.5986359119415283,
"learning_rate": 2.19e-05,
"loss": 0.0551,
"step": 220
},
{
"epoch": 1.216931216931217,
"grad_norm": 0.6715332269668579,
"learning_rate": 2.29e-05,
"loss": 0.0503,
"step": 230
},
{
"epoch": 1.2698412698412698,
"grad_norm": 0.6094868779182434,
"learning_rate": 2.39e-05,
"loss": 0.0476,
"step": 240
},
{
"epoch": 1.3227513227513228,
"grad_norm": 0.5420939922332764,
"learning_rate": 2.4900000000000002e-05,
"loss": 0.0453,
"step": 250
},
{
"epoch": 1.3756613756613756,
"grad_norm": 0.506049394607544,
"learning_rate": 2.5900000000000003e-05,
"loss": 0.0428,
"step": 260
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.42280900478363037,
"learning_rate": 2.6900000000000003e-05,
"loss": 0.0441,
"step": 270
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.5239710211753845,
"learning_rate": 2.7900000000000004e-05,
"loss": 0.04,
"step": 280
},
{
"epoch": 1.5343915343915344,
"grad_norm": 0.5714675784111023,
"learning_rate": 2.8899999999999998e-05,
"loss": 0.042,
"step": 290
},
{
"epoch": 1.5873015873015874,
"grad_norm": 0.41054368019104004,
"learning_rate": 2.9900000000000002e-05,
"loss": 0.0388,
"step": 300
},
{
"epoch": 1.6402116402116402,
"grad_norm": 0.5580154061317444,
"learning_rate": 3.09e-05,
"loss": 0.0377,
"step": 310
},
{
"epoch": 1.693121693121693,
"grad_norm": 0.538187563419342,
"learning_rate": 3.19e-05,
"loss": 0.0388,
"step": 320
},
{
"epoch": 1.746031746031746,
"grad_norm": 0.47221389412879944,
"learning_rate": 3.29e-05,
"loss": 0.037,
"step": 330
},
{
"epoch": 1.798941798941799,
"grad_norm": 0.4035741686820984,
"learning_rate": 3.3900000000000004e-05,
"loss": 0.033,
"step": 340
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.46619656682014465,
"learning_rate": 3.49e-05,
"loss": 0.032,
"step": 350
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.4589271545410156,
"learning_rate": 3.59e-05,
"loss": 0.0338,
"step": 360
},
{
"epoch": 1.9576719576719577,
"grad_norm": 0.428501158952713,
"learning_rate": 3.69e-05,
"loss": 0.0343,
"step": 370
},
{
"epoch": 2.0105820105820107,
"grad_norm": 0.44803091883659363,
"learning_rate": 3.79e-05,
"loss": 0.033,
"step": 380
},
{
"epoch": 2.0634920634920633,
"grad_norm": 0.4423496127128601,
"learning_rate": 3.8900000000000004e-05,
"loss": 0.0346,
"step": 390
},
{
"epoch": 2.1164021164021163,
"grad_norm": 0.3414062261581421,
"learning_rate": 3.99e-05,
"loss": 0.0321,
"step": 400
},
{
"epoch": 2.1693121693121693,
"grad_norm": 0.4166780710220337,
"learning_rate": 4.09e-05,
"loss": 0.0294,
"step": 410
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.41433510184288025,
"learning_rate": 4.19e-05,
"loss": 0.0309,
"step": 420
},
{
"epoch": 2.2751322751322753,
"grad_norm": 0.3357546627521515,
"learning_rate": 4.29e-05,
"loss": 0.0308,
"step": 430
},
{
"epoch": 2.328042328042328,
"grad_norm": 0.3968923091888428,
"learning_rate": 4.39e-05,
"loss": 0.0333,
"step": 440
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.39435455203056335,
"learning_rate": 4.49e-05,
"loss": 0.0333,
"step": 450
},
{
"epoch": 2.433862433862434,
"grad_norm": 0.4199584126472473,
"learning_rate": 4.5900000000000004e-05,
"loss": 0.0292,
"step": 460
},
{
"epoch": 2.4867724867724865,
"grad_norm": 0.4035734534263611,
"learning_rate": 4.69e-05,
"loss": 0.0308,
"step": 470
},
{
"epoch": 2.5396825396825395,
"grad_norm": 0.43274185061454773,
"learning_rate": 4.79e-05,
"loss": 0.0307,
"step": 480
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.4387704133987427,
"learning_rate": 4.89e-05,
"loss": 0.0308,
"step": 490
},
{
"epoch": 2.6455026455026456,
"grad_norm": 0.33311206102371216,
"learning_rate": 4.99e-05,
"loss": 0.0313,
"step": 500
},
{
"epoch": 2.6984126984126986,
"grad_norm": 0.379742294549942,
"learning_rate": 5.0900000000000004e-05,
"loss": 0.0309,
"step": 510
},
{
"epoch": 2.751322751322751,
"grad_norm": 0.47922366857528687,
"learning_rate": 5.19e-05,
"loss": 0.0286,
"step": 520
},
{
"epoch": 2.804232804232804,
"grad_norm": 0.3975037932395935,
"learning_rate": 5.2900000000000005e-05,
"loss": 0.0256,
"step": 530
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.3734884262084961,
"learning_rate": 5.390000000000001e-05,
"loss": 0.0286,
"step": 540
},
{
"epoch": 2.91005291005291,
"grad_norm": 0.38052114844322205,
"learning_rate": 5.4900000000000006e-05,
"loss": 0.0273,
"step": 550
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.3779924809932709,
"learning_rate": 5.590000000000001e-05,
"loss": 0.0252,
"step": 560
},
{
"epoch": 3.015873015873016,
"grad_norm": 0.47244957089424133,
"learning_rate": 5.69e-05,
"loss": 0.027,
"step": 570
},
{
"epoch": 3.068783068783069,
"grad_norm": 0.44039979577064514,
"learning_rate": 5.79e-05,
"loss": 0.026,
"step": 580
},
{
"epoch": 3.121693121693122,
"grad_norm": 0.35815173387527466,
"learning_rate": 5.89e-05,
"loss": 0.0288,
"step": 590
},
{
"epoch": 3.1746031746031744,
"grad_norm": 0.4196639657020569,
"learning_rate": 5.99e-05,
"loss": 0.0276,
"step": 600
},
{
"epoch": 3.2275132275132274,
"grad_norm": 0.41070035099983215,
"learning_rate": 6.09e-05,
"loss": 0.0254,
"step": 610
},
{
"epoch": 3.2804232804232805,
"grad_norm": 0.377328097820282,
"learning_rate": 6.19e-05,
"loss": 0.0277,
"step": 620
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.3343811631202698,
"learning_rate": 6.29e-05,
"loss": 0.0278,
"step": 630
},
{
"epoch": 3.386243386243386,
"grad_norm": 0.30622223019599915,
"learning_rate": 6.390000000000001e-05,
"loss": 0.0267,
"step": 640
},
{
"epoch": 3.439153439153439,
"grad_norm": 0.29412469267845154,
"learning_rate": 6.49e-05,
"loss": 0.0267,
"step": 650
},
{
"epoch": 3.492063492063492,
"grad_norm": 0.3135109841823578,
"learning_rate": 6.59e-05,
"loss": 0.0251,
"step": 660
},
{
"epoch": 3.544973544973545,
"grad_norm": 0.314832866191864,
"learning_rate": 6.690000000000001e-05,
"loss": 0.0285,
"step": 670
},
{
"epoch": 3.597883597883598,
"grad_norm": 0.39124566316604614,
"learning_rate": 6.790000000000001e-05,
"loss": 0.0263,
"step": 680
},
{
"epoch": 3.6507936507936507,
"grad_norm": 0.37374603748321533,
"learning_rate": 6.89e-05,
"loss": 0.0238,
"step": 690
},
{
"epoch": 3.7037037037037037,
"grad_norm": 0.3832198679447174,
"learning_rate": 6.99e-05,
"loss": 0.0231,
"step": 700
},
{
"epoch": 3.7566137566137567,
"grad_norm": 0.40413761138916016,
"learning_rate": 7.09e-05,
"loss": 0.0239,
"step": 710
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.35467275977134705,
"learning_rate": 7.19e-05,
"loss": 0.025,
"step": 720
},
{
"epoch": 3.8624338624338623,
"grad_norm": 0.35146641731262207,
"learning_rate": 7.29e-05,
"loss": 0.0253,
"step": 730
},
{
"epoch": 3.9153439153439153,
"grad_norm": 0.3469861149787903,
"learning_rate": 7.390000000000001e-05,
"loss": 0.0228,
"step": 740
},
{
"epoch": 3.9682539682539684,
"grad_norm": 0.3485950529575348,
"learning_rate": 7.49e-05,
"loss": 0.0236,
"step": 750
},
{
"epoch": 4.021164021164021,
"grad_norm": 0.35884398221969604,
"learning_rate": 7.59e-05,
"loss": 0.0242,
"step": 760
},
{
"epoch": 4.074074074074074,
"grad_norm": 0.34299910068511963,
"learning_rate": 7.69e-05,
"loss": 0.0249,
"step": 770
},
{
"epoch": 4.1269841269841265,
"grad_norm": 0.24837176501750946,
"learning_rate": 7.790000000000001e-05,
"loss": 0.0252,
"step": 780
},
{
"epoch": 4.1798941798941796,
"grad_norm": 0.3547382950782776,
"learning_rate": 7.890000000000001e-05,
"loss": 0.0241,
"step": 790
},
{
"epoch": 4.232804232804233,
"grad_norm": 0.32745105028152466,
"learning_rate": 7.99e-05,
"loss": 0.0238,
"step": 800
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.3326016366481781,
"learning_rate": 8.090000000000001e-05,
"loss": 0.023,
"step": 810
},
{
"epoch": 4.338624338624339,
"grad_norm": 0.3203228712081909,
"learning_rate": 8.19e-05,
"loss": 0.0239,
"step": 820
},
{
"epoch": 4.391534391534392,
"grad_norm": 0.31203749775886536,
"learning_rate": 8.29e-05,
"loss": 0.0216,
"step": 830
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.31825166940689087,
"learning_rate": 8.39e-05,
"loss": 0.0233,
"step": 840
},
{
"epoch": 4.497354497354498,
"grad_norm": 0.3035692572593689,
"learning_rate": 8.49e-05,
"loss": 0.0262,
"step": 850
},
{
"epoch": 4.550264550264551,
"grad_norm": 0.32904911041259766,
"learning_rate": 8.59e-05,
"loss": 0.0243,
"step": 860
},
{
"epoch": 4.603174603174603,
"grad_norm": 0.2948061227798462,
"learning_rate": 8.69e-05,
"loss": 0.0241,
"step": 870
},
{
"epoch": 4.656084656084656,
"grad_norm": 0.28630343079566956,
"learning_rate": 8.790000000000001e-05,
"loss": 0.0244,
"step": 880
},
{
"epoch": 4.708994708994709,
"grad_norm": 0.36151307821273804,
"learning_rate": 8.89e-05,
"loss": 0.0247,
"step": 890
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.3373434543609619,
"learning_rate": 8.99e-05,
"loss": 0.0245,
"step": 900
},
{
"epoch": 4.814814814814815,
"grad_norm": 0.3014078736305237,
"learning_rate": 9.090000000000001e-05,
"loss": 0.0244,
"step": 910
},
{
"epoch": 4.867724867724868,
"grad_norm": 0.3851953148841858,
"learning_rate": 9.190000000000001e-05,
"loss": 0.0232,
"step": 920
},
{
"epoch": 4.920634920634921,
"grad_norm": 0.2808712124824524,
"learning_rate": 9.290000000000001e-05,
"loss": 0.0265,
"step": 930
},
{
"epoch": 4.973544973544973,
"grad_norm": 0.2859858274459839,
"learning_rate": 9.39e-05,
"loss": 0.0237,
"step": 940
},
{
"epoch": 5.026455026455026,
"grad_norm": 0.31913334131240845,
"learning_rate": 9.49e-05,
"loss": 0.0245,
"step": 950
},
{
"epoch": 5.079365079365079,
"grad_norm": 0.3442356288433075,
"learning_rate": 9.59e-05,
"loss": 0.0257,
"step": 960
},
{
"epoch": 5.132275132275132,
"grad_norm": 0.406459242105484,
"learning_rate": 9.69e-05,
"loss": 0.0254,
"step": 970
},
{
"epoch": 5.185185185185185,
"grad_norm": 0.34004539251327515,
"learning_rate": 9.790000000000001e-05,
"loss": 0.024,
"step": 980
},
{
"epoch": 5.238095238095238,
"grad_norm": 0.3003678321838379,
"learning_rate": 9.89e-05,
"loss": 0.0248,
"step": 990
},
{
"epoch": 5.291005291005291,
"grad_norm": 0.3703750967979431,
"learning_rate": 9.99e-05,
"loss": 0.0239,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 106,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}