chinese_extract_bert / trainer_state.json
frett's picture
End of training
70cbe77 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 41466,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03617421501953408,
"grad_norm": 74.71255493164062,
"learning_rate": 2.963825784980466e-05,
"loss": 2.4781,
"step": 500
},
{
"epoch": 0.07234843003906816,
"grad_norm": 42.52278137207031,
"learning_rate": 2.927651569960932e-05,
"loss": 1.5533,
"step": 1000
},
{
"epoch": 0.10852264505860224,
"grad_norm": 45.66154479980469,
"learning_rate": 2.8914773549413978e-05,
"loss": 1.4003,
"step": 1500
},
{
"epoch": 0.1446968600781363,
"grad_norm": 93.42730712890625,
"learning_rate": 2.855303139921864e-05,
"loss": 1.3584,
"step": 2000
},
{
"epoch": 0.1808710750976704,
"grad_norm": 105.06791687011719,
"learning_rate": 2.8191289249023298e-05,
"loss": 1.3801,
"step": 2500
},
{
"epoch": 0.21704529011720447,
"grad_norm": 24.335142135620117,
"learning_rate": 2.782954709882796e-05,
"loss": 1.2891,
"step": 3000
},
{
"epoch": 0.2532195051367385,
"grad_norm": 190.3346710205078,
"learning_rate": 2.7467804948632617e-05,
"loss": 1.2827,
"step": 3500
},
{
"epoch": 0.2893937201562726,
"grad_norm": 81.25160217285156,
"learning_rate": 2.7106062798437272e-05,
"loss": 1.3404,
"step": 4000
},
{
"epoch": 0.3255679351758067,
"grad_norm": 0.048206575214862823,
"learning_rate": 2.6744320648241933e-05,
"loss": 1.1371,
"step": 4500
},
{
"epoch": 0.3617421501953408,
"grad_norm": 113.88823699951172,
"learning_rate": 2.638257849804659e-05,
"loss": 1.1761,
"step": 5000
},
{
"epoch": 0.39791636521487483,
"grad_norm": 100.29290771484375,
"learning_rate": 2.6020836347851253e-05,
"loss": 1.2511,
"step": 5500
},
{
"epoch": 0.43409058023440894,
"grad_norm": 41.36812973022461,
"learning_rate": 2.565909419765591e-05,
"loss": 1.1207,
"step": 6000
},
{
"epoch": 0.470264795253943,
"grad_norm": 95.61148071289062,
"learning_rate": 2.5297352047460572e-05,
"loss": 1.2887,
"step": 6500
},
{
"epoch": 0.506439010273477,
"grad_norm": 16.942440032958984,
"learning_rate": 2.493560989726523e-05,
"loss": 1.0208,
"step": 7000
},
{
"epoch": 0.5426132252930111,
"grad_norm": 0.014189004898071289,
"learning_rate": 2.4573867747069888e-05,
"loss": 1.0714,
"step": 7500
},
{
"epoch": 0.5787874403125453,
"grad_norm": 0.005269407294690609,
"learning_rate": 2.421212559687455e-05,
"loss": 1.2271,
"step": 8000
},
{
"epoch": 0.6149616553320792,
"grad_norm": 240.70460510253906,
"learning_rate": 2.3850383446679208e-05,
"loss": 1.1933,
"step": 8500
},
{
"epoch": 0.6511358703516134,
"grad_norm": 26.586158752441406,
"learning_rate": 2.348864129648387e-05,
"loss": 1.0857,
"step": 9000
},
{
"epoch": 0.6873100853711475,
"grad_norm": 7.732858180999756,
"learning_rate": 2.3126899146288527e-05,
"loss": 1.0202,
"step": 9500
},
{
"epoch": 0.7234843003906816,
"grad_norm": 49.43616485595703,
"learning_rate": 2.2765156996093185e-05,
"loss": 1.1122,
"step": 10000
},
{
"epoch": 0.7596585154102156,
"grad_norm": 26.370601654052734,
"learning_rate": 2.2403414845897847e-05,
"loss": 0.9863,
"step": 10500
},
{
"epoch": 0.7958327304297497,
"grad_norm": 55.836578369140625,
"learning_rate": 2.20416726957025e-05,
"loss": 1.0598,
"step": 11000
},
{
"epoch": 0.8320069454492838,
"grad_norm": 305.2657165527344,
"learning_rate": 2.1679930545507163e-05,
"loss": 1.0554,
"step": 11500
},
{
"epoch": 0.8681811604688179,
"grad_norm": 116.24004364013672,
"learning_rate": 2.131818839531182e-05,
"loss": 1.0478,
"step": 12000
},
{
"epoch": 0.9043553754883519,
"grad_norm": 152.19540405273438,
"learning_rate": 2.0956446245116482e-05,
"loss": 0.992,
"step": 12500
},
{
"epoch": 0.940529590507886,
"grad_norm": 148.80564880371094,
"learning_rate": 2.059470409492114e-05,
"loss": 0.9916,
"step": 13000
},
{
"epoch": 0.9767038055274201,
"grad_norm": 95.73086547851562,
"learning_rate": 2.0232961944725798e-05,
"loss": 1.13,
"step": 13500
},
{
"epoch": 1.012878020546954,
"grad_norm": 0.02437233179807663,
"learning_rate": 1.987121979453046e-05,
"loss": 0.8849,
"step": 14000
},
{
"epoch": 1.0490522355664882,
"grad_norm": 6.869604110717773,
"learning_rate": 1.9509477644335118e-05,
"loss": 0.6752,
"step": 14500
},
{
"epoch": 1.0852264505860223,
"grad_norm": 0.5347580909729004,
"learning_rate": 1.914773549413978e-05,
"loss": 0.6661,
"step": 15000
},
{
"epoch": 1.1214006656055564,
"grad_norm": 210.64053344726562,
"learning_rate": 1.8785993343944437e-05,
"loss": 0.7726,
"step": 15500
},
{
"epoch": 1.1575748806250905,
"grad_norm": 14.351602554321289,
"learning_rate": 1.84242511937491e-05,
"loss": 0.7875,
"step": 16000
},
{
"epoch": 1.1937490956446246,
"grad_norm": 6.646307945251465,
"learning_rate": 1.8062509043553756e-05,
"loss": 0.675,
"step": 16500
},
{
"epoch": 1.2299233106641585,
"grad_norm": 0.7076003551483154,
"learning_rate": 1.7700766893358414e-05,
"loss": 0.6639,
"step": 17000
},
{
"epoch": 1.2660975256836926,
"grad_norm": 0.0005671944818459451,
"learning_rate": 1.7339024743163076e-05,
"loss": 0.6347,
"step": 17500
},
{
"epoch": 1.3022717407032267,
"grad_norm": 2.3750717639923096,
"learning_rate": 1.6977282592967734e-05,
"loss": 0.6203,
"step": 18000
},
{
"epoch": 1.3384459557227608,
"grad_norm": 21.23076820373535,
"learning_rate": 1.6615540442772392e-05,
"loss": 0.651,
"step": 18500
},
{
"epoch": 1.374620170742295,
"grad_norm": 2.104569673538208,
"learning_rate": 1.625379829257705e-05,
"loss": 0.715,
"step": 19000
},
{
"epoch": 1.410794385761829,
"grad_norm": 95.23528289794922,
"learning_rate": 1.589205614238171e-05,
"loss": 0.7183,
"step": 19500
},
{
"epoch": 1.446968600781363,
"grad_norm": 0.14208447933197021,
"learning_rate": 1.553031399218637e-05,
"loss": 0.7521,
"step": 20000
},
{
"epoch": 1.483142815800897,
"grad_norm": 0.04999758303165436,
"learning_rate": 1.5168571841991027e-05,
"loss": 0.7045,
"step": 20500
},
{
"epoch": 1.5193170308204311,
"grad_norm": 60.915618896484375,
"learning_rate": 1.4806829691795689e-05,
"loss": 0.708,
"step": 21000
},
{
"epoch": 1.5554912458399652,
"grad_norm": 300.2060546875,
"learning_rate": 1.4445087541600347e-05,
"loss": 0.6256,
"step": 21500
},
{
"epoch": 1.5916654608594993,
"grad_norm": 0.028166990727186203,
"learning_rate": 1.4083345391405007e-05,
"loss": 0.6552,
"step": 22000
},
{
"epoch": 1.6278396758790334,
"grad_norm": 113.1443099975586,
"learning_rate": 1.3721603241209666e-05,
"loss": 0.6614,
"step": 22500
},
{
"epoch": 1.6640138908985675,
"grad_norm": 0.030826840549707413,
"learning_rate": 1.3359861091014326e-05,
"loss": 0.6465,
"step": 23000
},
{
"epoch": 1.7001881059181017,
"grad_norm": 14.439390182495117,
"learning_rate": 1.2998118940818984e-05,
"loss": 0.5828,
"step": 23500
},
{
"epoch": 1.7363623209376358,
"grad_norm": 124.41860961914062,
"learning_rate": 1.2636376790623644e-05,
"loss": 0.6032,
"step": 24000
},
{
"epoch": 1.7725365359571699,
"grad_norm": 0.004580104723572731,
"learning_rate": 1.2274634640428302e-05,
"loss": 0.5891,
"step": 24500
},
{
"epoch": 1.8087107509767038,
"grad_norm": 0.020617296919226646,
"learning_rate": 1.1912892490232962e-05,
"loss": 0.6729,
"step": 25000
},
{
"epoch": 1.8448849659962379,
"grad_norm": 0.0205403883010149,
"learning_rate": 1.1551150340037621e-05,
"loss": 0.6454,
"step": 25500
},
{
"epoch": 1.881059181015772,
"grad_norm": 54.97515869140625,
"learning_rate": 1.1189408189842281e-05,
"loss": 0.5438,
"step": 26000
},
{
"epoch": 1.917233396035306,
"grad_norm": 4.894663333892822,
"learning_rate": 1.082766603964694e-05,
"loss": 0.6154,
"step": 26500
},
{
"epoch": 1.95340761105484,
"grad_norm": 0.05438984930515289,
"learning_rate": 1.0465923889451599e-05,
"loss": 0.5989,
"step": 27000
},
{
"epoch": 1.989581826074374,
"grad_norm": 3.7340047359466553,
"learning_rate": 1.0104181739256258e-05,
"loss": 0.6849,
"step": 27500
},
{
"epoch": 2.025756041093908,
"grad_norm": 0.0035042744129896164,
"learning_rate": 9.742439589060917e-06,
"loss": 0.3786,
"step": 28000
},
{
"epoch": 2.0619302561134423,
"grad_norm": 1.7143951654434204,
"learning_rate": 9.380697438865576e-06,
"loss": 0.2773,
"step": 28500
},
{
"epoch": 2.0981044711329764,
"grad_norm": 128.7152862548828,
"learning_rate": 9.018955288670236e-06,
"loss": 0.4028,
"step": 29000
},
{
"epoch": 2.1342786861525105,
"grad_norm": 0.0006326949223875999,
"learning_rate": 8.657213138474896e-06,
"loss": 0.3371,
"step": 29500
},
{
"epoch": 2.1704529011720446,
"grad_norm": 0.17621225118637085,
"learning_rate": 8.295470988279555e-06,
"loss": 0.3158,
"step": 30000
},
{
"epoch": 2.2066271161915787,
"grad_norm": 0.005477548111230135,
"learning_rate": 7.933728838084215e-06,
"loss": 0.354,
"step": 30500
},
{
"epoch": 2.242801331211113,
"grad_norm": 0.009746459312736988,
"learning_rate": 7.571986687888874e-06,
"loss": 0.3242,
"step": 31000
},
{
"epoch": 2.278975546230647,
"grad_norm": 0.0019139773212373257,
"learning_rate": 7.210244537693533e-06,
"loss": 0.2838,
"step": 31500
},
{
"epoch": 2.315149761250181,
"grad_norm": 0.013819812797009945,
"learning_rate": 6.848502387498191e-06,
"loss": 0.375,
"step": 32000
},
{
"epoch": 2.351323976269715,
"grad_norm": 134.9909210205078,
"learning_rate": 6.486760237302851e-06,
"loss": 0.3825,
"step": 32500
},
{
"epoch": 2.3874981912892492,
"grad_norm": 0.00616120221093297,
"learning_rate": 6.12501808710751e-06,
"loss": 0.2966,
"step": 33000
},
{
"epoch": 2.423672406308783,
"grad_norm": 8.21721363067627,
"learning_rate": 5.763275936912168e-06,
"loss": 0.3228,
"step": 33500
},
{
"epoch": 2.459846621328317,
"grad_norm": 0.01578143984079361,
"learning_rate": 5.401533786716828e-06,
"loss": 0.2585,
"step": 34000
},
{
"epoch": 2.496020836347851,
"grad_norm": 0.0018479038262739778,
"learning_rate": 5.039791636521488e-06,
"loss": 0.3045,
"step": 34500
},
{
"epoch": 2.532195051367385,
"grad_norm": 10.980083465576172,
"learning_rate": 4.678049486326148e-06,
"loss": 0.3528,
"step": 35000
},
{
"epoch": 2.5683692663869193,
"grad_norm": 0.006408748682588339,
"learning_rate": 4.316307336130806e-06,
"loss": 0.2403,
"step": 35500
},
{
"epoch": 2.6045434814064534,
"grad_norm": 0.002309318631887436,
"learning_rate": 3.954565185935465e-06,
"loss": 0.3001,
"step": 36000
},
{
"epoch": 2.6407176964259875,
"grad_norm": 0.07963691651821136,
"learning_rate": 3.5928230357401242e-06,
"loss": 0.3115,
"step": 36500
},
{
"epoch": 2.6768919114455216,
"grad_norm": 0.0058697545900940895,
"learning_rate": 3.231080885544784e-06,
"loss": 0.2219,
"step": 37000
},
{
"epoch": 2.7130661264650557,
"grad_norm": 0.053734105080366135,
"learning_rate": 2.869338735349443e-06,
"loss": 0.3538,
"step": 37500
},
{
"epoch": 2.74924034148459,
"grad_norm": 470.7662048339844,
"learning_rate": 2.507596585154102e-06,
"loss": 0.4097,
"step": 38000
},
{
"epoch": 2.785414556504124,
"grad_norm": 0.008624177426099777,
"learning_rate": 2.1458544349587614e-06,
"loss": 0.2888,
"step": 38500
},
{
"epoch": 2.821588771523658,
"grad_norm": 2.1397125720977783,
"learning_rate": 1.7841122847634205e-06,
"loss": 0.2796,
"step": 39000
},
{
"epoch": 2.857762986543192,
"grad_norm": 0.005030774511396885,
"learning_rate": 1.4223701345680798e-06,
"loss": 0.2816,
"step": 39500
},
{
"epoch": 2.893937201562726,
"grad_norm": 36.75562286376953,
"learning_rate": 1.0606279843727391e-06,
"loss": 0.2714,
"step": 40000
},
{
"epoch": 2.9301114165822604,
"grad_norm": 0.02432202361524105,
"learning_rate": 6.988858341773984e-07,
"loss": 0.2777,
"step": 40500
},
{
"epoch": 2.966285631601794,
"grad_norm": 0.0958167314529419,
"learning_rate": 3.371436839820576e-07,
"loss": 0.291,
"step": 41000
},
{
"epoch": 3.0,
"step": 41466,
"total_flos": 2.1669078739359744e+16,
"train_loss": 0.730605952883344,
"train_runtime": 5598.5562,
"train_samples_per_second": 14.813,
"train_steps_per_second": 7.407
}
],
"logging_steps": 500,
"max_steps": 41466,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1669078739359744e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}