ForzaRoma / checkpoint-600 /trainer_state.json
mjf-su's picture
Upload folder using huggingface_hub
82862c3 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5644402634054563,
"eval_steps": 500,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval/E1/acc_on_CONSISTENT": 0.16435185185185186,
"eval/E1/acc_on_INCONSISTENT": 0.9875,
"eval/E1/accuracy": 0.29296875,
"eval/E1/f1_INC": 0.3038461538461539,
"eval/E1/precision_INC": 0.17954545454545454,
"eval/E1/recall_INC": 0.9875,
"eval/E2/acc_on_CONSISTENT": 0.6785714285714286,
"eval/E2/acc_on_INCONSISTENT": 0.9594594594594594,
"eval/E2/accuracy": 0.759765625,
"eval/E2/f1_INC": 0.6977886977886979,
"eval/E2/precision_INC": 0.5482625482625483,
"eval/E2/recall_INC": 0.9594594594594594,
"eval/E3/acc_on_CONSISTENT": 0.07098765432098765,
"eval/E3/acc_on_INCONSISTENT": 0.9680851063829787,
"eval/E3/accuracy": 0.400390625,
"eval/E3/f1_INC": 0.5424739195230999,
"eval/E3/precision_INC": 0.37681159420289856,
"eval/E3/recall_INC": 0.9680851063829787,
"eval/E4/acc_on_CONSISTENT": 0.0529595015576324,
"eval/E4/acc_on_INCONSISTENT": 0.9476439790575916,
"eval/E4/accuracy": 0.38671875,
"eval/E4/f1_INC": 0.5355029585798816,
"eval/E4/precision_INC": 0.3731958762886598,
"eval/E4/recall_INC": 0.9476439790575916,
"eval/edge_macro_accuracy": 0.4599609375,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.08133971291866028,
"eval/overall/acc_on_INCONSISTENT": 0.966996699669967,
"eval/overall/accuracy": 0.60546875,
"eval/overall/f1_INC": 0.7436548223350254,
"eval/overall/precision_INC": 0.6041237113402061,
"eval/overall/recall_INC": 0.966996699669967,
"eval/overall_exact_match": 0.064453125,
"eval_label": "start",
"step": 0
},
{
"epoch": 0.01881467544684854,
"grad_norm": 0.13588111102581024,
"learning_rate": 5.9375e-05,
"loss": 0.0936,
"step": 20
},
{
"epoch": 0.03762935089369708,
"grad_norm": 0.15987196564674377,
"learning_rate": 9.932104752667314e-05,
"loss": 0.0348,
"step": 40
},
{
"epoch": 0.05644402634054563,
"grad_norm": 0.14962074160575867,
"learning_rate": 9.73811833171678e-05,
"loss": 0.0273,
"step": 60
},
{
"epoch": 0.07525870178739416,
"grad_norm": 0.1912640929222107,
"learning_rate": 9.544131910766246e-05,
"loss": 0.0248,
"step": 80
},
{
"epoch": 0.09407337723424271,
"eval/E1/acc_on_CONSISTENT": 0.8819444444444444,
"eval/E1/acc_on_INCONSISTENT": 0.4625,
"eval/E1/accuracy": 0.81640625,
"eval/E1/f1_INC": 0.44047619047619047,
"eval/E1/precision_INC": 0.42045454545454547,
"eval/E1/recall_INC": 0.4625,
"eval/E2/acc_on_CONSISTENT": 0.9752747252747253,
"eval/E2/acc_on_INCONSISTENT": 0.8243243243243243,
"eval/E2/accuracy": 0.931640625,
"eval/E2/f1_INC": 0.8745519713261649,
"eval/E2/precision_INC": 0.9312977099236641,
"eval/E2/recall_INC": 0.8243243243243243,
"eval/E3/acc_on_CONSISTENT": 0.8179012345679012,
"eval/E3/acc_on_INCONSISTENT": 0.6968085106382979,
"eval/E3/accuracy": 0.7734375,
"eval/E3/f1_INC": 0.6931216931216931,
"eval/E3/precision_INC": 0.6894736842105263,
"eval/E3/recall_INC": 0.6968085106382979,
"eval/E4/acc_on_CONSISTENT": 0.8629283489096573,
"eval/E4/acc_on_INCONSISTENT": 0.7068062827225131,
"eval/E4/accuracy": 0.8046875,
"eval/E4/f1_INC": 0.7297297297297297,
"eval/E4/precision_INC": 0.7541899441340782,
"eval/E4/recall_INC": 0.7068062827225131,
"eval/edge_macro_accuracy": 0.83154296875,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.8325358851674641,
"eval/overall/acc_on_INCONSISTENT": 0.7887788778877888,
"eval/overall/accuracy": 0.806640625,
"eval/overall/f1_INC": 0.82842287694974,
"eval/overall/precision_INC": 0.8722627737226277,
"eval/overall/recall_INC": 0.7887788778877888,
"eval/overall_exact_match": 0.576171875,
"eval_label": "step_100",
"step": 100
},
{
"epoch": 0.09407337723424271,
"grad_norm": 0.10156096518039703,
"learning_rate": 9.350145489815713e-05,
"loss": 0.0248,
"step": 100
},
{
"epoch": 0.11288805268109126,
"grad_norm": 0.15984299778938293,
"learning_rate": 9.15615906886518e-05,
"loss": 0.0241,
"step": 120
},
{
"epoch": 0.1317027281279398,
"grad_norm": 0.14268864691257477,
"learning_rate": 8.962172647914647e-05,
"loss": 0.0222,
"step": 140
},
{
"epoch": 0.15051740357478832,
"grad_norm": 0.09480800479650497,
"learning_rate": 8.768186226964112e-05,
"loss": 0.022,
"step": 160
},
{
"epoch": 0.16933207902163688,
"grad_norm": 0.09908359497785568,
"learning_rate": 8.57419980601358e-05,
"loss": 0.0205,
"step": 180
},
{
"epoch": 0.18814675446848542,
"eval/E1/acc_on_CONSISTENT": 0.9236111111111112,
"eval/E1/acc_on_INCONSISTENT": 0.4125,
"eval/E1/accuracy": 0.84375,
"eval/E1/f1_INC": 0.4520547945205479,
"eval/E1/precision_INC": 0.5,
"eval/E1/recall_INC": 0.4125,
"eval/E2/acc_on_CONSISTENT": 0.945054945054945,
"eval/E2/acc_on_INCONSISTENT": 0.9391891891891891,
"eval/E2/accuracy": 0.943359375,
"eval/E2/f1_INC": 0.9055374592833876,
"eval/E2/precision_INC": 0.8742138364779874,
"eval/E2/recall_INC": 0.9391891891891891,
"eval/E3/acc_on_CONSISTENT": 0.8703703703703703,
"eval/E3/acc_on_INCONSISTENT": 0.8191489361702128,
"eval/E3/accuracy": 0.8515625,
"eval/E3/f1_INC": 0.8020833333333333,
"eval/E3/precision_INC": 0.7857142857142857,
"eval/E3/recall_INC": 0.8191489361702128,
"eval/E4/acc_on_CONSISTENT": 0.8411214953271028,
"eval/E4/acc_on_INCONSISTENT": 0.7905759162303665,
"eval/E4/accuracy": 0.822265625,
"eval/E4/f1_INC": 0.7684478371501273,
"eval/E4/precision_INC": 0.7475247524752475,
"eval/E4/recall_INC": 0.7905759162303665,
"eval/edge_macro_accuracy": 0.865234375,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.7559808612440191,
"eval/overall/acc_on_INCONSISTENT": 0.8712871287128713,
"eval/overall/accuracy": 0.82421875,
"eval/overall/f1_INC": 0.854368932038835,
"eval/overall/precision_INC": 0.8380952380952381,
"eval/overall/recall_INC": 0.8712871287128713,
"eval/overall_exact_match": 0.63671875,
"eval_label": "step_200",
"step": 200
},
{
"epoch": 0.18814675446848542,
"grad_norm": 0.1302443891763687,
"learning_rate": 8.380213385063046e-05,
"loss": 0.0211,
"step": 200
},
{
"epoch": 0.20696142991533395,
"grad_norm": 0.14304408431053162,
"learning_rate": 8.186226964112513e-05,
"loss": 0.0187,
"step": 220
},
{
"epoch": 0.2257761053621825,
"grad_norm": 0.20394913852214813,
"learning_rate": 7.99224054316198e-05,
"loss": 0.0208,
"step": 240
},
{
"epoch": 0.24459078080903104,
"grad_norm": 0.11161988973617554,
"learning_rate": 7.798254122211446e-05,
"loss": 0.019,
"step": 260
},
{
"epoch": 0.2634054562558796,
"grad_norm": 0.11904545873403549,
"learning_rate": 7.604267701260912e-05,
"loss": 0.0204,
"step": 280
},
{
"epoch": 0.28222013170272814,
"eval/E1/acc_on_CONSISTENT": 0.9652777777777778,
"eval/E1/acc_on_INCONSISTENT": 0.3375,
"eval/E1/accuracy": 0.8671875,
"eval/E1/f1_INC": 0.4426229508196722,
"eval/E1/precision_INC": 0.6428571428571429,
"eval/E1/recall_INC": 0.3375,
"eval/E2/acc_on_CONSISTENT": 0.967032967032967,
"eval/E2/acc_on_INCONSISTENT": 0.9324324324324325,
"eval/E2/accuracy": 0.95703125,
"eval/E2/f1_INC": 0.9261744966442953,
"eval/E2/precision_INC": 0.92,
"eval/E2/recall_INC": 0.9324324324324325,
"eval/E3/acc_on_CONSISTENT": 0.9382716049382716,
"eval/E3/acc_on_INCONSISTENT": 0.8138297872340425,
"eval/E3/accuracy": 0.892578125,
"eval/E3/f1_INC": 0.8476454293628809,
"eval/E3/precision_INC": 0.884393063583815,
"eval/E3/recall_INC": 0.8138297872340425,
"eval/E4/acc_on_CONSISTENT": 0.8940809968847352,
"eval/E4/acc_on_INCONSISTENT": 0.8010471204188482,
"eval/E4/accuracy": 0.859375,
"eval/E4/f1_INC": 0.8095238095238095,
"eval/E4/precision_INC": 0.8181818181818182,
"eval/E4/recall_INC": 0.8010471204188482,
"eval/edge_macro_accuracy": 0.89404296875,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.8851674641148325,
"eval/overall/acc_on_INCONSISTENT": 0.8448844884488449,
"eval/overall/accuracy": 0.861328125,
"eval/overall/f1_INC": 0.8782161234991424,
"eval/overall/precision_INC": 0.9142857142857143,
"eval/overall/recall_INC": 0.8448844884488449,
"eval/overall_exact_match": 0.697265625,
"eval_label": "step_300",
"step": 300
},
{
"epoch": 0.28222013170272814,
"grad_norm": 0.1075235903263092,
"learning_rate": 7.410281280310378e-05,
"loss": 0.0212,
"step": 300
},
{
"epoch": 0.30103480714957664,
"grad_norm": 0.08106118440628052,
"learning_rate": 7.216294859359845e-05,
"loss": 0.0177,
"step": 320
},
{
"epoch": 0.3198494825964252,
"grad_norm": 0.07731106132268906,
"learning_rate": 7.022308438409312e-05,
"loss": 0.0182,
"step": 340
},
{
"epoch": 0.33866415804327377,
"grad_norm": 0.1482355296611786,
"learning_rate": 6.828322017458779e-05,
"loss": 0.0178,
"step": 360
},
{
"epoch": 0.35747883349012227,
"grad_norm": 0.07897721230983734,
"learning_rate": 6.634335596508244e-05,
"loss": 0.018,
"step": 380
},
{
"epoch": 0.37629350893697083,
"eval/E1/acc_on_CONSISTENT": 0.9722222222222222,
"eval/E1/acc_on_INCONSISTENT": 0.45,
"eval/E1/accuracy": 0.890625,
"eval/E1/f1_INC": 0.5625000000000001,
"eval/E1/precision_INC": 0.75,
"eval/E1/recall_INC": 0.45,
"eval/E2/acc_on_CONSISTENT": 0.9697802197802198,
"eval/E2/acc_on_INCONSISTENT": 0.9324324324324325,
"eval/E2/accuracy": 0.958984375,
"eval/E2/f1_INC": 0.9292929292929293,
"eval/E2/precision_INC": 0.9261744966442953,
"eval/E2/recall_INC": 0.9324324324324325,
"eval/E3/acc_on_CONSISTENT": 0.9598765432098766,
"eval/E3/acc_on_INCONSISTENT": 0.8138297872340425,
"eval/E3/accuracy": 0.90625,
"eval/E3/f1_INC": 0.8644067796610171,
"eval/E3/precision_INC": 0.9216867469879518,
"eval/E3/recall_INC": 0.8138297872340425,
"eval/E4/acc_on_CONSISTENT": 0.9096573208722741,
"eval/E4/acc_on_INCONSISTENT": 0.806282722513089,
"eval/E4/accuracy": 0.87109375,
"eval/E4/f1_INC": 0.823529411764706,
"eval/E4/precision_INC": 0.8415300546448088,
"eval/E4/recall_INC": 0.806282722513089,
"eval/edge_macro_accuracy": 0.90673828125,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.9282296650717703,
"eval/overall/acc_on_INCONSISTENT": 0.8481848184818482,
"eval/overall/accuracy": 0.880859375,
"eval/overall/f1_INC": 0.8939130434782608,
"eval/overall/precision_INC": 0.9448529411764706,
"eval/overall/recall_INC": 0.8481848184818482,
"eval/overall_exact_match": 0.728515625,
"eval_label": "step_400",
"step": 400
},
{
"epoch": 0.37629350893697083,
"grad_norm": 0.1120094284415245,
"learning_rate": 6.440349175557712e-05,
"loss": 0.0168,
"step": 400
},
{
"epoch": 0.3951081843838194,
"grad_norm": 0.16071178019046783,
"learning_rate": 6.246362754607178e-05,
"loss": 0.016,
"step": 420
},
{
"epoch": 0.4139228598306679,
"grad_norm": 0.10165177285671234,
"learning_rate": 6.0523763336566445e-05,
"loss": 0.0159,
"step": 440
},
{
"epoch": 0.43273753527751646,
"grad_norm": 0.08946932107210159,
"learning_rate": 5.8583899127061106e-05,
"loss": 0.0167,
"step": 460
},
{
"epoch": 0.451552210724365,
"grad_norm": 0.07830841839313507,
"learning_rate": 5.664403491755578e-05,
"loss": 0.016,
"step": 480
},
{
"epoch": 0.4703668861712135,
"eval/E1/acc_on_CONSISTENT": 0.9444444444444444,
"eval/E1/acc_on_INCONSISTENT": 0.575,
"eval/E1/accuracy": 0.88671875,
"eval/E1/f1_INC": 0.6133333333333333,
"eval/E1/precision_INC": 0.6571428571428571,
"eval/E1/recall_INC": 0.575,
"eval/E2/acc_on_CONSISTENT": 0.9285714285714286,
"eval/E2/acc_on_INCONSISTENT": 0.9594594594594594,
"eval/E2/accuracy": 0.9375,
"eval/E2/f1_INC": 0.8987341772151899,
"eval/E2/precision_INC": 0.8452380952380952,
"eval/E2/recall_INC": 0.9594594594594594,
"eval/E3/acc_on_CONSISTENT": 0.8919753086419753,
"eval/E3/acc_on_INCONSISTENT": 0.8882978723404256,
"eval/E3/accuracy": 0.890625,
"eval/E3/f1_INC": 0.8564102564102564,
"eval/E3/precision_INC": 0.8267326732673267,
"eval/E3/recall_INC": 0.8882978723404256,
"eval/E4/acc_on_CONSISTENT": 0.8847352024922118,
"eval/E4/acc_on_INCONSISTENT": 0.8638743455497382,
"eval/E4/accuracy": 0.876953125,
"eval/E4/f1_INC": 0.8396946564885497,
"eval/E4/precision_INC": 0.8168316831683168,
"eval/E4/recall_INC": 0.8638743455497382,
"eval/edge_macro_accuracy": 0.89794921875,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.8325358851674641,
"eval/overall/acc_on_INCONSISTENT": 0.8976897689768977,
"eval/overall/accuracy": 0.87109375,
"eval/overall/f1_INC": 0.8918032786885246,
"eval/overall/precision_INC": 0.8859934853420195,
"eval/overall/recall_INC": 0.8976897689768977,
"eval/overall_exact_match": 0.71484375,
"eval_label": "step_500",
"step": 500
},
{
"epoch": 0.4703668861712135,
"grad_norm": 0.23986610770225525,
"learning_rate": 5.470417070805044e-05,
"loss": 0.0168,
"step": 500
},
{
"epoch": 0.4891815616180621,
"grad_norm": 0.07184334099292755,
"learning_rate": 5.27643064985451e-05,
"loss": 0.0169,
"step": 520
},
{
"epoch": 0.5079962370649106,
"grad_norm": 0.0486169196665287,
"learning_rate": 5.0824442289039763e-05,
"loss": 0.0148,
"step": 540
},
{
"epoch": 0.5268109125117592,
"grad_norm": 0.11095824092626572,
"learning_rate": 4.888457807953444e-05,
"loss": 0.0173,
"step": 560
},
{
"epoch": 0.5456255879586077,
"grad_norm": 0.11410392075777054,
"learning_rate": 4.69447138700291e-05,
"loss": 0.0149,
"step": 580
},
{
"epoch": 0.5644402634054563,
"eval/E1/acc_on_CONSISTENT": 0.9722222222222222,
"eval/E1/acc_on_INCONSISTENT": 0.4375,
"eval/E1/accuracy": 0.888671875,
"eval/E1/f1_INC": 0.5511811023622046,
"eval/E1/precision_INC": 0.7446808510638298,
"eval/E1/recall_INC": 0.4375,
"eval/E2/acc_on_CONSISTENT": 0.978021978021978,
"eval/E2/acc_on_INCONSISTENT": 0.9391891891891891,
"eval/E2/accuracy": 0.966796875,
"eval/E2/f1_INC": 0.9423728813559321,
"eval/E2/precision_INC": 0.9455782312925171,
"eval/E2/recall_INC": 0.9391891891891891,
"eval/E3/acc_on_CONSISTENT": 0.9691358024691358,
"eval/E3/acc_on_INCONSISTENT": 0.75,
"eval/E3/accuracy": 0.888671875,
"eval/E3/f1_INC": 0.831858407079646,
"eval/E3/precision_INC": 0.9337748344370861,
"eval/E3/recall_INC": 0.75,
"eval/E4/acc_on_CONSISTENT": 0.956386292834891,
"eval/E4/acc_on_INCONSISTENT": 0.7801047120418848,
"eval/E4/accuracy": 0.890625,
"eval/E4/f1_INC": 0.8418079096045198,
"eval/E4/precision_INC": 0.9141104294478528,
"eval/E4/recall_INC": 0.7801047120418848,
"eval/edge_macro_accuracy": 0.90869140625,
"eval/malformed_rate": 0.0,
"eval/n_eval": 512.0,
"eval/overall/acc_on_CONSISTENT": 0.9473684210526315,
"eval/overall/acc_on_INCONSISTENT": 0.8118811881188119,
"eval/overall/accuracy": 0.8671875,
"eval/overall/f1_INC": 0.8785714285714287,
"eval/overall/precision_INC": 0.9571984435797666,
"eval/overall/recall_INC": 0.8118811881188119,
"eval/overall_exact_match": 0.728515625,
"eval_label": "step_600",
"step": 600
},
{
"epoch": 0.5644402634054563,
"grad_norm": 0.13476960361003876,
"learning_rate": 4.500484966052377e-05,
"loss": 0.0159,
"step": 600
}
],
"logging_steps": 20,
"max_steps": 1063,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.738632608964936e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}