Instructions to use Deehan1866/PR-pass-structbert-large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Deehan1866/PR-pass-structbert-large with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("question-answering", model="Deehan1866/PR-pass-structbert-large")# Load model directly from transformers import AutoTokenizer, AutoModelForQuestionAnswering tokenizer = AutoTokenizer.from_pretrained("Deehan1866/PR-pass-structbert-large") model = AutoModelForQuestionAnswering.from_pretrained("Deehan1866/PR-pass-structbert-large") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 93.7, | |
| "best_model_checkpoint": "../results/phrase_retrieval/PR-pass/qa/mpsDistillbert/finetuned/checkpoint-5000", | |
| "epoch": 2.0, | |
| "eval_steps": 100, | |
| "global_step": 5066, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03947887879984208, | |
| "grad_norm": 36.08970642089844, | |
| "learning_rate": 2.9407816818002367e-05, | |
| "loss": 2.4898, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03947887879984208, | |
| "eval_exact_match": 79.53333333333333, | |
| "eval_f1": 85.07605280322392, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07895775759968417, | |
| "grad_norm": 15.822399139404297, | |
| "learning_rate": 2.8815633636004737e-05, | |
| "loss": 0.7877, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07895775759968417, | |
| "eval_exact_match": 82.16666666666667, | |
| "eval_f1": 86.96412271126349, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11843663639952626, | |
| "grad_norm": 53.009056091308594, | |
| "learning_rate": 2.8223450454007107e-05, | |
| "loss": 0.6493, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11843663639952626, | |
| "eval_exact_match": 86.96666666666667, | |
| "eval_f1": 90.26251606941636, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15791551519936833, | |
| "grad_norm": 33.941368103027344, | |
| "learning_rate": 2.7631267272009477e-05, | |
| "loss": 0.6859, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15791551519936833, | |
| "eval_exact_match": 87.7, | |
| "eval_f1": 90.70352596039436, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1973943939992104, | |
| "grad_norm": 33.127262115478516, | |
| "learning_rate": 2.7039084090011843e-05, | |
| "loss": 0.6207, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1973943939992104, | |
| "eval_exact_match": 88.76666666666667, | |
| "eval_f1": 91.3789169814247, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23687327279905251, | |
| "grad_norm": 73.58187103271484, | |
| "learning_rate": 2.6446900908014213e-05, | |
| "loss": 0.4846, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.23687327279905251, | |
| "eval_exact_match": 89.6, | |
| "eval_f1": 92.12643429555197, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2763521515988946, | |
| "grad_norm": 38.15833282470703, | |
| "learning_rate": 2.585471772601658e-05, | |
| "loss": 0.4907, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2763521515988946, | |
| "eval_exact_match": 89.5, | |
| "eval_f1": 92.18318144579082, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.31583103039873667, | |
| "grad_norm": 48.2036018371582, | |
| "learning_rate": 2.5262534544018952e-05, | |
| "loss": 0.4449, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.31583103039873667, | |
| "eval_exact_match": 89.36666666666666, | |
| "eval_f1": 92.03679853056012, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.35530990919857874, | |
| "grad_norm": 48.83040237426758, | |
| "learning_rate": 2.467035136202132e-05, | |
| "loss": 0.4882, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.35530990919857874, | |
| "eval_exact_match": 91.0, | |
| "eval_f1": 93.24476194351007, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3947887879984208, | |
| "grad_norm": 33.257354736328125, | |
| "learning_rate": 2.407816818002369e-05, | |
| "loss": 0.4357, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3947887879984208, | |
| "eval_exact_match": 91.16666666666667, | |
| "eval_f1": 93.38561670865543, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.43426766679826295, | |
| "grad_norm": 36.499168395996094, | |
| "learning_rate": 2.3485984998026055e-05, | |
| "loss": 0.499, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.43426766679826295, | |
| "eval_exact_match": 90.96666666666667, | |
| "eval_f1": 93.31853006155642, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.47374654559810503, | |
| "grad_norm": 9.117573738098145, | |
| "learning_rate": 2.2893801816028428e-05, | |
| "loss": 0.4294, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.47374654559810503, | |
| "eval_exact_match": 90.83333333333333, | |
| "eval_f1": 93.1301494526572, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.513225424397947, | |
| "grad_norm": 47.918365478515625, | |
| "learning_rate": 2.2301618634030794e-05, | |
| "loss": 0.4528, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.513225424397947, | |
| "eval_exact_match": 91.8, | |
| "eval_f1": 93.93781839408155, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5527043031977892, | |
| "grad_norm": 2.020205020904541, | |
| "learning_rate": 2.1709435452033164e-05, | |
| "loss": 0.3965, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5527043031977892, | |
| "eval_exact_match": 91.46666666666667, | |
| "eval_f1": 93.66177518095938, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5921831819976313, | |
| "grad_norm": 16.072200775146484, | |
| "learning_rate": 2.111725227003553e-05, | |
| "loss": 0.3609, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5921831819976313, | |
| "eval_exact_match": 91.7, | |
| "eval_f1": 93.75143838703438, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6316620607974733, | |
| "grad_norm": 48.690391540527344, | |
| "learning_rate": 2.05250690880379e-05, | |
| "loss": 0.4318, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6316620607974733, | |
| "eval_exact_match": 91.46666666666667, | |
| "eval_f1": 93.55887712871922, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6711409395973155, | |
| "grad_norm": 3.189640998840332, | |
| "learning_rate": 1.993288590604027e-05, | |
| "loss": 0.3602, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6711409395973155, | |
| "eval_exact_match": 92.43333333333334, | |
| "eval_f1": 94.2249514830154, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7106198183971575, | |
| "grad_norm": 0.2732882499694824, | |
| "learning_rate": 1.9340702724042637e-05, | |
| "loss": 0.3656, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7106198183971575, | |
| "eval_exact_match": 92.3, | |
| "eval_f1": 94.17740849673203, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7500986971969996, | |
| "grad_norm": 25.344755172729492, | |
| "learning_rate": 1.8748519542045006e-05, | |
| "loss": 0.3825, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7500986971969996, | |
| "eval_exact_match": 92.16666666666667, | |
| "eval_f1": 94.0608726088726, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7895775759968416, | |
| "grad_norm": 48.43545913696289, | |
| "learning_rate": 1.8156336360047373e-05, | |
| "loss": 0.3447, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7895775759968416, | |
| "eval_exact_match": 92.53333333333333, | |
| "eval_f1": 94.25784554225729, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8290564547966838, | |
| "grad_norm": 38.915374755859375, | |
| "learning_rate": 1.7564153178049743e-05, | |
| "loss": 0.3033, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8290564547966838, | |
| "eval_exact_match": 92.0, | |
| "eval_f1": 94.01127170868344, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8685353335965259, | |
| "grad_norm": 3.7626020908355713, | |
| "learning_rate": 1.6971969996052112e-05, | |
| "loss": 0.3126, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8685353335965259, | |
| "eval_exact_match": 92.4, | |
| "eval_f1": 94.20074145705928, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9080142123963679, | |
| "grad_norm": 11.15784740447998, | |
| "learning_rate": 1.6379786814054482e-05, | |
| "loss": 0.368, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9080142123963679, | |
| "eval_exact_match": 92.46666666666667, | |
| "eval_f1": 94.47528310578315, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9474930911962101, | |
| "grad_norm": 55.31396484375, | |
| "learning_rate": 1.578760363205685e-05, | |
| "loss": 0.3398, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9474930911962101, | |
| "eval_exact_match": 92.46666666666667, | |
| "eval_f1": 94.40168950153159, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9869719699960521, | |
| "grad_norm": 32.73415756225586, | |
| "learning_rate": 1.5195420450059218e-05, | |
| "loss": 0.3177, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9869719699960521, | |
| "eval_exact_match": 92.46666666666667, | |
| "eval_f1": 94.35837233137235, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.026450848795894, | |
| "grad_norm": 24.14990234375, | |
| "learning_rate": 1.4603237268061586e-05, | |
| "loss": 0.2105, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.026450848795894, | |
| "eval_exact_match": 92.7, | |
| "eval_f1": 94.4984410208528, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.0659297275957362, | |
| "grad_norm": 0.0023591353092342615, | |
| "learning_rate": 1.4011054086063956e-05, | |
| "loss": 0.1006, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0659297275957362, | |
| "eval_exact_match": 92.36666666666666, | |
| "eval_f1": 94.37342015392016, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1054086063955784, | |
| "grad_norm": 20.859468460083008, | |
| "learning_rate": 1.3418870904066324e-05, | |
| "loss": 0.1476, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1054086063955784, | |
| "eval_exact_match": 92.63333333333334, | |
| "eval_f1": 94.58407021341235, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1448874851954205, | |
| "grad_norm": 0.3791426420211792, | |
| "learning_rate": 1.2826687722068692e-05, | |
| "loss": 0.1295, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.1448874851954205, | |
| "eval_exact_match": 93.13333333333334, | |
| "eval_f1": 94.95533549783549, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.1843663639952626, | |
| "grad_norm": 4.289775371551514, | |
| "learning_rate": 1.2234504540071062e-05, | |
| "loss": 0.1727, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.1843663639952626, | |
| "eval_exact_match": 92.66666666666667, | |
| "eval_f1": 94.65173544973547, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2238452427951045, | |
| "grad_norm": 0.0014754978474229574, | |
| "learning_rate": 1.164232135807343e-05, | |
| "loss": 0.1345, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2238452427951045, | |
| "eval_exact_match": 92.96666666666667, | |
| "eval_f1": 94.9088371686793, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2633241215949467, | |
| "grad_norm": 43.49968338012695, | |
| "learning_rate": 1.10501381760758e-05, | |
| "loss": 0.202, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2633241215949467, | |
| "eval_exact_match": 92.66666666666667, | |
| "eval_f1": 94.57915070481636, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3028030003947888, | |
| "grad_norm": 13.5677490234375, | |
| "learning_rate": 1.0457954994078168e-05, | |
| "loss": 0.1898, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3028030003947888, | |
| "eval_exact_match": 92.86666666666666, | |
| "eval_f1": 94.8515283790284, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.342281879194631, | |
| "grad_norm": 76.90211486816406, | |
| "learning_rate": 9.865771812080538e-06, | |
| "loss": 0.1433, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.342281879194631, | |
| "eval_exact_match": 93.46666666666667, | |
| "eval_f1": 95.2914963359081, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3817607579944728, | |
| "grad_norm": 7.809657096862793, | |
| "learning_rate": 9.273588630082906e-06, | |
| "loss": 0.1693, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.3817607579944728, | |
| "eval_exact_match": 93.0, | |
| "eval_f1": 94.87400432900435, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.421239636794315, | |
| "grad_norm": 4.223247051239014, | |
| "learning_rate": 8.681405448085274e-06, | |
| "loss": 0.1827, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.421239636794315, | |
| "eval_exact_match": 93.33333333333333, | |
| "eval_f1": 95.12902597402599, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.460718515594157, | |
| "grad_norm": 27.452770233154297, | |
| "learning_rate": 8.089222266087644e-06, | |
| "loss": 0.1843, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.460718515594157, | |
| "eval_exact_match": 93.36666666666666, | |
| "eval_f1": 95.09415584415585, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5001973943939992, | |
| "grad_norm": 20.997011184692383, | |
| "learning_rate": 7.497039084090013e-06, | |
| "loss": 0.1656, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5001973943939992, | |
| "eval_exact_match": 93.7, | |
| "eval_f1": 95.35056277056279, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5396762731938414, | |
| "grad_norm": 0.005699894856661558, | |
| "learning_rate": 6.90485590209238e-06, | |
| "loss": 0.1013, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.5396762731938414, | |
| "eval_exact_match": 93.4, | |
| "eval_f1": 95.10358730158731, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.5791551519936835, | |
| "grad_norm": 4.07551383972168, | |
| "learning_rate": 6.312672720094749e-06, | |
| "loss": 0.1632, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.5791551519936835, | |
| "eval_exact_match": 93.43333333333334, | |
| "eval_f1": 95.18676190476195, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6186340307935254, | |
| "grad_norm": 0.002155046910047531, | |
| "learning_rate": 5.720489538097118e-06, | |
| "loss": 0.1702, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.6186340307935254, | |
| "eval_exact_match": 93.36666666666666, | |
| "eval_f1": 95.09804473304477, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.6581129095933675, | |
| "grad_norm": 0.017265846952795982, | |
| "learning_rate": 5.128306356099487e-06, | |
| "loss": 0.1308, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6581129095933675, | |
| "eval_exact_match": 93.23333333333333, | |
| "eval_f1": 94.99049422799425, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6975917883932097, | |
| "grad_norm": 0.009932265616953373, | |
| "learning_rate": 4.536123174101856e-06, | |
| "loss": 0.1608, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.6975917883932097, | |
| "eval_exact_match": 93.63333333333334, | |
| "eval_f1": 95.32610028860032, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.7370706671930516, | |
| "grad_norm": 56.21049880981445, | |
| "learning_rate": 3.943939992104225e-06, | |
| "loss": 0.1296, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.7370706671930516, | |
| "eval_exact_match": 93.43333333333334, | |
| "eval_f1": 95.17288023088024, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.7765495459928937, | |
| "grad_norm": 78.09048461914062, | |
| "learning_rate": 3.3517568101065932e-06, | |
| "loss": 0.1752, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.7765495459928937, | |
| "eval_exact_match": 93.66666666666667, | |
| "eval_f1": 95.27616883116887, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8160284247927359, | |
| "grad_norm": 2.959677219390869, | |
| "learning_rate": 2.7595736281089617e-06, | |
| "loss": 0.1195, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.8160284247927359, | |
| "eval_exact_match": 93.43333333333334, | |
| "eval_f1": 95.0532770562771, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.855507303592578, | |
| "grad_norm": 69.6814193725586, | |
| "learning_rate": 2.1673904461113303e-06, | |
| "loss": 0.1849, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.855507303592578, | |
| "eval_exact_match": 93.5, | |
| "eval_f1": 95.10591774891775, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.8949861823924201, | |
| "grad_norm": 0.19182445108890533, | |
| "learning_rate": 1.5752072641136992e-06, | |
| "loss": 0.09, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.8949861823924201, | |
| "eval_exact_match": 93.46666666666667, | |
| "eval_f1": 95.12782467532469, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.9344650611922622, | |
| "grad_norm": 65.0757827758789, | |
| "learning_rate": 9.83024082116068e-07, | |
| "loss": 0.1393, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.9344650611922622, | |
| "eval_exact_match": 93.7, | |
| "eval_f1": 95.2371066252588, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.9739439399921044, | |
| "grad_norm": 96.50936889648438, | |
| "learning_rate": 3.9084090011843665e-07, | |
| "loss": 0.149, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.9739439399921044, | |
| "eval_exact_match": 93.7, | |
| "eval_f1": 95.25543995859213, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 5066, | |
| "total_flos": 3.763306473501082e+16, | |
| "train_loss": 0.339278704556278, | |
| "train_runtime": 7807.973, | |
| "train_samples_per_second": 5.19, | |
| "train_steps_per_second": 0.649 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5066, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.763306473501082e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |