Instructions to use deu05232/repllama-llama2-7B-RQ1-only_q_version with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use deu05232/repllama-llama2-7B-RQ1-only_q_version with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996979764421625, | |
| "eval_steps": 500, | |
| "global_step": 1655, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006040471156750227, | |
| "grad_norm": 6.352119345594041, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 5.7625, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012080942313500454, | |
| "grad_norm": 3.5107589040708578, | |
| "learning_rate": 6.505149978319905e-05, | |
| "loss": 3.1477, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.018121413470250678, | |
| "grad_norm": 1.7597502982401718, | |
| "learning_rate": 7.385606273598311e-05, | |
| "loss": 1.8234, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.024161884627000908, | |
| "grad_norm": 2.0430582664699704, | |
| "learning_rate": 8.01029995663981e-05, | |
| "loss": 1.482, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.030202355783751134, | |
| "grad_norm": 2.30138022647804, | |
| "learning_rate": 8.494850021680092e-05, | |
| "loss": 1.427, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.036242826940501356, | |
| "grad_norm": 1.961300808555845, | |
| "learning_rate": 8.890756251918216e-05, | |
| "loss": 1.3652, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.042283298097251586, | |
| "grad_norm": 1.5670057056368645, | |
| "learning_rate": 9.225490200071284e-05, | |
| "loss": 1.2859, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.048323769254001815, | |
| "grad_norm": 1.5648604780816326, | |
| "learning_rate": 9.515449934959716e-05, | |
| "loss": 1.2166, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05436424041075204, | |
| "grad_norm": 1.5594489363452173, | |
| "learning_rate": 9.771212547196623e-05, | |
| "loss": 1.2299, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06040471156750227, | |
| "grad_norm": 1.478498798125124, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 1.1406, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0664451827242525, | |
| "grad_norm": 1.3456415286113537, | |
| "learning_rate": 9.942122186495178e-05, | |
| "loss": 1.1512, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07248565388100271, | |
| "grad_norm": 1.4169868695799486, | |
| "learning_rate": 9.877813504823152e-05, | |
| "loss": 1.2473, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07852612503775294, | |
| "grad_norm": 1.5442406663559958, | |
| "learning_rate": 9.813504823151127e-05, | |
| "loss": 1.1609, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08456659619450317, | |
| "grad_norm": 1.3840740480619829, | |
| "learning_rate": 9.7491961414791e-05, | |
| "loss": 1.1617, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0906070673512534, | |
| "grad_norm": 1.1099163069476086, | |
| "learning_rate": 9.684887459807074e-05, | |
| "loss": 1.1746, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09664753850800363, | |
| "grad_norm": 1.2420434112131873, | |
| "learning_rate": 9.620578778135049e-05, | |
| "loss": 1.1504, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10268800966475385, | |
| "grad_norm": 1.081321720135299, | |
| "learning_rate": 9.556270096463023e-05, | |
| "loss": 1.1605, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.10872848082150408, | |
| "grad_norm": 1.0982034537770153, | |
| "learning_rate": 9.491961414790998e-05, | |
| "loss": 1.0799, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1147689519782543, | |
| "grad_norm": 1.1736811633063773, | |
| "learning_rate": 9.427652733118972e-05, | |
| "loss": 1.1242, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12080942313500453, | |
| "grad_norm": 1.3595797342301437, | |
| "learning_rate": 9.363344051446946e-05, | |
| "loss": 1.1016, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12684989429175475, | |
| "grad_norm": 1.2621841618934533, | |
| "learning_rate": 9.29903536977492e-05, | |
| "loss": 1.0695, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.132890365448505, | |
| "grad_norm": 1.2070699106064489, | |
| "learning_rate": 9.234726688102894e-05, | |
| "loss": 1.1049, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1389308366052552, | |
| "grad_norm": 1.0416383062176056, | |
| "learning_rate": 9.17041800643087e-05, | |
| "loss": 1.1467, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.14497130776200542, | |
| "grad_norm": 1.1856097509180394, | |
| "learning_rate": 9.106109324758843e-05, | |
| "loss": 1.0746, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15101177891875567, | |
| "grad_norm": 1.144246874115688, | |
| "learning_rate": 9.041800643086817e-05, | |
| "loss": 1.2018, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15705225007550588, | |
| "grad_norm": 0.9353587652819314, | |
| "learning_rate": 8.977491961414792e-05, | |
| "loss": 1.0789, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.16309272123225613, | |
| "grad_norm": 1.0285527563460544, | |
| "learning_rate": 8.913183279742766e-05, | |
| "loss": 1.0555, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.16913319238900634, | |
| "grad_norm": 0.9852353032151006, | |
| "learning_rate": 8.848874598070739e-05, | |
| "loss": 1.1451, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.17517366354575656, | |
| "grad_norm": 1.175932073617665, | |
| "learning_rate": 8.784565916398714e-05, | |
| "loss": 1.0789, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1812141347025068, | |
| "grad_norm": 1.213820312892746, | |
| "learning_rate": 8.720257234726688e-05, | |
| "loss": 1.0475, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.18725460585925702, | |
| "grad_norm": 1.1846528600649535, | |
| "learning_rate": 8.655948553054663e-05, | |
| "loss": 1.0521, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.19329507701600726, | |
| "grad_norm": 1.1497197430459096, | |
| "learning_rate": 8.591639871382637e-05, | |
| "loss": 1.082, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.19933554817275748, | |
| "grad_norm": 1.1612797319381625, | |
| "learning_rate": 8.52733118971061e-05, | |
| "loss": 1.0793, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2053760193295077, | |
| "grad_norm": 1.082739707844854, | |
| "learning_rate": 8.463022508038586e-05, | |
| "loss": 1.1061, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.21141649048625794, | |
| "grad_norm": 1.2757899011878036, | |
| "learning_rate": 8.398713826366559e-05, | |
| "loss": 1.073, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.21745696164300815, | |
| "grad_norm": 0.9540956853203417, | |
| "learning_rate": 8.334405144694534e-05, | |
| "loss": 1.0135, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2234974327997584, | |
| "grad_norm": 1.0200709241255603, | |
| "learning_rate": 8.270096463022508e-05, | |
| "loss": 1.1389, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2295379039565086, | |
| "grad_norm": 0.8455317026975807, | |
| "learning_rate": 8.205787781350482e-05, | |
| "loss": 1.0598, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.23557837511325883, | |
| "grad_norm": 1.1381349305391164, | |
| "learning_rate": 8.141479099678457e-05, | |
| "loss": 1.0777, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.24161884627000907, | |
| "grad_norm": 1.103747383663609, | |
| "learning_rate": 8.07717041800643e-05, | |
| "loss": 1.0617, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24765931742675928, | |
| "grad_norm": 1.138904626814912, | |
| "learning_rate": 8.012861736334406e-05, | |
| "loss": 1.0324, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2536997885835095, | |
| "grad_norm": 1.0781428262426909, | |
| "learning_rate": 7.94855305466238e-05, | |
| "loss": 1.0859, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2597402597402597, | |
| "grad_norm": 1.0846995831727089, | |
| "learning_rate": 7.884244372990353e-05, | |
| "loss": 1.0553, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.26578073089701, | |
| "grad_norm": 0.8641072003497396, | |
| "learning_rate": 7.819935691318328e-05, | |
| "loss": 1.0068, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2718212020537602, | |
| "grad_norm": 1.110226877216393, | |
| "learning_rate": 7.755627009646302e-05, | |
| "loss": 1.0115, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2778616732105104, | |
| "grad_norm": 1.1418636977442045, | |
| "learning_rate": 7.691318327974277e-05, | |
| "loss": 1.0373, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.28390214436726063, | |
| "grad_norm": 0.9476740742119087, | |
| "learning_rate": 7.62700964630225e-05, | |
| "loss": 1.0023, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.28994261552401085, | |
| "grad_norm": 1.1736350313366837, | |
| "learning_rate": 7.562700964630224e-05, | |
| "loss": 1.0338, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2959830866807611, | |
| "grad_norm": 1.113923619981689, | |
| "learning_rate": 7.4983922829582e-05, | |
| "loss": 0.9855, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.30202355783751134, | |
| "grad_norm": 1.0389339180007815, | |
| "learning_rate": 7.434083601286173e-05, | |
| "loss": 1.0525, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.30806402899426155, | |
| "grad_norm": 1.161270624803982, | |
| "learning_rate": 7.369774919614148e-05, | |
| "loss": 1.0697, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.31410450015101177, | |
| "grad_norm": 0.8892068789215148, | |
| "learning_rate": 7.305466237942122e-05, | |
| "loss": 0.9889, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.320144971307762, | |
| "grad_norm": 1.0455859065719944, | |
| "learning_rate": 7.241157556270097e-05, | |
| "loss": 0.9898, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.32618544246451225, | |
| "grad_norm": 1.0186894188406512, | |
| "learning_rate": 7.17684887459807e-05, | |
| "loss": 0.9904, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.33222591362126247, | |
| "grad_norm": 1.0072092839183986, | |
| "learning_rate": 7.112540192926044e-05, | |
| "loss": 0.9873, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3382663847780127, | |
| "grad_norm": 0.9663462582485958, | |
| "learning_rate": 7.04823151125402e-05, | |
| "loss": 1.0375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3443068559347629, | |
| "grad_norm": 0.9103712505082897, | |
| "learning_rate": 6.983922829581993e-05, | |
| "loss": 1.0383, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3503473270915131, | |
| "grad_norm": 1.0485963233672557, | |
| "learning_rate": 6.919614147909968e-05, | |
| "loss": 0.9758, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3563877982482634, | |
| "grad_norm": 0.9912979171159205, | |
| "learning_rate": 6.855305466237942e-05, | |
| "loss": 0.993, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3624282694050136, | |
| "grad_norm": 1.1550179596081722, | |
| "learning_rate": 6.790996784565917e-05, | |
| "loss": 1.032, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3684687405617638, | |
| "grad_norm": 1.0176105219446732, | |
| "learning_rate": 6.726688102893891e-05, | |
| "loss": 1.0094, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.37450921171851403, | |
| "grad_norm": 1.1210756115920897, | |
| "learning_rate": 6.662379421221864e-05, | |
| "loss": 0.968, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.38054968287526425, | |
| "grad_norm": 0.9310657150450071, | |
| "learning_rate": 6.59807073954984e-05, | |
| "loss": 1.0102, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3865901540320145, | |
| "grad_norm": 1.1633016127800648, | |
| "learning_rate": 6.533762057877813e-05, | |
| "loss": 1.0256, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.39263062518876474, | |
| "grad_norm": 0.8195008101746646, | |
| "learning_rate": 6.469453376205788e-05, | |
| "loss": 0.9906, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.39867109634551495, | |
| "grad_norm": 0.9310761172857367, | |
| "learning_rate": 6.405144694533762e-05, | |
| "loss": 0.9754, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.40471156750226517, | |
| "grad_norm": 1.0516630367824455, | |
| "learning_rate": 6.340836012861736e-05, | |
| "loss": 1.0275, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4107520386590154, | |
| "grad_norm": 1.0629036969985795, | |
| "learning_rate": 6.276527331189711e-05, | |
| "loss": 1.0088, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.41679250981576565, | |
| "grad_norm": 1.0110994904370134, | |
| "learning_rate": 6.212218649517684e-05, | |
| "loss": 1.0416, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.42283298097251587, | |
| "grad_norm": 0.8895584335650736, | |
| "learning_rate": 6.14790996784566e-05, | |
| "loss": 1.0762, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4288734521292661, | |
| "grad_norm": 1.0448634604827216, | |
| "learning_rate": 6.083601286173633e-05, | |
| "loss": 0.9771, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4349139232860163, | |
| "grad_norm": 0.7905625978703504, | |
| "learning_rate": 6.019292604501608e-05, | |
| "loss": 1.0104, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4409543944427665, | |
| "grad_norm": 1.0522004404580236, | |
| "learning_rate": 5.954983922829582e-05, | |
| "loss": 0.9977, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4469948655995168, | |
| "grad_norm": 0.989475716298655, | |
| "learning_rate": 5.8906752411575564e-05, | |
| "loss": 0.9752, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.453035336756267, | |
| "grad_norm": 1.1463565668787514, | |
| "learning_rate": 5.826366559485531e-05, | |
| "loss": 0.9922, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4590758079130172, | |
| "grad_norm": 1.189865689753586, | |
| "learning_rate": 5.7620578778135045e-05, | |
| "loss": 1.0211, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 1.0920686333427474, | |
| "learning_rate": 5.6977491961414796e-05, | |
| "loss": 0.9664, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.47115675022651765, | |
| "grad_norm": 1.0174913677757231, | |
| "learning_rate": 5.633440514469453e-05, | |
| "loss": 0.9992, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4771972213832679, | |
| "grad_norm": 1.1913080680366728, | |
| "learning_rate": 5.5691318327974284e-05, | |
| "loss": 1.033, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.48323769254001814, | |
| "grad_norm": 1.0204323369307062, | |
| "learning_rate": 5.504823151125402e-05, | |
| "loss": 0.9807, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.48927816369676835, | |
| "grad_norm": 1.118864062431584, | |
| "learning_rate": 5.440514469453376e-05, | |
| "loss": 0.974, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.49531863485351857, | |
| "grad_norm": 1.0350012597705058, | |
| "learning_rate": 5.376205787781351e-05, | |
| "loss": 0.9705, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5013591060102688, | |
| "grad_norm": 1.0845595520731828, | |
| "learning_rate": 5.3118971061093246e-05, | |
| "loss": 0.9582, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.507399577167019, | |
| "grad_norm": 0.9453749574005488, | |
| "learning_rate": 5.2475884244372996e-05, | |
| "loss": 1.0293, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5134400483237692, | |
| "grad_norm": 1.042699724920899, | |
| "learning_rate": 5.183279742765273e-05, | |
| "loss": 0.952, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5194805194805194, | |
| "grad_norm": 1.024020216858902, | |
| "learning_rate": 5.118971061093247e-05, | |
| "loss": 1.018, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5255209906372698, | |
| "grad_norm": 1.0360224854752502, | |
| "learning_rate": 5.054662379421222e-05, | |
| "loss": 0.9295, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.53156146179402, | |
| "grad_norm": 1.1683508369643563, | |
| "learning_rate": 4.9903536977491965e-05, | |
| "loss": 1.0133, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5376019329507702, | |
| "grad_norm": 0.8614966755997071, | |
| "learning_rate": 4.92604501607717e-05, | |
| "loss": 0.993, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5436424041075204, | |
| "grad_norm": 0.9857361206695465, | |
| "learning_rate": 4.8617363344051446e-05, | |
| "loss": 0.951, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5496828752642706, | |
| "grad_norm": 0.8956078547131043, | |
| "learning_rate": 4.797427652733119e-05, | |
| "loss": 0.9268, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5557233464210208, | |
| "grad_norm": 0.8885268581836658, | |
| "learning_rate": 4.7331189710610934e-05, | |
| "loss": 0.9688, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.561763817577771, | |
| "grad_norm": 1.136712299735102, | |
| "learning_rate": 4.668810289389068e-05, | |
| "loss": 0.9545, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5678042887345213, | |
| "grad_norm": 1.114040839440962, | |
| "learning_rate": 4.604501607717042e-05, | |
| "loss": 1.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5738447598912715, | |
| "grad_norm": 1.2120423884348053, | |
| "learning_rate": 4.5401929260450165e-05, | |
| "loss": 0.9525, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5798852310480217, | |
| "grad_norm": 0.9841344976466476, | |
| "learning_rate": 4.47588424437299e-05, | |
| "loss": 1.0389, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.585925702204772, | |
| "grad_norm": 1.0459005071658793, | |
| "learning_rate": 4.4115755627009646e-05, | |
| "loss": 1.0041, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5919661733615222, | |
| "grad_norm": 1.2096290639870764, | |
| "learning_rate": 4.347266881028939e-05, | |
| "loss": 0.9939, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5980066445182725, | |
| "grad_norm": 1.0171142436328486, | |
| "learning_rate": 4.2829581993569134e-05, | |
| "loss": 0.9432, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6040471156750227, | |
| "grad_norm": 1.0557726010616861, | |
| "learning_rate": 4.218649517684888e-05, | |
| "loss": 1.0041, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6100875868317729, | |
| "grad_norm": 0.9854936325370102, | |
| "learning_rate": 4.154340836012862e-05, | |
| "loss": 0.9598, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6161280579885231, | |
| "grad_norm": 1.0002881708013533, | |
| "learning_rate": 4.090032154340836e-05, | |
| "loss": 0.9537, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6221685291452733, | |
| "grad_norm": 1.0163254545569222, | |
| "learning_rate": 4.02572347266881e-05, | |
| "loss": 0.9781, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6282090003020235, | |
| "grad_norm": 1.0700856083800017, | |
| "learning_rate": 3.961414790996785e-05, | |
| "loss": 0.9418, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6342494714587738, | |
| "grad_norm": 0.925802326406659, | |
| "learning_rate": 3.897106109324759e-05, | |
| "loss": 0.9355, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.640289942615524, | |
| "grad_norm": 1.0173964783122673, | |
| "learning_rate": 3.8327974276527335e-05, | |
| "loss": 0.9311, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6463304137722743, | |
| "grad_norm": 1.0886760638416628, | |
| "learning_rate": 3.768488745980708e-05, | |
| "loss": 1.0104, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6523708849290245, | |
| "grad_norm": 0.8784480294988184, | |
| "learning_rate": 3.7041800643086816e-05, | |
| "loss": 0.9631, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6584113560857747, | |
| "grad_norm": 1.0061477546516637, | |
| "learning_rate": 3.639871382636656e-05, | |
| "loss": 0.9209, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6644518272425249, | |
| "grad_norm": 1.0443361002098694, | |
| "learning_rate": 3.5755627009646303e-05, | |
| "loss": 0.9494, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6704922983992752, | |
| "grad_norm": 1.064188874483296, | |
| "learning_rate": 3.511254019292605e-05, | |
| "loss": 0.9871, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6765327695560254, | |
| "grad_norm": 0.9804777210337282, | |
| "learning_rate": 3.446945337620579e-05, | |
| "loss": 0.9357, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6825732407127756, | |
| "grad_norm": 1.0053083581127769, | |
| "learning_rate": 3.3826366559485535e-05, | |
| "loss": 0.9637, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6886137118695258, | |
| "grad_norm": 1.0062328413826747, | |
| "learning_rate": 3.318327974276528e-05, | |
| "loss": 0.9914, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.694654183026276, | |
| "grad_norm": 1.0659942748249296, | |
| "learning_rate": 3.2540192926045016e-05, | |
| "loss": 0.9332, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7006946541830262, | |
| "grad_norm": 1.2425227959394274, | |
| "learning_rate": 3.189710610932476e-05, | |
| "loss": 0.9951, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7067351253397764, | |
| "grad_norm": 1.0269910163244402, | |
| "learning_rate": 3.1254019292604504e-05, | |
| "loss": 0.9805, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7127755964965268, | |
| "grad_norm": 1.067485855484705, | |
| "learning_rate": 3.061093247588425e-05, | |
| "loss": 0.975, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.718816067653277, | |
| "grad_norm": 0.9819234749980894, | |
| "learning_rate": 2.996784565916399e-05, | |
| "loss": 0.9805, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7248565388100272, | |
| "grad_norm": 0.9915476687600292, | |
| "learning_rate": 2.9324758842443732e-05, | |
| "loss": 0.9365, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7308970099667774, | |
| "grad_norm": 1.048821106029052, | |
| "learning_rate": 2.8681672025723473e-05, | |
| "loss": 0.9861, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7369374811235276, | |
| "grad_norm": 0.9121022219179783, | |
| "learning_rate": 2.8038585209003217e-05, | |
| "loss": 0.9854, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7429779522802779, | |
| "grad_norm": 0.9525288844616361, | |
| "learning_rate": 2.739549839228296e-05, | |
| "loss": 0.948, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7490184234370281, | |
| "grad_norm": 1.1391100357014714, | |
| "learning_rate": 2.67524115755627e-05, | |
| "loss": 0.8887, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7550588945937783, | |
| "grad_norm": 1.1078585866746686, | |
| "learning_rate": 2.6109324758842445e-05, | |
| "loss": 0.9715, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7610993657505285, | |
| "grad_norm": 0.9323091046055703, | |
| "learning_rate": 2.546623794212219e-05, | |
| "loss": 0.9359, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7671398369072787, | |
| "grad_norm": 1.146046378807531, | |
| "learning_rate": 2.482315112540193e-05, | |
| "loss": 0.8967, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.773180308064029, | |
| "grad_norm": 1.14163561365635, | |
| "learning_rate": 2.4180064308681673e-05, | |
| "loss": 0.9707, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7792207792207793, | |
| "grad_norm": 1.0034307361335086, | |
| "learning_rate": 2.3536977491961414e-05, | |
| "loss": 0.9668, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7852612503775295, | |
| "grad_norm": 1.1199578096409222, | |
| "learning_rate": 2.2893890675241158e-05, | |
| "loss": 0.968, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7913017215342797, | |
| "grad_norm": 1.0809213063777794, | |
| "learning_rate": 2.22508038585209e-05, | |
| "loss": 0.9264, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7973421926910299, | |
| "grad_norm": 1.1266044230874084, | |
| "learning_rate": 2.1607717041800642e-05, | |
| "loss": 1.0203, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8033826638477801, | |
| "grad_norm": 0.8540728431635001, | |
| "learning_rate": 2.0964630225080386e-05, | |
| "loss": 0.924, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8094231350045303, | |
| "grad_norm": 1.0063045365537524, | |
| "learning_rate": 2.032154340836013e-05, | |
| "loss": 0.9656, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8154636061612806, | |
| "grad_norm": 0.9432512605760064, | |
| "learning_rate": 1.967845659163987e-05, | |
| "loss": 1.0254, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8215040773180308, | |
| "grad_norm": 1.162199676010242, | |
| "learning_rate": 1.9035369774919614e-05, | |
| "loss": 0.9549, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.827544548474781, | |
| "grad_norm": 1.0184734737311432, | |
| "learning_rate": 1.8392282958199358e-05, | |
| "loss": 0.8937, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8335850196315313, | |
| "grad_norm": 1.0321283898574098, | |
| "learning_rate": 1.77491961414791e-05, | |
| "loss": 0.9641, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8396254907882815, | |
| "grad_norm": 0.948342731761788, | |
| "learning_rate": 1.7106109324758842e-05, | |
| "loss": 0.907, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.8456659619450317, | |
| "grad_norm": 1.016072519476065, | |
| "learning_rate": 1.6463022508038586e-05, | |
| "loss": 0.999, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.851706433101782, | |
| "grad_norm": 0.8891894724654406, | |
| "learning_rate": 1.581993569131833e-05, | |
| "loss": 0.9238, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.8577469042585322, | |
| "grad_norm": 1.0116016221718374, | |
| "learning_rate": 1.517684887459807e-05, | |
| "loss": 0.9488, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.8637873754152824, | |
| "grad_norm": 1.092739541532347, | |
| "learning_rate": 1.4533762057877815e-05, | |
| "loss": 0.9971, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.8698278465720326, | |
| "grad_norm": 0.8563957174375877, | |
| "learning_rate": 1.3890675241157558e-05, | |
| "loss": 1.0133, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.8758683177287828, | |
| "grad_norm": 1.089288756096709, | |
| "learning_rate": 1.3247588424437299e-05, | |
| "loss": 0.9127, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.881908788885533, | |
| "grad_norm": 0.9458299067214011, | |
| "learning_rate": 1.2604501607717043e-05, | |
| "loss": 0.9668, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.8879492600422833, | |
| "grad_norm": 1.2229020868039389, | |
| "learning_rate": 1.1961414790996785e-05, | |
| "loss": 0.9506, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8939897311990336, | |
| "grad_norm": 1.0615217830735095, | |
| "learning_rate": 1.1318327974276529e-05, | |
| "loss": 0.9219, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.9000302023557838, | |
| "grad_norm": 1.1622944573297567, | |
| "learning_rate": 1.0675241157556271e-05, | |
| "loss": 0.9391, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.906070673512534, | |
| "grad_norm": 0.9443566657008828, | |
| "learning_rate": 1.0032154340836013e-05, | |
| "loss": 0.8955, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9121111446692842, | |
| "grad_norm": 1.1367332066610845, | |
| "learning_rate": 9.389067524115757e-06, | |
| "loss": 0.9385, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9181516158260344, | |
| "grad_norm": 0.9414922696266849, | |
| "learning_rate": 8.7459807073955e-06, | |
| "loss": 0.9318, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9241920869827847, | |
| "grad_norm": 1.0844935096035118, | |
| "learning_rate": 8.102893890675242e-06, | |
| "loss": 0.9311, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 1.120509038145209, | |
| "learning_rate": 7.459807073954985e-06, | |
| "loss": 0.9422, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.9362730292962851, | |
| "grad_norm": 0.9486397409946379, | |
| "learning_rate": 6.816720257234727e-06, | |
| "loss": 0.9162, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.9423135004530353, | |
| "grad_norm": 1.1412479158621043, | |
| "learning_rate": 6.17363344051447e-06, | |
| "loss": 0.9676, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.9483539716097855, | |
| "grad_norm": 1.050318112867144, | |
| "learning_rate": 5.530546623794212e-06, | |
| "loss": 0.9754, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.9543944427665358, | |
| "grad_norm": 1.0466438135565226, | |
| "learning_rate": 4.887459807073955e-06, | |
| "loss": 0.926, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.9604349139232861, | |
| "grad_norm": 0.9720115456232871, | |
| "learning_rate": 4.244372990353698e-06, | |
| "loss": 0.9447, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.9664753850800363, | |
| "grad_norm": 0.8786923129454015, | |
| "learning_rate": 3.6012861736334403e-06, | |
| "loss": 0.9443, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9725158562367865, | |
| "grad_norm": 1.0733513985052048, | |
| "learning_rate": 2.9581993569131834e-06, | |
| "loss": 0.9053, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.9785563273935367, | |
| "grad_norm": 0.8731368575153022, | |
| "learning_rate": 2.315112540192926e-06, | |
| "loss": 0.9803, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.9845967985502869, | |
| "grad_norm": 1.0060959243542735, | |
| "learning_rate": 1.6720257234726688e-06, | |
| "loss": 0.9563, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.9906372697070371, | |
| "grad_norm": 0.9113226806468382, | |
| "learning_rate": 1.0289389067524116e-06, | |
| "loss": 0.9191, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.9966777408637874, | |
| "grad_norm": 0.9160377841463557, | |
| "learning_rate": 3.8585209003215437e-07, | |
| "loss": 0.9117, | |
| "step": 1650 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1655, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |