Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 0.9310132935790502, | |
| "best_model_checkpoint": "../models/phq_cls\\checkpoint-20500", | |
| "epoch": 4.929577464788732, | |
| "eval_steps": 500, | |
| "global_step": 21000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.5858579277992249, | |
| "learning_rate": 4.997652582159625e-05, | |
| "loss": 0.6356, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.6038872599601746, | |
| "learning_rate": 4.995305164319249e-05, | |
| "loss": 0.526, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 3.9445767402648926, | |
| "learning_rate": 4.992957746478874e-05, | |
| "loss": 0.4502, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.5415595769882202, | |
| "learning_rate": 4.990610328638498e-05, | |
| "loss": 0.4029, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.5211673378944397, | |
| "learning_rate": 4.988262910798122e-05, | |
| "loss": 0.3697, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.3754492402076721, | |
| "learning_rate": 4.9859154929577466e-05, | |
| "loss": 0.3559, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.48204684257507324, | |
| "learning_rate": 4.9835680751173713e-05, | |
| "loss": 0.3499, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.3834236264228821, | |
| "learning_rate": 4.9812206572769954e-05, | |
| "loss": 0.3487, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.40969759225845337, | |
| "learning_rate": 4.97887323943662e-05, | |
| "loss": 0.3514, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.3595544695854187, | |
| "learning_rate": 4.976525821596245e-05, | |
| "loss": 0.3505, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.41510289907455444, | |
| "learning_rate": 4.974178403755869e-05, | |
| "loss": 0.3538, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.40279024839401245, | |
| "learning_rate": 4.971830985915493e-05, | |
| "loss": 0.3504, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.43963101506233215, | |
| "learning_rate": 4.969483568075118e-05, | |
| "loss": 0.3474, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.514215886592865, | |
| "learning_rate": 4.967136150234742e-05, | |
| "loss": 0.3488, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.49667927622795105, | |
| "learning_rate": 4.9647887323943665e-05, | |
| "loss": 0.343, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.8667863607406616, | |
| "learning_rate": 4.962441314553991e-05, | |
| "loss": 0.3427, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.5492832660675049, | |
| "learning_rate": 4.960093896713615e-05, | |
| "loss": 0.3432, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.45692935585975647, | |
| "learning_rate": 4.95774647887324e-05, | |
| "loss": 0.3328, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.5771167874336243, | |
| "learning_rate": 4.955399061032864e-05, | |
| "loss": 0.3274, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.586593747138977, | |
| "learning_rate": 4.953051643192488e-05, | |
| "loss": 0.3427, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.45685452222824097, | |
| "learning_rate": 4.950704225352113e-05, | |
| "loss": 0.3278, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.40281644463539124, | |
| "learning_rate": 4.9483568075117376e-05, | |
| "loss": 0.3412, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.6066292524337769, | |
| "learning_rate": 4.946009389671362e-05, | |
| "loss": 0.3256, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.0048617124557495, | |
| "learning_rate": 4.9436619718309864e-05, | |
| "loss": 0.3296, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.4742206633090973, | |
| "learning_rate": 4.941314553990611e-05, | |
| "loss": 0.3181, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.44790124893188477, | |
| "learning_rate": 4.938967136150235e-05, | |
| "loss": 0.3116, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.3388730585575104, | |
| "learning_rate": 4.936619718309859e-05, | |
| "loss": 0.3311, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.0921857357025146, | |
| "learning_rate": 4.934272300469484e-05, | |
| "loss": 0.3167, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.29578328132629395, | |
| "learning_rate": 4.931924882629108e-05, | |
| "loss": 0.3104, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.5786669254302979, | |
| "learning_rate": 4.929577464788733e-05, | |
| "loss": 0.3032, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.3513028919696808, | |
| "learning_rate": 4.927230046948357e-05, | |
| "loss": 0.3187, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.6156922578811646, | |
| "learning_rate": 4.9248826291079816e-05, | |
| "loss": 0.3145, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.43208229541778564, | |
| "learning_rate": 4.9225352112676056e-05, | |
| "loss": 0.3178, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.3528173863887787, | |
| "learning_rate": 4.92018779342723e-05, | |
| "loss": 0.3074, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.6173387765884399, | |
| "learning_rate": 4.9178403755868544e-05, | |
| "loss": 0.3121, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.8469752073287964, | |
| "learning_rate": 4.915492957746479e-05, | |
| "loss": 0.3007, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.5218201875686646, | |
| "learning_rate": 4.913145539906103e-05, | |
| "loss": 0.3022, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.46562883257865906, | |
| "learning_rate": 4.910798122065728e-05, | |
| "loss": 0.3074, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.3317212760448456, | |
| "learning_rate": 4.908450704225353e-05, | |
| "loss": 0.2938, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.5221473574638367, | |
| "learning_rate": 4.906103286384977e-05, | |
| "loss": 0.2981, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.4833567142486572, | |
| "learning_rate": 4.903755868544601e-05, | |
| "loss": 0.2911, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.3665942847728729, | |
| "learning_rate": 4.9014084507042255e-05, | |
| "loss": 0.2836, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.9034627079963684, | |
| "learning_rate": 4.8990610328638496e-05, | |
| "loss": 0.2948, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.40714317560195923, | |
| "learning_rate": 4.896713615023474e-05, | |
| "loss": 0.3087, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.4227840304374695, | |
| "learning_rate": 4.894366197183099e-05, | |
| "loss": 0.3115, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.471699982881546, | |
| "learning_rate": 4.892018779342723e-05, | |
| "loss": 0.2815, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.5239991545677185, | |
| "learning_rate": 4.889671361502348e-05, | |
| "loss": 0.2906, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.5088714361190796, | |
| "learning_rate": 4.887323943661972e-05, | |
| "loss": 0.2837, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.568097710609436, | |
| "learning_rate": 4.884976525821596e-05, | |
| "loss": 0.2848, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.5387840867042542, | |
| "learning_rate": 4.882629107981221e-05, | |
| "loss": 0.2885, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 0.27685049176216125, | |
| "eval_macro/f1": 0.09752839011356046, | |
| "eval_macro/precision": 0.10432297249017507, | |
| "eval_macro/recall": 0.09156475384132957, | |
| "eval_micro/f1": 0.21273753870438952, | |
| "eval_micro/precision": 0.9389067524115756, | |
| "eval_micro/recall": 0.11995891817870592, | |
| "eval_runtime": 27.609, | |
| "eval_samples/accuracy": 0.11995891817870592, | |
| "eval_samples_per_second": 528.994, | |
| "eval_steps_per_second": 16.553, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.117136001586914, | |
| "learning_rate": 4.8802816901408454e-05, | |
| "loss": 0.3016, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.5608183145523071, | |
| "learning_rate": 4.8779342723004695e-05, | |
| "loss": 0.2758, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.1747539043426514, | |
| "learning_rate": 4.875586854460094e-05, | |
| "loss": 0.2871, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.1754446029663086, | |
| "learning_rate": 4.873239436619719e-05, | |
| "loss": 0.27, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.45414578914642334, | |
| "learning_rate": 4.870892018779343e-05, | |
| "loss": 0.2702, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.651540219783783, | |
| "learning_rate": 4.868544600938967e-05, | |
| "loss": 0.265, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.6937834024429321, | |
| "learning_rate": 4.866197183098592e-05, | |
| "loss": 0.278, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.1861662864685059, | |
| "learning_rate": 4.863849765258216e-05, | |
| "loss": 0.26, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.8335360884666443, | |
| "learning_rate": 4.8615023474178406e-05, | |
| "loss": 0.2595, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.48560401797294617, | |
| "learning_rate": 4.8591549295774653e-05, | |
| "loss": 0.2769, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.0077592134475708, | |
| "learning_rate": 4.8568075117370894e-05, | |
| "loss": 0.2473, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.5798254013061523, | |
| "learning_rate": 4.854460093896714e-05, | |
| "loss": 0.2486, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.9621915817260742, | |
| "learning_rate": 4.852112676056338e-05, | |
| "loss": 0.264, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.764593780040741, | |
| "learning_rate": 4.849765258215962e-05, | |
| "loss": 0.2519, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.6093132495880127, | |
| "learning_rate": 4.847417840375587e-05, | |
| "loss": 0.2366, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 5.682877063751221, | |
| "learning_rate": 4.845774647887324e-05, | |
| "loss": 0.2781, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.4239857196807861, | |
| "learning_rate": 4.843427230046948e-05, | |
| "loss": 0.2548, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.2523099184036255, | |
| "learning_rate": 4.841079812206573e-05, | |
| "loss": 0.24, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.2140272855758667, | |
| "learning_rate": 4.838732394366197e-05, | |
| "loss": 0.2195, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.9842355847358704, | |
| "learning_rate": 4.836384976525822e-05, | |
| "loss": 0.2268, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 16.670461654663086, | |
| "learning_rate": 4.8340375586854466e-05, | |
| "loss": 0.2344, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.6551809310913086, | |
| "learning_rate": 4.8316901408450706e-05, | |
| "loss": 0.2304, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.0985769033432007, | |
| "learning_rate": 4.8293427230046953e-05, | |
| "loss": 0.2318, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 2.296762228012085, | |
| "learning_rate": 4.8269953051643194e-05, | |
| "loss": 0.233, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.6605547666549683, | |
| "learning_rate": 4.8246478873239435e-05, | |
| "loss": 0.2134, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.3795205354690552, | |
| "learning_rate": 4.822300469483568e-05, | |
| "loss": 0.2099, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.0488667488098145, | |
| "learning_rate": 4.819953051643193e-05, | |
| "loss": 0.2432, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.1046578884124756, | |
| "learning_rate": 4.817605633802817e-05, | |
| "loss": 0.2393, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.8910871744155884, | |
| "learning_rate": 4.815258215962442e-05, | |
| "loss": 0.2256, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 2.1071548461914062, | |
| "learning_rate": 4.8129107981220665e-05, | |
| "loss": 0.231, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.6893961429595947, | |
| "learning_rate": 4.8105633802816905e-05, | |
| "loss": 0.2251, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.9333511590957642, | |
| "learning_rate": 4.8082159624413146e-05, | |
| "loss": 0.2202, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.001318335533142, | |
| "learning_rate": 4.805868544600939e-05, | |
| "loss": 0.2055, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.0097473859786987, | |
| "learning_rate": 4.8035211267605634e-05, | |
| "loss": 0.2062, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3152905702590942, | |
| "learning_rate": 4.801173708920188e-05, | |
| "loss": 0.193, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.033956527709961, | |
| "learning_rate": 4.798826291079813e-05, | |
| "loss": 0.2181, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.7312398552894592, | |
| "learning_rate": 4.796478873239437e-05, | |
| "loss": 0.1852, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.898751139640808, | |
| "learning_rate": 4.794131455399061e-05, | |
| "loss": 0.2077, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.6988235712051392, | |
| "learning_rate": 4.791784037558686e-05, | |
| "loss": 0.2234, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.8618746399879456, | |
| "learning_rate": 4.78943661971831e-05, | |
| "loss": 0.1829, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.307588577270508, | |
| "learning_rate": 4.7870892018779345e-05, | |
| "loss": 0.1791, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.8917971849441528, | |
| "learning_rate": 4.784741784037559e-05, | |
| "loss": 0.1817, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.6609673500061035, | |
| "learning_rate": 4.782394366197183e-05, | |
| "loss": 0.2039, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.1030490398406982, | |
| "learning_rate": 4.780046948356808e-05, | |
| "loss": 0.1924, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.8905339241027832, | |
| "learning_rate": 4.777699530516432e-05, | |
| "loss": 0.1902, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4277052879333496, | |
| "learning_rate": 4.775352112676056e-05, | |
| "loss": 0.1888, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.2068595886230469, | |
| "learning_rate": 4.773004694835681e-05, | |
| "loss": 0.1963, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.084404706954956, | |
| "learning_rate": 4.7706572769953056e-05, | |
| "loss": 0.18, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.7552419900894165, | |
| "learning_rate": 4.7683098591549296e-05, | |
| "loss": 0.1897, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4683459997177124, | |
| "learning_rate": 4.7659624413145544e-05, | |
| "loss": 0.19, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 0.17012138664722443, | |
| "eval_macro/f1": 0.6974816804549718, | |
| "eval_macro/precision": 0.814657194389621, | |
| "eval_macro/recall": 0.6377254831311044, | |
| "eval_micro/f1": 0.7211732077048745, | |
| "eval_micro/precision": 0.8100887674974394, | |
| "eval_micro/recall": 0.6498459431701472, | |
| "eval_runtime": 27.2589, | |
| "eval_samples/accuracy": 0.6475179732968162, | |
| "eval_samples_per_second": 535.787, | |
| "eval_steps_per_second": 16.765, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9634861350059509, | |
| "learning_rate": 4.763615023474179e-05, | |
| "loss": 0.1842, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.3621294498443604, | |
| "learning_rate": 4.761267605633803e-05, | |
| "loss": 0.1755, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.732932686805725, | |
| "learning_rate": 4.758920187793427e-05, | |
| "loss": 0.1896, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 3.365919589996338, | |
| "learning_rate": 4.756572769953052e-05, | |
| "loss": 0.1627, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.9022215604782104, | |
| "learning_rate": 4.754225352112676e-05, | |
| "loss": 0.177, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.9240831136703491, | |
| "learning_rate": 4.751877934272301e-05, | |
| "loss": 0.1495, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.7557075023651123, | |
| "learning_rate": 4.7495305164319255e-05, | |
| "loss": 0.1884, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.368794322013855, | |
| "learning_rate": 4.7471830985915495e-05, | |
| "loss": 0.1852, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 3.1905553340911865, | |
| "learning_rate": 4.744835680751174e-05, | |
| "loss": 0.1558, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.3366222381591797, | |
| "learning_rate": 4.742488262910798e-05, | |
| "loss": 0.1658, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.9878789782524109, | |
| "learning_rate": 4.7401408450704224e-05, | |
| "loss": 0.1988, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.590110182762146, | |
| "learning_rate": 4.737793427230047e-05, | |
| "loss": 0.1652, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.6078647375106812, | |
| "learning_rate": 4.735446009389671e-05, | |
| "loss": 0.1874, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.7055974006652832, | |
| "learning_rate": 4.733098591549296e-05, | |
| "loss": 0.1679, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 2.226369619369507, | |
| "learning_rate": 4.7307511737089206e-05, | |
| "loss": 0.1648, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.7244293689727783, | |
| "learning_rate": 4.728403755868545e-05, | |
| "loss": 0.1594, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 2.184124231338501, | |
| "learning_rate": 4.7260563380281694e-05, | |
| "loss": 0.1648, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.080694556236267, | |
| "learning_rate": 4.7237089201877935e-05, | |
| "loss": 0.1849, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.224308490753174, | |
| "learning_rate": 4.7213615023474176e-05, | |
| "loss": 0.1697, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.618312120437622, | |
| "learning_rate": 4.719014084507042e-05, | |
| "loss": 0.1416, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.3569130897521973, | |
| "learning_rate": 4.716666666666667e-05, | |
| "loss": 0.1557, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 2.247316598892212, | |
| "learning_rate": 4.714319248826291e-05, | |
| "loss": 0.1846, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.6453408002853394, | |
| "learning_rate": 4.711971830985916e-05, | |
| "loss": 0.1483, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.5334419012069702, | |
| "learning_rate": 4.7096244131455405e-05, | |
| "loss": 0.1663, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.6418590545654297, | |
| "learning_rate": 4.7072769953051646e-05, | |
| "loss": 0.147, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.0844509601593018, | |
| "learning_rate": 4.704929577464789e-05, | |
| "loss": 0.1543, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.806553602218628, | |
| "learning_rate": 4.7025821596244134e-05, | |
| "loss": 0.1799, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.5278924703598022, | |
| "learning_rate": 4.7002347417840375e-05, | |
| "loss": 0.1527, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.797590434551239, | |
| "learning_rate": 4.697887323943662e-05, | |
| "loss": 0.1468, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.9068496823310852, | |
| "learning_rate": 4.695539906103287e-05, | |
| "loss": 0.1606, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.6918179988861084, | |
| "learning_rate": 4.693192488262911e-05, | |
| "loss": 0.1602, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.412083864212036, | |
| "learning_rate": 4.690845070422536e-05, | |
| "loss": 0.1583, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.339053153991699, | |
| "learning_rate": 4.68849765258216e-05, | |
| "loss": 0.1834, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 4.236501693725586, | |
| "learning_rate": 4.686150234741784e-05, | |
| "loss": 0.1337, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.246243000030518, | |
| "learning_rate": 4.6838028169014086e-05, | |
| "loss": 0.156, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.2402584552764893, | |
| "learning_rate": 4.681455399061033e-05, | |
| "loss": 0.1529, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.787749767303467, | |
| "learning_rate": 4.6791079812206574e-05, | |
| "loss": 0.1547, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.6329225897789001, | |
| "learning_rate": 4.676760563380282e-05, | |
| "loss": 0.1333, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.7299346923828125, | |
| "learning_rate": 4.674413145539907e-05, | |
| "loss": 0.1383, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.7260222434997559, | |
| "learning_rate": 4.672065727699531e-05, | |
| "loss": 0.1457, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.683088779449463, | |
| "learning_rate": 4.669718309859155e-05, | |
| "loss": 0.1243, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.2119216918945312, | |
| "learning_rate": 4.66737089201878e-05, | |
| "loss": 0.1469, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.2248650789260864, | |
| "learning_rate": 4.665023474178404e-05, | |
| "loss": 0.1628, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.9862676858901978, | |
| "learning_rate": 4.6626760563380285e-05, | |
| "loss": 0.1234, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 3.197272539138794, | |
| "learning_rate": 4.660328638497653e-05, | |
| "loss": 0.1371, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.4832003116607666, | |
| "learning_rate": 4.657981220657277e-05, | |
| "loss": 0.1424, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.848587989807129, | |
| "learning_rate": 4.655633802816901e-05, | |
| "loss": 0.1151, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.1081154346466064, | |
| "learning_rate": 4.653286384976526e-05, | |
| "loss": 0.1193, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 3.711557388305664, | |
| "learning_rate": 4.65093896713615e-05, | |
| "loss": 0.1575, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.7196974754333496, | |
| "learning_rate": 4.648591549295775e-05, | |
| "loss": 0.1425, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 0.13336510956287384, | |
| "eval_macro/f1": 0.77789474745571, | |
| "eval_macro/precision": 0.8167214646472566, | |
| "eval_macro/recall": 0.7518631015976748, | |
| "eval_micro/f1": 0.7850268139361438, | |
| "eval_micro/precision": 0.8155253837072018, | |
| "eval_micro/recall": 0.7567271482369051, | |
| "eval_runtime": 27.3098, | |
| "eval_samples/accuracy": 0.7492639507018144, | |
| "eval_samples_per_second": 534.79, | |
| "eval_steps_per_second": 16.734, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.3112411499023438, | |
| "learning_rate": 4.6462441314553996e-05, | |
| "loss": 0.1383, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.371739387512207, | |
| "learning_rate": 4.6438967136150236e-05, | |
| "loss": 0.169, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 2.0370471477508545, | |
| "learning_rate": 4.6415492957746484e-05, | |
| "loss": 0.1304, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 2.2070887088775635, | |
| "learning_rate": 4.6392018779342724e-05, | |
| "loss": 0.156, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 10.297237396240234, | |
| "learning_rate": 4.6368544600938965e-05, | |
| "loss": 0.1527, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.114290475845337, | |
| "learning_rate": 4.634507042253521e-05, | |
| "loss": 0.1351, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.2772897481918335, | |
| "learning_rate": 4.632159624413146e-05, | |
| "loss": 0.1397, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.7256558537483215, | |
| "learning_rate": 4.62981220657277e-05, | |
| "loss": 0.1394, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 2.603022575378418, | |
| "learning_rate": 4.627464788732395e-05, | |
| "loss": 0.1641, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.835552453994751, | |
| "learning_rate": 4.6251173708920195e-05, | |
| "loss": 0.1645, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.006970167160034, | |
| "learning_rate": 4.6227699530516435e-05, | |
| "loss": 0.1458, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.926814079284668, | |
| "learning_rate": 4.6204225352112676e-05, | |
| "loss": 0.164, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.104510545730591, | |
| "learning_rate": 4.618075117370892e-05, | |
| "loss": 0.1687, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.2601518630981445, | |
| "learning_rate": 4.6157276995305164e-05, | |
| "loss": 0.1615, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.0919325351715088, | |
| "learning_rate": 4.613380281690141e-05, | |
| "loss": 0.1519, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 3.507404327392578, | |
| "learning_rate": 4.611032863849766e-05, | |
| "loss": 0.133, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.8300056457519531, | |
| "learning_rate": 4.60868544600939e-05, | |
| "loss": 0.1772, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.9435104131698608, | |
| "learning_rate": 4.6063380281690146e-05, | |
| "loss": 0.1298, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.371901035308838, | |
| "learning_rate": 4.603990610328639e-05, | |
| "loss": 0.1652, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.4579296112060547, | |
| "learning_rate": 4.601643192488263e-05, | |
| "loss": 0.1144, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.280348300933838, | |
| "learning_rate": 4.5992957746478875e-05, | |
| "loss": 0.1363, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 3.6556637287139893, | |
| "learning_rate": 4.5969483568075115e-05, | |
| "loss": 0.1297, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.8788136839866638, | |
| "learning_rate": 4.594600938967136e-05, | |
| "loss": 0.134, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.5975239276885986, | |
| "learning_rate": 4.592253521126761e-05, | |
| "loss": 0.1058, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 3.037933588027954, | |
| "learning_rate": 4.589906103286385e-05, | |
| "loss": 0.1139, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.6861093044281006, | |
| "learning_rate": 4.58755868544601e-05, | |
| "loss": 0.134, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.547356128692627, | |
| "learning_rate": 4.585211267605634e-05, | |
| "loss": 0.1278, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.218045473098755, | |
| "learning_rate": 4.582863849765258e-05, | |
| "loss": 0.1167, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.8614739179611206, | |
| "learning_rate": 4.5805164319248827e-05, | |
| "loss": 0.1134, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.04622483253479, | |
| "learning_rate": 4.5781690140845074e-05, | |
| "loss": 0.1331, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.9503240585327148, | |
| "learning_rate": 4.5758215962441315e-05, | |
| "loss": 0.1151, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.473653554916382, | |
| "learning_rate": 4.573474178403756e-05, | |
| "loss": 0.1188, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 4.5746307373046875, | |
| "learning_rate": 4.571126760563381e-05, | |
| "loss": 0.109, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.2897071838378906, | |
| "learning_rate": 4.568779342723005e-05, | |
| "loss": 0.135, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.783514976501465, | |
| "learning_rate": 4.566431924882629e-05, | |
| "loss": 0.1216, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.9944408535957336, | |
| "learning_rate": 4.564084507042254e-05, | |
| "loss": 0.1105, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.4792580604553223, | |
| "learning_rate": 4.561737089201878e-05, | |
| "loss": 0.1215, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.559095621109009, | |
| "learning_rate": 4.5593896713615026e-05, | |
| "loss": 0.1097, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.6624674797058105, | |
| "learning_rate": 4.557042253521127e-05, | |
| "loss": 0.1206, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.1126134395599365, | |
| "learning_rate": 4.5546948356807514e-05, | |
| "loss": 0.1314, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.4518086910247803, | |
| "learning_rate": 4.552347417840376e-05, | |
| "loss": 0.1317, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.835390329360962, | |
| "learning_rate": 4.55e-05, | |
| "loss": 0.1224, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.0483720302581787, | |
| "learning_rate": 4.547652582159624e-05, | |
| "loss": 0.1148, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.8186525106430054, | |
| "learning_rate": 4.545305164319249e-05, | |
| "loss": 0.1364, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.4727530479431152, | |
| "learning_rate": 4.542957746478874e-05, | |
| "loss": 0.1349, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.574378490447998, | |
| "learning_rate": 4.540610328638498e-05, | |
| "loss": 0.128, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.9332356452941895, | |
| "learning_rate": 4.5382629107981225e-05, | |
| "loss": 0.1286, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.3091564178466797, | |
| "learning_rate": 4.535915492957747e-05, | |
| "loss": 0.1274, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.5543984174728394, | |
| "learning_rate": 4.533568075117371e-05, | |
| "loss": 0.1205, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 4.30750846862793, | |
| "learning_rate": 4.531220657276995e-05, | |
| "loss": 0.1399, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 0.11542148888111115, | |
| "eval_macro/f1": 0.8150834145504557, | |
| "eval_macro/precision": 0.8413019593078327, | |
| "eval_macro/recall": 0.7991287240846648, | |
| "eval_micro/f1": 0.8217569126437585, | |
| "eval_micro/precision": 0.8394515069275817, | |
| "eval_micro/recall": 0.8047928791509757, | |
| "eval_runtime": 28.9918, | |
| "eval_samples/accuracy": 0.796644984594317, | |
| "eval_samples_per_second": 503.764, | |
| "eval_steps_per_second": 15.763, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 2.2433559894561768, | |
| "learning_rate": 4.52887323943662e-05, | |
| "loss": 0.1168, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0208545923233032, | |
| "learning_rate": 4.526525821596244e-05, | |
| "loss": 0.1003, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.44397255778312683, | |
| "learning_rate": 4.524178403755869e-05, | |
| "loss": 0.0873, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.4248991012573242, | |
| "learning_rate": 4.5218309859154936e-05, | |
| "loss": 0.0988, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.7967787981033325, | |
| "learning_rate": 4.5194835680751176e-05, | |
| "loss": 0.1254, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.8488617539405823, | |
| "learning_rate": 4.517136150234742e-05, | |
| "loss": 0.1011, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.6594550609588623, | |
| "learning_rate": 4.5147887323943664e-05, | |
| "loss": 0.121, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.3737144470214844, | |
| "learning_rate": 4.5124413145539905e-05, | |
| "loss": 0.1021, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.1709132194519043, | |
| "learning_rate": 4.510093896713615e-05, | |
| "loss": 0.112, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 3.5635571479797363, | |
| "learning_rate": 4.50774647887324e-05, | |
| "loss": 0.0889, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.1667410135269165, | |
| "learning_rate": 4.505399061032864e-05, | |
| "loss": 0.1027, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.7648849487304688, | |
| "learning_rate": 4.503051643192489e-05, | |
| "loss": 0.1391, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.023391604423523, | |
| "learning_rate": 4.500704225352113e-05, | |
| "loss": 0.1178, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.9849759340286255, | |
| "learning_rate": 4.498356807511737e-05, | |
| "loss": 0.1134, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.2011280059814453, | |
| "learning_rate": 4.4960093896713616e-05, | |
| "loss": 0.1243, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 5.394866943359375, | |
| "learning_rate": 4.493661971830986e-05, | |
| "loss": 0.1278, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.788053512573242, | |
| "learning_rate": 4.4913145539906104e-05, | |
| "loss": 0.1166, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.9931975603103638, | |
| "learning_rate": 4.488967136150235e-05, | |
| "loss": 0.1044, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.5912959575653076, | |
| "learning_rate": 4.48661971830986e-05, | |
| "loss": 0.1193, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 2.024529218673706, | |
| "learning_rate": 4.484272300469484e-05, | |
| "loss": 0.1286, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.237317442893982, | |
| "learning_rate": 4.481924882629108e-05, | |
| "loss": 0.1266, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.3059284687042236, | |
| "learning_rate": 4.479577464788733e-05, | |
| "loss": 0.1151, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 2.9779324531555176, | |
| "learning_rate": 4.477230046948357e-05, | |
| "loss": 0.0948, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 2.533174991607666, | |
| "learning_rate": 4.4748826291079815e-05, | |
| "loss": 0.1128, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.6116623878479004, | |
| "learning_rate": 4.472535211267606e-05, | |
| "loss": 0.0999, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 2.3070313930511475, | |
| "learning_rate": 4.47018779342723e-05, | |
| "loss": 0.1286, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.7318587899208069, | |
| "learning_rate": 4.467840375586855e-05, | |
| "loss": 0.0921, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.8361902832984924, | |
| "learning_rate": 4.465492957746479e-05, | |
| "loss": 0.1057, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.4417285919189453, | |
| "learning_rate": 4.463145539906103e-05, | |
| "loss": 0.1166, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 2.194974184036255, | |
| "learning_rate": 4.460798122065728e-05, | |
| "loss": 0.1176, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 3.3667984008789062, | |
| "learning_rate": 4.4584507042253526e-05, | |
| "loss": 0.1341, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.3247355222702026, | |
| "learning_rate": 4.4561032863849767e-05, | |
| "loss": 0.113, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 2.8170759677886963, | |
| "learning_rate": 4.4537558685446014e-05, | |
| "loss": 0.1253, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 2.810574769973755, | |
| "learning_rate": 4.4514084507042254e-05, | |
| "loss": 0.1287, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.5883653163909912, | |
| "learning_rate": 4.44906103286385e-05, | |
| "loss": 0.1312, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.6999726295471191, | |
| "learning_rate": 4.446713615023474e-05, | |
| "loss": 0.1092, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.7833150625228882, | |
| "learning_rate": 4.444366197183098e-05, | |
| "loss": 0.1055, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.1052982807159424, | |
| "learning_rate": 4.442018779342723e-05, | |
| "loss": 0.1007, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.897437334060669, | |
| "learning_rate": 4.439671361502348e-05, | |
| "loss": 0.1058, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.2218818664550781, | |
| "learning_rate": 4.437323943661972e-05, | |
| "loss": 0.1223, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.2371973991394043, | |
| "learning_rate": 4.4349765258215966e-05, | |
| "loss": 0.13, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.4405248165130615, | |
| "learning_rate": 4.432629107981221e-05, | |
| "loss": 0.1087, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.625314712524414, | |
| "learning_rate": 4.4302816901408453e-05, | |
| "loss": 0.1129, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.4910762310028076, | |
| "learning_rate": 4.4279342723004694e-05, | |
| "loss": 0.101, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 3.9263997077941895, | |
| "learning_rate": 4.425586854460094e-05, | |
| "loss": 0.1112, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.6563661098480225, | |
| "learning_rate": 4.423239436619718e-05, | |
| "loss": 0.1268, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.192418098449707, | |
| "learning_rate": 4.420892018779343e-05, | |
| "loss": 0.1131, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.195281505584717, | |
| "learning_rate": 4.418544600938968e-05, | |
| "loss": 0.1094, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.4269959926605225, | |
| "learning_rate": 4.416197183098592e-05, | |
| "loss": 0.101, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.625380516052246, | |
| "learning_rate": 4.4138497652582165e-05, | |
| "loss": 0.0976, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.10971714556217194, | |
| "eval_macro/f1": 0.8284099206756298, | |
| "eval_macro/precision": 0.8512250265581908, | |
| "eval_macro/recall": 0.8153409474616256, | |
| "eval_micro/f1": 0.8356522648812638, | |
| "eval_micro/precision": 0.8514781125639568, | |
| "eval_micro/recall": 0.8204039712427251, | |
| "eval_runtime": 28.2017, | |
| "eval_samples/accuracy": 0.8112290311537145, | |
| "eval_samples_per_second": 517.876, | |
| "eval_steps_per_second": 16.205, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 3.6350128650665283, | |
| "learning_rate": 4.4115023474178405e-05, | |
| "loss": 0.1133, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.4444243907928467, | |
| "learning_rate": 4.4091549295774646e-05, | |
| "loss": 0.1404, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.2758675813674927, | |
| "learning_rate": 4.406807511737089e-05, | |
| "loss": 0.1049, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.4315747022628784, | |
| "learning_rate": 4.404460093896714e-05, | |
| "loss": 0.1005, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.812558174133301, | |
| "learning_rate": 4.402112676056338e-05, | |
| "loss": 0.1143, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.0793673992156982, | |
| "learning_rate": 4.399765258215963e-05, | |
| "loss": 0.1275, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.506557583808899, | |
| "learning_rate": 4.3974178403755876e-05, | |
| "loss": 0.0949, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 2.994401216506958, | |
| "learning_rate": 4.395070422535211e-05, | |
| "loss": 0.11, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 3.2654285430908203, | |
| "learning_rate": 4.392723004694836e-05, | |
| "loss": 0.1286, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.5115245580673218, | |
| "learning_rate": 4.3903755868544604e-05, | |
| "loss": 0.0963, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.503602147102356, | |
| "learning_rate": 4.3880281690140845e-05, | |
| "loss": 0.1148, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.3735108375549316, | |
| "learning_rate": 4.385680751173709e-05, | |
| "loss": 0.1222, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.604314088821411, | |
| "learning_rate": 4.383333333333334e-05, | |
| "loss": 0.104, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.7355499267578125, | |
| "learning_rate": 4.380985915492958e-05, | |
| "loss": 0.1145, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 3.0043768882751465, | |
| "learning_rate": 4.378638497652582e-05, | |
| "loss": 0.1341, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.7208704948425293, | |
| "learning_rate": 4.376291079812207e-05, | |
| "loss": 0.1132, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.2890592813491821, | |
| "learning_rate": 4.373943661971831e-05, | |
| "loss": 0.1036, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.7100173234939575, | |
| "learning_rate": 4.3715962441314556e-05, | |
| "loss": 0.1012, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 3.7833025455474854, | |
| "learning_rate": 4.36924882629108e-05, | |
| "loss": 0.0968, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.8149163722991943, | |
| "learning_rate": 4.3669014084507044e-05, | |
| "loss": 0.111, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.475219488143921, | |
| "learning_rate": 4.364553990610329e-05, | |
| "loss": 0.0996, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.110396146774292, | |
| "learning_rate": 4.362206572769953e-05, | |
| "loss": 0.0994, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.823807716369629, | |
| "learning_rate": 4.359859154929577e-05, | |
| "loss": 0.1095, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.6784656047821045, | |
| "learning_rate": 4.357511737089202e-05, | |
| "loss": 0.1321, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.6397862434387207, | |
| "learning_rate": 4.355164319248827e-05, | |
| "loss": 0.1109, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.6760945320129395, | |
| "learning_rate": 4.352816901408451e-05, | |
| "loss": 0.1076, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.7687478065490723, | |
| "learning_rate": 4.3504694835680755e-05, | |
| "loss": 0.1094, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.2985103130340576, | |
| "learning_rate": 4.3481220657277e-05, | |
| "loss": 0.0959, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.3562726974487305, | |
| "learning_rate": 4.345774647887324e-05, | |
| "loss": 0.091, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.858865737915039, | |
| "learning_rate": 4.343427230046948e-05, | |
| "loss": 0.1, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 4.3310136795043945, | |
| "learning_rate": 4.341079812206573e-05, | |
| "loss": 0.0756, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.1979329586029053, | |
| "learning_rate": 4.338732394366197e-05, | |
| "loss": 0.0742, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.7807092666625977, | |
| "learning_rate": 4.336384976525822e-05, | |
| "loss": 0.0804, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 3.8421008586883545, | |
| "learning_rate": 4.3340375586854466e-05, | |
| "loss": 0.0784, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 2.183363199234009, | |
| "learning_rate": 4.3316901408450707e-05, | |
| "loss": 0.1078, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.3300306797027588, | |
| "learning_rate": 4.3293427230046954e-05, | |
| "loss": 0.1222, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.2840020656585693, | |
| "learning_rate": 4.3269953051643194e-05, | |
| "loss": 0.1102, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.7970964908599854, | |
| "learning_rate": 4.3246478873239435e-05, | |
| "loss": 0.0765, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.5625609159469604, | |
| "learning_rate": 4.322300469483568e-05, | |
| "loss": 0.116, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1956239938735962, | |
| "learning_rate": 4.319953051643193e-05, | |
| "loss": 0.1024, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1357309818267822, | |
| "learning_rate": 4.317605633802817e-05, | |
| "loss": 0.1128, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.9150160551071167, | |
| "learning_rate": 4.315258215962442e-05, | |
| "loss": 0.1298, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.3721535205841064, | |
| "learning_rate": 4.312910798122066e-05, | |
| "loss": 0.0973, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.3223726749420166, | |
| "learning_rate": 4.3105633802816906e-05, | |
| "loss": 0.1276, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.278697967529297, | |
| "learning_rate": 4.3082159624413146e-05, | |
| "loss": 0.1016, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.2410049438476562, | |
| "learning_rate": 4.305868544600939e-05, | |
| "loss": 0.0774, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.6037675142288208, | |
| "learning_rate": 4.3035211267605634e-05, | |
| "loss": 0.0978, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.202610969543457, | |
| "learning_rate": 4.301173708920188e-05, | |
| "loss": 0.0845, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.6229338645935059, | |
| "learning_rate": 4.298826291079812e-05, | |
| "loss": 0.1009, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.605353355407715, | |
| "learning_rate": 4.296478873239437e-05, | |
| "loss": 0.154, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 0.09926234930753708, | |
| "eval_macro/f1": 0.8497144892634887, | |
| "eval_macro/precision": 0.866627156962533, | |
| "eval_macro/recall": 0.8443244774442097, | |
| "eval_micro/f1": 0.8532182916307162, | |
| "eval_micro/precision": 0.8601948503827418, | |
| "eval_micro/recall": 0.8463539883601506, | |
| "eval_runtime": 27.4955, | |
| "eval_samples/accuracy": 0.8319753509072235, | |
| "eval_samples_per_second": 531.178, | |
| "eval_steps_per_second": 16.621, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.2412047386169434, | |
| "learning_rate": 4.294131455399062e-05, | |
| "loss": 0.09, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 2.657113790512085, | |
| "learning_rate": 4.291784037558686e-05, | |
| "loss": 0.0903, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.5215303897857666, | |
| "learning_rate": 4.28943661971831e-05, | |
| "loss": 0.0713, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 4.21558141708374, | |
| "learning_rate": 4.2870892018779345e-05, | |
| "loss": 0.0989, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 4.106936454772949, | |
| "learning_rate": 4.2847417840375586e-05, | |
| "loss": 0.0929, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.9461392164230347, | |
| "learning_rate": 4.282394366197183e-05, | |
| "loss": 0.102, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.7253563404083252, | |
| "learning_rate": 4.280046948356808e-05, | |
| "loss": 0.1059, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.55907142162323, | |
| "learning_rate": 4.277699530516432e-05, | |
| "loss": 0.0858, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.3519967794418335, | |
| "learning_rate": 4.275352112676057e-05, | |
| "loss": 0.1043, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.8086038827896118, | |
| "learning_rate": 4.273004694835681e-05, | |
| "loss": 0.0993, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.8035309314727783, | |
| "learning_rate": 4.270657276995305e-05, | |
| "loss": 0.1009, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.5046508312225342, | |
| "learning_rate": 4.26830985915493e-05, | |
| "loss": 0.0797, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 2.27606463432312, | |
| "learning_rate": 4.2659624413145544e-05, | |
| "loss": 0.0974, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.8333800435066223, | |
| "learning_rate": 4.2636150234741785e-05, | |
| "loss": 0.0811, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.6786904335021973, | |
| "learning_rate": 4.261267605633803e-05, | |
| "loss": 0.1141, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 2.6342034339904785, | |
| "learning_rate": 4.258920187793428e-05, | |
| "loss": 0.0979, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.9367939233779907, | |
| "learning_rate": 4.256572769953051e-05, | |
| "loss": 0.0953, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.6788667440414429, | |
| "learning_rate": 4.254225352112676e-05, | |
| "loss": 0.0959, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.9545440673828125, | |
| "learning_rate": 4.251877934272301e-05, | |
| "loss": 0.1059, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.865529179573059, | |
| "learning_rate": 4.249530516431925e-05, | |
| "loss": 0.1049, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.6784844398498535, | |
| "learning_rate": 4.2471830985915496e-05, | |
| "loss": 0.1029, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.8157294988632202, | |
| "learning_rate": 4.244835680751174e-05, | |
| "loss": 0.1113, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.7033796310424805, | |
| "learning_rate": 4.2424882629107984e-05, | |
| "loss": 0.1181, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.7018625736236572, | |
| "learning_rate": 4.2401408450704224e-05, | |
| "loss": 0.0979, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.8627302646636963, | |
| "learning_rate": 4.237793427230047e-05, | |
| "loss": 0.1359, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 3.45668363571167, | |
| "learning_rate": 4.235446009389671e-05, | |
| "loss": 0.0977, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.8866633772850037, | |
| "learning_rate": 4.233098591549296e-05, | |
| "loss": 0.1024, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 2.3429834842681885, | |
| "learning_rate": 4.230751173708921e-05, | |
| "loss": 0.1123, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 34.54017639160156, | |
| "learning_rate": 4.228403755868545e-05, | |
| "loss": 0.1015, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1823272705078125, | |
| "learning_rate": 4.2260563380281695e-05, | |
| "loss": 0.119, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.785077452659607, | |
| "learning_rate": 4.2237089201877935e-05, | |
| "loss": 0.1086, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.6036972999572754, | |
| "learning_rate": 4.2213615023474176e-05, | |
| "loss": 0.1138, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 8.906991004943848, | |
| "learning_rate": 4.219014084507042e-05, | |
| "loss": 0.1071, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.8581056594848633, | |
| "learning_rate": 4.216666666666667e-05, | |
| "loss": 0.0766, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 7.854555130004883, | |
| "learning_rate": 4.214319248826291e-05, | |
| "loss": 0.1175, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 2.5612292289733887, | |
| "learning_rate": 4.211971830985916e-05, | |
| "loss": 0.0868, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 5.368772983551025, | |
| "learning_rate": 4.2096244131455406e-05, | |
| "loss": 0.1131, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.4319589138031006, | |
| "learning_rate": 4.2072769953051646e-05, | |
| "loss": 0.1003, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.323556661605835, | |
| "learning_rate": 4.204929577464789e-05, | |
| "loss": 0.1189, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.330096483230591, | |
| "learning_rate": 4.2025821596244134e-05, | |
| "loss": 0.0943, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.2431845664978027, | |
| "learning_rate": 4.2002347417840375e-05, | |
| "loss": 0.1233, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.7425540685653687, | |
| "learning_rate": 4.197887323943662e-05, | |
| "loss": 0.1105, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.7905280590057373, | |
| "learning_rate": 4.195539906103287e-05, | |
| "loss": 0.1138, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.4292432069778442, | |
| "learning_rate": 4.193192488262911e-05, | |
| "loss": 0.0902, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.4348337650299072, | |
| "learning_rate": 4.190845070422536e-05, | |
| "loss": 0.1234, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 3.0525081157684326, | |
| "learning_rate": 4.18849765258216e-05, | |
| "loss": 0.0942, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.9704358577728271, | |
| "learning_rate": 4.186150234741784e-05, | |
| "loss": 0.1039, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.6399110555648804, | |
| "learning_rate": 4.1838028169014086e-05, | |
| "loss": 0.0806, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.9704186916351318, | |
| "learning_rate": 4.1814553990610333e-05, | |
| "loss": 0.0744, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 2.6479651927948, | |
| "learning_rate": 4.1791079812206574e-05, | |
| "loss": 0.1027, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 0.1158306822180748, | |
| "eval_macro/f1": 0.8195684810983835, | |
| "eval_macro/precision": 0.8380379108916444, | |
| "eval_macro/recall": 0.8105275723641988, | |
| "eval_micro/f1": 0.8262169680111265, | |
| "eval_micro/precision": 0.839350052984811, | |
| "eval_micro/recall": 0.8134885313248887, | |
| "eval_runtime": 27.9369, | |
| "eval_samples/accuracy": 0.8013009243409791, | |
| "eval_samples_per_second": 522.784, | |
| "eval_steps_per_second": 16.358, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 2.463714122772217, | |
| "learning_rate": 4.176760563380282e-05, | |
| "loss": 0.126, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.4390833377838135, | |
| "learning_rate": 4.174413145539906e-05, | |
| "loss": 0.1077, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.8290138244628906, | |
| "learning_rate": 4.172065727699531e-05, | |
| "loss": 0.1226, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 7.175014495849609, | |
| "learning_rate": 4.169718309859155e-05, | |
| "loss": 0.1347, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.000025987625122, | |
| "learning_rate": 4.167370892018779e-05, | |
| "loss": 0.1227, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.2527687549591064, | |
| "learning_rate": 4.165023474178404e-05, | |
| "loss": 0.0835, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 3.775047540664673, | |
| "learning_rate": 4.1626760563380285e-05, | |
| "loss": 0.1268, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.8067278861999512, | |
| "learning_rate": 4.1603286384976526e-05, | |
| "loss": 0.1167, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 7.179357051849365, | |
| "learning_rate": 4.158215962441315e-05, | |
| "loss": 0.1395, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 14.411430358886719, | |
| "learning_rate": 4.155868544600939e-05, | |
| "loss": 0.1371, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 4.457677841186523, | |
| "learning_rate": 4.153755868544601e-05, | |
| "loss": 0.0889, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 3.6479544639587402, | |
| "learning_rate": 4.1514084507042256e-05, | |
| "loss": 0.0836, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.3600658178329468, | |
| "learning_rate": 4.1490610328638503e-05, | |
| "loss": 0.0988, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.5027318000793457, | |
| "learning_rate": 4.1467136150234744e-05, | |
| "loss": 0.0974, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.9970905780792236, | |
| "learning_rate": 4.144366197183099e-05, | |
| "loss": 0.0921, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.5798285007476807, | |
| "learning_rate": 4.142018779342723e-05, | |
| "loss": 0.0596, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.3576560020446777, | |
| "learning_rate": 4.139671361502347e-05, | |
| "loss": 0.0888, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.278519630432129, | |
| "learning_rate": 4.137323943661972e-05, | |
| "loss": 0.0754, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.012185573577881, | |
| "learning_rate": 4.134976525821597e-05, | |
| "loss": 0.0949, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.5081164836883545, | |
| "learning_rate": 4.132629107981221e-05, | |
| "loss": 0.1032, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.6326484680175781, | |
| "learning_rate": 4.1302816901408455e-05, | |
| "loss": 0.0848, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.9340505599975586, | |
| "learning_rate": 4.12793427230047e-05, | |
| "loss": 0.0902, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.5919960737228394, | |
| "learning_rate": 4.125586854460094e-05, | |
| "loss": 0.1134, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.7612159848213196, | |
| "learning_rate": 4.1232394366197184e-05, | |
| "loss": 0.1145, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.8657788634300232, | |
| "learning_rate": 4.120892018779343e-05, | |
| "loss": 0.0921, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.0533640384674072, | |
| "learning_rate": 4.118544600938967e-05, | |
| "loss": 0.1018, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.4787542819976807, | |
| "learning_rate": 4.116197183098592e-05, | |
| "loss": 0.0953, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 2.6822516918182373, | |
| "learning_rate": 4.1138497652582166e-05, | |
| "loss": 0.0891, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 3.1771156787872314, | |
| "learning_rate": 4.111502347417841e-05, | |
| "loss": 0.108, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 2.4537932872772217, | |
| "learning_rate": 4.109154929577465e-05, | |
| "loss": 0.0889, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.7221810817718506, | |
| "learning_rate": 4.1068075117370895e-05, | |
| "loss": 0.0918, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.9804364442825317, | |
| "learning_rate": 4.1044600938967135e-05, | |
| "loss": 0.0967, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.1556975841522217, | |
| "learning_rate": 4.102112676056338e-05, | |
| "loss": 0.1045, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.696667194366455, | |
| "learning_rate": 4.099765258215963e-05, | |
| "loss": 0.1076, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 4.1449432373046875, | |
| "learning_rate": 4.097417840375587e-05, | |
| "loss": 0.1015, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.9939990043640137, | |
| "learning_rate": 4.095070422535212e-05, | |
| "loss": 0.1221, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.6629542112350464, | |
| "learning_rate": 4.092723004694836e-05, | |
| "loss": 0.13, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.9935959577560425, | |
| "learning_rate": 4.09037558685446e-05, | |
| "loss": 0.091, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.3307180404663086, | |
| "learning_rate": 4.0880281690140846e-05, | |
| "loss": 0.0947, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 2.5752205848693848, | |
| "learning_rate": 4.085680751173709e-05, | |
| "loss": 0.0781, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 3.055501699447632, | |
| "learning_rate": 4.0833333333333334e-05, | |
| "loss": 0.117, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.8486618995666504, | |
| "learning_rate": 4.080985915492958e-05, | |
| "loss": 0.092, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 2.8639369010925293, | |
| "learning_rate": 4.078638497652582e-05, | |
| "loss": 0.0883, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 2.1113569736480713, | |
| "learning_rate": 4.076291079812207e-05, | |
| "loss": 0.1089, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.2119710445404053, | |
| "learning_rate": 4.073943661971831e-05, | |
| "loss": 0.1111, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 3.3505148887634277, | |
| "learning_rate": 4.071596244131455e-05, | |
| "loss": 0.1011, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.162949562072754, | |
| "learning_rate": 4.06924882629108e-05, | |
| "loss": 0.1035, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 2.1703877449035645, | |
| "learning_rate": 4.0669014084507045e-05, | |
| "loss": 0.0842, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.3537096977233887, | |
| "learning_rate": 4.0645539906103286e-05, | |
| "loss": 0.0932, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.504040002822876, | |
| "learning_rate": 4.062206572769953e-05, | |
| "loss": 0.1043, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 0.08268122375011444, | |
| "eval_macro/f1": 0.8728380733950062, | |
| "eval_macro/precision": 0.88235722533482, | |
| "eval_macro/recall": 0.8667865568824481, | |
| "eval_micro/f1": 0.876, | |
| "eval_micro/precision": 0.8823897186523099, | |
| "eval_micro/recall": 0.8697021567956179, | |
| "eval_runtime": 28.2041, | |
| "eval_samples/accuracy": 0.8590893529613146, | |
| "eval_samples_per_second": 517.832, | |
| "eval_steps_per_second": 16.203, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 2.4709396362304688, | |
| "learning_rate": 4.059859154929578e-05, | |
| "loss": 0.081, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.5959906578063965, | |
| "learning_rate": 4.057511737089202e-05, | |
| "loss": 0.1023, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.6471996307373047, | |
| "learning_rate": 4.055164319248826e-05, | |
| "loss": 0.0955, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.5212956666946411, | |
| "learning_rate": 4.052816901408451e-05, | |
| "loss": 0.094, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.4765437841415405, | |
| "learning_rate": 4.050469483568075e-05, | |
| "loss": 0.1121, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.833054542541504, | |
| "learning_rate": 4.0481220657277e-05, | |
| "loss": 0.0835, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 5.180490970611572, | |
| "learning_rate": 4.0457746478873244e-05, | |
| "loss": 0.1079, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.6298426389694214, | |
| "learning_rate": 4.0434272300469485e-05, | |
| "loss": 0.0757, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 16.718875885009766, | |
| "learning_rate": 4.041079812206573e-05, | |
| "loss": 0.0913, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 4.327826023101807, | |
| "learning_rate": 4.038732394366197e-05, | |
| "loss": 0.0929, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 9.035626411437988, | |
| "learning_rate": 4.0363849765258213e-05, | |
| "loss": 0.1165, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.8778882026672363, | |
| "learning_rate": 4.034272300469484e-05, | |
| "loss": 0.0693, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.3383145332336426, | |
| "learning_rate": 4.0319248826291085e-05, | |
| "loss": 0.0896, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 32.227989196777344, | |
| "learning_rate": 4.0295774647887326e-05, | |
| "loss": 0.0792, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 4.050955772399902, | |
| "learning_rate": 4.027230046948357e-05, | |
| "loss": 0.1008, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.629568576812744, | |
| "learning_rate": 4.0248826291079814e-05, | |
| "loss": 0.0825, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 3.6457648277282715, | |
| "learning_rate": 4.0225352112676054e-05, | |
| "loss": 0.08, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 9.004801750183105, | |
| "learning_rate": 4.02018779342723e-05, | |
| "loss": 0.0986, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.4670467376708984, | |
| "learning_rate": 4.017840375586855e-05, | |
| "loss": 0.1026, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 2.2789056301116943, | |
| "learning_rate": 4.015492957746479e-05, | |
| "loss": 0.0832, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.7449488639831543, | |
| "learning_rate": 4.013145539906104e-05, | |
| "loss": 0.0842, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 2.188464879989624, | |
| "learning_rate": 4.0107981220657284e-05, | |
| "loss": 0.0756, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.3610471487045288, | |
| "learning_rate": 4.0084507042253525e-05, | |
| "loss": 0.1, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 7.353645324707031, | |
| "learning_rate": 4.0061032863849766e-05, | |
| "loss": 0.0816, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 4.01577091217041, | |
| "learning_rate": 4.003755868544601e-05, | |
| "loss": 0.0783, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.917956590652466, | |
| "learning_rate": 4.0014084507042254e-05, | |
| "loss": 0.0916, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.5377771854400635, | |
| "learning_rate": 3.99906103286385e-05, | |
| "loss": 0.0902, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.7792444229125977, | |
| "learning_rate": 3.996713615023475e-05, | |
| "loss": 0.0695, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 6.497312545776367, | |
| "learning_rate": 3.994366197183099e-05, | |
| "loss": 0.0961, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 2.327802896499634, | |
| "learning_rate": 3.9920187793427236e-05, | |
| "loss": 0.0965, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 2.7272486686706543, | |
| "learning_rate": 3.989671361502348e-05, | |
| "loss": 0.0985, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 1.634901762008667, | |
| "learning_rate": 3.987323943661972e-05, | |
| "loss": 0.0871, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 5.812564849853516, | |
| "learning_rate": 3.9849765258215965e-05, | |
| "loss": 0.0622, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.1237666606903076, | |
| "learning_rate": 3.9826291079812205e-05, | |
| "loss": 0.0695, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.3049256801605225, | |
| "learning_rate": 3.980281690140845e-05, | |
| "loss": 0.1182, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.1295623779296875, | |
| "learning_rate": 3.97793427230047e-05, | |
| "loss": 0.1011, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.530081033706665, | |
| "learning_rate": 3.975586854460094e-05, | |
| "loss": 0.0848, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 2.473222017288208, | |
| "learning_rate": 3.973239436619718e-05, | |
| "loss": 0.0813, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.927211880683899, | |
| "learning_rate": 3.970892018779343e-05, | |
| "loss": 0.077, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 2.43953800201416, | |
| "learning_rate": 3.968544600938967e-05, | |
| "loss": 0.0779, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 2.9721133708953857, | |
| "learning_rate": 3.9661971830985916e-05, | |
| "loss": 0.1128, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 2.547895669937134, | |
| "learning_rate": 3.9638497652582164e-05, | |
| "loss": 0.0869, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 9.629310607910156, | |
| "learning_rate": 3.9615023474178404e-05, | |
| "loss": 0.0872, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.8918728232383728, | |
| "learning_rate": 3.959154929577465e-05, | |
| "loss": 0.0784, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.12496773898601532, | |
| "learning_rate": 3.956807511737089e-05, | |
| "loss": 0.0596, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 2.4438138008117676, | |
| "learning_rate": 3.954460093896713e-05, | |
| "loss": 0.0811, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 2.845428228378296, | |
| "learning_rate": 3.952112676056338e-05, | |
| "loss": 0.1081, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 5.136063098907471, | |
| "learning_rate": 3.949765258215963e-05, | |
| "loss": 0.0791, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 2.6351871490478516, | |
| "learning_rate": 3.947417840375587e-05, | |
| "loss": 0.0745, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 2.7584948539733887, | |
| "learning_rate": 3.9450704225352115e-05, | |
| "loss": 0.1022, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.0945679247379303, | |
| "eval_macro/f1": 0.8653927311909163, | |
| "eval_macro/precision": 0.8720764510455892, | |
| "eval_macro/recall": 0.8665681005259545, | |
| "eval_micro/f1": 0.8702337234444177, | |
| "eval_micro/precision": 0.8724194880264244, | |
| "eval_micro/recall": 0.8680588839438549, | |
| "eval_runtime": 27.8503, | |
| "eval_samples/accuracy": 0.8558712769599452, | |
| "eval_samples_per_second": 524.411, | |
| "eval_steps_per_second": 16.409, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 2.672335624694824, | |
| "learning_rate": 3.942723004694836e-05, | |
| "loss": 0.069, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.058993935585022, | |
| "learning_rate": 3.94037558685446e-05, | |
| "loss": 0.0767, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.6829683780670166, | |
| "learning_rate": 3.9380281690140844e-05, | |
| "loss": 0.0691, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 2.168950080871582, | |
| "learning_rate": 3.935680751173709e-05, | |
| "loss": 0.0567, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.44047364592552185, | |
| "learning_rate": 3.933333333333333e-05, | |
| "loss": 0.064, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.05623297393321991, | |
| "learning_rate": 3.930985915492958e-05, | |
| "loss": 0.0668, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 3.3752222061157227, | |
| "learning_rate": 3.9286384976525826e-05, | |
| "loss": 0.0586, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 3.060328722000122, | |
| "learning_rate": 3.926291079812207e-05, | |
| "loss": 0.0718, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 3.963909149169922, | |
| "learning_rate": 3.9239436619718314e-05, | |
| "loss": 0.0704, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.7161349058151245, | |
| "learning_rate": 3.9215962441314555e-05, | |
| "loss": 0.1007, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.8788623809814453, | |
| "learning_rate": 3.9192488262910795e-05, | |
| "loss": 0.1044, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.5107207298278809, | |
| "learning_rate": 3.916901408450704e-05, | |
| "loss": 0.0918, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.8452281951904297, | |
| "learning_rate": 3.914553990610329e-05, | |
| "loss": 0.0709, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.724955677986145, | |
| "learning_rate": 3.912206572769953e-05, | |
| "loss": 0.0869, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 5.611421585083008, | |
| "learning_rate": 3.909859154929578e-05, | |
| "loss": 0.09, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.4529111385345459, | |
| "learning_rate": 3.9075117370892025e-05, | |
| "loss": 0.0806, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 2.281949043273926, | |
| "learning_rate": 3.9051643192488266e-05, | |
| "loss": 0.084, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 2.5894758701324463, | |
| "learning_rate": 3.9028169014084507e-05, | |
| "loss": 0.0837, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 3.316633701324463, | |
| "learning_rate": 3.9004694835680754e-05, | |
| "loss": 0.0899, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.6215742826461792, | |
| "learning_rate": 3.8981220657276994e-05, | |
| "loss": 0.0773, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 3.763181447982788, | |
| "learning_rate": 3.895774647887324e-05, | |
| "loss": 0.0898, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.0760018825531006, | |
| "learning_rate": 3.893427230046949e-05, | |
| "loss": 0.0872, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 4.817967414855957, | |
| "learning_rate": 3.891079812206573e-05, | |
| "loss": 0.0703, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 3.2826614379882812, | |
| "learning_rate": 3.888732394366198e-05, | |
| "loss": 0.0863, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 1.4272204637527466, | |
| "learning_rate": 3.886384976525822e-05, | |
| "loss": 0.0737, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 2.626129150390625, | |
| "learning_rate": 3.884037558685446e-05, | |
| "loss": 0.0682, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.8139760494232178, | |
| "learning_rate": 3.8816901408450706e-05, | |
| "loss": 0.0705, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 7.998354911804199, | |
| "learning_rate": 3.879342723004695e-05, | |
| "loss": 0.0622, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 2.174318790435791, | |
| "learning_rate": 3.8769953051643193e-05, | |
| "loss": 0.059, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 2.3592910766601562, | |
| "learning_rate": 3.874647887323944e-05, | |
| "loss": 0.0842, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.0818564891815186, | |
| "learning_rate": 3.872300469483569e-05, | |
| "loss": 0.092, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 3.985733985900879, | |
| "learning_rate": 3.869953051643193e-05, | |
| "loss": 0.0579, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 6.383425712585449, | |
| "learning_rate": 3.867605633802817e-05, | |
| "loss": 0.0818, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.3395494222640991, | |
| "learning_rate": 3.865258215962442e-05, | |
| "loss": 0.0903, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 2.034086227416992, | |
| "learning_rate": 3.862910798122066e-05, | |
| "loss": 0.0989, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0606484413146973, | |
| "learning_rate": 3.8605633802816905e-05, | |
| "loss": 0.0978, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 0.6535605788230896, | |
| "learning_rate": 3.858215962441315e-05, | |
| "loss": 0.1039, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.6977512836456299, | |
| "learning_rate": 3.855868544600939e-05, | |
| "loss": 0.0869, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.8797959089279175, | |
| "learning_rate": 3.853521126760564e-05, | |
| "loss": 0.0797, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.8291544914245605, | |
| "learning_rate": 3.851173708920188e-05, | |
| "loss": 0.0658, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.1057475805282593, | |
| "learning_rate": 3.848826291079812e-05, | |
| "loss": 0.0814, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 3.3656809329986572, | |
| "learning_rate": 3.846478873239437e-05, | |
| "loss": 0.1156, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.4543434381484985, | |
| "learning_rate": 3.844131455399061e-05, | |
| "loss": 0.1014, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.2256841659545898, | |
| "learning_rate": 3.8417840375586856e-05, | |
| "loss": 0.0735, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 2.286642551422119, | |
| "learning_rate": 3.8394366197183104e-05, | |
| "loss": 0.0762, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.6697473526000977, | |
| "learning_rate": 3.8370892018779344e-05, | |
| "loss": 0.0759, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 3.3569507598876953, | |
| "learning_rate": 3.8347417840375585e-05, | |
| "loss": 0.064, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.3700305223464966, | |
| "learning_rate": 3.832394366197183e-05, | |
| "loss": 0.0722, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 6.224872589111328, | |
| "learning_rate": 3.830046948356807e-05, | |
| "loss": 0.09, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 3.2396225929260254, | |
| "learning_rate": 3.827699530516432e-05, | |
| "loss": 0.0857, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.07859091460704803, | |
| "eval_macro/f1": 0.8827431167400268, | |
| "eval_macro/precision": 0.886261334665688, | |
| "eval_macro/recall": 0.8813339379181391, | |
| "eval_micro/f1": 0.886541363698724, | |
| "eval_micro/precision": 0.888186378943028, | |
| "eval_micro/recall": 0.8849024306744265, | |
| "eval_runtime": 27.7668, | |
| "eval_samples/accuracy": 0.8712769599452242, | |
| "eval_samples_per_second": 525.988, | |
| "eval_steps_per_second": 16.459, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 0.7627536058425903, | |
| "learning_rate": 3.825352112676057e-05, | |
| "loss": 0.0839, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.6807057857513428, | |
| "learning_rate": 3.823004694835681e-05, | |
| "loss": 0.0613, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 2.8506741523742676, | |
| "learning_rate": 3.8206572769953055e-05, | |
| "loss": 0.0706, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.1173980236053467, | |
| "learning_rate": 3.8183098591549296e-05, | |
| "loss": 0.0982, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.0861316919326782, | |
| "learning_rate": 3.8159624413145536e-05, | |
| "loss": 0.0768, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 2.1186795234680176, | |
| "learning_rate": 3.8136150234741784e-05, | |
| "loss": 0.0914, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 5.411003112792969, | |
| "learning_rate": 3.811267605633803e-05, | |
| "loss": 0.1255, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.6623032093048096, | |
| "learning_rate": 3.808920187793427e-05, | |
| "loss": 0.0482, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 2.952754497528076, | |
| "learning_rate": 3.806572769953052e-05, | |
| "loss": 0.0589, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 19.873538970947266, | |
| "learning_rate": 3.8042253521126766e-05, | |
| "loss": 0.0817, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.0706191062927246, | |
| "learning_rate": 3.801877934272301e-05, | |
| "loss": 0.0642, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.5746757984161377, | |
| "learning_rate": 3.799530516431925e-05, | |
| "loss": 0.0788, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.0287206172943115, | |
| "learning_rate": 3.7971830985915495e-05, | |
| "loss": 0.0894, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.4267714023590088, | |
| "learning_rate": 3.7948356807511735e-05, | |
| "loss": 0.0921, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 0.6256331205368042, | |
| "learning_rate": 3.792488262910798e-05, | |
| "loss": 0.0611, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.0450356006622314, | |
| "learning_rate": 3.790140845070423e-05, | |
| "loss": 0.077, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.0859485864639282, | |
| "learning_rate": 3.787793427230047e-05, | |
| "loss": 0.0848, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 6.269891262054443, | |
| "learning_rate": 3.785446009389672e-05, | |
| "loss": 0.0766, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 3.0235748291015625, | |
| "learning_rate": 3.783098591549296e-05, | |
| "loss": 0.0828, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.3449300527572632, | |
| "learning_rate": 3.78075117370892e-05, | |
| "loss": 0.0751, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.6258209943771362, | |
| "learning_rate": 3.7784037558685447e-05, | |
| "loss": 0.0878, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.2452788352966309, | |
| "learning_rate": 3.7760563380281694e-05, | |
| "loss": 0.0524, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.5970511436462402, | |
| "learning_rate": 3.7737089201877934e-05, | |
| "loss": 0.077, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.322287917137146, | |
| "learning_rate": 3.771361502347418e-05, | |
| "loss": 0.0859, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.2838901281356812, | |
| "learning_rate": 3.769014084507043e-05, | |
| "loss": 0.0747, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 2.0644261837005615, | |
| "learning_rate": 3.766666666666667e-05, | |
| "loss": 0.0632, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.1590847969055176, | |
| "learning_rate": 3.764319248826291e-05, | |
| "loss": 0.1018, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.9163933992385864, | |
| "learning_rate": 3.761971830985916e-05, | |
| "loss": 0.078, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.6301237344741821, | |
| "learning_rate": 3.75962441314554e-05, | |
| "loss": 0.0484, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 2.292043447494507, | |
| "learning_rate": 3.7572769953051646e-05, | |
| "loss": 0.0777, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.5556319952011108, | |
| "learning_rate": 3.754929577464789e-05, | |
| "loss": 0.0919, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.770605444908142, | |
| "learning_rate": 3.7525821596244133e-05, | |
| "loss": 0.0928, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.8810251355171204, | |
| "learning_rate": 3.750234741784038e-05, | |
| "loss": 0.0662, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.905276358127594, | |
| "learning_rate": 3.747887323943662e-05, | |
| "loss": 0.073, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 3.4868996143341064, | |
| "learning_rate": 3.745539906103286e-05, | |
| "loss": 0.0914, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.6897212266921997, | |
| "learning_rate": 3.743192488262911e-05, | |
| "loss": 0.0569, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 2.168696165084839, | |
| "learning_rate": 3.740845070422536e-05, | |
| "loss": 0.092, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 2.419448137283325, | |
| "learning_rate": 3.73849765258216e-05, | |
| "loss": 0.0787, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.9727363586425781, | |
| "learning_rate": 3.7361502347417845e-05, | |
| "loss": 0.0748, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.079543948173523, | |
| "learning_rate": 3.733802816901409e-05, | |
| "loss": 0.0745, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 2.7353591918945312, | |
| "learning_rate": 3.731455399061033e-05, | |
| "loss": 0.0616, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 2.7964367866516113, | |
| "learning_rate": 3.729107981220657e-05, | |
| "loss": 0.0858, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.155333399772644, | |
| "learning_rate": 3.726760563380282e-05, | |
| "loss": 0.0755, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.547913670539856, | |
| "learning_rate": 3.724413145539906e-05, | |
| "loss": 0.0718, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 4.3765764236450195, | |
| "learning_rate": 3.722065727699531e-05, | |
| "loss": 0.0945, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.7049620151519775, | |
| "learning_rate": 3.7197183098591556e-05, | |
| "loss": 0.0717, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.8908655643463135, | |
| "learning_rate": 3.7173708920187796e-05, | |
| "loss": 0.0909, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.6085829734802246, | |
| "learning_rate": 3.7150234741784044e-05, | |
| "loss": 0.0557, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.4436389207839966, | |
| "learning_rate": 3.7126760563380284e-05, | |
| "loss": 0.072, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 2.450096607208252, | |
| "learning_rate": 3.7103286384976525e-05, | |
| "loss": 0.0787, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.07993102073669434, | |
| "eval_macro/f1": 0.8823025838255318, | |
| "eval_macro/precision": 0.8854401633374165, | |
| "eval_macro/recall": 0.8817389137045657, | |
| "eval_micro/f1": 0.8849351626920313, | |
| "eval_micro/precision": 0.884420735877445, | |
| "eval_micro/recall": 0.885450188291681, | |
| "eval_runtime": 28.6405, | |
| "eval_samples/accuracy": 0.8701129750085587, | |
| "eval_samples_per_second": 509.943, | |
| "eval_steps_per_second": 15.956, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 3.5232038497924805, | |
| "learning_rate": 3.707981220657277e-05, | |
| "loss": 0.0636, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.5219241380691528, | |
| "learning_rate": 3.705633802816901e-05, | |
| "loss": 0.0742, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.5301398038864136, | |
| "learning_rate": 3.703286384976526e-05, | |
| "loss": 0.0802, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.245137333869934, | |
| "learning_rate": 3.700938967136151e-05, | |
| "loss": 0.0793, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.6079393625259399, | |
| "learning_rate": 3.698591549295775e-05, | |
| "loss": 0.0769, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 2.262885570526123, | |
| "learning_rate": 3.696244131455399e-05, | |
| "loss": 0.0707, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 2.196953296661377, | |
| "learning_rate": 3.6938967136150236e-05, | |
| "loss": 0.0815, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.519397258758545, | |
| "learning_rate": 3.6915492957746476e-05, | |
| "loss": 0.1008, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.4691829681396484, | |
| "learning_rate": 3.6892018779342724e-05, | |
| "loss": 0.0678, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 3.34647274017334, | |
| "learning_rate": 3.686854460093897e-05, | |
| "loss": 0.0717, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.3005377054214478, | |
| "learning_rate": 3.684507042253521e-05, | |
| "loss": 0.0772, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 3.112577199935913, | |
| "learning_rate": 3.682159624413146e-05, | |
| "loss": 0.0722, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.1536271572113037, | |
| "learning_rate": 3.67981220657277e-05, | |
| "loss": 0.0619, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.5694202780723572, | |
| "learning_rate": 3.677464788732394e-05, | |
| "loss": 0.0658, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 5.5259504318237305, | |
| "learning_rate": 3.675117370892019e-05, | |
| "loss": 0.0631, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 2.356536388397217, | |
| "learning_rate": 3.6727699530516435e-05, | |
| "loss": 0.0575, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 0.8688063025474548, | |
| "learning_rate": 3.6704225352112675e-05, | |
| "loss": 0.0678, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 5.551433086395264, | |
| "learning_rate": 3.668075117370892e-05, | |
| "loss": 0.0609, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 2.565351963043213, | |
| "learning_rate": 3.665727699530517e-05, | |
| "loss": 0.0656, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.8359733819961548, | |
| "learning_rate": 3.663380281690141e-05, | |
| "loss": 0.0659, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.4640568196773529, | |
| "learning_rate": 3.661032863849765e-05, | |
| "loss": 0.0466, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.10229873657226562, | |
| "learning_rate": 3.65868544600939e-05, | |
| "loss": 0.0794, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 4.01750373840332, | |
| "learning_rate": 3.656338028169014e-05, | |
| "loss": 0.1165, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 2.3960859775543213, | |
| "learning_rate": 3.6539906103286386e-05, | |
| "loss": 0.0757, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.8701328039169312, | |
| "learning_rate": 3.6516431924882634e-05, | |
| "loss": 0.046, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.9990036487579346, | |
| "learning_rate": 3.6492957746478874e-05, | |
| "loss": 0.1002, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 2.334117889404297, | |
| "learning_rate": 3.646948356807512e-05, | |
| "loss": 0.0724, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 2.3866279125213623, | |
| "learning_rate": 3.644600938967136e-05, | |
| "loss": 0.0994, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 0.8514377474784851, | |
| "learning_rate": 3.64225352112676e-05, | |
| "loss": 0.0744, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.0646476745605469, | |
| "learning_rate": 3.639906103286385e-05, | |
| "loss": 0.0785, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.6509699821472168, | |
| "learning_rate": 3.63755868544601e-05, | |
| "loss": 0.0968, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 2.1369731426239014, | |
| "learning_rate": 3.635211267605634e-05, | |
| "loss": 0.0722, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 2.6235861778259277, | |
| "learning_rate": 3.6328638497652585e-05, | |
| "loss": 0.0725, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.6085470914840698, | |
| "learning_rate": 3.630516431924883e-05, | |
| "loss": 0.0916, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.7220445275306702, | |
| "learning_rate": 3.6281690140845073e-05, | |
| "loss": 0.0779, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.6964889764785767, | |
| "learning_rate": 3.6258215962441314e-05, | |
| "loss": 0.0682, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.0012626647949219, | |
| "learning_rate": 3.623474178403756e-05, | |
| "loss": 0.0573, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.5465306043624878, | |
| "learning_rate": 3.62112676056338e-05, | |
| "loss": 0.0912, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 3.804412841796875, | |
| "learning_rate": 3.618779342723005e-05, | |
| "loss": 0.0683, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 2.037912130355835, | |
| "learning_rate": 3.6164319248826297e-05, | |
| "loss": 0.0655, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.2398627996444702, | |
| "learning_rate": 3.614084507042254e-05, | |
| "loss": 0.0772, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.238403081893921, | |
| "learning_rate": 3.6117370892018785e-05, | |
| "loss": 0.0758, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 2.2002389430999756, | |
| "learning_rate": 3.6093896713615025e-05, | |
| "loss": 0.0794, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 0.43891093134880066, | |
| "learning_rate": 3.6070422535211266e-05, | |
| "loss": 0.0737, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.9602551460266113, | |
| "learning_rate": 3.604694835680751e-05, | |
| "loss": 0.0638, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.5585052967071533, | |
| "learning_rate": 3.602347417840376e-05, | |
| "loss": 0.0899, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.7965030670166016, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.0717, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.0735397338867188, | |
| "learning_rate": 3.597652582159625e-05, | |
| "loss": 0.0664, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 0.42542290687561035, | |
| "learning_rate": 3.5953051643192496e-05, | |
| "loss": 0.0667, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.6038751602172852, | |
| "learning_rate": 3.5929577464788736e-05, | |
| "loss": 0.0603, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.07968219369649887, | |
| "eval_macro/f1": 0.8841156142067595, | |
| "eval_macro/precision": 0.8863706352948096, | |
| "eval_macro/recall": 0.8858079234329682, | |
| "eval_micro/f1": 0.8880147763031878, | |
| "eval_micro/precision": 0.8872257535370105, | |
| "eval_micro/recall": 0.8888052036973639, | |
| "eval_runtime": 28.6019, | |
| "eval_samples/accuracy": 0.8748373844573776, | |
| "eval_samples_per_second": 510.63, | |
| "eval_steps_per_second": 15.978, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.2723647356033325, | |
| "learning_rate": 3.590610328638498e-05, | |
| "loss": 0.0638, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.2588775157928467, | |
| "learning_rate": 3.5882629107981224e-05, | |
| "loss": 0.0768, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.3960028886795044, | |
| "learning_rate": 3.5859154929577465e-05, | |
| "loss": 0.0589, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.8584194183349609, | |
| "learning_rate": 3.583568075117371e-05, | |
| "loss": 0.0816, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 3.710993528366089, | |
| "learning_rate": 3.581220657276996e-05, | |
| "loss": 0.0818, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.5568336844444275, | |
| "learning_rate": 3.57887323943662e-05, | |
| "loss": 0.0659, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 6.254613876342773, | |
| "learning_rate": 3.576525821596244e-05, | |
| "loss": 0.0765, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.4226367473602295, | |
| "learning_rate": 3.574178403755869e-05, | |
| "loss": 0.0702, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.3769171237945557, | |
| "learning_rate": 3.571830985915493e-05, | |
| "loss": 0.0631, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.0111894607543945, | |
| "learning_rate": 3.5694835680751176e-05, | |
| "loss": 0.0525, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.731039524078369, | |
| "learning_rate": 3.5671361502347416e-05, | |
| "loss": 0.0434, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.3040751218795776, | |
| "learning_rate": 3.5647887323943664e-05, | |
| "loss": 0.0738, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 3.0165939331054688, | |
| "learning_rate": 3.562441314553991e-05, | |
| "loss": 0.084, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 4.147961139678955, | |
| "learning_rate": 3.560093896713615e-05, | |
| "loss": 0.1147, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.984157681465149, | |
| "learning_rate": 3.557746478873239e-05, | |
| "loss": 0.1099, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 3.322106122970581, | |
| "learning_rate": 3.555399061032864e-05, | |
| "loss": 0.0787, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.5959970951080322, | |
| "learning_rate": 3.553051643192488e-05, | |
| "loss": 0.0503, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 3.4334895610809326, | |
| "learning_rate": 3.550704225352113e-05, | |
| "loss": 0.0617, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.8281345963478088, | |
| "learning_rate": 3.5483568075117375e-05, | |
| "loss": 0.0814, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.6435192823410034, | |
| "learning_rate": 3.5460093896713615e-05, | |
| "loss": 0.0671, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 5.341729164123535, | |
| "learning_rate": 3.543661971830986e-05, | |
| "loss": 0.0629, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 4.257093906402588, | |
| "learning_rate": 3.54131455399061e-05, | |
| "loss": 0.0403, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.0729011297225952, | |
| "learning_rate": 3.5389671361502344e-05, | |
| "loss": 0.0995, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 4.933761119842529, | |
| "learning_rate": 3.536619718309859e-05, | |
| "loss": 0.0797, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.1815464496612549, | |
| "learning_rate": 3.534272300469484e-05, | |
| "loss": 0.0641, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.0844922065734863, | |
| "learning_rate": 3.531924882629108e-05, | |
| "loss": 0.0736, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.1614429950714111, | |
| "learning_rate": 3.5295774647887326e-05, | |
| "loss": 0.0666, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.0777498483657837, | |
| "learning_rate": 3.5272300469483574e-05, | |
| "loss": 0.0705, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 4.16050910949707, | |
| "learning_rate": 3.5248826291079814e-05, | |
| "loss": 0.0661, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.9127310514450073, | |
| "learning_rate": 3.5225352112676055e-05, | |
| "loss": 0.0742, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 2.6476809978485107, | |
| "learning_rate": 3.52018779342723e-05, | |
| "loss": 0.0535, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 3.1948328018188477, | |
| "learning_rate": 3.517840375586854e-05, | |
| "loss": 0.0828, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.5699864625930786, | |
| "learning_rate": 3.515492957746479e-05, | |
| "loss": 0.0855, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 2.835286855697632, | |
| "learning_rate": 3.513145539906104e-05, | |
| "loss": 0.0442, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.8206992149353027, | |
| "learning_rate": 3.510798122065728e-05, | |
| "loss": 0.0483, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 2.245073080062866, | |
| "learning_rate": 3.5084507042253525e-05, | |
| "loss": 0.0694, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.3250207901000977, | |
| "learning_rate": 3.5061032863849766e-05, | |
| "loss": 0.0776, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.4506942331790924, | |
| "learning_rate": 3.5037558685446007e-05, | |
| "loss": 0.0757, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.560647964477539, | |
| "learning_rate": 3.5014084507042254e-05, | |
| "loss": 0.0581, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.318333148956299, | |
| "learning_rate": 3.499295774647888e-05, | |
| "loss": 0.048, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.2419353723526, | |
| "learning_rate": 3.496948356807512e-05, | |
| "loss": 0.0668, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 2.8134100437164307, | |
| "learning_rate": 3.4946009389671367e-05, | |
| "loss": 0.0848, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 0.7494829297065735, | |
| "learning_rate": 3.492253521126761e-05, | |
| "loss": 0.0731, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 2.2532362937927246, | |
| "learning_rate": 3.489906103286385e-05, | |
| "loss": 0.0912, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.5315017700195312, | |
| "learning_rate": 3.4875586854460095e-05, | |
| "loss": 0.0655, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.3311911821365356, | |
| "learning_rate": 3.485211267605634e-05, | |
| "loss": 0.0685, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.0309032201766968, | |
| "learning_rate": 3.482863849765258e-05, | |
| "loss": 0.0778, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.9944983720779419, | |
| "learning_rate": 3.480516431924883e-05, | |
| "loss": 0.0757, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 2.2548060417175293, | |
| "learning_rate": 3.478169014084508e-05, | |
| "loss": 0.0747, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 3.4860355854034424, | |
| "learning_rate": 3.475821596244132e-05, | |
| "loss": 0.0654, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.07351405918598175, | |
| "eval_macro/f1": 0.8905284200569832, | |
| "eval_macro/precision": 0.8936967336400916, | |
| "eval_macro/recall": 0.8890971037530004, | |
| "eval_micro/f1": 0.8933296771097553, | |
| "eval_micro/precision": 0.8944333859564829, | |
| "eval_micro/recall": 0.8922286888052037, | |
| "eval_runtime": 28.7347, | |
| "eval_samples/accuracy": 0.8788086271824718, | |
| "eval_samples_per_second": 508.27, | |
| "eval_steps_per_second": 15.904, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.7109043598175049, | |
| "learning_rate": 3.473474178403756e-05, | |
| "loss": 0.0717, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 0.9149212837219238, | |
| "learning_rate": 3.4711267605633806e-05, | |
| "loss": 0.0474, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.6851046085357666, | |
| "learning_rate": 3.468779342723005e-05, | |
| "loss": 0.0694, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 44.414886474609375, | |
| "learning_rate": 3.4664319248826294e-05, | |
| "loss": 0.0653, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 3.4222354888916016, | |
| "learning_rate": 3.464084507042254e-05, | |
| "loss": 0.0882, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 2.1840708255767822, | |
| "learning_rate": 3.461737089201878e-05, | |
| "loss": 0.0579, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.45962441314554e-05, | |
| "loss": 0.0563, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 1.8609527349472046, | |
| "learning_rate": 3.457276995305164e-05, | |
| "loss": 0.0694, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.5708072185516357, | |
| "learning_rate": 3.454929577464789e-05, | |
| "loss": 0.0888, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.0683521032333374, | |
| "learning_rate": 3.4525821596244135e-05, | |
| "loss": 0.0611, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 5.111327171325684, | |
| "learning_rate": 3.4502347417840376e-05, | |
| "loss": 0.0508, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.7825844287872314, | |
| "learning_rate": 3.447887323943662e-05, | |
| "loss": 0.0691, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 3.4711480140686035, | |
| "learning_rate": 3.445539906103287e-05, | |
| "loss": 0.1069, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.459466814994812, | |
| "learning_rate": 3.4431924882629104e-05, | |
| "loss": 0.083, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 3.5742270946502686, | |
| "learning_rate": 3.440845070422535e-05, | |
| "loss": 0.0809, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.6339036226272583, | |
| "learning_rate": 3.43849765258216e-05, | |
| "loss": 0.0442, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 2.5935568809509277, | |
| "learning_rate": 3.436150234741784e-05, | |
| "loss": 0.0746, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 3.0025858879089355, | |
| "learning_rate": 3.433802816901409e-05, | |
| "loss": 0.0781, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.0843034982681274, | |
| "learning_rate": 3.4314553990610334e-05, | |
| "loss": 0.061, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 2.5993733406066895, | |
| "learning_rate": 3.4291079812206575e-05, | |
| "loss": 0.0881, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 3.3276283740997314, | |
| "learning_rate": 3.4267605633802815e-05, | |
| "loss": 0.0815, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 3.007833957672119, | |
| "learning_rate": 3.424413145539906e-05, | |
| "loss": 0.0699, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 3.273491621017456, | |
| "learning_rate": 3.42206572769953e-05, | |
| "loss": 0.0873, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.6225374341011047, | |
| "learning_rate": 3.419718309859155e-05, | |
| "loss": 0.0877, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 2.3699939250946045, | |
| "learning_rate": 3.41737089201878e-05, | |
| "loss": 0.0628, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 3.982626438140869, | |
| "learning_rate": 3.415023474178404e-05, | |
| "loss": 0.0818, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 2.5500738620758057, | |
| "learning_rate": 3.4126760563380286e-05, | |
| "loss": 0.077, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.9729875326156616, | |
| "learning_rate": 3.4103286384976526e-05, | |
| "loss": 0.0584, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 3.429597854614258, | |
| "learning_rate": 3.407981220657277e-05, | |
| "loss": 0.0692, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 3.9442617893218994, | |
| "learning_rate": 3.4056338028169014e-05, | |
| "loss": 0.0758, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 3.5854039192199707, | |
| "learning_rate": 3.403286384976526e-05, | |
| "loss": 0.0759, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.9469636678695679, | |
| "learning_rate": 3.40093896713615e-05, | |
| "loss": 0.0546, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.9262510538101196, | |
| "learning_rate": 3.398591549295775e-05, | |
| "loss": 0.0852, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 3.2512049674987793, | |
| "learning_rate": 3.3962441314554e-05, | |
| "loss": 0.0679, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 2.976078987121582, | |
| "learning_rate": 3.393896713615024e-05, | |
| "loss": 0.0551, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.684304118156433, | |
| "learning_rate": 3.391549295774648e-05, | |
| "loss": 0.0569, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 2.2591466903686523, | |
| "learning_rate": 3.3892018779342725e-05, | |
| "loss": 0.0594, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 2.1120476722717285, | |
| "learning_rate": 3.3868544600938966e-05, | |
| "loss": 0.0895, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.734380304813385, | |
| "learning_rate": 3.384507042253521e-05, | |
| "loss": 0.0515, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 2.5207552909851074, | |
| "learning_rate": 3.382159624413146e-05, | |
| "loss": 0.0824, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.5820809602737427, | |
| "learning_rate": 3.37981220657277e-05, | |
| "loss": 0.0984, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 2.8833916187286377, | |
| "learning_rate": 3.377464788732395e-05, | |
| "loss": 0.0718, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.4902230501174927, | |
| "learning_rate": 3.375117370892019e-05, | |
| "loss": 0.059, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 2.578580141067505, | |
| "learning_rate": 3.372769953051643e-05, | |
| "loss": 0.0768, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.8747726678848267, | |
| "learning_rate": 3.370422535211268e-05, | |
| "loss": 0.0996, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.386628270149231, | |
| "learning_rate": 3.3680751173708924e-05, | |
| "loss": 0.0448, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.6875271797180176, | |
| "learning_rate": 3.3657276995305165e-05, | |
| "loss": 0.1286, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.8919246196746826, | |
| "learning_rate": 3.363380281690141e-05, | |
| "loss": 0.0644, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.1084660291671753, | |
| "learning_rate": 3.361032863849766e-05, | |
| "loss": 0.0652, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 2.3537535667419434, | |
| "learning_rate": 3.35868544600939e-05, | |
| "loss": 0.0844, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.06981974095106125, | |
| "eval_macro/f1": 0.895093218534528, | |
| "eval_macro/precision": 0.8993744432665862, | |
| "eval_macro/recall": 0.893346057078023, | |
| "eval_micro/f1": 0.8977924262711573, | |
| "eval_micro/precision": 0.9003580773998072, | |
| "eval_micro/recall": 0.8952413557001027, | |
| "eval_runtime": 28.2589, | |
| "eval_samples/accuracy": 0.885792536802465, | |
| "eval_samples_per_second": 516.828, | |
| "eval_steps_per_second": 16.172, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 2.0603086948394775, | |
| "learning_rate": 3.356338028169014e-05, | |
| "loss": 0.08, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 3.941000461578369, | |
| "learning_rate": 3.353990610328639e-05, | |
| "loss": 0.0561, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.4806817770004272, | |
| "learning_rate": 3.351643192488263e-05, | |
| "loss": 0.0699, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 0.9418378472328186, | |
| "learning_rate": 3.3492957746478876e-05, | |
| "loss": 0.062, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 2.2413206100463867, | |
| "learning_rate": 3.3469483568075117e-05, | |
| "loss": 0.0643, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 0.5685548782348633, | |
| "learning_rate": 3.3446009389671364e-05, | |
| "loss": 0.0621, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 2.528632640838623, | |
| "learning_rate": 3.342253521126761e-05, | |
| "loss": 0.1233, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 0.1424780786037445, | |
| "learning_rate": 3.339906103286385e-05, | |
| "loss": 0.0732, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.396640658378601, | |
| "learning_rate": 3.337558685446009e-05, | |
| "loss": 0.0749, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.4843108057975769, | |
| "learning_rate": 3.335211267605634e-05, | |
| "loss": 0.0604, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.4272027015686035, | |
| "learning_rate": 3.332863849765258e-05, | |
| "loss": 0.0476, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.5079143047332764, | |
| "learning_rate": 3.330516431924883e-05, | |
| "loss": 0.0747, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 2.5758399963378906, | |
| "learning_rate": 3.3281690140845075e-05, | |
| "loss": 0.0578, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 3.6596169471740723, | |
| "learning_rate": 3.3258215962441316e-05, | |
| "loss": 0.0586, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 2.591481924057007, | |
| "learning_rate": 3.323474178403756e-05, | |
| "loss": 0.0571, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 0.9179452657699585, | |
| "learning_rate": 3.3211267605633804e-05, | |
| "loss": 0.0722, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 2.251352071762085, | |
| "learning_rate": 3.3187793427230044e-05, | |
| "loss": 0.0784, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 2.1384119987487793, | |
| "learning_rate": 3.316431924882629e-05, | |
| "loss": 0.0814, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.7491868138313293, | |
| "learning_rate": 3.314084507042254e-05, | |
| "loss": 0.0701, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 2.3708689212799072, | |
| "learning_rate": 3.311737089201878e-05, | |
| "loss": 0.0816, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.7296544313430786, | |
| "learning_rate": 3.309389671361503e-05, | |
| "loss": 0.0704, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.8387882709503174, | |
| "learning_rate": 3.307042253521127e-05, | |
| "loss": 0.0669, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.6314681768417358, | |
| "learning_rate": 3.304694835680751e-05, | |
| "loss": 0.0657, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.852002739906311, | |
| "learning_rate": 3.3023474178403755e-05, | |
| "loss": 0.0562, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 2.0356638431549072, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.0793, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.4071274995803833, | |
| "learning_rate": 3.297652582159624e-05, | |
| "loss": 0.062, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 3.241372585296631, | |
| "learning_rate": 3.295305164319249e-05, | |
| "loss": 0.0774, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.8536049127578735, | |
| "learning_rate": 3.292957746478874e-05, | |
| "loss": 0.0507, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 2.601881980895996, | |
| "learning_rate": 3.290610328638498e-05, | |
| "loss": 0.0837, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.7648743391036987, | |
| "learning_rate": 3.288262910798122e-05, | |
| "loss": 0.0598, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.050346851348877, | |
| "learning_rate": 3.2859154929577466e-05, | |
| "loss": 0.0676, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.15019965171813965, | |
| "learning_rate": 3.283568075117371e-05, | |
| "loss": 0.0539, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.2535006999969482, | |
| "learning_rate": 3.2812206572769954e-05, | |
| "loss": 0.0478, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.989013433456421, | |
| "learning_rate": 3.27887323943662e-05, | |
| "loss": 0.0599, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 4.4332756996154785, | |
| "learning_rate": 3.276525821596244e-05, | |
| "loss": 0.0609, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.3076884746551514, | |
| "learning_rate": 3.274178403755869e-05, | |
| "loss": 0.058, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.2508978843688965, | |
| "learning_rate": 3.271830985915493e-05, | |
| "loss": 0.0802, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.3526707887649536, | |
| "learning_rate": 3.269483568075117e-05, | |
| "loss": 0.048, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.801395058631897, | |
| "learning_rate": 3.267136150234742e-05, | |
| "loss": 0.1073, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 2.8772401809692383, | |
| "learning_rate": 3.2647887323943665e-05, | |
| "loss": 0.089, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.380043387413025, | |
| "learning_rate": 3.2624413145539906e-05, | |
| "loss": 0.0525, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.199769377708435, | |
| "learning_rate": 3.260093896713615e-05, | |
| "loss": 0.0799, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 2.3984384536743164, | |
| "learning_rate": 3.25774647887324e-05, | |
| "loss": 0.0465, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.5681291222572327, | |
| "learning_rate": 3.255399061032864e-05, | |
| "loss": 0.0328, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 6.884750843048096, | |
| "learning_rate": 3.253051643192488e-05, | |
| "loss": 0.0789, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 5.364428520202637, | |
| "learning_rate": 3.250704225352113e-05, | |
| "loss": 0.1007, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.7261794805526733, | |
| "learning_rate": 3.248356807511737e-05, | |
| "loss": 0.0461, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.6595466136932373, | |
| "learning_rate": 3.246009389671362e-05, | |
| "loss": 0.0682, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3.9795382022857666, | |
| "learning_rate": 3.2436619718309864e-05, | |
| "loss": 0.0717, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.648000955581665, | |
| "learning_rate": 3.2413145539906105e-05, | |
| "loss": 0.0751, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.07527824491262436, | |
| "eval_macro/f1": 0.8894495203537635, | |
| "eval_macro/precision": 0.8925620624824648, | |
| "eval_macro/recall": 0.8900694450587358, | |
| "eval_micro/f1": 0.8929048156894959, | |
| "eval_micro/precision": 0.8926909389542842, | |
| "eval_micro/recall": 0.8931187949332421, | |
| "eval_runtime": 29.2466, | |
| "eval_samples/accuracy": 0.8813420061622732, | |
| "eval_samples_per_second": 499.375, | |
| "eval_steps_per_second": 15.626, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.2241559028625488, | |
| "learning_rate": 3.238967136150235e-05, | |
| "loss": 0.0636, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.2783581018447876, | |
| "learning_rate": 3.236619718309859e-05, | |
| "loss": 0.058, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.1444569826126099, | |
| "learning_rate": 3.234272300469483e-05, | |
| "loss": 0.0529, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 3.06350040435791, | |
| "learning_rate": 3.231924882629108e-05, | |
| "loss": 0.083, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 0.7670800685882568, | |
| "learning_rate": 3.229577464788733e-05, | |
| "loss": 0.056, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 2.1597325801849365, | |
| "learning_rate": 3.227230046948357e-05, | |
| "loss": 0.0796, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.469767451286316, | |
| "learning_rate": 3.2248826291079816e-05, | |
| "loss": 0.0595, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.9761245250701904, | |
| "learning_rate": 3.222535211267606e-05, | |
| "loss": 0.0757, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.3863333463668823, | |
| "learning_rate": 3.2201877934272304e-05, | |
| "loss": 0.102, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 2.2688632011413574, | |
| "learning_rate": 3.2178403755868544e-05, | |
| "loss": 0.0489, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.1232688426971436, | |
| "learning_rate": 3.215492957746479e-05, | |
| "loss": 0.0621, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.6261709928512573, | |
| "learning_rate": 3.213145539906103e-05, | |
| "loss": 0.0464, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.0990047454833984, | |
| "learning_rate": 3.210798122065728e-05, | |
| "loss": 0.0756, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.7575910091400146, | |
| "learning_rate": 3.208450704225353e-05, | |
| "loss": 0.0865, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.6897794008255005, | |
| "learning_rate": 3.206103286384977e-05, | |
| "loss": 0.0874, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 3.900545120239258, | |
| "learning_rate": 3.2037558685446015e-05, | |
| "loss": 0.0491, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.0206152200698853, | |
| "learning_rate": 3.2014084507042256e-05, | |
| "loss": 0.0678, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.4470140933990479, | |
| "learning_rate": 3.1990610328638496e-05, | |
| "loss": 0.0817, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.211830496788025, | |
| "learning_rate": 3.1967136150234743e-05, | |
| "loss": 0.0651, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.4572837352752686, | |
| "learning_rate": 3.1943661971830984e-05, | |
| "loss": 0.0527, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.8414143323898315, | |
| "learning_rate": 3.192018779342723e-05, | |
| "loss": 0.0535, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 0.916439950466156, | |
| "learning_rate": 3.189671361502348e-05, | |
| "loss": 0.0653, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 2.0691421031951904, | |
| "learning_rate": 3.187323943661972e-05, | |
| "loss": 0.0717, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 0.38336944580078125, | |
| "learning_rate": 3.184976525821597e-05, | |
| "loss": 0.0677, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.506400465965271, | |
| "learning_rate": 3.182629107981221e-05, | |
| "loss": 0.0585, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 0.8172094821929932, | |
| "learning_rate": 3.180281690140845e-05, | |
| "loss": 0.0807, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.8917680978775024, | |
| "learning_rate": 3.1779342723004695e-05, | |
| "loss": 0.0714, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.2013388872146606, | |
| "learning_rate": 3.175586854460094e-05, | |
| "loss": 0.0703, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 0.5137236714363098, | |
| "learning_rate": 3.173239436619718e-05, | |
| "loss": 0.063, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 4.698399543762207, | |
| "learning_rate": 3.170892018779343e-05, | |
| "loss": 0.0573, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 2.3770229816436768, | |
| "learning_rate": 3.168544600938967e-05, | |
| "loss": 0.0771, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.3372191190719604, | |
| "learning_rate": 3.166197183098591e-05, | |
| "loss": 0.074, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.439131259918213, | |
| "learning_rate": 3.163849765258216e-05, | |
| "loss": 0.0552, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 9.400777816772461, | |
| "learning_rate": 3.1615023474178406e-05, | |
| "loss": 0.0633, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.817305326461792, | |
| "learning_rate": 3.159154929577465e-05, | |
| "loss": 0.0608, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 3.0985467433929443, | |
| "learning_rate": 3.1568075117370894e-05, | |
| "loss": 0.0856, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.4275131225585938, | |
| "learning_rate": 3.154460093896714e-05, | |
| "loss": 0.0829, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.892664909362793, | |
| "learning_rate": 3.152112676056338e-05, | |
| "loss": 0.0714, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.899727702140808, | |
| "learning_rate": 3.149765258215962e-05, | |
| "loss": 0.0689, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.081380844116211, | |
| "learning_rate": 3.147417840375587e-05, | |
| "loss": 0.0618, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.7399240732192993, | |
| "learning_rate": 3.145070422535211e-05, | |
| "loss": 0.062, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 0.36951398849487305, | |
| "learning_rate": 3.142723004694836e-05, | |
| "loss": 0.0501, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 0.5229721069335938, | |
| "learning_rate": 3.1403755868544605e-05, | |
| "loss": 0.088, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 2.4102940559387207, | |
| "learning_rate": 3.1380281690140846e-05, | |
| "loss": 0.0403, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.0259218215942383, | |
| "learning_rate": 3.135680751173709e-05, | |
| "loss": 0.07, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 2.9200522899627686, | |
| "learning_rate": 3.1333333333333334e-05, | |
| "loss": 0.0934, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.3367905616760254, | |
| "learning_rate": 3.1309859154929574e-05, | |
| "loss": 0.0799, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 0.7008821964263916, | |
| "learning_rate": 3.128638497652582e-05, | |
| "loss": 0.0581, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 6.872345447540283, | |
| "learning_rate": 3.126291079812207e-05, | |
| "loss": 0.0762, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.593233346939087, | |
| "learning_rate": 3.123943661971831e-05, | |
| "loss": 0.065, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 0.07052791863679886, | |
| "eval_macro/f1": 0.8992477199985428, | |
| "eval_macro/precision": 0.8994462156866554, | |
| "eval_macro/recall": 0.9003938152127393, | |
| "eval_micro/f1": 0.9019728519164358, | |
| "eval_micro/precision": 0.9008332195055321, | |
| "eval_micro/recall": 0.9031153714481343, | |
| "eval_runtime": 28.0078, | |
| "eval_samples/accuracy": 0.8907223553577542, | |
| "eval_samples_per_second": 521.462, | |
| "eval_steps_per_second": 16.317, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 5.146605491638184, | |
| "learning_rate": 3.121596244131456e-05, | |
| "loss": 0.0658, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.41272249817848206, | |
| "learning_rate": 3.1192488262910804e-05, | |
| "loss": 0.066, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.21056464314460754, | |
| "learning_rate": 3.1169014084507045e-05, | |
| "loss": 0.0414, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 2.248830556869507, | |
| "learning_rate": 3.1145539906103285e-05, | |
| "loss": 0.0526, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.4600979089736938, | |
| "learning_rate": 3.112206572769953e-05, | |
| "loss": 0.0498, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 0.836912214756012, | |
| "learning_rate": 3.109859154929577e-05, | |
| "loss": 0.0711, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.9406927824020386, | |
| "learning_rate": 3.107511737089202e-05, | |
| "loss": 0.0398, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.8109996318817139, | |
| "learning_rate": 3.105164319248827e-05, | |
| "loss": 0.0607, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 3.5531466007232666, | |
| "learning_rate": 3.102816901408451e-05, | |
| "loss": 0.057, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.3769063651561737, | |
| "learning_rate": 3.1004694835680756e-05, | |
| "loss": 0.0649, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.7092906832695007, | |
| "learning_rate": 3.0981220657276997e-05, | |
| "loss": 0.0722, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.2704371213912964, | |
| "learning_rate": 3.095774647887324e-05, | |
| "loss": 0.0629, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 0.6935750842094421, | |
| "learning_rate": 3.0934272300469484e-05, | |
| "loss": 0.0519, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.7571748495101929, | |
| "learning_rate": 3.091079812206573e-05, | |
| "loss": 0.0436, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 0.3458290100097656, | |
| "learning_rate": 3.088732394366197e-05, | |
| "loss": 0.0628, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 2.861957311630249, | |
| "learning_rate": 3.086384976525822e-05, | |
| "loss": 0.0603, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 2.1730966567993164, | |
| "learning_rate": 3.084037558685447e-05, | |
| "loss": 0.1088, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.8724583983421326, | |
| "learning_rate": 3.081690140845071e-05, | |
| "loss": 0.0419, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 4.432601451873779, | |
| "learning_rate": 3.079342723004695e-05, | |
| "loss": 0.0556, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.5362699031829834, | |
| "learning_rate": 3.0769953051643196e-05, | |
| "loss": 0.0514, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 3.248563766479492, | |
| "learning_rate": 3.0746478873239436e-05, | |
| "loss": 0.0713, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 2.2945611476898193, | |
| "learning_rate": 3.0723004694835683e-05, | |
| "loss": 0.0553, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.3250812292099, | |
| "learning_rate": 3.069953051643193e-05, | |
| "loss": 0.0577, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 2.426212787628174, | |
| "learning_rate": 3.067605633802817e-05, | |
| "loss": 0.086, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.70464289188385, | |
| "learning_rate": 3.065258215962442e-05, | |
| "loss": 0.0542, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.791068434715271, | |
| "learning_rate": 3.062910798122066e-05, | |
| "loss": 0.0697, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.9915051460266113, | |
| "learning_rate": 3.06056338028169e-05, | |
| "loss": 0.0711, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.4869524836540222, | |
| "learning_rate": 3.058215962441315e-05, | |
| "loss": 0.0541, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 0.5803549885749817, | |
| "learning_rate": 3.055868544600939e-05, | |
| "loss": 0.0392, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.3323622941970825, | |
| "learning_rate": 3.0535211267605635e-05, | |
| "loss": 0.0631, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 2.372645139694214, | |
| "learning_rate": 3.051173708920188e-05, | |
| "loss": 0.0815, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.5064605474472046, | |
| "learning_rate": 3.048826291079812e-05, | |
| "loss": 0.0925, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.408501625061035, | |
| "learning_rate": 3.0464788732394367e-05, | |
| "loss": 0.0769, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.6267452239990234, | |
| "learning_rate": 3.0441314553990614e-05, | |
| "loss": 0.0574, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.341489791870117, | |
| "learning_rate": 3.0417840375586855e-05, | |
| "loss": 0.0665, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.8234708309173584, | |
| "learning_rate": 3.03943661971831e-05, | |
| "loss": 0.0629, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.7846190333366394, | |
| "learning_rate": 3.0370892018779346e-05, | |
| "loss": 0.0646, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 2.0671329498291016, | |
| "learning_rate": 3.0347417840375587e-05, | |
| "loss": 0.0667, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.061323881149292, | |
| "learning_rate": 3.032394366197183e-05, | |
| "loss": 0.0676, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 2.8302061557769775, | |
| "learning_rate": 3.0300469483568078e-05, | |
| "loss": 0.0454, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 2.0348949432373047, | |
| "learning_rate": 3.027699530516432e-05, | |
| "loss": 0.0839, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 4.4613494873046875, | |
| "learning_rate": 3.0253521126760566e-05, | |
| "loss": 0.0617, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.2178605794906616, | |
| "learning_rate": 3.023004694835681e-05, | |
| "loss": 0.0877, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.9891900420188904, | |
| "learning_rate": 3.020657276995305e-05, | |
| "loss": 0.0658, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.4533942937850952, | |
| "learning_rate": 3.0183098591549298e-05, | |
| "loss": 0.0901, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.6907552480697632, | |
| "learning_rate": 3.0159624413145542e-05, | |
| "loss": 0.053, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 0.9100379943847656, | |
| "learning_rate": 3.0136150234741782e-05, | |
| "loss": 0.0392, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.1074556112289429, | |
| "learning_rate": 3.011267605633803e-05, | |
| "loss": 0.0781, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 2.8616244792938232, | |
| "learning_rate": 3.0089201877934277e-05, | |
| "loss": 0.0448, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.9619174003601074, | |
| "learning_rate": 3.0065727699530514e-05, | |
| "loss": 0.0803, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.06560152769088745, | |
| "eval_macro/f1": 0.9052200757632112, | |
| "eval_macro/precision": 0.9052297506739012, | |
| "eval_macro/recall": 0.905709010268969, | |
| "eval_micro/f1": 0.9075227599425012, | |
| "eval_micro/precision": 0.9072743447615137, | |
| "eval_micro/recall": 0.9077713111947963, | |
| "eval_runtime": 27.8697, | |
| "eval_samples/accuracy": 0.8985963711057857, | |
| "eval_samples_per_second": 524.046, | |
| "eval_steps_per_second": 16.398, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.557591199874878, | |
| "learning_rate": 3.004225352112676e-05, | |
| "loss": 0.072, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9702019691467285, | |
| "learning_rate": 3.001877934272301e-05, | |
| "loss": 0.0418, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.3909103870391846, | |
| "learning_rate": 2.999530516431925e-05, | |
| "loss": 0.0548, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.390047073364258, | |
| "learning_rate": 2.9971830985915494e-05, | |
| "loss": 0.0602, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9028347134590149, | |
| "learning_rate": 2.994835680751174e-05, | |
| "loss": 0.0489, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 1.160230278968811, | |
| "learning_rate": 2.992488262910798e-05, | |
| "loss": 0.0542, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.7624329924583435, | |
| "learning_rate": 2.9901408450704225e-05, | |
| "loss": 0.0568, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 1.706424593925476, | |
| "learning_rate": 2.9877934272300473e-05, | |
| "loss": 0.0593, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 2.120983839035034, | |
| "learning_rate": 2.9854460093896713e-05, | |
| "loss": 0.045, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 1.4256010055541992, | |
| "learning_rate": 2.983098591549296e-05, | |
| "loss": 0.0559, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 7.589344501495361, | |
| "learning_rate": 2.9807511737089205e-05, | |
| "loss": 0.0348, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 2.1067638397216797, | |
| "learning_rate": 2.9784037558685445e-05, | |
| "loss": 0.057, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.43838655948638916, | |
| "learning_rate": 2.9760563380281693e-05, | |
| "loss": 0.0371, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 2.4295332431793213, | |
| "learning_rate": 2.9737089201877936e-05, | |
| "loss": 0.077, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 3.2406866550445557, | |
| "learning_rate": 2.9713615023474177e-05, | |
| "loss": 0.0472, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 2.5891144275665283, | |
| "learning_rate": 2.9690140845070424e-05, | |
| "loss": 0.0882, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.5570282340049744, | |
| "learning_rate": 2.9666666666666672e-05, | |
| "loss": 0.0601, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 1.3984103202819824, | |
| "learning_rate": 2.9643192488262912e-05, | |
| "loss": 0.0484, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 8.100823402404785, | |
| "learning_rate": 2.9619718309859156e-05, | |
| "loss": 0.0383, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 3.0917282104492188, | |
| "learning_rate": 2.9596244131455404e-05, | |
| "loss": 0.0452, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 1.646676778793335, | |
| "learning_rate": 2.9572769953051644e-05, | |
| "loss": 0.0407, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.6801753640174866, | |
| "learning_rate": 2.9549295774647888e-05, | |
| "loss": 0.0493, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.7821123600006104, | |
| "learning_rate": 2.9525821596244135e-05, | |
| "loss": 0.0599, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 1.8716830015182495, | |
| "learning_rate": 2.9502347417840376e-05, | |
| "loss": 0.0515, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.6820869445800781, | |
| "learning_rate": 2.9478873239436623e-05, | |
| "loss": 0.0392, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 2.376614809036255, | |
| "learning_rate": 2.9455399061032867e-05, | |
| "loss": 0.0568, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.6240296363830566, | |
| "learning_rate": 2.9431924882629108e-05, | |
| "loss": 0.063, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 2.458850145339966, | |
| "learning_rate": 2.9408450704225355e-05, | |
| "loss": 0.0508, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 2.6562559604644775, | |
| "learning_rate": 2.93849765258216e-05, | |
| "loss": 0.0689, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 3.8325881958007812, | |
| "learning_rate": 2.936150234741784e-05, | |
| "loss": 0.055, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 3.6671085357666016, | |
| "learning_rate": 2.9338028169014087e-05, | |
| "loss": 0.0546, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 3.2882981300354004, | |
| "learning_rate": 2.931455399061033e-05, | |
| "loss": 0.0687, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 1.7834620475769043, | |
| "learning_rate": 2.9291079812206572e-05, | |
| "loss": 0.0535, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 2.3183083534240723, | |
| "learning_rate": 2.926760563380282e-05, | |
| "loss": 0.0494, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.2512354850769043, | |
| "learning_rate": 2.924413145539906e-05, | |
| "loss": 0.0537, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 10.471619606018066, | |
| "learning_rate": 2.9220657276995307e-05, | |
| "loss": 0.0644, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 2.114579439163208, | |
| "learning_rate": 2.919718309859155e-05, | |
| "loss": 0.0466, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.7307925224304199, | |
| "learning_rate": 2.917370892018779e-05, | |
| "loss": 0.0536, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.281708836555481, | |
| "learning_rate": 2.915023474178404e-05, | |
| "loss": 0.0627, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 4.101240634918213, | |
| "learning_rate": 2.9126760563380283e-05, | |
| "loss": 0.0685, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 0.2623077929019928, | |
| "learning_rate": 2.9103286384976523e-05, | |
| "loss": 0.0415, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 2.209989547729492, | |
| "learning_rate": 2.907981220657277e-05, | |
| "loss": 0.063, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 1.111786961555481, | |
| "learning_rate": 2.9056338028169018e-05, | |
| "loss": 0.0753, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 2.8421497344970703, | |
| "learning_rate": 2.903286384976526e-05, | |
| "loss": 0.0259, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 3.9581027030944824, | |
| "learning_rate": 2.9009389671361503e-05, | |
| "loss": 0.0534, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 1.7602144479751587, | |
| "learning_rate": 2.898591549295775e-05, | |
| "loss": 0.0633, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 3.0346012115478516, | |
| "learning_rate": 2.896244131455399e-05, | |
| "loss": 0.0488, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 4.6578850746154785, | |
| "learning_rate": 2.8938967136150234e-05, | |
| "loss": 0.0563, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.955411434173584, | |
| "learning_rate": 2.8915492957746482e-05, | |
| "loss": 0.0544, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.6909317970275879, | |
| "learning_rate": 2.8892018779342722e-05, | |
| "loss": 0.0744, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 0.0659680888056755, | |
| "eval_macro/f1": 0.9030170471313492, | |
| "eval_macro/precision": 0.9076760486157719, | |
| "eval_macro/recall": 0.8993950760206066, | |
| "eval_micro/f1": 0.9036940221503749, | |
| "eval_micro/precision": 0.9079411154882853, | |
| "eval_micro/recall": 0.899486477233824, | |
| "eval_runtime": 28.0326, | |
| "eval_samples/accuracy": 0.8906538856555974, | |
| "eval_samples_per_second": 521.0, | |
| "eval_steps_per_second": 16.302, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.8493853807449341, | |
| "learning_rate": 2.886854460093897e-05, | |
| "loss": 0.0391, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.0768928527832031, | |
| "learning_rate": 2.8845070422535214e-05, | |
| "loss": 0.051, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.8907384872436523, | |
| "learning_rate": 2.8821596244131454e-05, | |
| "loss": 0.0382, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 3.2443666458129883, | |
| "learning_rate": 2.87981220657277e-05, | |
| "loss": 0.0441, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.6289181709289551, | |
| "learning_rate": 2.8774647887323946e-05, | |
| "loss": 0.0429, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 2.1197361946105957, | |
| "learning_rate": 2.8751173708920186e-05, | |
| "loss": 0.0364, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 4.240948677062988, | |
| "learning_rate": 2.8727699530516433e-05, | |
| "loss": 0.0597, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 2.6654043197631836, | |
| "learning_rate": 2.870422535211268e-05, | |
| "loss": 0.0571, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 1.7354888916015625, | |
| "learning_rate": 2.8680751173708918e-05, | |
| "loss": 0.0363, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.7101438045501709, | |
| "learning_rate": 2.8657276995305165e-05, | |
| "loss": 0.0507, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.45135313272476196, | |
| "learning_rate": 2.8633802816901413e-05, | |
| "loss": 0.0473, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.211514949798584, | |
| "learning_rate": 2.8610328638497653e-05, | |
| "loss": 0.0565, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.7397087812423706, | |
| "learning_rate": 2.8586854460093897e-05, | |
| "loss": 0.0608, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.08779401332139969, | |
| "learning_rate": 2.8563380281690145e-05, | |
| "loss": 0.0606, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.5981838703155518, | |
| "learning_rate": 2.8539906103286385e-05, | |
| "loss": 0.0359, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.314452052116394, | |
| "learning_rate": 2.851643192488263e-05, | |
| "loss": 0.0521, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.439706414937973, | |
| "learning_rate": 2.8492957746478876e-05, | |
| "loss": 0.0746, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 2.364208221435547, | |
| "learning_rate": 2.8469483568075117e-05, | |
| "loss": 0.0672, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.9845068454742432, | |
| "learning_rate": 2.8446009389671364e-05, | |
| "loss": 0.1084, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.915307641029358, | |
| "learning_rate": 2.842253521126761e-05, | |
| "loss": 0.0886, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 2.353233814239502, | |
| "learning_rate": 2.839906103286385e-05, | |
| "loss": 0.0528, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 4.984065055847168, | |
| "learning_rate": 2.8375586854460096e-05, | |
| "loss": 0.0632, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 2.3609771728515625, | |
| "learning_rate": 2.835211267605634e-05, | |
| "loss": 0.039, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.2718889713287354, | |
| "learning_rate": 2.832863849765258e-05, | |
| "loss": 0.0316, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 0.3682420551776886, | |
| "learning_rate": 2.8305164319248828e-05, | |
| "loss": 0.0469, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 2.0798330307006836, | |
| "learning_rate": 2.8281690140845075e-05, | |
| "loss": 0.0343, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 2.4822909832000732, | |
| "learning_rate": 2.8258215962441316e-05, | |
| "loss": 0.0608, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.664831280708313, | |
| "learning_rate": 2.823474178403756e-05, | |
| "loss": 0.0674, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.6971834897994995, | |
| "learning_rate": 2.8211267605633807e-05, | |
| "loss": 0.0561, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.227864146232605, | |
| "learning_rate": 2.8187793427230048e-05, | |
| "loss": 0.0258, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.0127270221710205, | |
| "learning_rate": 2.8164319248826292e-05, | |
| "loss": 0.0724, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.144787073135376, | |
| "learning_rate": 2.814084507042254e-05, | |
| "loss": 0.0502, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 3.193720817565918, | |
| "learning_rate": 2.811737089201878e-05, | |
| "loss": 0.0624, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.395615816116333, | |
| "learning_rate": 2.8093896713615027e-05, | |
| "loss": 0.0616, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.2624863386154175, | |
| "learning_rate": 2.807042253521127e-05, | |
| "loss": 0.0405, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.1176111698150635, | |
| "learning_rate": 2.804694835680751e-05, | |
| "loss": 0.0425, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.4671870470046997, | |
| "learning_rate": 2.802347417840376e-05, | |
| "loss": 0.0456, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.19849862158298492, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.0388, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.17306649684906, | |
| "learning_rate": 2.7976525821596244e-05, | |
| "loss": 0.0593, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.8869905471801758, | |
| "learning_rate": 2.795305164319249e-05, | |
| "loss": 0.0549, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 3.7512552738189697, | |
| "learning_rate": 2.7929577464788735e-05, | |
| "loss": 0.0546, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 2.4991276264190674, | |
| "learning_rate": 2.7906103286384975e-05, | |
| "loss": 0.057, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.2474638223648071, | |
| "learning_rate": 2.7882629107981223e-05, | |
| "loss": 0.0546, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 1.6727144718170166, | |
| "learning_rate": 2.7859154929577463e-05, | |
| "loss": 0.0352, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.7062831521034241, | |
| "learning_rate": 2.783568075117371e-05, | |
| "loss": 0.0234, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 1.569942593574524, | |
| "learning_rate": 2.7812206572769955e-05, | |
| "loss": 0.045, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 3.6019442081451416, | |
| "learning_rate": 2.7788732394366195e-05, | |
| "loss": 0.058, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.731298565864563, | |
| "learning_rate": 2.7765258215962443e-05, | |
| "loss": 0.0549, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 2.845656156539917, | |
| "learning_rate": 2.7741784037558687e-05, | |
| "loss": 0.0388, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 3.3697431087493896, | |
| "learning_rate": 2.7718309859154927e-05, | |
| "loss": 0.0476, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.07065404951572418, | |
| "eval_macro/f1": 0.9069429246419818, | |
| "eval_macro/precision": 0.9071087424181236, | |
| "eval_macro/recall": 0.910052009047393, | |
| "eval_micro/f1": 0.9090350128095644, | |
| "eval_micro/precision": 0.9070211315610088, | |
| "eval_micro/recall": 0.9110578568983225, | |
| "eval_runtime": 30.3133, | |
| "eval_samples/accuracy": 0.900513522766176, | |
| "eval_samples_per_second": 481.802, | |
| "eval_steps_per_second": 15.076, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 5.096194744110107, | |
| "learning_rate": 2.7694835680751174e-05, | |
| "loss": 0.0354, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 4.457573890686035, | |
| "learning_rate": 2.7671361502347422e-05, | |
| "loss": 0.0826, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.3853645324707031, | |
| "learning_rate": 2.7647887323943662e-05, | |
| "loss": 0.0493, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 6.033750534057617, | |
| "learning_rate": 2.7624413145539906e-05, | |
| "loss": 0.0437, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.1589934825897217, | |
| "learning_rate": 2.7600938967136154e-05, | |
| "loss": 0.026, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.9086150527000427, | |
| "learning_rate": 2.7577464788732394e-05, | |
| "loss": 0.058, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 7.624779224395752, | |
| "learning_rate": 2.7553990610328638e-05, | |
| "loss": 0.0731, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 2.7912135124206543, | |
| "learning_rate": 2.7530516431924886e-05, | |
| "loss": 0.0668, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.7742284536361694, | |
| "learning_rate": 2.7507042253521126e-05, | |
| "loss": 0.0359, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.47039780020713806, | |
| "learning_rate": 2.7483568075117373e-05, | |
| "loss": 0.0595, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 2.1637165546417236, | |
| "learning_rate": 2.7460093896713617e-05, | |
| "loss": 0.0523, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.5711060762405396, | |
| "learning_rate": 2.7436619718309858e-05, | |
| "loss": 0.0298, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.5349418520927429, | |
| "learning_rate": 2.7413145539906105e-05, | |
| "loss": 0.0472, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.7174102067947388, | |
| "learning_rate": 2.738967136150235e-05, | |
| "loss": 0.0619, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 0.6406490802764893, | |
| "learning_rate": 2.736619718309859e-05, | |
| "loss": 0.0228, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.3063549995422363, | |
| "learning_rate": 2.7342723004694837e-05, | |
| "loss": 0.0576, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 2.3705832958221436, | |
| "learning_rate": 2.731924882629108e-05, | |
| "loss": 0.0528, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.0235642194747925, | |
| "learning_rate": 2.7295774647887322e-05, | |
| "loss": 0.0736, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 0.5643028616905212, | |
| "learning_rate": 2.727230046948357e-05, | |
| "loss": 0.0494, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 5.157418251037598, | |
| "learning_rate": 2.7248826291079816e-05, | |
| "loss": 0.0589, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 0.2736777663230896, | |
| "learning_rate": 2.7225352112676057e-05, | |
| "loss": 0.063, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 2.397003650665283, | |
| "learning_rate": 2.72018779342723e-05, | |
| "loss": 0.0536, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 2.01479172706604, | |
| "learning_rate": 2.7178403755868548e-05, | |
| "loss": 0.0529, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 4.520587921142578, | |
| "learning_rate": 2.715492957746479e-05, | |
| "loss": 0.0524, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.8048969507217407, | |
| "learning_rate": 2.7131455399061033e-05, | |
| "loss": 0.0475, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.0479780435562134, | |
| "learning_rate": 2.710798122065728e-05, | |
| "loss": 0.0621, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 2.384406089782715, | |
| "learning_rate": 2.708450704225352e-05, | |
| "loss": 0.0566, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.4049185514450073, | |
| "learning_rate": 2.7061032863849768e-05, | |
| "loss": 0.0356, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.6596524715423584, | |
| "learning_rate": 2.7037558685446012e-05, | |
| "loss": 0.0702, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 3.465924024581909, | |
| "learning_rate": 2.7014084507042253e-05, | |
| "loss": 0.0482, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 3.690901756286621, | |
| "learning_rate": 2.69906103286385e-05, | |
| "loss": 0.0486, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 2.503009557723999, | |
| "learning_rate": 2.6967136150234744e-05, | |
| "loss": 0.0382, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.9366639256477356, | |
| "learning_rate": 2.6943661971830984e-05, | |
| "loss": 0.0501, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.9588242769241333, | |
| "learning_rate": 2.6920187793427232e-05, | |
| "loss": 0.0731, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 3.305715799331665, | |
| "learning_rate": 2.689671361502348e-05, | |
| "loss": 0.0458, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 4.218424320220947, | |
| "learning_rate": 2.687323943661972e-05, | |
| "loss": 0.0685, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0759609937667847, | |
| "learning_rate": 2.6849765258215964e-05, | |
| "loss": 0.0473, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.3329447507858276, | |
| "learning_rate": 2.682629107981221e-05, | |
| "loss": 0.0581, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 3.64178466796875, | |
| "learning_rate": 2.680281690140845e-05, | |
| "loss": 0.0244, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0093239545822144, | |
| "learning_rate": 2.6779342723004696e-05, | |
| "loss": 0.0315, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 2.4063069820404053, | |
| "learning_rate": 2.6755868544600943e-05, | |
| "loss": 0.0416, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 0.29868409037590027, | |
| "learning_rate": 2.6732394366197184e-05, | |
| "loss": 0.0354, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0405687093734741, | |
| "learning_rate": 2.670892018779343e-05, | |
| "loss": 0.0498, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 0.8380749821662903, | |
| "learning_rate": 2.6685446009389675e-05, | |
| "loss": 0.0426, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.7580487728118896, | |
| "learning_rate": 2.6661971830985915e-05, | |
| "loss": 0.0596, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.4274970293045044, | |
| "learning_rate": 2.6638497652582163e-05, | |
| "loss": 0.0754, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.6633018255233765, | |
| "learning_rate": 2.6615023474178407e-05, | |
| "loss": 0.0713, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.3542613983154297, | |
| "learning_rate": 2.6591549295774647e-05, | |
| "loss": 0.0604, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 2.1828737258911133, | |
| "learning_rate": 2.6568075117370895e-05, | |
| "loss": 0.0426, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.2581530213356018, | |
| "learning_rate": 2.654460093896714e-05, | |
| "loss": 0.041, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 0.06118783354759216, | |
| "eval_macro/f1": 0.9121583171919195, | |
| "eval_macro/precision": 0.9130776209618738, | |
| "eval_macro/recall": 0.9122677551293914, | |
| "eval_micro/f1": 0.9137872049264454, | |
| "eval_micro/precision": 0.9131623931623931, | |
| "eval_micro/recall": 0.9144128723040055, | |
| "eval_runtime": 30.3668, | |
| "eval_samples/accuracy": 0.905169462512838, | |
| "eval_samples_per_second": 480.952, | |
| "eval_steps_per_second": 15.049, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 3.0459187030792236, | |
| "learning_rate": 2.652112676056338e-05, | |
| "loss": 0.0516, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 2.2514278888702393, | |
| "learning_rate": 2.6497652582159626e-05, | |
| "loss": 0.0527, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.5657632946968079, | |
| "learning_rate": 2.6474178403755867e-05, | |
| "loss": 0.0612, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.468795895576477, | |
| "learning_rate": 2.6450704225352114e-05, | |
| "loss": 0.0426, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.9087652564048767, | |
| "learning_rate": 2.642723004694836e-05, | |
| "loss": 0.0446, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 2.5247867107391357, | |
| "learning_rate": 2.64037558685446e-05, | |
| "loss": 0.0724, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.4081361293792725, | |
| "learning_rate": 2.6380281690140846e-05, | |
| "loss": 0.0492, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 202.60423278808594, | |
| "learning_rate": 2.635680751173709e-05, | |
| "loss": 0.033, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 2.9532530307769775, | |
| "learning_rate": 2.633333333333333e-05, | |
| "loss": 0.0627, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.42042076587677, | |
| "learning_rate": 2.6309859154929578e-05, | |
| "loss": 0.046, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 0.46867698431015015, | |
| "learning_rate": 2.6286384976525825e-05, | |
| "loss": 0.0549, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 2.7932143211364746, | |
| "learning_rate": 2.6262910798122066e-05, | |
| "loss": 0.0465, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 3.8150582313537598, | |
| "learning_rate": 2.623943661971831e-05, | |
| "loss": 0.0421, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.6926825642585754, | |
| "learning_rate": 2.6215962441314557e-05, | |
| "loss": 0.0536, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.488939642906189, | |
| "learning_rate": 2.6192488262910798e-05, | |
| "loss": 0.0572, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.9060409069061279, | |
| "learning_rate": 2.6169014084507042e-05, | |
| "loss": 0.05, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 2.041475296020508, | |
| "learning_rate": 2.614553990610329e-05, | |
| "loss": 0.0604, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 0.3654033839702606, | |
| "learning_rate": 2.612206572769953e-05, | |
| "loss": 0.045, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 5.171931266784668, | |
| "learning_rate": 2.6098591549295777e-05, | |
| "loss": 0.0303, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 2.96879243850708, | |
| "learning_rate": 2.607511737089202e-05, | |
| "loss": 0.0634, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.6135741472244263, | |
| "learning_rate": 2.605164319248826e-05, | |
| "loss": 0.0519, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.8153369426727295, | |
| "learning_rate": 2.602816901408451e-05, | |
| "loss": 0.0564, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.526282787322998, | |
| "learning_rate": 2.6004694835680753e-05, | |
| "loss": 0.0459, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.152677059173584, | |
| "learning_rate": 2.5981220657276994e-05, | |
| "loss": 0.0656, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.8138638734817505, | |
| "learning_rate": 2.595774647887324e-05, | |
| "loss": 0.0433, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 2.1275670528411865, | |
| "learning_rate": 2.5934272300469485e-05, | |
| "loss": 0.0776, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.4768202304840088, | |
| "learning_rate": 2.5910798122065725e-05, | |
| "loss": 0.0495, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 5.845564365386963, | |
| "learning_rate": 2.5887323943661973e-05, | |
| "loss": 0.0506, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.0060453414916992, | |
| "learning_rate": 2.586384976525822e-05, | |
| "loss": 0.0663, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.312296986579895, | |
| "learning_rate": 2.584037558685446e-05, | |
| "loss": 0.0515, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 0.9539171457290649, | |
| "learning_rate": 2.5816901408450705e-05, | |
| "loss": 0.0706, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 2.121685266494751, | |
| "learning_rate": 2.5793427230046952e-05, | |
| "loss": 0.04, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 2.037627696990967, | |
| "learning_rate": 2.5769953051643193e-05, | |
| "loss": 0.0423, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.45661213994026184, | |
| "learning_rate": 2.5746478873239437e-05, | |
| "loss": 0.0451, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 2.1163055896759033, | |
| "learning_rate": 2.5723004694835684e-05, | |
| "loss": 0.0662, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.9851250052452087, | |
| "learning_rate": 2.5699530516431924e-05, | |
| "loss": 0.0421, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 1.8168209791183472, | |
| "learning_rate": 2.5676056338028172e-05, | |
| "loss": 0.0577, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.1686525344848633, | |
| "learning_rate": 2.5652582159624416e-05, | |
| "loss": 0.0542, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.2663167715072632, | |
| "learning_rate": 2.5629107981220656e-05, | |
| "loss": 0.0577, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 2.535994529724121, | |
| "learning_rate": 2.5605633802816904e-05, | |
| "loss": 0.0528, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 97.92497253417969, | |
| "learning_rate": 2.5582159624413148e-05, | |
| "loss": 0.0661, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.2818212509155273, | |
| "learning_rate": 2.5558685446009388e-05, | |
| "loss": 0.0483, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.5373280048370361, | |
| "learning_rate": 2.5535211267605636e-05, | |
| "loss": 0.0508, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.694848656654358, | |
| "learning_rate": 2.5511737089201883e-05, | |
| "loss": 0.0591, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 3.430144786834717, | |
| "learning_rate": 2.5488262910798123e-05, | |
| "loss": 0.0621, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 3.0223701000213623, | |
| "learning_rate": 2.5464788732394367e-05, | |
| "loss": 0.0578, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.3250190019607544, | |
| "learning_rate": 2.5441314553990615e-05, | |
| "loss": 0.0413, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.6544115543365479, | |
| "learning_rate": 2.5417840375586855e-05, | |
| "loss": 0.0567, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.7126723527908325, | |
| "learning_rate": 2.53943661971831e-05, | |
| "loss": 0.0626, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.39465492963790894, | |
| "learning_rate": 2.5370892018779347e-05, | |
| "loss": 0.0384, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 0.06695384532213211, | |
| "eval_macro/f1": 0.9068315076135826, | |
| "eval_macro/precision": 0.9077032497066345, | |
| "eval_macro/recall": 0.9072453550876894, | |
| "eval_micro/f1": 0.908891928864569, | |
| "eval_micro/precision": 0.9079603689784762, | |
| "eval_micro/recall": 0.9098254022595001, | |
| "eval_runtime": 30.7367, | |
| "eval_samples/accuracy": 0.8998972954467648, | |
| "eval_samples_per_second": 475.165, | |
| "eval_steps_per_second": 14.868, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.648552656173706, | |
| "learning_rate": 2.5347417840375587e-05, | |
| "loss": 0.0684, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.2032274007797241, | |
| "learning_rate": 2.5323943661971835e-05, | |
| "loss": 0.0668, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 2.3236911296844482, | |
| "learning_rate": 2.530046948356808e-05, | |
| "loss": 0.053, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.6463286876678467, | |
| "learning_rate": 2.527699530516432e-05, | |
| "loss": 0.0502, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.206963300704956, | |
| "learning_rate": 2.5253521126760566e-05, | |
| "loss": 0.0613, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.3414201736450195, | |
| "learning_rate": 2.523004694835681e-05, | |
| "loss": 0.0704, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.45636922121047974, | |
| "learning_rate": 2.520657276995305e-05, | |
| "loss": 0.0272, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.4351228475570679, | |
| "learning_rate": 2.51830985915493e-05, | |
| "loss": 0.0348, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 0.7133152484893799, | |
| "learning_rate": 2.5159624413145542e-05, | |
| "loss": 0.0471, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 2.3375461101531982, | |
| "learning_rate": 2.5136150234741783e-05, | |
| "loss": 0.0593, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.4897431135177612, | |
| "learning_rate": 2.511267605633803e-05, | |
| "loss": 0.062, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 0.9795855283737183, | |
| "learning_rate": 2.508920187793427e-05, | |
| "loss": 0.0647, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 2.317823886871338, | |
| "learning_rate": 2.5065727699530518e-05, | |
| "loss": 0.0556, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.6250512599945068, | |
| "learning_rate": 2.5042253521126762e-05, | |
| "loss": 0.0564, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 2.342941999435425, | |
| "learning_rate": 2.5018779342723003e-05, | |
| "loss": 0.0425, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 3.147282838821411, | |
| "learning_rate": 2.499530516431925e-05, | |
| "loss": 0.0601, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.04555138200521469, | |
| "learning_rate": 2.4971830985915494e-05, | |
| "loss": 0.0489, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 0.3302970230579376, | |
| "learning_rate": 2.4948356807511738e-05, | |
| "loss": 0.0598, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 2.386502504348755, | |
| "learning_rate": 2.4924882629107982e-05, | |
| "loss": 0.0612, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.918862223625183, | |
| "learning_rate": 2.490140845070423e-05, | |
| "loss": 0.0522, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.9548710584640503, | |
| "learning_rate": 2.487793427230047e-05, | |
| "loss": 0.0559, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 2.2457354068756104, | |
| "learning_rate": 2.4854460093896714e-05, | |
| "loss": 0.0473, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 2.8347527980804443, | |
| "learning_rate": 2.483098591549296e-05, | |
| "loss": 0.0529, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.7313975095748901, | |
| "learning_rate": 2.4807511737089205e-05, | |
| "loss": 0.0702, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 2.26188325881958, | |
| "learning_rate": 2.4784037558685446e-05, | |
| "loss": 0.0473, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 0.3577061891555786, | |
| "learning_rate": 2.476056338028169e-05, | |
| "loss": 0.0426, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.3557612895965576, | |
| "learning_rate": 2.4737089201877937e-05, | |
| "loss": 0.0294, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 2.5885844230651855, | |
| "learning_rate": 2.471361502347418e-05, | |
| "loss": 0.0546, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 2.567915201187134, | |
| "learning_rate": 2.469014084507042e-05, | |
| "loss": 0.0527, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 3.9320240020751953, | |
| "learning_rate": 2.466666666666667e-05, | |
| "loss": 0.0528, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.2177060842514038, | |
| "learning_rate": 2.4643192488262913e-05, | |
| "loss": 0.0594, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 2.4854683876037598, | |
| "learning_rate": 2.4619718309859153e-05, | |
| "loss": 0.0631, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 2.2723195552825928, | |
| "learning_rate": 2.45962441314554e-05, | |
| "loss": 0.04, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 3.3895249366760254, | |
| "learning_rate": 2.4572769953051645e-05, | |
| "loss": 0.0461, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.03994426131248474, | |
| "learning_rate": 2.454929577464789e-05, | |
| "loss": 0.0333, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.1587481498718262, | |
| "learning_rate": 2.4525821596244133e-05, | |
| "loss": 0.0378, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 2.166707754135132, | |
| "learning_rate": 2.4502347417840377e-05, | |
| "loss": 0.0627, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.7003379464149475, | |
| "learning_rate": 2.447887323943662e-05, | |
| "loss": 0.038, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.285989284515381, | |
| "learning_rate": 2.4455399061032864e-05, | |
| "loss": 0.0562, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 4.629007816314697, | |
| "learning_rate": 2.443192488262911e-05, | |
| "loss": 0.0419, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 3.119145154953003, | |
| "learning_rate": 2.4408450704225352e-05, | |
| "loss": 0.039, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1.251133680343628, | |
| "learning_rate": 2.43849765258216e-05, | |
| "loss": 0.083, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 0.3550426661968231, | |
| "learning_rate": 2.436150234741784e-05, | |
| "loss": 0.0387, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.6787813901901245, | |
| "learning_rate": 2.4338028169014084e-05, | |
| "loss": 0.0566, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 11.414258003234863, | |
| "learning_rate": 2.431455399061033e-05, | |
| "loss": 0.0418, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 4.182372093200684, | |
| "learning_rate": 2.4291079812206576e-05, | |
| "loss": 0.0413, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.34739282727241516, | |
| "learning_rate": 2.4267605633802816e-05, | |
| "loss": 0.0407, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.3519314229488373, | |
| "learning_rate": 2.4244131455399063e-05, | |
| "loss": 0.0236, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.8023984432220459, | |
| "learning_rate": 2.4220657276995307e-05, | |
| "loss": 0.0592, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.21758234500885, | |
| "learning_rate": 2.419718309859155e-05, | |
| "loss": 0.0469, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.06562425941228867, | |
| "eval_macro/f1": 0.9144028854078985, | |
| "eval_macro/precision": 0.9130815641679679, | |
| "eval_macro/recall": 0.9169312081190664, | |
| "eval_micro/f1": 0.9161118508655126, | |
| "eval_micro/precision": 0.9136475074911469, | |
| "eval_micro/recall": 0.91858952413557, | |
| "eval_runtime": 29.3898, | |
| "eval_samples/accuracy": 0.9083875385142075, | |
| "eval_samples_per_second": 496.94, | |
| "eval_steps_per_second": 15.55, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 2.0988662242889404, | |
| "learning_rate": 2.4173708920187795e-05, | |
| "loss": 0.0357, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 5.835433483123779, | |
| "learning_rate": 2.415023474178404e-05, | |
| "loss": 0.0669, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 3.476304531097412, | |
| "learning_rate": 2.4126760563380283e-05, | |
| "loss": 0.0633, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.9310297966003418, | |
| "learning_rate": 2.4103286384976527e-05, | |
| "loss": 0.0461, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.138595700263977, | |
| "learning_rate": 2.407981220657277e-05, | |
| "loss": 0.0579, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.2649039030075073, | |
| "learning_rate": 2.4056338028169015e-05, | |
| "loss": 0.03, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.3273512125015259, | |
| "learning_rate": 2.403286384976526e-05, | |
| "loss": 0.0563, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 4.17151403427124, | |
| "learning_rate": 2.4009389671361503e-05, | |
| "loss": 0.069, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.5479555130004883, | |
| "learning_rate": 2.3985915492957747e-05, | |
| "loss": 0.0344, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 2.591539144515991, | |
| "learning_rate": 2.396244131455399e-05, | |
| "loss": 0.0374, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.1022748947143555, | |
| "learning_rate": 2.3938967136150235e-05, | |
| "loss": 0.0411, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.20835240185260773, | |
| "learning_rate": 2.391549295774648e-05, | |
| "loss": 0.0528, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.4419134855270386, | |
| "learning_rate": 2.3892018779342723e-05, | |
| "loss": 0.0402, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.681576132774353, | |
| "learning_rate": 2.386854460093897e-05, | |
| "loss": 0.0273, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 2.0418591499328613, | |
| "learning_rate": 2.384507042253521e-05, | |
| "loss": 0.059, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.7147711515426636, | |
| "learning_rate": 2.3821596244131455e-05, | |
| "loss": 0.0911, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 2.6744132041931152, | |
| "learning_rate": 2.3798122065727702e-05, | |
| "loss": 0.0551, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 0.905580461025238, | |
| "learning_rate": 2.3774647887323946e-05, | |
| "loss": 0.0537, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 2.434372663497925, | |
| "learning_rate": 2.3751173708920187e-05, | |
| "loss": 0.05, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.8721539378166199, | |
| "learning_rate": 2.3727699530516434e-05, | |
| "loss": 0.0351, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 5.539384365081787, | |
| "learning_rate": 2.3704225352112678e-05, | |
| "loss": 0.035, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.04614145681262016, | |
| "learning_rate": 2.3680751173708922e-05, | |
| "loss": 0.0502, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 7.171780109405518, | |
| "learning_rate": 2.3657276995305166e-05, | |
| "loss": 0.043, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.1372716426849365, | |
| "learning_rate": 2.363380281690141e-05, | |
| "loss": 0.037, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.49709656834602356, | |
| "learning_rate": 2.3610328638497654e-05, | |
| "loss": 0.0735, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 3.8573594093322754, | |
| "learning_rate": 2.3586854460093898e-05, | |
| "loss": 0.0439, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.6819432973861694, | |
| "learning_rate": 2.356338028169014e-05, | |
| "loss": 0.038, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.4849399924278259, | |
| "learning_rate": 2.3539906103286386e-05, | |
| "loss": 0.0423, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 2.0761923789978027, | |
| "learning_rate": 2.3516431924882633e-05, | |
| "loss": 0.0359, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.2302016168832779, | |
| "learning_rate": 2.3492957746478874e-05, | |
| "loss": 0.0437, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.9668428301811218, | |
| "learning_rate": 2.3469483568075117e-05, | |
| "loss": 0.0711, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 2.1143290996551514, | |
| "learning_rate": 2.3446009389671365e-05, | |
| "loss": 0.0559, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.4595099687576294, | |
| "learning_rate": 2.342253521126761e-05, | |
| "loss": 0.04, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 4.2474775314331055, | |
| "learning_rate": 2.339906103286385e-05, | |
| "loss": 0.071, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.3273273706436157, | |
| "learning_rate": 2.3375586854460093e-05, | |
| "loss": 0.0637, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 2.783879280090332, | |
| "learning_rate": 2.335211267605634e-05, | |
| "loss": 0.0379, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.8824636936187744, | |
| "learning_rate": 2.3328638497652585e-05, | |
| "loss": 0.0643, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.5119823217391968, | |
| "learning_rate": 2.3305164319248825e-05, | |
| "loss": 0.0412, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.3782035112380981, | |
| "learning_rate": 2.3281690140845073e-05, | |
| "loss": 0.0433, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.557918906211853, | |
| "learning_rate": 2.3258215962441316e-05, | |
| "loss": 0.056, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.31254222989082336, | |
| "learning_rate": 2.3234741784037557e-05, | |
| "loss": 0.046, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.7209023833274841, | |
| "learning_rate": 2.3211267605633804e-05, | |
| "loss": 0.048, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.2861871123313904, | |
| "learning_rate": 2.318779342723005e-05, | |
| "loss": 0.0624, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.4058027267456055, | |
| "learning_rate": 2.3164319248826292e-05, | |
| "loss": 0.0424, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 2.840806007385254, | |
| "learning_rate": 2.3140845070422536e-05, | |
| "loss": 0.0492, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.5399413108825684, | |
| "learning_rate": 2.311737089201878e-05, | |
| "loss": 0.041, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 3.8651294708251953, | |
| "learning_rate": 2.3093896713615024e-05, | |
| "loss": 0.0494, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 4.223033428192139, | |
| "learning_rate": 2.3070422535211268e-05, | |
| "loss": 0.0745, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.967286229133606, | |
| "learning_rate": 2.3046948356807512e-05, | |
| "loss": 0.043, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 2.8095009326934814, | |
| "learning_rate": 2.3023474178403756e-05, | |
| "loss": 0.0556, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 0.0602407269179821, | |
| "eval_macro/f1": 0.9160111322025722, | |
| "eval_macro/precision": 0.9174229990455038, | |
| "eval_macro/recall": 0.9169255864513151, | |
| "eval_micro/f1": 0.9172736910502377, | |
| "eval_micro/precision": 0.9163022683793386, | |
| "eval_micro/recall": 0.9182471756247861, | |
| "eval_runtime": 30.1851, | |
| "eval_samples/accuracy": 0.907839780896953, | |
| "eval_samples_per_second": 483.847, | |
| "eval_steps_per_second": 15.14, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 0.8641358017921448, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.0303, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.8955144882202148, | |
| "learning_rate": 2.2976525821596244e-05, | |
| "loss": 0.0304, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 2.131303310394287, | |
| "learning_rate": 2.2953051643192488e-05, | |
| "loss": 0.0781, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 2.817549467086792, | |
| "learning_rate": 2.2929577464788735e-05, | |
| "loss": 0.0379, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 1.6803205013275146, | |
| "learning_rate": 2.290610328638498e-05, | |
| "loss": 0.0526, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 1.1056827306747437, | |
| "learning_rate": 2.288262910798122e-05, | |
| "loss": 0.0408, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 3.293531894683838, | |
| "learning_rate": 2.2859154929577467e-05, | |
| "loss": 0.0737, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 0.13934262096881866, | |
| "learning_rate": 2.283568075117371e-05, | |
| "loss": 0.0687, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.664345622062683, | |
| "learning_rate": 2.2812206572769955e-05, | |
| "loss": 0.0529, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 2.634932518005371, | |
| "learning_rate": 2.27887323943662e-05, | |
| "loss": 0.0725, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.9947081804275513, | |
| "learning_rate": 2.2765258215962443e-05, | |
| "loss": 0.0424, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 2.1717348098754883, | |
| "learning_rate": 2.2741784037558687e-05, | |
| "loss": 0.0511, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 3.0661449432373047, | |
| "learning_rate": 2.271830985915493e-05, | |
| "loss": 0.0267, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 2.451406478881836, | |
| "learning_rate": 2.2694835680751175e-05, | |
| "loss": 0.0553, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.1184929609298706, | |
| "learning_rate": 2.267136150234742e-05, | |
| "loss": 0.0526, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 2.386408567428589, | |
| "learning_rate": 2.2647887323943663e-05, | |
| "loss": 0.0566, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.6783018112182617, | |
| "learning_rate": 2.2624413145539907e-05, | |
| "loss": 0.0299, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.21903283894062042, | |
| "learning_rate": 2.260093896713615e-05, | |
| "loss": 0.0508, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.843197226524353, | |
| "learning_rate": 2.2577464788732395e-05, | |
| "loss": 0.0572, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.7903924584388733, | |
| "learning_rate": 2.255399061032864e-05, | |
| "loss": 0.0434, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.0302156209945679, | |
| "learning_rate": 2.2530516431924883e-05, | |
| "loss": 0.0529, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 3.6759471893310547, | |
| "learning_rate": 2.2507042253521127e-05, | |
| "loss": 0.0303, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 2.0208933353424072, | |
| "learning_rate": 2.2483568075117374e-05, | |
| "loss": 0.0316, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 1.2156116962432861, | |
| "learning_rate": 2.2460093896713614e-05, | |
| "loss": 0.033, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.3793451488018036, | |
| "learning_rate": 2.243661971830986e-05, | |
| "loss": 0.0397, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 2.972238779067993, | |
| "learning_rate": 2.2413145539906106e-05, | |
| "loss": 0.0561, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 3.2234435081481934, | |
| "learning_rate": 2.238967136150235e-05, | |
| "loss": 0.0487, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.875913143157959, | |
| "learning_rate": 2.236619718309859e-05, | |
| "loss": 0.0578, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.3704244792461395, | |
| "learning_rate": 2.2342723004694838e-05, | |
| "loss": 0.05, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 2.043782949447632, | |
| "learning_rate": 2.231924882629108e-05, | |
| "loss": 0.0488, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.9040173888206482, | |
| "learning_rate": 2.2295774647887326e-05, | |
| "loss": 0.0644, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 2.0497360229492188, | |
| "learning_rate": 2.227230046948357e-05, | |
| "loss": 0.043, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.3397915363311768, | |
| "learning_rate": 2.2248826291079813e-05, | |
| "loss": 0.0451, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 2.3610448837280273, | |
| "learning_rate": 2.2225352112676057e-05, | |
| "loss": 0.0585, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.3045170307159424, | |
| "learning_rate": 2.22018779342723e-05, | |
| "loss": 0.0704, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.5577067136764526, | |
| "learning_rate": 2.2178403755868545e-05, | |
| "loss": 0.0583, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.5495411157608032, | |
| "learning_rate": 2.215492957746479e-05, | |
| "loss": 0.0431, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.3377631902694702, | |
| "learning_rate": 2.2131455399061037e-05, | |
| "loss": 0.0348, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 0.4540291726589203, | |
| "learning_rate": 2.2107981220657277e-05, | |
| "loss": 0.051, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.256713628768921, | |
| "learning_rate": 2.208450704225352e-05, | |
| "loss": 0.058, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.3505330085754395, | |
| "learning_rate": 2.206103286384977e-05, | |
| "loss": 0.0474, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 2.184422254562378, | |
| "learning_rate": 2.2037558685446012e-05, | |
| "loss": 0.0347, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.9837118983268738, | |
| "learning_rate": 2.2014084507042253e-05, | |
| "loss": 0.0486, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.9448314905166626, | |
| "learning_rate": 2.1990610328638497e-05, | |
| "loss": 0.0508, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.18796034157276154, | |
| "learning_rate": 2.1967136150234744e-05, | |
| "loss": 0.0365, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 2.7096333503723145, | |
| "learning_rate": 2.1943661971830985e-05, | |
| "loss": 0.0447, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.3839486837387085, | |
| "learning_rate": 2.192018779342723e-05, | |
| "loss": 0.0606, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.7544217705726624, | |
| "learning_rate": 2.1896713615023476e-05, | |
| "loss": 0.0517, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 3.9837958812713623, | |
| "learning_rate": 2.187323943661972e-05, | |
| "loss": 0.0491, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 4.055257797241211, | |
| "learning_rate": 2.184976525821596e-05, | |
| "loss": 0.0509, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.057348866015672684, | |
| "eval_macro/f1": 0.9190607485607669, | |
| "eval_macro/precision": 0.91942223429283, | |
| "eval_macro/recall": 0.9200309369578695, | |
| "eval_micro/f1": 0.9205556315861503, | |
| "eval_micro/precision": 0.9199890583327635, | |
| "eval_micro/recall": 0.9211229031153715, | |
| "eval_runtime": 29.9261, | |
| "eval_samples/accuracy": 0.912221841834988, | |
| "eval_samples_per_second": 488.036, | |
| "eval_steps_per_second": 15.271, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.9651139974594116, | |
| "learning_rate": 2.1826291079812208e-05, | |
| "loss": 0.046, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 1.4122140407562256, | |
| "learning_rate": 2.1802816901408452e-05, | |
| "loss": 0.0427, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 3.4565532207489014, | |
| "learning_rate": 2.1779342723004696e-05, | |
| "loss": 0.0496, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 2.368950366973877, | |
| "learning_rate": 2.175586854460094e-05, | |
| "loss": 0.05, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.5657429695129395, | |
| "learning_rate": 2.1732394366197184e-05, | |
| "loss": 0.0387, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 2.9971156120300293, | |
| "learning_rate": 2.1708920187793428e-05, | |
| "loss": 0.0729, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 2.455146074295044, | |
| "learning_rate": 2.1685446009389672e-05, | |
| "loss": 0.0594, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 2.0647032260894775, | |
| "learning_rate": 2.1661971830985916e-05, | |
| "loss": 0.0489, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.9192816019058228, | |
| "learning_rate": 2.163849765258216e-05, | |
| "loss": 0.069, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.2612496614456177, | |
| "learning_rate": 2.1615023474178407e-05, | |
| "loss": 0.0447, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.4851101636886597, | |
| "learning_rate": 2.1591549295774648e-05, | |
| "loss": 0.0357, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 3.6981115341186523, | |
| "learning_rate": 2.156807511737089e-05, | |
| "loss": 0.0763, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.6905491352081299, | |
| "learning_rate": 2.154460093896714e-05, | |
| "loss": 0.0509, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0096628665924072, | |
| "learning_rate": 2.1521126760563383e-05, | |
| "loss": 0.051, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 2.6806323528289795, | |
| "learning_rate": 2.1497652582159624e-05, | |
| "loss": 0.0418, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.4341468811035156, | |
| "learning_rate": 2.147417840375587e-05, | |
| "loss": 0.0551, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 0.7659865617752075, | |
| "learning_rate": 2.1450704225352115e-05, | |
| "loss": 0.0529, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.6196166276931763, | |
| "learning_rate": 2.142723004694836e-05, | |
| "loss": 0.0426, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 4.964820861816406, | |
| "learning_rate": 2.1403755868544603e-05, | |
| "loss": 0.0764, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.1169065237045288, | |
| "learning_rate": 2.1380281690140847e-05, | |
| "loss": 0.0661, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 2.428039073944092, | |
| "learning_rate": 2.135680751173709e-05, | |
| "loss": 0.0362, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 2.051858901977539, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 0.0698, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.6289446353912354, | |
| "learning_rate": 2.130985915492958e-05, | |
| "loss": 0.0688, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0273969173431396, | |
| "learning_rate": 2.1286384976525823e-05, | |
| "loss": 0.0412, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 2.8337440490722656, | |
| "learning_rate": 2.1262910798122067e-05, | |
| "loss": 0.0464, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 3.0352559089660645, | |
| "learning_rate": 2.123943661971831e-05, | |
| "loss": 0.0361, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 3.7453436851501465, | |
| "learning_rate": 2.1215962441314554e-05, | |
| "loss": 0.05, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 3.489884853363037, | |
| "learning_rate": 2.11924882629108e-05, | |
| "loss": 0.075, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.7656161785125732, | |
| "learning_rate": 2.1169014084507042e-05, | |
| "loss": 0.0621, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 2.6409249305725098, | |
| "learning_rate": 2.1145539906103286e-05, | |
| "loss": 0.06, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.3973041772842407, | |
| "learning_rate": 2.112206572769953e-05, | |
| "loss": 0.059, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.4552676677703857, | |
| "learning_rate": 2.1098591549295778e-05, | |
| "loss": 0.0559, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.8865125179290771, | |
| "learning_rate": 2.1075117370892018e-05, | |
| "loss": 0.0409, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.3112214803695679, | |
| "learning_rate": 2.1051643192488262e-05, | |
| "loss": 0.0515, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.4582147598266602, | |
| "learning_rate": 2.102816901408451e-05, | |
| "loss": 0.0499, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.7377904057502747, | |
| "learning_rate": 2.1004694835680753e-05, | |
| "loss": 0.0419, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.7822595238685608, | |
| "learning_rate": 2.0981220657276994e-05, | |
| "loss": 0.0492, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 2.651425361633301, | |
| "learning_rate": 2.095774647887324e-05, | |
| "loss": 0.0292, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 2.326988697052002, | |
| "learning_rate": 2.0934272300469485e-05, | |
| "loss": 0.0613, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 2.0126523971557617, | |
| "learning_rate": 2.091079812206573e-05, | |
| "loss": 0.0554, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 4.0273566246032715, | |
| "learning_rate": 2.0887323943661973e-05, | |
| "loss": 0.0483, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 3.330744981765747, | |
| "learning_rate": 2.0863849765258217e-05, | |
| "loss": 0.052, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.2141571044921875, | |
| "learning_rate": 2.084037558685446e-05, | |
| "loss": 0.0583, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.20693588256835938, | |
| "learning_rate": 2.0816901408450705e-05, | |
| "loss": 0.0526, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 3.2535054683685303, | |
| "learning_rate": 2.079342723004695e-05, | |
| "loss": 0.0344, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.5202856063842773, | |
| "learning_rate": 2.0769953051643193e-05, | |
| "loss": 0.0478, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 3.9802026748657227, | |
| "learning_rate": 2.074647887323944e-05, | |
| "loss": 0.0459, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.4574544429779053, | |
| "learning_rate": 2.072300469483568e-05, | |
| "loss": 0.0425, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 0.9468692541122437, | |
| "learning_rate": 2.0699530516431925e-05, | |
| "loss": 0.0327, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.450246810913086, | |
| "learning_rate": 2.0676056338028172e-05, | |
| "loss": 0.0266, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 0.06408223509788513, | |
| "eval_macro/f1": 0.9174916425127692, | |
| "eval_macro/precision": 0.9170797303677747, | |
| "eval_macro/recall": 0.9189415678502407, | |
| "eval_micro/f1": 0.9193680755026672, | |
| "eval_micro/precision": 0.9183004303572648, | |
| "eval_micro/recall": 0.9204382060938034, | |
| "eval_runtime": 27.9407, | |
| "eval_samples/accuracy": 0.9120849024306744, | |
| "eval_samples_per_second": 522.714, | |
| "eval_steps_per_second": 16.356, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.10892952233552933, | |
| "learning_rate": 2.0652582159624416e-05, | |
| "loss": 0.0349, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 2.7067954540252686, | |
| "learning_rate": 2.0629107981220657e-05, | |
| "loss": 0.0429, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.4395139813423157, | |
| "learning_rate": 2.06056338028169e-05, | |
| "loss": 0.0499, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.1774306297302246, | |
| "learning_rate": 2.0582159624413148e-05, | |
| "loss": 0.0576, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 3.046229124069214, | |
| "learning_rate": 2.055868544600939e-05, | |
| "loss": 0.0463, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.7256075143814087, | |
| "learning_rate": 2.0535211267605633e-05, | |
| "loss": 0.0594, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 2.626065254211426, | |
| "learning_rate": 2.051173708920188e-05, | |
| "loss": 0.0451, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.4180030822753906, | |
| "learning_rate": 2.0488262910798124e-05, | |
| "loss": 0.0485, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 4.02133846282959, | |
| "learning_rate": 2.0464788732394364e-05, | |
| "loss": 0.049, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1.9987022876739502, | |
| "learning_rate": 2.0441314553990612e-05, | |
| "loss": 0.0397, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.2887463867664337, | |
| "learning_rate": 2.0417840375586856e-05, | |
| "loss": 0.0369, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.342617988586426, | |
| "learning_rate": 2.03943661971831e-05, | |
| "loss": 0.0337, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.309424638748169, | |
| "learning_rate": 2.0370892018779344e-05, | |
| "loss": 0.0535, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 0.7142930030822754, | |
| "learning_rate": 2.0347417840375588e-05, | |
| "loss": 0.0418, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 2.1817173957824707, | |
| "learning_rate": 2.032394366197183e-05, | |
| "loss": 0.0357, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 4.096397399902344, | |
| "learning_rate": 2.0300469483568076e-05, | |
| "loss": 0.072, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 4.028958320617676, | |
| "learning_rate": 2.027699530516432e-05, | |
| "loss": 0.0518, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.7069728374481201, | |
| "learning_rate": 2.0253521126760563e-05, | |
| "loss": 0.0317, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 2.568117380142212, | |
| "learning_rate": 2.023004694835681e-05, | |
| "loss": 0.0317, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.4156475067138672, | |
| "learning_rate": 2.020657276995305e-05, | |
| "loss": 0.0356, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 3.9762303829193115, | |
| "learning_rate": 2.0183098591549295e-05, | |
| "loss": 0.0545, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 0.18147669732570648, | |
| "learning_rate": 2.0159624413145543e-05, | |
| "loss": 0.0356, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 2.423311948776245, | |
| "learning_rate": 2.0136150234741787e-05, | |
| "loss": 0.0732, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 2.2960498332977295, | |
| "learning_rate": 2.0112676056338027e-05, | |
| "loss": 0.0579, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 2.083404064178467, | |
| "learning_rate": 2.0089201877934275e-05, | |
| "loss": 0.0575, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.8938828706741333, | |
| "learning_rate": 2.006572769953052e-05, | |
| "loss": 0.0336, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.287044048309326, | |
| "learning_rate": 2.0042253521126763e-05, | |
| "loss": 0.0484, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.1882569044828415, | |
| "learning_rate": 2.0018779342723006e-05, | |
| "loss": 0.0404, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.9885941743850708, | |
| "learning_rate": 1.999530516431925e-05, | |
| "loss": 0.0436, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.3820126056671143, | |
| "learning_rate": 1.9971830985915494e-05, | |
| "loss": 0.0363, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 4.282595634460449, | |
| "learning_rate": 1.994835680751174e-05, | |
| "loss": 0.0485, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 2.2270476818084717, | |
| "learning_rate": 1.9924882629107982e-05, | |
| "loss": 0.0389, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 1.3999806642532349, | |
| "learning_rate": 1.9901408450704226e-05, | |
| "loss": 0.0393, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 0.3673723042011261, | |
| "learning_rate": 1.987793427230047e-05, | |
| "loss": 0.0418, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 3.573798179626465, | |
| "learning_rate": 1.9854460093896714e-05, | |
| "loss": 0.0384, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 1.584816336631775, | |
| "learning_rate": 1.9830985915492958e-05, | |
| "loss": 0.0354, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 2.1099088191986084, | |
| "learning_rate": 1.9807511737089202e-05, | |
| "loss": 0.0479, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 2.267026424407959, | |
| "learning_rate": 1.9784037558685446e-05, | |
| "loss": 0.0612, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 4.091300964355469, | |
| "learning_rate": 1.976056338028169e-05, | |
| "loss": 0.0298, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.49155735969543457, | |
| "learning_rate": 1.9737089201877934e-05, | |
| "loss": 0.0341, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 2.704821825027466, | |
| "learning_rate": 1.971361502347418e-05, | |
| "loss": 0.0414, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.24696892499923706, | |
| "learning_rate": 1.9690140845070422e-05, | |
| "loss": 0.025, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1.6729565858840942, | |
| "learning_rate": 1.9666666666666666e-05, | |
| "loss": 0.0288, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1.8321446180343628, | |
| "learning_rate": 1.9643192488262913e-05, | |
| "loss": 0.0463, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.1947106420993805, | |
| "learning_rate": 1.9619718309859157e-05, | |
| "loss": 0.0223, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 4.319494724273682, | |
| "learning_rate": 1.9596244131455398e-05, | |
| "loss": 0.0379, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.17218045890331268, | |
| "learning_rate": 1.9572769953051645e-05, | |
| "loss": 0.0214, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 1.846875548362732, | |
| "learning_rate": 1.954929577464789e-05, | |
| "loss": 0.044, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 2.1127357482910156, | |
| "learning_rate": 1.9525821596244133e-05, | |
| "loss": 0.0289, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 2.4004576206207275, | |
| "learning_rate": 1.9502347417840377e-05, | |
| "loss": 0.0468, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "eval_loss": 0.06455881893634796, | |
| "eval_macro/f1": 0.9173124672477516, | |
| "eval_macro/precision": 0.9169905051673943, | |
| "eval_macro/recall": 0.9183038895525318, | |
| "eval_micro/f1": 0.9185863695399549, | |
| "eval_micro/precision": 0.9170818262471849, | |
| "eval_micro/recall": 0.9200958575830195, | |
| "eval_runtime": 28.2634, | |
| "eval_samples/accuracy": 0.9116056145155769, | |
| "eval_samples_per_second": 516.745, | |
| "eval_steps_per_second": 16.169, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 2.3656160831451416, | |
| "learning_rate": 1.947887323943662e-05, | |
| "loss": 0.0468, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 2.0503287315368652, | |
| "learning_rate": 1.9455399061032865e-05, | |
| "loss": 0.0346, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 1.2296708822250366, | |
| "learning_rate": 1.943192488262911e-05, | |
| "loss": 0.043, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 1.9218964576721191, | |
| "learning_rate": 1.9408450704225353e-05, | |
| "loss": 0.0504, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 0.4588683247566223, | |
| "learning_rate": 1.9384976525821597e-05, | |
| "loss": 0.0339, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 0.322399377822876, | |
| "learning_rate": 1.9361502347417844e-05, | |
| "loss": 0.0484, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 1.4765088558197021, | |
| "learning_rate": 1.9338028169014085e-05, | |
| "loss": 0.0368, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 2.2785489559173584, | |
| "learning_rate": 1.931455399061033e-05, | |
| "loss": 0.027, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 0.05628859996795654, | |
| "learning_rate": 1.9291079812206576e-05, | |
| "loss": 0.0442, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 2.550320863723755, | |
| "learning_rate": 1.926760563380282e-05, | |
| "loss": 0.0475, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 1.3099887371063232, | |
| "learning_rate": 1.924413145539906e-05, | |
| "loss": 0.0333, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 1.2200229167938232, | |
| "learning_rate": 1.9220657276995304e-05, | |
| "loss": 0.037, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 1.5526323318481445, | |
| "learning_rate": 1.9197183098591552e-05, | |
| "loss": 0.0599, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.1484403908252716, | |
| "learning_rate": 1.9173708920187792e-05, | |
| "loss": 0.035, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 2.4299137592315674, | |
| "learning_rate": 1.9150234741784036e-05, | |
| "loss": 0.0423, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 0.21701346337795258, | |
| "learning_rate": 1.9126760563380284e-05, | |
| "loss": 0.0282, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 0.13961966335773468, | |
| "learning_rate": 1.9103286384976528e-05, | |
| "loss": 0.025, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 0.26682791113853455, | |
| "learning_rate": 1.9079812206572768e-05, | |
| "loss": 0.0302, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.1746547371149063, | |
| "learning_rate": 1.9056338028169016e-05, | |
| "loss": 0.0376, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.3016582429409027, | |
| "learning_rate": 1.903286384976526e-05, | |
| "loss": 0.0567, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.5560840964317322, | |
| "learning_rate": 1.9009389671361503e-05, | |
| "loss": 0.0342, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.5322542786598206, | |
| "learning_rate": 1.8985915492957747e-05, | |
| "loss": 0.036, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 3.8124992847442627, | |
| "learning_rate": 1.896244131455399e-05, | |
| "loss": 0.0345, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 2.32499623298645, | |
| "learning_rate": 1.8938967136150235e-05, | |
| "loss": 0.0221, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 0.9310886263847351, | |
| "learning_rate": 1.891549295774648e-05, | |
| "loss": 0.0179, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 2.592772960662842, | |
| "learning_rate": 1.8892018779342723e-05, | |
| "loss": 0.0627, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.32913756370544434, | |
| "learning_rate": 1.8868544600938967e-05, | |
| "loss": 0.064, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.5899044871330261, | |
| "learning_rate": 1.8845070422535215e-05, | |
| "loss": 0.041, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 4.0505170822143555, | |
| "learning_rate": 1.8821596244131455e-05, | |
| "loss": 0.0325, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 2.8459744453430176, | |
| "learning_rate": 1.87981220657277e-05, | |
| "loss": 0.0351, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 1.0423197746276855, | |
| "learning_rate": 1.8774647887323946e-05, | |
| "loss": 0.0356, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 1.0360386371612549, | |
| "learning_rate": 1.875117370892019e-05, | |
| "loss": 0.0366, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 1.490789771080017, | |
| "learning_rate": 1.872769953051643e-05, | |
| "loss": 0.0456, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 2.266568422317505, | |
| "learning_rate": 1.870422535211268e-05, | |
| "loss": 0.0383, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 2.135704755783081, | |
| "learning_rate": 1.8680751173708922e-05, | |
| "loss": 0.0539, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 2.030789852142334, | |
| "learning_rate": 1.8657276995305166e-05, | |
| "loss": 0.0316, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 2.624379873275757, | |
| "learning_rate": 1.863380281690141e-05, | |
| "loss": 0.0507, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 0.05331215262413025, | |
| "learning_rate": 1.8610328638497654e-05, | |
| "loss": 0.058, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 4.441781520843506, | |
| "learning_rate": 1.8586854460093898e-05, | |
| "loss": 0.0248, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 3.5211195945739746, | |
| "learning_rate": 1.8563380281690142e-05, | |
| "loss": 0.0393, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 1.485267996788025, | |
| "learning_rate": 1.8539906103286386e-05, | |
| "loss": 0.0476, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 1.912190556526184, | |
| "learning_rate": 1.851643192488263e-05, | |
| "loss": 0.0334, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 0.3232327699661255, | |
| "learning_rate": 1.8492957746478874e-05, | |
| "loss": 0.039, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 3.1284468173980713, | |
| "learning_rate": 1.8469483568075118e-05, | |
| "loss": 0.0458, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.6203511357307434, | |
| "learning_rate": 1.8446009389671362e-05, | |
| "loss": 0.0263, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.8480092883110046, | |
| "learning_rate": 1.8422535211267606e-05, | |
| "loss": 0.03, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.9211137890815735, | |
| "learning_rate": 1.839906103286385e-05, | |
| "loss": 0.0331, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 2.1461546421051025, | |
| "learning_rate": 1.8375586854460094e-05, | |
| "loss": 0.0464, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 1.1663808822631836, | |
| "learning_rate": 1.8352112676056338e-05, | |
| "loss": 0.051, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 0.1547522097826004, | |
| "learning_rate": 1.8328638497652585e-05, | |
| "loss": 0.0327, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "eval_loss": 0.0615035742521286, | |
| "eval_macro/f1": 0.9206273949130451, | |
| "eval_macro/precision": 0.919397009262914, | |
| "eval_macro/recall": 0.9224895351325427, | |
| "eval_micro/f1": 0.9222305747283537, | |
| "eval_micro/precision": 0.9204692722188118, | |
| "eval_micro/recall": 0.9239986306059569, | |
| "eval_runtime": 29.134, | |
| "eval_samples/accuracy": 0.915576857240671, | |
| "eval_samples_per_second": 501.304, | |
| "eval_steps_per_second": 15.686, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 0.1404021680355072, | |
| "learning_rate": 1.8305164319248826e-05, | |
| "loss": 0.0341, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 1.9744393825531006, | |
| "learning_rate": 1.828169014084507e-05, | |
| "loss": 0.0404, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 3.0998284816741943, | |
| "learning_rate": 1.8258215962441317e-05, | |
| "loss": 0.0467, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 2.285637855529785, | |
| "learning_rate": 1.823474178403756e-05, | |
| "loss": 0.0409, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 2.0926291942596436, | |
| "learning_rate": 1.82112676056338e-05, | |
| "loss": 0.0314, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 1.4188313484191895, | |
| "learning_rate": 1.818779342723005e-05, | |
| "loss": 0.0382, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 2.5279481410980225, | |
| "learning_rate": 1.8164319248826293e-05, | |
| "loss": 0.019, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 1.5087552070617676, | |
| "learning_rate": 1.8140845070422537e-05, | |
| "loss": 0.0221, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 1.3917876482009888, | |
| "learning_rate": 1.811737089201878e-05, | |
| "loss": 0.028, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 1.1080641746520996, | |
| "learning_rate": 1.8093896713615025e-05, | |
| "loss": 0.0451, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 0.10831775516271591, | |
| "learning_rate": 1.807042253521127e-05, | |
| "loss": 0.0291, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.2383673191070557, | |
| "learning_rate": 1.8046948356807513e-05, | |
| "loss": 0.0353, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 2.3589017391204834, | |
| "learning_rate": 1.8023474178403756e-05, | |
| "loss": 0.0368, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.8582547307014465, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.043, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.0989599227905273, | |
| "learning_rate": 1.7976525821596248e-05, | |
| "loss": 0.0699, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 1.3760502338409424, | |
| "learning_rate": 1.795305164319249e-05, | |
| "loss": 0.0447, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 1.222806692123413, | |
| "learning_rate": 1.7929577464788732e-05, | |
| "loss": 0.0353, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 5.3245368003845215, | |
| "learning_rate": 1.790610328638498e-05, | |
| "loss": 0.039, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 0.2474091649055481, | |
| "learning_rate": 1.788262910798122e-05, | |
| "loss": 0.0246, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 0.02522147260606289, | |
| "learning_rate": 1.7859154929577464e-05, | |
| "loss": 0.0226, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 0.7612372636795044, | |
| "learning_rate": 1.7835680751173708e-05, | |
| "loss": 0.026, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 0.646409273147583, | |
| "learning_rate": 1.7812206572769956e-05, | |
| "loss": 0.0292, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 1.2514673471450806, | |
| "learning_rate": 1.7788732394366196e-05, | |
| "loss": 0.0415, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 1.112414836883545, | |
| "learning_rate": 1.776525821596244e-05, | |
| "loss": 0.0257, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 1.2794502973556519, | |
| "learning_rate": 1.7741784037558687e-05, | |
| "loss": 0.0403, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 2.2429542541503906, | |
| "learning_rate": 1.771830985915493e-05, | |
| "loss": 0.0329, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 0.08626211434602737, | |
| "learning_rate": 1.7694835680751172e-05, | |
| "loss": 0.0322, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 0.9771416783332825, | |
| "learning_rate": 1.767136150234742e-05, | |
| "loss": 0.0223, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 29.060523986816406, | |
| "learning_rate": 1.7647887323943663e-05, | |
| "loss": 0.041, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.3592706322669983, | |
| "learning_rate": 1.7624413145539907e-05, | |
| "loss": 0.0342, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 1.4272717237472534, | |
| "learning_rate": 1.760093896713615e-05, | |
| "loss": 0.0263, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 1.0685161352157593, | |
| "learning_rate": 1.7577464788732395e-05, | |
| "loss": 0.0516, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 2.785543203353882, | |
| "learning_rate": 1.755399061032864e-05, | |
| "loss": 0.0533, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 2.7001149654388428, | |
| "learning_rate": 1.7530516431924883e-05, | |
| "loss": 0.048, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 1.7706950902938843, | |
| "learning_rate": 1.7507042253521127e-05, | |
| "loss": 0.0407, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 2.3187646865844727, | |
| "learning_rate": 1.748356807511737e-05, | |
| "loss": 0.0212, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 1.5856642723083496, | |
| "learning_rate": 1.7460093896713618e-05, | |
| "loss": 0.0345, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 0.3232921361923218, | |
| "learning_rate": 1.743661971830986e-05, | |
| "loss": 0.0326, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 0.44799041748046875, | |
| "learning_rate": 1.7413145539906103e-05, | |
| "loss": 0.0329, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 6.359521389007568, | |
| "learning_rate": 1.738967136150235e-05, | |
| "loss": 0.041, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 0.3691033720970154, | |
| "learning_rate": 1.7366197183098594e-05, | |
| "loss": 0.0281, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 2.6704986095428467, | |
| "learning_rate": 1.7342723004694835e-05, | |
| "loss": 0.0579, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 2.6575472354888916, | |
| "learning_rate": 1.7319248826291082e-05, | |
| "loss": 0.0325, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 3.007859468460083, | |
| "learning_rate": 1.7295774647887326e-05, | |
| "loss": 0.0338, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 1.2672277688980103, | |
| "learning_rate": 1.727230046948357e-05, | |
| "loss": 0.0242, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 1.3012404441833496, | |
| "learning_rate": 1.7248826291079814e-05, | |
| "loss": 0.0577, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 1.5620529651641846, | |
| "learning_rate": 1.7225352112676058e-05, | |
| "loss": 0.0474, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 0.07768592238426208, | |
| "learning_rate": 1.7201877934272302e-05, | |
| "loss": 0.0414, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 2.158637046813965, | |
| "learning_rate": 1.7178403755868546e-05, | |
| "loss": 0.0545, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 0.3285406827926636, | |
| "learning_rate": 1.715492957746479e-05, | |
| "loss": 0.0223, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_loss": 0.06010037660598755, | |
| "eval_macro/f1": 0.9183699014717978, | |
| "eval_macro/precision": 0.9199256679585665, | |
| "eval_macro/recall": 0.9176130775774011, | |
| "eval_micro/f1": 0.9193316441827022, | |
| "eval_micro/precision": 0.9194575713992192, | |
| "eval_micro/recall": 0.9192057514549812, | |
| "eval_runtime": 28.7682, | |
| "eval_samples/accuracy": 0.9106470386853818, | |
| "eval_samples_per_second": 507.678, | |
| "eval_steps_per_second": 15.886, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 1.8858875036239624, | |
| "learning_rate": 1.7131455399061034e-05, | |
| "loss": 0.0265, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 1.7713499069213867, | |
| "learning_rate": 1.7107981220657278e-05, | |
| "loss": 0.0581, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 0.39804449677467346, | |
| "learning_rate": 1.708450704225352e-05, | |
| "loss": 0.026, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 0.16412827372550964, | |
| "learning_rate": 1.7061032863849766e-05, | |
| "loss": 0.0249, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 2.8789305686950684, | |
| "learning_rate": 1.703755868544601e-05, | |
| "loss": 0.0495, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 3.439175605773926, | |
| "learning_rate": 1.7014084507042253e-05, | |
| "loss": 0.0444, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 0.4579373300075531, | |
| "learning_rate": 1.6990610328638497e-05, | |
| "loss": 0.054, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 2.6613855361938477, | |
| "learning_rate": 1.696713615023474e-05, | |
| "loss": 0.0411, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 1.7336716651916504, | |
| "learning_rate": 1.694366197183099e-05, | |
| "loss": 0.0532, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 2.034172773361206, | |
| "learning_rate": 1.692018779342723e-05, | |
| "loss": 0.02, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 1.298073410987854, | |
| "learning_rate": 1.6896713615023473e-05, | |
| "loss": 0.0449, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 0.09053938835859299, | |
| "learning_rate": 1.687323943661972e-05, | |
| "loss": 0.0132, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 1.7225825786590576, | |
| "learning_rate": 1.6849765258215965e-05, | |
| "loss": 0.0345, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.9660710692405701, | |
| "learning_rate": 1.6826291079812205e-05, | |
| "loss": 0.0331, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 2.697035312652588, | |
| "learning_rate": 1.6802816901408453e-05, | |
| "loss": 0.0385, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 3.3782408237457275, | |
| "learning_rate": 1.6779342723004696e-05, | |
| "loss": 0.0559, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 2.809810161590576, | |
| "learning_rate": 1.675586854460094e-05, | |
| "loss": 0.0395, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 0.35062745213508606, | |
| "learning_rate": 1.6732394366197184e-05, | |
| "loss": 0.0229, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 0.4234026372432709, | |
| "learning_rate": 1.670892018779343e-05, | |
| "loss": 0.0468, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 1.09328293800354, | |
| "learning_rate": 1.6685446009389672e-05, | |
| "loss": 0.0286, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 0.16104546189308167, | |
| "learning_rate": 1.6661971830985916e-05, | |
| "loss": 0.0317, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 1.0694267749786377, | |
| "learning_rate": 1.663849765258216e-05, | |
| "loss": 0.0383, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 2.887817621231079, | |
| "learning_rate": 1.6615023474178404e-05, | |
| "loss": 0.0273, | |
| "step": 14230 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 1.8802268505096436, | |
| "learning_rate": 1.659154929577465e-05, | |
| "loss": 0.025, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 0.8812164664268494, | |
| "learning_rate": 1.6568075117370892e-05, | |
| "loss": 0.0298, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 0.48760342597961426, | |
| "learning_rate": 1.6544600938967136e-05, | |
| "loss": 0.0247, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 1.062553882598877, | |
| "learning_rate": 1.6521126760563383e-05, | |
| "loss": 0.0319, | |
| "step": 14270 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 1.5591585636138916, | |
| "learning_rate": 1.6497652582159624e-05, | |
| "loss": 0.0356, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 1.9570565223693848, | |
| "learning_rate": 1.6474178403755868e-05, | |
| "loss": 0.0356, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.9120977520942688, | |
| "learning_rate": 1.6450704225352112e-05, | |
| "loss": 0.0229, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 2.795611619949341, | |
| "learning_rate": 1.642723004694836e-05, | |
| "loss": 0.0505, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.9692701101303101, | |
| "learning_rate": 1.64037558685446e-05, | |
| "loss": 0.0404, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.7188575267791748, | |
| "learning_rate": 1.6380281690140844e-05, | |
| "loss": 0.0237, | |
| "step": 14330 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 0.2590070366859436, | |
| "learning_rate": 1.635680751173709e-05, | |
| "loss": 0.019, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 0.07495612651109695, | |
| "learning_rate": 1.6333333333333335e-05, | |
| "loss": 0.0306, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 4.267418384552002, | |
| "learning_rate": 1.6309859154929576e-05, | |
| "loss": 0.0445, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 0.051151078194379807, | |
| "learning_rate": 1.6286384976525823e-05, | |
| "loss": 0.0346, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 3.307426929473877, | |
| "learning_rate": 1.6262910798122067e-05, | |
| "loss": 0.0602, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 0.3463471829891205, | |
| "learning_rate": 1.623943661971831e-05, | |
| "loss": 0.0391, | |
| "step": 14390 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 4.471025466918945, | |
| "learning_rate": 1.6215962441314555e-05, | |
| "loss": 0.0551, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 0.8686420321464539, | |
| "learning_rate": 1.61924882629108e-05, | |
| "loss": 0.038, | |
| "step": 14410 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 0.5848603248596191, | |
| "learning_rate": 1.6169014084507043e-05, | |
| "loss": 0.0276, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 1.7548906803131104, | |
| "learning_rate": 1.6145539906103287e-05, | |
| "loss": 0.0565, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 0.0470554418861866, | |
| "learning_rate": 1.612206572769953e-05, | |
| "loss": 0.0671, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 0.6341148614883423, | |
| "learning_rate": 1.6098591549295775e-05, | |
| "loss": 0.0286, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 1.4463139772415161, | |
| "learning_rate": 1.6075117370892022e-05, | |
| "loss": 0.024, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 2.489950180053711, | |
| "learning_rate": 1.6051643192488263e-05, | |
| "loss": 0.0512, | |
| "step": 14470 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 1.4375927448272705, | |
| "learning_rate": 1.6028169014084507e-05, | |
| "loss": 0.027, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 2.1097233295440674, | |
| "learning_rate": 1.6004694835680754e-05, | |
| "loss": 0.0186, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 3.670124053955078, | |
| "learning_rate": 1.5981220657276998e-05, | |
| "loss": 0.0252, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_loss": 0.06111237779259682, | |
| "eval_macro/f1": 0.920550554269432, | |
| "eval_macro/precision": 0.9202120705749197, | |
| "eval_macro/recall": 0.9219955061647757, | |
| "eval_micro/f1": 0.9220566115137426, | |
| "eval_micro/precision": 0.9207346214241824, | |
| "eval_micro/recall": 0.9233824032865457, | |
| "eval_runtime": 30.7947, | |
| "eval_samples/accuracy": 0.9153714481342006, | |
| "eval_samples_per_second": 474.271, | |
| "eval_steps_per_second": 14.84, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 0.2565593421459198, | |
| "learning_rate": 1.595774647887324e-05, | |
| "loss": 0.0526, | |
| "step": 14510 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 0.10212003439664841, | |
| "learning_rate": 1.5934272300469486e-05, | |
| "loss": 0.0296, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 1.9271149635314941, | |
| "learning_rate": 1.591079812206573e-05, | |
| "loss": 0.0342, | |
| "step": 14530 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 1.296161413192749, | |
| "learning_rate": 1.588732394366197e-05, | |
| "loss": 0.0505, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 0.2935797870159149, | |
| "learning_rate": 1.5863849765258218e-05, | |
| "loss": 0.0316, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 0.09829127788543701, | |
| "learning_rate": 1.584037558685446e-05, | |
| "loss": 0.0393, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 1.6144376993179321, | |
| "learning_rate": 1.5816901408450706e-05, | |
| "loss": 0.0618, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 0.642356812953949, | |
| "learning_rate": 1.579342723004695e-05, | |
| "loss": 0.0553, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "grad_norm": 1.8286004066467285, | |
| "learning_rate": 1.5769953051643193e-05, | |
| "loss": 0.0349, | |
| "step": 14590 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 0.8908696174621582, | |
| "learning_rate": 1.5746478873239437e-05, | |
| "loss": 0.0424, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 0.6125556230545044, | |
| "learning_rate": 1.572300469483568e-05, | |
| "loss": 0.0315, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 0.11555056273937225, | |
| "learning_rate": 1.5699530516431925e-05, | |
| "loss": 0.042, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 1.4078800678253174, | |
| "learning_rate": 1.567605633802817e-05, | |
| "loss": 0.0206, | |
| "step": 14630 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.036080729216337204, | |
| "learning_rate": 1.5652582159624413e-05, | |
| "loss": 0.0247, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.10510585457086563, | |
| "learning_rate": 1.5629107981220657e-05, | |
| "loss": 0.0416, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 1.879212498664856, | |
| "learning_rate": 1.56056338028169e-05, | |
| "loss": 0.0364, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 10.751227378845215, | |
| "learning_rate": 1.5582159624413145e-05, | |
| "loss": 0.0471, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 0.3223772346973419, | |
| "learning_rate": 1.5558685446009392e-05, | |
| "loss": 0.0438, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 2.0051674842834473, | |
| "learning_rate": 1.5535211267605633e-05, | |
| "loss": 0.0373, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 2.243884325027466, | |
| "learning_rate": 1.5511737089201877e-05, | |
| "loss": 0.0402, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 1.409742832183838, | |
| "learning_rate": 1.5488262910798124e-05, | |
| "loss": 0.0452, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 1.0107128620147705, | |
| "learning_rate": 1.546478873239437e-05, | |
| "loss": 0.0268, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 0.6701316237449646, | |
| "learning_rate": 1.544131455399061e-05, | |
| "loss": 0.0398, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 1.3181270360946655, | |
| "learning_rate": 1.5417840375586856e-05, | |
| "loss": 0.0526, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 2.4367566108703613, | |
| "learning_rate": 1.53943661971831e-05, | |
| "loss": 0.0254, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 0.1977778524160385, | |
| "learning_rate": 1.5370892018779344e-05, | |
| "loss": 0.0533, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 0.5955461263656616, | |
| "learning_rate": 1.5347417840375588e-05, | |
| "loss": 0.0332, | |
| "step": 14770 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 0.13952666521072388, | |
| "learning_rate": 1.5323943661971832e-05, | |
| "loss": 0.0521, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 1.3798327445983887, | |
| "learning_rate": 1.5300469483568076e-05, | |
| "loss": 0.066, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 0.49183735251426697, | |
| "learning_rate": 1.527699530516432e-05, | |
| "loss": 0.0346, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 1.1325535774230957, | |
| "learning_rate": 1.5253521126760564e-05, | |
| "loss": 0.0517, | |
| "step": 14810 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 1.626822829246521, | |
| "learning_rate": 1.5230046948356808e-05, | |
| "loss": 0.062, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 1.8618102073669434, | |
| "learning_rate": 1.5206572769953054e-05, | |
| "loss": 0.0363, | |
| "step": 14830 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 1.8805667161941528, | |
| "learning_rate": 1.5183098591549298e-05, | |
| "loss": 0.0251, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 1.918205976486206, | |
| "learning_rate": 1.515962441314554e-05, | |
| "loss": 0.0425, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 2.2678277492523193, | |
| "learning_rate": 1.5136150234741785e-05, | |
| "loss": 0.0356, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 2.2082948684692383, | |
| "learning_rate": 1.511267605633803e-05, | |
| "loss": 0.0367, | |
| "step": 14870 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 0.748775839805603, | |
| "learning_rate": 1.5089201877934273e-05, | |
| "loss": 0.0278, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 2.2440285682678223, | |
| "learning_rate": 1.5065727699530516e-05, | |
| "loss": 0.0636, | |
| "step": 14890 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 1.5458263158798218, | |
| "learning_rate": 1.5042253521126761e-05, | |
| "loss": 0.0364, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.6863703727722168, | |
| "learning_rate": 1.5018779342723005e-05, | |
| "loss": 0.0127, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.5138887166976929, | |
| "learning_rate": 1.4995305164319247e-05, | |
| "loss": 0.0265, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.6225917339324951, | |
| "learning_rate": 1.4971830985915495e-05, | |
| "loss": 0.047, | |
| "step": 14930 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 1.9543975591659546, | |
| "learning_rate": 1.4948356807511737e-05, | |
| "loss": 0.0324, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 0.7699962854385376, | |
| "learning_rate": 1.4924882629107981e-05, | |
| "loss": 0.0357, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 0.03310937061905861, | |
| "learning_rate": 1.4901408450704227e-05, | |
| "loss": 0.0138, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 3.659010171890259, | |
| "learning_rate": 1.487793427230047e-05, | |
| "loss": 0.0365, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.07815925031900406, | |
| "learning_rate": 1.4854460093896713e-05, | |
| "loss": 0.0212, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.031491972506046295, | |
| "learning_rate": 1.4830985915492959e-05, | |
| "loss": 0.0272, | |
| "step": 14990 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 4.178372859954834, | |
| "learning_rate": 1.4807511737089203e-05, | |
| "loss": 0.0216, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "eval_loss": 0.058485910296440125, | |
| "eval_macro/f1": 0.924584638738032, | |
| "eval_macro/precision": 0.9249554071277554, | |
| "eval_macro/recall": 0.9245806833373952, | |
| "eval_micro/f1": 0.9260513978715396, | |
| "eval_micro/precision": 0.925639622383363, | |
| "eval_micro/recall": 0.9264635398836015, | |
| "eval_runtime": 30.7706, | |
| "eval_samples/accuracy": 0.919274221157138, | |
| "eval_samples_per_second": 474.641, | |
| "eval_steps_per_second": 14.852, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.2273653894662857, | |
| "learning_rate": 1.4784037558685446e-05, | |
| "loss": 0.0458, | |
| "step": 15010 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 1.6062771081924438, | |
| "learning_rate": 1.4760563380281692e-05, | |
| "loss": 0.0333, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 6.850545406341553, | |
| "learning_rate": 1.4737089201877934e-05, | |
| "loss": 0.0098, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 0.8278430104255676, | |
| "learning_rate": 1.4713615023474178e-05, | |
| "loss": 0.0311, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 0.17055070400238037, | |
| "learning_rate": 1.4690140845070424e-05, | |
| "loss": 0.0377, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 2.156358242034912, | |
| "learning_rate": 1.4666666666666668e-05, | |
| "loss": 0.0533, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 2.780056953430176, | |
| "learning_rate": 1.464319248826291e-05, | |
| "loss": 0.0318, | |
| "step": 15070 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 1.9434489011764526, | |
| "learning_rate": 1.4619718309859156e-05, | |
| "loss": 0.0388, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 0.6632909178733826, | |
| "learning_rate": 1.45962441314554e-05, | |
| "loss": 0.0197, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 4.248073577880859, | |
| "learning_rate": 1.4572769953051644e-05, | |
| "loss": 0.0538, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 2.3261492252349854, | |
| "learning_rate": 1.454929577464789e-05, | |
| "loss": 0.0356, | |
| "step": 15110 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 2.4972357749938965, | |
| "learning_rate": 1.4525821596244132e-05, | |
| "loss": 0.0505, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 2.1739044189453125, | |
| "learning_rate": 1.4502347417840376e-05, | |
| "loss": 0.0451, | |
| "step": 15130 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 0.1503843516111374, | |
| "learning_rate": 1.4478873239436621e-05, | |
| "loss": 0.0455, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.8404219150543213, | |
| "learning_rate": 1.4455399061032865e-05, | |
| "loss": 0.0345, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.3419097065925598, | |
| "learning_rate": 1.4431924882629108e-05, | |
| "loss": 0.0443, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.6726291179656982, | |
| "learning_rate": 1.4408450704225355e-05, | |
| "loss": 0.0221, | |
| "step": 15170 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.051636260002851486, | |
| "learning_rate": 1.4384976525821597e-05, | |
| "loss": 0.0381, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 1.508140206336975, | |
| "learning_rate": 1.4361502347417841e-05, | |
| "loss": 0.0427, | |
| "step": 15190 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 2.3500542640686035, | |
| "learning_rate": 1.4338028169014083e-05, | |
| "loss": 0.0426, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 0.3720274567604065, | |
| "learning_rate": 1.4314553990610329e-05, | |
| "loss": 0.0211, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 0.3215431272983551, | |
| "learning_rate": 1.4291079812206573e-05, | |
| "loss": 0.025, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 4.001272678375244, | |
| "learning_rate": 1.4267605633802817e-05, | |
| "loss": 0.0434, | |
| "step": 15230 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.29120638966560364, | |
| "learning_rate": 1.4244131455399063e-05, | |
| "loss": 0.0309, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.6027160882949829, | |
| "learning_rate": 1.4220657276995305e-05, | |
| "loss": 0.0304, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.1958913952112198, | |
| "learning_rate": 1.4197183098591549e-05, | |
| "loss": 0.0381, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.306017130613327, | |
| "learning_rate": 1.4173708920187794e-05, | |
| "loss": 0.0402, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 3.3421289920806885, | |
| "learning_rate": 1.4150234741784038e-05, | |
| "loss": 0.0313, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 0.6600431799888611, | |
| "learning_rate": 1.412676056338028e-05, | |
| "loss": 0.0408, | |
| "step": 15290 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 1.1957746744155884, | |
| "learning_rate": 1.4103286384976528e-05, | |
| "loss": 0.0367, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 2.5639190673828125, | |
| "learning_rate": 1.407981220657277e-05, | |
| "loss": 0.0337, | |
| "step": 15310 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 1.6588279008865356, | |
| "learning_rate": 1.4056338028169014e-05, | |
| "loss": 0.0521, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.6526573896408081, | |
| "learning_rate": 1.403286384976526e-05, | |
| "loss": 0.0238, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 1.4112497568130493, | |
| "learning_rate": 1.4009389671361502e-05, | |
| "loss": 0.0274, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.15742766857147217, | |
| "learning_rate": 1.3985915492957746e-05, | |
| "loss": 0.0424, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 2.976287841796875, | |
| "learning_rate": 1.3962441314553992e-05, | |
| "loss": 0.0314, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 1.613516926765442, | |
| "learning_rate": 1.3938967136150236e-05, | |
| "loss": 0.0352, | |
| "step": 15370 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 2.304145336151123, | |
| "learning_rate": 1.3915492957746478e-05, | |
| "loss": 0.0329, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 2.545947551727295, | |
| "learning_rate": 1.3892018779342725e-05, | |
| "loss": 0.0279, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 0.5519721508026123, | |
| "learning_rate": 1.3868544600938968e-05, | |
| "loss": 0.0393, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 0.135732501745224, | |
| "learning_rate": 1.3845070422535212e-05, | |
| "loss": 0.0287, | |
| "step": 15410 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 0.8303437829017639, | |
| "learning_rate": 1.3821596244131457e-05, | |
| "loss": 0.0371, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 2.1903645992279053, | |
| "learning_rate": 1.3798122065727701e-05, | |
| "loss": 0.0334, | |
| "step": 15430 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 2.582015037536621, | |
| "learning_rate": 1.3774647887323943e-05, | |
| "loss": 0.0433, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 0.15312707424163818, | |
| "learning_rate": 1.3751173708920189e-05, | |
| "loss": 0.0527, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 2.6689505577087402, | |
| "learning_rate": 1.3727699530516433e-05, | |
| "loss": 0.0503, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 2.9017531871795654, | |
| "learning_rate": 1.3704225352112677e-05, | |
| "loss": 0.0719, | |
| "step": 15470 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 0.6398376822471619, | |
| "learning_rate": 1.3680751173708923e-05, | |
| "loss": 0.0221, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 1.0251731872558594, | |
| "learning_rate": 1.3657276995305165e-05, | |
| "loss": 0.0326, | |
| "step": 15490 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 1.954060435295105, | |
| "learning_rate": 1.3633802816901409e-05, | |
| "loss": 0.0346, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_loss": 0.05969972163438797, | |
| "eval_macro/f1": 0.9216894852226254, | |
| "eval_macro/precision": 0.9223725171238232, | |
| "eval_macro/recall": 0.9214463830733233, | |
| "eval_micro/f1": 0.9234086242299795, | |
| "eval_micro/precision": 0.9230927129661307, | |
| "eval_micro/recall": 0.9237247517973297, | |
| "eval_runtime": 27.8691, | |
| "eval_samples/accuracy": 0.9168777815816501, | |
| "eval_samples_per_second": 524.057, | |
| "eval_steps_per_second": 16.398, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 2.410942792892456, | |
| "learning_rate": 1.3610328638497651e-05, | |
| "loss": 0.0299, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.29106903076171875, | |
| "learning_rate": 1.3586854460093899e-05, | |
| "loss": 0.0311, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 1.473370909690857, | |
| "learning_rate": 1.356338028169014e-05, | |
| "loss": 0.0221, | |
| "step": 15530 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 3.0741446018218994, | |
| "learning_rate": 1.3539906103286385e-05, | |
| "loss": 0.0407, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 1.4671398401260376, | |
| "learning_rate": 1.351643192488263e-05, | |
| "loss": 0.0318, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 1.6664915084838867, | |
| "learning_rate": 1.3492957746478874e-05, | |
| "loss": 0.0484, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 3.2199301719665527, | |
| "learning_rate": 1.3469483568075117e-05, | |
| "loss": 0.0558, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 1.3700677156448364, | |
| "learning_rate": 1.3446009389671362e-05, | |
| "loss": 0.0346, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 0.7739179730415344, | |
| "learning_rate": 1.3422535211267606e-05, | |
| "loss": 0.0504, | |
| "step": 15590 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 0.4577118754386902, | |
| "learning_rate": 1.339906103286385e-05, | |
| "loss": 0.0258, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 0.44119253754615784, | |
| "learning_rate": 1.3375586854460096e-05, | |
| "loss": 0.0405, | |
| "step": 15610 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 4.451091766357422, | |
| "learning_rate": 1.3352112676056338e-05, | |
| "loss": 0.0683, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 3.0964515209198, | |
| "learning_rate": 1.3328638497652582e-05, | |
| "loss": 0.0373, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 2.7771458625793457, | |
| "learning_rate": 1.3305164319248828e-05, | |
| "loss": 0.0745, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 2.804838180541992, | |
| "learning_rate": 1.3281690140845072e-05, | |
| "loss": 0.0397, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 1.006893277168274, | |
| "learning_rate": 1.3258215962441314e-05, | |
| "loss": 0.0428, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.8771440982818604, | |
| "learning_rate": 1.323474178403756e-05, | |
| "loss": 0.0282, | |
| "step": 15670 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.4935603737831116, | |
| "learning_rate": 1.3211267605633804e-05, | |
| "loss": 0.0293, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.03460874408483505, | |
| "learning_rate": 1.3187793427230048e-05, | |
| "loss": 0.0509, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 0.2056482583284378, | |
| "learning_rate": 1.3164319248826293e-05, | |
| "loss": 0.0358, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 3.385071039199829, | |
| "learning_rate": 1.3140845070422535e-05, | |
| "loss": 0.043, | |
| "step": 15710 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 1.2948657274246216, | |
| "learning_rate": 1.311737089201878e-05, | |
| "loss": 0.0186, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 0.2789139151573181, | |
| "learning_rate": 1.3093896713615025e-05, | |
| "loss": 0.0441, | |
| "step": 15730 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 2.7602319717407227, | |
| "learning_rate": 1.3070422535211269e-05, | |
| "loss": 0.0468, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.09470643103122711, | |
| "learning_rate": 1.3046948356807511e-05, | |
| "loss": 0.0173, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 2.7615208625793457, | |
| "learning_rate": 1.3023474178403759e-05, | |
| "loss": 0.0472, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 1.1640163660049438, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0328, | |
| "step": 15770 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.09145846962928772, | |
| "learning_rate": 1.2976525821596245e-05, | |
| "loss": 0.0282, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 0.874416172504425, | |
| "learning_rate": 1.2953051643192487e-05, | |
| "loss": 0.0524, | |
| "step": 15790 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 1.1191848516464233, | |
| "learning_rate": 1.2929577464788733e-05, | |
| "loss": 0.0483, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 0.3583777844905853, | |
| "learning_rate": 1.2906103286384977e-05, | |
| "loss": 0.0227, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 4.477143287658691, | |
| "learning_rate": 1.288262910798122e-05, | |
| "loss": 0.0354, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 1.5109944343566895, | |
| "learning_rate": 1.2859154929577466e-05, | |
| "loss": 0.0257, | |
| "step": 15830 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 0.3320276141166687, | |
| "learning_rate": 1.2835680751173709e-05, | |
| "loss": 0.0173, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 1.1068660020828247, | |
| "learning_rate": 1.2812206572769953e-05, | |
| "loss": 0.0243, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 4.6560773849487305, | |
| "learning_rate": 1.2788732394366198e-05, | |
| "loss": 0.0407, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 1.1727485656738281, | |
| "learning_rate": 1.2765258215962442e-05, | |
| "loss": 0.0409, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 1.807950735092163, | |
| "learning_rate": 1.2741784037558684e-05, | |
| "loss": 0.0177, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 0.8868434429168701, | |
| "learning_rate": 1.2718309859154932e-05, | |
| "loss": 0.0377, | |
| "step": 15890 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 0.8664716482162476, | |
| "learning_rate": 1.2694835680751174e-05, | |
| "loss": 0.0251, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 0.5700992345809937, | |
| "learning_rate": 1.2671361502347418e-05, | |
| "loss": 0.045, | |
| "step": 15910 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 2.4590039253234863, | |
| "learning_rate": 1.2647887323943664e-05, | |
| "loss": 0.0379, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 0.2064366489648819, | |
| "learning_rate": 1.2624413145539906e-05, | |
| "loss": 0.0275, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 0.28647980093955994, | |
| "learning_rate": 1.260093896713615e-05, | |
| "loss": 0.0333, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 0.4945986568927765, | |
| "learning_rate": 1.2577464788732396e-05, | |
| "loss": 0.0267, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 3.3648037910461426, | |
| "learning_rate": 1.255399061032864e-05, | |
| "loss": 0.0359, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 2.6217010021209717, | |
| "learning_rate": 1.2530516431924882e-05, | |
| "loss": 0.035, | |
| "step": 15970 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 1.323452115058899, | |
| "learning_rate": 1.2507042253521129e-05, | |
| "loss": 0.0249, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.818280816078186, | |
| "learning_rate": 1.2483568075117371e-05, | |
| "loss": 0.046, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 1.3233391046524048, | |
| "learning_rate": 1.2460093896713615e-05, | |
| "loss": 0.027, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "eval_loss": 0.060248348861932755, | |
| "eval_macro/f1": 0.9231307675203939, | |
| "eval_macro/precision": 0.9238576142304777, | |
| "eval_macro/recall": 0.9234269768144926, | |
| "eval_micro/f1": 0.9243082395594623, | |
| "eval_micro/precision": 0.9234554401312193, | |
| "eval_micro/recall": 0.9251626155426224, | |
| "eval_runtime": 29.6965, | |
| "eval_samples/accuracy": 0.9184525847312565, | |
| "eval_samples_per_second": 491.809, | |
| "eval_steps_per_second": 15.389, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.4075230360031128, | |
| "learning_rate": 1.243661971830986e-05, | |
| "loss": 0.0353, | |
| "step": 16010 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 1.8514373302459717, | |
| "learning_rate": 1.2413145539906105e-05, | |
| "loss": 0.0317, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.3068770170211792, | |
| "learning_rate": 1.2389671361502347e-05, | |
| "loss": 0.0262, | |
| "step": 16030 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 1.1337624788284302, | |
| "learning_rate": 1.2366197183098593e-05, | |
| "loss": 0.0446, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 0.8808763027191162, | |
| "learning_rate": 1.2342723004694837e-05, | |
| "loss": 0.0255, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 2.083991527557373, | |
| "learning_rate": 1.2319248826291079e-05, | |
| "loss": 0.0243, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 2.384486198425293, | |
| "learning_rate": 1.2295774647887325e-05, | |
| "loss": 0.0348, | |
| "step": 16070 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 1.3079556226730347, | |
| "learning_rate": 1.2272300469483569e-05, | |
| "loss": 0.0574, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 0.15147972106933594, | |
| "learning_rate": 1.2248826291079813e-05, | |
| "loss": 0.0221, | |
| "step": 16090 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 2.0932321548461914, | |
| "learning_rate": 1.2225352112676057e-05, | |
| "loss": 0.0345, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 1.885918140411377, | |
| "learning_rate": 1.2201877934272302e-05, | |
| "loss": 0.0303, | |
| "step": 16110 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 1.7613309621810913, | |
| "learning_rate": 1.2178403755868545e-05, | |
| "loss": 0.0449, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 0.49504420161247253, | |
| "learning_rate": 1.215492957746479e-05, | |
| "loss": 0.0318, | |
| "step": 16130 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 1.404921054840088, | |
| "learning_rate": 1.2131455399061034e-05, | |
| "loss": 0.0368, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 1.087222933769226, | |
| "learning_rate": 1.2107981220657278e-05, | |
| "loss": 0.0251, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 0.017098503187298775, | |
| "learning_rate": 1.2084507042253522e-05, | |
| "loss": 0.0265, | |
| "step": 16160 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 2.125025987625122, | |
| "learning_rate": 1.2061032863849766e-05, | |
| "loss": 0.0361, | |
| "step": 16170 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 1.9100775718688965, | |
| "learning_rate": 1.203755868544601e-05, | |
| "loss": 0.0572, | |
| "step": 16180 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.4402145445346832, | |
| "learning_rate": 1.2014084507042254e-05, | |
| "loss": 0.0463, | |
| "step": 16190 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.9230586886405945, | |
| "learning_rate": 1.1990610328638498e-05, | |
| "loss": 0.0328, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 0.3850882053375244, | |
| "learning_rate": 1.1967136150234742e-05, | |
| "loss": 0.0488, | |
| "step": 16210 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 0.6668741703033447, | |
| "learning_rate": 1.1943661971830987e-05, | |
| "loss": 0.0282, | |
| "step": 16220 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 0.18775498867034912, | |
| "learning_rate": 1.192018779342723e-05, | |
| "loss": 0.0257, | |
| "step": 16230 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 4.097379684448242, | |
| "learning_rate": 1.1896713615023475e-05, | |
| "loss": 0.0369, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 3.4568750858306885, | |
| "learning_rate": 1.187323943661972e-05, | |
| "loss": 0.0487, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.5609110593795776, | |
| "learning_rate": 1.1849765258215963e-05, | |
| "loss": 0.0358, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.8959105014801025, | |
| "learning_rate": 1.1826291079812207e-05, | |
| "loss": 0.0333, | |
| "step": 16270 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.7780373096466064, | |
| "learning_rate": 1.1802816901408451e-05, | |
| "loss": 0.0389, | |
| "step": 16280 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 0.4024539291858673, | |
| "learning_rate": 1.1779342723004695e-05, | |
| "loss": 0.0317, | |
| "step": 16290 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 2.487910747528076, | |
| "learning_rate": 1.175586854460094e-05, | |
| "loss": 0.0473, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 0.5158886313438416, | |
| "learning_rate": 1.1732394366197183e-05, | |
| "loss": 0.0493, | |
| "step": 16310 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 1.9161232709884644, | |
| "learning_rate": 1.1708920187793427e-05, | |
| "loss": 0.0421, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 1.4477870464324951, | |
| "learning_rate": 1.1685446009389673e-05, | |
| "loss": 0.0442, | |
| "step": 16330 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 3.5894601345062256, | |
| "learning_rate": 1.1661971830985915e-05, | |
| "loss": 0.0483, | |
| "step": 16340 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.6837550401687622, | |
| "learning_rate": 1.163849765258216e-05, | |
| "loss": 0.0396, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.9177005290985107, | |
| "learning_rate": 1.1615023474178405e-05, | |
| "loss": 0.0159, | |
| "step": 16360 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 1.36934494972229, | |
| "learning_rate": 1.1591549295774649e-05, | |
| "loss": 0.02, | |
| "step": 16370 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.41758379340171814, | |
| "learning_rate": 1.1568075117370893e-05, | |
| "loss": 0.0305, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.9088613390922546, | |
| "learning_rate": 1.1544600938967136e-05, | |
| "loss": 0.0282, | |
| "step": 16390 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.19039836525917053, | |
| "learning_rate": 1.152112676056338e-05, | |
| "loss": 0.0276, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 3.383873462677002, | |
| "learning_rate": 1.1497652582159624e-05, | |
| "loss": 0.0529, | |
| "step": 16410 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.04074535891413689, | |
| "learning_rate": 1.147417840375587e-05, | |
| "loss": 0.0341, | |
| "step": 16420 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 2.3034849166870117, | |
| "learning_rate": 1.1450704225352112e-05, | |
| "loss": 0.068, | |
| "step": 16430 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 1.1354528665542603, | |
| "learning_rate": 1.1427230046948358e-05, | |
| "loss": 0.0133, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 1.0032627582550049, | |
| "learning_rate": 1.14037558685446e-05, | |
| "loss": 0.0327, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 1.8904528617858887, | |
| "learning_rate": 1.1380281690140846e-05, | |
| "loss": 0.039, | |
| "step": 16460 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 2.2675487995147705, | |
| "learning_rate": 1.135680751173709e-05, | |
| "loss": 0.0538, | |
| "step": 16470 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 1.8295910358428955, | |
| "learning_rate": 1.1333333333333334e-05, | |
| "loss": 0.0551, | |
| "step": 16480 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 1.6022156476974487, | |
| "learning_rate": 1.1309859154929578e-05, | |
| "loss": 0.0266, | |
| "step": 16490 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 1.1555670499801636, | |
| "learning_rate": 1.1286384976525822e-05, | |
| "loss": 0.0454, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "eval_loss": 0.05463627353310585, | |
| "eval_macro/f1": 0.9259127675842223, | |
| "eval_macro/precision": 0.9267482357187499, | |
| "eval_macro/recall": 0.92584653320531, | |
| "eval_micro/f1": 0.9269678302532512, | |
| "eval_micro/precision": 0.9266507013342457, | |
| "eval_micro/recall": 0.927285176309483, | |
| "eval_runtime": 29.5895, | |
| "eval_samples/accuracy": 0.9209859637110579, | |
| "eval_samples_per_second": 493.587, | |
| "eval_steps_per_second": 15.445, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 1.2500646114349365, | |
| "learning_rate": 1.1262910798122066e-05, | |
| "loss": 0.0334, | |
| "step": 16510 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 1.2711575031280518, | |
| "learning_rate": 1.123943661971831e-05, | |
| "loss": 0.0318, | |
| "step": 16520 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 1.3326259851455688, | |
| "learning_rate": 1.1215962441314555e-05, | |
| "loss": 0.014, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.792326033115387, | |
| "learning_rate": 1.1192488262910798e-05, | |
| "loss": 0.0391, | |
| "step": 16540 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.5718660354614258, | |
| "learning_rate": 1.1169014084507043e-05, | |
| "loss": 0.0382, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 0.5356886982917786, | |
| "learning_rate": 1.1145539906103287e-05, | |
| "loss": 0.0388, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 1.5403183698654175, | |
| "learning_rate": 1.1122065727699531e-05, | |
| "loss": 0.0301, | |
| "step": 16570 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 4.721304893493652, | |
| "learning_rate": 1.1098591549295775e-05, | |
| "loss": 0.0655, | |
| "step": 16580 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 1.5049669742584229, | |
| "learning_rate": 1.1075117370892019e-05, | |
| "loss": 0.0249, | |
| "step": 16590 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 0.16248831152915955, | |
| "learning_rate": 1.1051643192488263e-05, | |
| "loss": 0.0391, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 1.5317431688308716, | |
| "learning_rate": 1.1028169014084509e-05, | |
| "loss": 0.0336, | |
| "step": 16610 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 4.947154998779297, | |
| "learning_rate": 1.1004694835680751e-05, | |
| "loss": 0.0512, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 2.626268148422241, | |
| "learning_rate": 1.0981220657276995e-05, | |
| "loss": 0.0394, | |
| "step": 16630 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 3.1185550689697266, | |
| "learning_rate": 1.095774647887324e-05, | |
| "loss": 0.036, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 1.6116607189178467, | |
| "learning_rate": 1.0934272300469483e-05, | |
| "loss": 0.0396, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 0.3400775194168091, | |
| "learning_rate": 1.0910798122065728e-05, | |
| "loss": 0.0316, | |
| "step": 16660 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 0.26096880435943604, | |
| "learning_rate": 1.0887323943661972e-05, | |
| "loss": 0.026, | |
| "step": 16670 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 2.0492286682128906, | |
| "learning_rate": 1.0863849765258216e-05, | |
| "loss": 0.0336, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 2.7371644973754883, | |
| "learning_rate": 1.084037558685446e-05, | |
| "loss": 0.0474, | |
| "step": 16690 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.3516647219657898, | |
| "learning_rate": 1.0816901408450706e-05, | |
| "loss": 0.0233, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 1.8178682327270508, | |
| "learning_rate": 1.0793427230046948e-05, | |
| "loss": 0.0429, | |
| "step": 16710 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.6841596961021423, | |
| "learning_rate": 1.0769953051643194e-05, | |
| "loss": 0.0171, | |
| "step": 16720 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 0.06779265403747559, | |
| "learning_rate": 1.0746478873239438e-05, | |
| "loss": 0.0233, | |
| "step": 16730 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 1.7634962797164917, | |
| "learning_rate": 1.0723004694835682e-05, | |
| "loss": 0.0191, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 1.6839202642440796, | |
| "learning_rate": 1.0699530516431926e-05, | |
| "loss": 0.0508, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 0.2583700716495514, | |
| "learning_rate": 1.067605633802817e-05, | |
| "loss": 0.0283, | |
| "step": 16760 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 2.4837722778320312, | |
| "learning_rate": 1.0652582159624414e-05, | |
| "loss": 0.0249, | |
| "step": 16770 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 3.755295753479004, | |
| "learning_rate": 1.0629107981220658e-05, | |
| "loss": 0.0399, | |
| "step": 16780 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 1.3216301202774048, | |
| "learning_rate": 1.0605633802816902e-05, | |
| "loss": 0.0495, | |
| "step": 16790 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 0.964759886264801, | |
| "learning_rate": 1.0582159624413146e-05, | |
| "loss": 0.0277, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 0.13690391182899475, | |
| "learning_rate": 1.0558685446009391e-05, | |
| "loss": 0.0453, | |
| "step": 16810 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 0.9749215245246887, | |
| "learning_rate": 1.0535211267605633e-05, | |
| "loss": 0.0202, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 1.2910761833190918, | |
| "learning_rate": 1.0511737089201879e-05, | |
| "loss": 0.0255, | |
| "step": 16830 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 1.0894033908843994, | |
| "learning_rate": 1.0488262910798123e-05, | |
| "loss": 0.0286, | |
| "step": 16840 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.8152934908866882, | |
| "learning_rate": 1.0464788732394367e-05, | |
| "loss": 0.0375, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 2.546592950820923, | |
| "learning_rate": 1.0441314553990611e-05, | |
| "loss": 0.0217, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 2.69392466545105, | |
| "learning_rate": 1.0417840375586855e-05, | |
| "loss": 0.0374, | |
| "step": 16870 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 2.5685863494873047, | |
| "learning_rate": 1.0394366197183099e-05, | |
| "loss": 0.039, | |
| "step": 16880 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 2.294158697128296, | |
| "learning_rate": 1.0370892018779343e-05, | |
| "loss": 0.0268, | |
| "step": 16890 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 1.6547008752822876, | |
| "learning_rate": 1.0347417840375587e-05, | |
| "loss": 0.0163, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 2.758500576019287, | |
| "learning_rate": 1.032394366197183e-05, | |
| "loss": 0.0382, | |
| "step": 16910 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 0.08954645693302155, | |
| "learning_rate": 1.0300469483568076e-05, | |
| "loss": 0.0409, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 1.6631460189819336, | |
| "learning_rate": 1.0276995305164319e-05, | |
| "loss": 0.0429, | |
| "step": 16930 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 0.3024432063102722, | |
| "learning_rate": 1.0253521126760564e-05, | |
| "loss": 0.0466, | |
| "step": 16940 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 2.2123658657073975, | |
| "learning_rate": 1.0230046948356808e-05, | |
| "loss": 0.0389, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 2.56610369682312, | |
| "learning_rate": 1.0206572769953052e-05, | |
| "loss": 0.0516, | |
| "step": 16960 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 2.63765549659729, | |
| "learning_rate": 1.0183098591549296e-05, | |
| "loss": 0.0288, | |
| "step": 16970 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 1.9523321390151978, | |
| "learning_rate": 1.015962441314554e-05, | |
| "loss": 0.0274, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 1.6789344549179077, | |
| "learning_rate": 1.0136150234741784e-05, | |
| "loss": 0.0283, | |
| "step": 16990 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 2.195377826690674, | |
| "learning_rate": 1.0112676056338028e-05, | |
| "loss": 0.0362, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_loss": 0.05907091498374939, | |
| "eval_macro/f1": 0.9221598708411745, | |
| "eval_macro/precision": 0.9243403631803216, | |
| "eval_macro/recall": 0.9208585140851037, | |
| "eval_micro/f1": 0.9233931752775113, | |
| "eval_micro/precision": 0.9240896934787081, | |
| "eval_micro/recall": 0.9226977062649777, | |
| "eval_runtime": 28.8245, | |
| "eval_samples/accuracy": 0.915576857240671, | |
| "eval_samples_per_second": 506.687, | |
| "eval_steps_per_second": 15.855, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 1.6797447204589844, | |
| "learning_rate": 1.0089201877934274e-05, | |
| "loss": 0.0542, | |
| "step": 17010 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.0108383893966675, | |
| "learning_rate": 1.0065727699530516e-05, | |
| "loss": 0.0444, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 4.3218183517456055, | |
| "learning_rate": 1.0042253521126762e-05, | |
| "loss": 0.0333, | |
| "step": 17030 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.9301080703735352, | |
| "learning_rate": 1.0018779342723004e-05, | |
| "loss": 0.0269, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.6253621578216553, | |
| "learning_rate": 9.99530516431925e-06, | |
| "loss": 0.0384, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.4339439868927, | |
| "learning_rate": 9.971830985915494e-06, | |
| "loss": 0.0321, | |
| "step": 17060 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 3.879636287689209, | |
| "learning_rate": 9.948356807511738e-06, | |
| "loss": 0.023, | |
| "step": 17070 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 2.669905424118042, | |
| "learning_rate": 9.924882629107981e-06, | |
| "loss": 0.0337, | |
| "step": 17080 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 0.813624382019043, | |
| "learning_rate": 9.901408450704225e-06, | |
| "loss": 0.0326, | |
| "step": 17090 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 1.2216477394104004, | |
| "learning_rate": 9.87793427230047e-06, | |
| "loss": 0.0185, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 1.9044301509857178, | |
| "learning_rate": 9.854460093896713e-06, | |
| "loss": 0.0439, | |
| "step": 17110 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 1.891352653503418, | |
| "learning_rate": 9.830985915492959e-06, | |
| "loss": 0.0384, | |
| "step": 17120 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 1.359215259552002, | |
| "learning_rate": 9.807511737089201e-06, | |
| "loss": 0.0127, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 0.31286099553108215, | |
| "learning_rate": 9.784037558685447e-06, | |
| "loss": 0.0182, | |
| "step": 17140 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 1.1283832788467407, | |
| "learning_rate": 9.760563380281691e-06, | |
| "loss": 0.0183, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.06705621629953384, | |
| "learning_rate": 9.737089201877935e-06, | |
| "loss": 0.0259, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.1862085461616516, | |
| "learning_rate": 9.713615023474179e-06, | |
| "loss": 0.0183, | |
| "step": 17170 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 2.7586522102355957, | |
| "learning_rate": 9.690140845070424e-06, | |
| "loss": 0.0405, | |
| "step": 17180 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 4.007384777069092, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 0.0303, | |
| "step": 17190 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 1.2727338075637817, | |
| "learning_rate": 9.643192488262912e-06, | |
| "loss": 0.0324, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.08033546805381775, | |
| "learning_rate": 9.619718309859155e-06, | |
| "loss": 0.031, | |
| "step": 17210 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 4.440250873565674, | |
| "learning_rate": 9.596244131455399e-06, | |
| "loss": 0.04, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.09834202378988266, | |
| "learning_rate": 9.572769953051644e-06, | |
| "loss": 0.0302, | |
| "step": 17230 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 1.6602915525436401, | |
| "learning_rate": 9.549295774647887e-06, | |
| "loss": 0.0329, | |
| "step": 17240 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.055043309926986694, | |
| "learning_rate": 9.525821596244132e-06, | |
| "loss": 0.0307, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 1.081846833229065, | |
| "learning_rate": 9.502347417840376e-06, | |
| "loss": 0.0381, | |
| "step": 17260 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.8104713559150696, | |
| "learning_rate": 9.47887323943662e-06, | |
| "loss": 0.0359, | |
| "step": 17270 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 0.40220922231674194, | |
| "learning_rate": 9.455399061032864e-06, | |
| "loss": 0.0247, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 4.490058898925781, | |
| "learning_rate": 9.43192488262911e-06, | |
| "loss": 0.0616, | |
| "step": 17290 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 1.7871899604797363, | |
| "learning_rate": 9.408450704225352e-06, | |
| "loss": 0.0212, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 3.075772523880005, | |
| "learning_rate": 9.384976525821598e-06, | |
| "loss": 0.0312, | |
| "step": 17310 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 3.081249237060547, | |
| "learning_rate": 9.361502347417842e-06, | |
| "loss": 0.0294, | |
| "step": 17320 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 0.10270259529352188, | |
| "learning_rate": 9.338028169014086e-06, | |
| "loss": 0.0138, | |
| "step": 17330 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 2.901857614517212, | |
| "learning_rate": 9.31455399061033e-06, | |
| "loss": 0.046, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 1.5097789764404297, | |
| "learning_rate": 9.291079812206572e-06, | |
| "loss": 0.034, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 3.355926513671875, | |
| "learning_rate": 9.267605633802817e-06, | |
| "loss": 0.0209, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.142938494682312, | |
| "learning_rate": 9.244131455399061e-06, | |
| "loss": 0.0305, | |
| "step": 17370 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 1.179494023323059, | |
| "learning_rate": 9.220657276995305e-06, | |
| "loss": 0.0341, | |
| "step": 17380 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.5627864599227905, | |
| "learning_rate": 9.19718309859155e-06, | |
| "loss": 0.0428, | |
| "step": 17390 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 1.4507837295532227, | |
| "learning_rate": 9.173708920187795e-06, | |
| "loss": 0.0387, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 0.5361911058425903, | |
| "learning_rate": 9.150234741784037e-06, | |
| "loss": 0.0198, | |
| "step": 17410 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 0.12988106906414032, | |
| "learning_rate": 9.126760563380283e-06, | |
| "loss": 0.0191, | |
| "step": 17420 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 0.06474238634109497, | |
| "learning_rate": 9.103286384976527e-06, | |
| "loss": 0.0303, | |
| "step": 17430 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 1.0567501783370972, | |
| "learning_rate": 9.07981220657277e-06, | |
| "loss": 0.0416, | |
| "step": 17440 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.2987545132637024, | |
| "learning_rate": 9.056338028169015e-06, | |
| "loss": 0.0256, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 4.924028396606445, | |
| "learning_rate": 9.032863849765259e-06, | |
| "loss": 0.0165, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 2.6408851146698, | |
| "learning_rate": 9.009389671361503e-06, | |
| "loss": 0.0278, | |
| "step": 17470 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.41200679540634155, | |
| "learning_rate": 8.985915492957747e-06, | |
| "loss": 0.0392, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 3.0683581829071045, | |
| "learning_rate": 8.96244131455399e-06, | |
| "loss": 0.0311, | |
| "step": 17490 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 0.34089791774749756, | |
| "learning_rate": 8.938967136150235e-06, | |
| "loss": 0.0174, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "eval_loss": 0.0591842420399189, | |
| "eval_macro/f1": 0.9271865348802009, | |
| "eval_macro/precision": 0.9263216362517599, | |
| "eval_macro/recall": 0.9283185590241645, | |
| "eval_micro/f1": 0.9284102564102564, | |
| "eval_micro/precision": 0.9271423694093547, | |
| "eval_micro/recall": 0.9296816158849709, | |
| "eval_runtime": 28.0804, | |
| "eval_samples/accuracy": 0.9222184183498802, | |
| "eval_samples_per_second": 520.115, | |
| "eval_steps_per_second": 16.275, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 0.23726418614387512, | |
| "learning_rate": 8.91549295774648e-06, | |
| "loss": 0.0348, | |
| "step": 17510 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 0.15017840266227722, | |
| "learning_rate": 8.892018779342722e-06, | |
| "loss": 0.0207, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 2.7328882217407227, | |
| "learning_rate": 8.868544600938968e-06, | |
| "loss": 0.0293, | |
| "step": 17530 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.2770448923110962, | |
| "learning_rate": 8.845070422535212e-06, | |
| "loss": 0.027, | |
| "step": 17540 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 1.4732258319854736, | |
| "learning_rate": 8.821596244131456e-06, | |
| "loss": 0.0481, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 2.0700390338897705, | |
| "learning_rate": 8.7981220657277e-06, | |
| "loss": 0.0226, | |
| "step": 17560 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.1949704885482788, | |
| "learning_rate": 8.774647887323944e-06, | |
| "loss": 0.0369, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 1.3958638906478882, | |
| "learning_rate": 8.751173708920188e-06, | |
| "loss": 0.0326, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 2.3745298385620117, | |
| "learning_rate": 8.727699530516432e-06, | |
| "loss": 0.0277, | |
| "step": 17590 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 0.185311421751976, | |
| "learning_rate": 8.704225352112677e-06, | |
| "loss": 0.0203, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 2.408078670501709, | |
| "learning_rate": 8.68075117370892e-06, | |
| "loss": 0.0268, | |
| "step": 17610 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 0.8216488361358643, | |
| "learning_rate": 8.657276995305165e-06, | |
| "loss": 0.0319, | |
| "step": 17620 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 0.5919995307922363, | |
| "learning_rate": 8.633802816901408e-06, | |
| "loss": 0.0339, | |
| "step": 17630 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 2.0640909671783447, | |
| "learning_rate": 8.610328638497653e-06, | |
| "loss": 0.0238, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 0.6420736908912659, | |
| "learning_rate": 8.586854460093897e-06, | |
| "loss": 0.0241, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 2.002295732498169, | |
| "learning_rate": 8.563380281690141e-06, | |
| "loss": 0.0363, | |
| "step": 17660 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 0.12747663259506226, | |
| "learning_rate": 8.539906103286385e-06, | |
| "loss": 0.0212, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 1.799019694328308, | |
| "learning_rate": 8.51643192488263e-06, | |
| "loss": 0.0369, | |
| "step": 17680 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 0.7480834722518921, | |
| "learning_rate": 8.492957746478873e-06, | |
| "loss": 0.0383, | |
| "step": 17690 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 1.7629315853118896, | |
| "learning_rate": 8.469483568075117e-06, | |
| "loss": 0.0385, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 2.2089579105377197, | |
| "learning_rate": 8.446009389671363e-06, | |
| "loss": 0.0287, | |
| "step": 17710 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.43293681740760803, | |
| "learning_rate": 8.422535211267605e-06, | |
| "loss": 0.0293, | |
| "step": 17720 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.13332372903823853, | |
| "learning_rate": 8.39906103286385e-06, | |
| "loss": 0.0307, | |
| "step": 17730 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.055583804845809937, | |
| "learning_rate": 8.375586854460095e-06, | |
| "loss": 0.0256, | |
| "step": 17740 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 3.303671360015869, | |
| "learning_rate": 8.352112676056339e-06, | |
| "loss": 0.0207, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 3.193225383758545, | |
| "learning_rate": 8.328638497652583e-06, | |
| "loss": 0.0242, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 0.5734906196594238, | |
| "learning_rate": 8.305164319248828e-06, | |
| "loss": 0.0206, | |
| "step": 17770 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 0.11326544731855392, | |
| "learning_rate": 8.28169014084507e-06, | |
| "loss": 0.0364, | |
| "step": 17780 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.0245682280510664, | |
| "learning_rate": 8.258215962441314e-06, | |
| "loss": 0.0187, | |
| "step": 17790 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.11930684745311737, | |
| "learning_rate": 8.234741784037558e-06, | |
| "loss": 0.0085, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.308901309967041, | |
| "learning_rate": 8.211267605633802e-06, | |
| "loss": 0.0146, | |
| "step": 17810 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.14321398735046387, | |
| "learning_rate": 8.187793427230048e-06, | |
| "loss": 0.0226, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 0.437814325094223, | |
| "learning_rate": 8.16431924882629e-06, | |
| "loss": 0.0427, | |
| "step": 17830 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 4.8848876953125, | |
| "learning_rate": 8.140845070422536e-06, | |
| "loss": 0.0292, | |
| "step": 17840 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 1.6253747940063477, | |
| "learning_rate": 8.11737089201878e-06, | |
| "loss": 0.0385, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 0.15667077898979187, | |
| "learning_rate": 8.093896713615024e-06, | |
| "loss": 0.0305, | |
| "step": 17860 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 0.06896039843559265, | |
| "learning_rate": 8.070422535211268e-06, | |
| "loss": 0.0158, | |
| "step": 17870 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.7833272218704224, | |
| "learning_rate": 8.046948356807513e-06, | |
| "loss": 0.0332, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 2.794677495956421, | |
| "learning_rate": 8.023474178403756e-06, | |
| "loss": 0.0243, | |
| "step": 17890 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.9475429654121399, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0249, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 4.429962158203125, | |
| "learning_rate": 7.976525821596245e-06, | |
| "loss": 0.0311, | |
| "step": 17910 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.1972973346710205, | |
| "learning_rate": 7.95305164319249e-06, | |
| "loss": 0.0299, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.13434672355651855, | |
| "learning_rate": 7.929577464788733e-06, | |
| "loss": 0.0255, | |
| "step": 17930 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.22750389575958252, | |
| "learning_rate": 7.906103286384975e-06, | |
| "loss": 0.0242, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 1.5128264427185059, | |
| "learning_rate": 7.882629107981221e-06, | |
| "loss": 0.0158, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 0.3299107551574707, | |
| "learning_rate": 7.859154929577465e-06, | |
| "loss": 0.0355, | |
| "step": 17960 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 1.590198040008545, | |
| "learning_rate": 7.835680751173709e-06, | |
| "loss": 0.0358, | |
| "step": 17970 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 0.29909548163414, | |
| "learning_rate": 7.812206572769953e-06, | |
| "loss": 0.0238, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 0.14423009753227234, | |
| "learning_rate": 7.788732394366199e-06, | |
| "loss": 0.0218, | |
| "step": 17990 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 1.6336926221847534, | |
| "learning_rate": 7.765258215962441e-06, | |
| "loss": 0.0149, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_loss": 0.06184852123260498, | |
| "eval_macro/f1": 0.9275533285987433, | |
| "eval_macro/precision": 0.927621636500257, | |
| "eval_macro/recall": 0.9283999031694936, | |
| "eval_micro/f1": 0.9285030103995622, | |
| "eval_micro/precision": 0.9278047446503043, | |
| "eval_micro/recall": 0.9292023279698733, | |
| "eval_runtime": 29.8927, | |
| "eval_samples/accuracy": 0.9221499486477234, | |
| "eval_samples_per_second": 488.58, | |
| "eval_steps_per_second": 15.288, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 7.740451335906982, | |
| "learning_rate": 7.741784037558687e-06, | |
| "loss": 0.0434, | |
| "step": 18010 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 3.4866607189178467, | |
| "learning_rate": 7.71830985915493e-06, | |
| "loss": 0.0362, | |
| "step": 18020 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 3.1101765632629395, | |
| "learning_rate": 7.694835680751174e-06, | |
| "loss": 0.0091, | |
| "step": 18030 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 2.40651798248291, | |
| "learning_rate": 7.671361502347418e-06, | |
| "loss": 0.0197, | |
| "step": 18040 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 4.570631980895996, | |
| "learning_rate": 7.647887323943662e-06, | |
| "loss": 0.0127, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 1.8891502618789673, | |
| "learning_rate": 7.624413145539906e-06, | |
| "loss": 0.0386, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 2.7779672145843506, | |
| "learning_rate": 7.600938967136151e-06, | |
| "loss": 0.0221, | |
| "step": 18070 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 1.5719915628433228, | |
| "learning_rate": 7.577464788732394e-06, | |
| "loss": 0.024, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 2.32023286819458, | |
| "learning_rate": 7.553990610328639e-06, | |
| "loss": 0.0352, | |
| "step": 18090 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 0.3694073259830475, | |
| "learning_rate": 7.530516431924883e-06, | |
| "loss": 0.021, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 1.49236261844635, | |
| "learning_rate": 7.507042253521126e-06, | |
| "loss": 0.032, | |
| "step": 18110 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 0.02265837788581848, | |
| "learning_rate": 7.483568075117371e-06, | |
| "loss": 0.0375, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 0.5602453947067261, | |
| "learning_rate": 7.460093896713616e-06, | |
| "loss": 0.0295, | |
| "step": 18130 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 0.8204029202461243, | |
| "learning_rate": 7.436619718309859e-06, | |
| "loss": 0.0221, | |
| "step": 18140 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 0.03510039299726486, | |
| "learning_rate": 7.413145539906104e-06, | |
| "loss": 0.0392, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 0.16863106191158295, | |
| "learning_rate": 7.3896713615023485e-06, | |
| "loss": 0.0119, | |
| "step": 18160 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 0.08002981543540955, | |
| "learning_rate": 7.366197183098592e-06, | |
| "loss": 0.0129, | |
| "step": 18170 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 2.261359214782715, | |
| "learning_rate": 7.342723004694836e-06, | |
| "loss": 0.0292, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 3.8216466903686523, | |
| "learning_rate": 7.31924882629108e-06, | |
| "loss": 0.0416, | |
| "step": 18190 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 0.11059773713350296, | |
| "learning_rate": 7.295774647887324e-06, | |
| "loss": 0.0225, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 2.422400712966919, | |
| "learning_rate": 7.272300469483568e-06, | |
| "loss": 0.0331, | |
| "step": 18210 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 1.4523348808288574, | |
| "learning_rate": 7.248826291079812e-06, | |
| "loss": 0.0205, | |
| "step": 18220 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 2.5690364837646484, | |
| "learning_rate": 7.225352112676056e-06, | |
| "loss": 0.0328, | |
| "step": 18230 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 1.9039969444274902, | |
| "learning_rate": 7.201877934272301e-06, | |
| "loss": 0.0307, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 3.139665365219116, | |
| "learning_rate": 7.178403755868544e-06, | |
| "loss": 0.0299, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 0.07591520249843597, | |
| "learning_rate": 7.154929577464789e-06, | |
| "loss": 0.0228, | |
| "step": 18260 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 0.07442035526037216, | |
| "learning_rate": 7.131455399061034e-06, | |
| "loss": 0.0245, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 0.11448445916175842, | |
| "learning_rate": 7.107981220657277e-06, | |
| "loss": 0.0265, | |
| "step": 18280 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 2.6437320709228516, | |
| "learning_rate": 7.084507042253522e-06, | |
| "loss": 0.0238, | |
| "step": 18290 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 1.5147851705551147, | |
| "learning_rate": 7.0610328638497664e-06, | |
| "loss": 0.0258, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.25056353211402893, | |
| "learning_rate": 7.0375586854460096e-06, | |
| "loss": 0.0129, | |
| "step": 18310 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 2.1855599880218506, | |
| "learning_rate": 7.014084507042254e-06, | |
| "loss": 0.0317, | |
| "step": 18320 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 3.4039382934570312, | |
| "learning_rate": 6.990610328638498e-06, | |
| "loss": 0.0191, | |
| "step": 18330 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 0.2336108237504959, | |
| "learning_rate": 6.9671361502347414e-06, | |
| "loss": 0.0323, | |
| "step": 18340 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 0.5614345669746399, | |
| "learning_rate": 6.943661971830986e-06, | |
| "loss": 0.0338, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 2.2648632526397705, | |
| "learning_rate": 6.920187793427231e-06, | |
| "loss": 0.0264, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 1.8318125009536743, | |
| "learning_rate": 6.896713615023474e-06, | |
| "loss": 0.0356, | |
| "step": 18370 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 0.24477006494998932, | |
| "learning_rate": 6.873239436619719e-06, | |
| "loss": 0.011, | |
| "step": 18380 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.3729543685913086, | |
| "learning_rate": 6.849765258215962e-06, | |
| "loss": 0.04, | |
| "step": 18390 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 2.3714241981506348, | |
| "learning_rate": 6.826291079812207e-06, | |
| "loss": 0.0194, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 8.464815139770508, | |
| "learning_rate": 6.802816901408452e-06, | |
| "loss": 0.0304, | |
| "step": 18410 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.38195595145225525, | |
| "learning_rate": 6.779342723004695e-06, | |
| "loss": 0.0253, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 2.6723573207855225, | |
| "learning_rate": 6.75586854460094e-06, | |
| "loss": 0.047, | |
| "step": 18430 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 0.3452346920967102, | |
| "learning_rate": 6.7323943661971836e-06, | |
| "loss": 0.0235, | |
| "step": 18440 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 0.28022298216819763, | |
| "learning_rate": 6.7089201877934275e-06, | |
| "loss": 0.0353, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 0.5109660625457764, | |
| "learning_rate": 6.6854460093896715e-06, | |
| "loss": 0.0281, | |
| "step": 18460 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 3.8708913326263428, | |
| "learning_rate": 6.661971830985916e-06, | |
| "loss": 0.0285, | |
| "step": 18470 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 1.4034435749053955, | |
| "learning_rate": 6.638497652582159e-06, | |
| "loss": 0.0391, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 1.097798466682434, | |
| "learning_rate": 6.615023474178404e-06, | |
| "loss": 0.0405, | |
| "step": 18490 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 3.1356542110443115, | |
| "learning_rate": 6.591549295774649e-06, | |
| "loss": 0.018, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "eval_loss": 0.060135386884212494, | |
| "eval_macro/f1": 0.927526156864245, | |
| "eval_macro/precision": 0.9275914439000685, | |
| "eval_macro/recall": 0.927618083612261, | |
| "eval_micro/f1": 0.9285934238888699, | |
| "eval_micro/precision": 0.9280536178361374, | |
| "eval_micro/recall": 0.9291338582677166, | |
| "eval_runtime": 28.74, | |
| "eval_samples/accuracy": 0.922286888052037, | |
| "eval_samples_per_second": 508.177, | |
| "eval_steps_per_second": 15.901, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 2.763146162033081, | |
| "learning_rate": 6.568075117370892e-06, | |
| "loss": 0.0352, | |
| "step": 18510 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 0.015551486052572727, | |
| "learning_rate": 6.544600938967137e-06, | |
| "loss": 0.0163, | |
| "step": 18520 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 2.646799325942993, | |
| "learning_rate": 6.52112676056338e-06, | |
| "loss": 0.0225, | |
| "step": 18530 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 0.29067888855934143, | |
| "learning_rate": 6.497652582159625e-06, | |
| "loss": 0.0174, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 0.14220267534255981, | |
| "learning_rate": 6.474178403755869e-06, | |
| "loss": 0.0196, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.1823500692844391, | |
| "learning_rate": 6.450704225352113e-06, | |
| "loss": 0.0115, | |
| "step": 18560 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.6547729969024658, | |
| "learning_rate": 6.427230046948357e-06, | |
| "loss": 0.037, | |
| "step": 18570 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.19114431738853455, | |
| "learning_rate": 6.4037558685446015e-06, | |
| "loss": 0.0221, | |
| "step": 18580 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 4.823287010192871, | |
| "learning_rate": 6.380281690140845e-06, | |
| "loss": 0.0166, | |
| "step": 18590 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 0.13954699039459229, | |
| "learning_rate": 6.3568075117370894e-06, | |
| "loss": 0.0455, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 0.08931886404752731, | |
| "learning_rate": 6.333333333333334e-06, | |
| "loss": 0.0354, | |
| "step": 18610 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 3.5649523735046387, | |
| "learning_rate": 6.309859154929577e-06, | |
| "loss": 0.024, | |
| "step": 18620 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 2.356419324874878, | |
| "learning_rate": 6.286384976525822e-06, | |
| "loss": 0.0164, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 1.0465301275253296, | |
| "learning_rate": 6.262910798122067e-06, | |
| "loss": 0.0258, | |
| "step": 18640 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 0.01890731416642666, | |
| "learning_rate": 6.23943661971831e-06, | |
| "loss": 0.0142, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 2.5224521160125732, | |
| "learning_rate": 6.215962441314555e-06, | |
| "loss": 0.032, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 1.1495766639709473, | |
| "learning_rate": 6.192488262910799e-06, | |
| "loss": 0.0314, | |
| "step": 18670 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 0.7142943143844604, | |
| "learning_rate": 6.169014084507042e-06, | |
| "loss": 0.0272, | |
| "step": 18680 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.24826817214488983, | |
| "learning_rate": 6.145539906103287e-06, | |
| "loss": 0.0187, | |
| "step": 18690 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.063688263297081, | |
| "learning_rate": 6.122065727699531e-06, | |
| "loss": 0.0208, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 2.3652358055114746, | |
| "learning_rate": 6.098591549295775e-06, | |
| "loss": 0.035, | |
| "step": 18710 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.7242918014526367, | |
| "learning_rate": 6.075117370892019e-06, | |
| "loss": 0.0352, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 4.603453636169434, | |
| "learning_rate": 6.0516431924882634e-06, | |
| "loss": 0.0543, | |
| "step": 18730 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.45003360509872437, | |
| "learning_rate": 6.028169014084507e-06, | |
| "loss": 0.0233, | |
| "step": 18740 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 2.918506383895874, | |
| "learning_rate": 6.004694835680751e-06, | |
| "loss": 0.0357, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 2.2364795207977295, | |
| "learning_rate": 5.981220657276996e-06, | |
| "loss": 0.0345, | |
| "step": 18760 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 1.1748833656311035, | |
| "learning_rate": 5.95774647887324e-06, | |
| "loss": 0.031, | |
| "step": 18770 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 0.20631471276283264, | |
| "learning_rate": 5.934272300469484e-06, | |
| "loss": 0.0411, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 2.653383255004883, | |
| "learning_rate": 5.910798122065728e-06, | |
| "loss": 0.0313, | |
| "step": 18790 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 3.3780014514923096, | |
| "learning_rate": 5.887323943661972e-06, | |
| "loss": 0.0299, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 0.190080463886261, | |
| "learning_rate": 5.863849765258216e-06, | |
| "loss": 0.0246, | |
| "step": 18810 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 3.175036907196045, | |
| "learning_rate": 5.84037558685446e-06, | |
| "loss": 0.0198, | |
| "step": 18820 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 1.293860912322998, | |
| "learning_rate": 5.816901408450705e-06, | |
| "loss": 0.0413, | |
| "step": 18830 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 3.4616804122924805, | |
| "learning_rate": 5.793427230046949e-06, | |
| "loss": 0.0294, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 0.1567220538854599, | |
| "learning_rate": 5.769953051643193e-06, | |
| "loss": 0.0348, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 0.14279071986675262, | |
| "learning_rate": 5.746478873239437e-06, | |
| "loss": 0.0112, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 4.581559181213379, | |
| "learning_rate": 5.723004694835681e-06, | |
| "loss": 0.0398, | |
| "step": 18870 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 1.2094709873199463, | |
| "learning_rate": 5.699530516431925e-06, | |
| "loss": 0.0274, | |
| "step": 18880 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 1.6614265441894531, | |
| "learning_rate": 5.676056338028169e-06, | |
| "loss": 0.0513, | |
| "step": 18890 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 1.3133680820465088, | |
| "learning_rate": 5.652582159624414e-06, | |
| "loss": 0.0221, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 1.085091233253479, | |
| "learning_rate": 5.629107981220657e-06, | |
| "loss": 0.0297, | |
| "step": 18910 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.05348065122961998, | |
| "learning_rate": 5.605633802816901e-06, | |
| "loss": 0.0119, | |
| "step": 18920 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.11949329078197479, | |
| "learning_rate": 5.582159624413145e-06, | |
| "loss": 0.023, | |
| "step": 18930 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 0.890052080154419, | |
| "learning_rate": 5.55868544600939e-06, | |
| "loss": 0.0343, | |
| "step": 18940 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 2.084806442260742, | |
| "learning_rate": 5.535211267605634e-06, | |
| "loss": 0.0165, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 0.42421096563339233, | |
| "learning_rate": 5.511737089201878e-06, | |
| "loss": 0.0273, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 4.482558250427246, | |
| "learning_rate": 5.488262910798123e-06, | |
| "loss": 0.0347, | |
| "step": 18970 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 1.6675491333007812, | |
| "learning_rate": 5.464788732394367e-06, | |
| "loss": 0.0365, | |
| "step": 18980 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 0.973112940788269, | |
| "learning_rate": 5.441314553990611e-06, | |
| "loss": 0.0287, | |
| "step": 18990 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 1.0578975677490234, | |
| "learning_rate": 5.4178403755868546e-06, | |
| "loss": 0.0237, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "eval_loss": 0.05804529786109924, | |
| "eval_macro/f1": 0.9300467382231257, | |
| "eval_macro/precision": 0.9304321443218644, | |
| "eval_macro/recall": 0.9299328529924998, | |
| "eval_micro/f1": 0.9309872655073258, | |
| "eval_micro/precision": 0.9309235298144725, | |
| "eval_micro/recall": 0.9310510099281069, | |
| "eval_runtime": 29.2424, | |
| "eval_samples/accuracy": 0.9250256761383088, | |
| "eval_samples_per_second": 499.446, | |
| "eval_steps_per_second": 15.628, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 0.13687680661678314, | |
| "learning_rate": 5.394366197183099e-06, | |
| "loss": 0.0209, | |
| "step": 19010 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 0.7590184807777405, | |
| "learning_rate": 5.370892018779343e-06, | |
| "loss": 0.0328, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 2.366809606552124, | |
| "learning_rate": 5.347417840375587e-06, | |
| "loss": 0.0288, | |
| "step": 19030 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 0.280494749546051, | |
| "learning_rate": 5.323943661971831e-06, | |
| "loss": 0.0253, | |
| "step": 19040 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 2.3025805950164795, | |
| "learning_rate": 5.300469483568075e-06, | |
| "loss": 0.014, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 1.3467135429382324, | |
| "learning_rate": 5.276995305164319e-06, | |
| "loss": 0.0291, | |
| "step": 19060 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.5404326915740967, | |
| "learning_rate": 5.253521126760563e-06, | |
| "loss": 0.0359, | |
| "step": 19070 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 1.894284725189209, | |
| "learning_rate": 5.230046948356808e-06, | |
| "loss": 0.0294, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.44013938307762146, | |
| "learning_rate": 5.206572769953052e-06, | |
| "loss": 0.0294, | |
| "step": 19090 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.9481649398803711, | |
| "learning_rate": 5.183098591549296e-06, | |
| "loss": 0.016, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 2.767650604248047, | |
| "learning_rate": 5.159624413145541e-06, | |
| "loss": 0.0107, | |
| "step": 19110 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 0.19418075680732727, | |
| "learning_rate": 5.136150234741785e-06, | |
| "loss": 0.0288, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 3.5764660835266113, | |
| "learning_rate": 5.1126760563380286e-06, | |
| "loss": 0.0385, | |
| "step": 19130 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 0.20306912064552307, | |
| "learning_rate": 5.0892018779342725e-06, | |
| "loss": 0.0276, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.028659334406256676, | |
| "learning_rate": 5.0657276995305165e-06, | |
| "loss": 0.021, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 3.5171916484832764, | |
| "learning_rate": 5.0422535211267604e-06, | |
| "loss": 0.0216, | |
| "step": 19160 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.7346154451370239, | |
| "learning_rate": 5.018779342723004e-06, | |
| "loss": 0.0361, | |
| "step": 19170 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.5785157680511475, | |
| "learning_rate": 4.995305164319249e-06, | |
| "loss": 0.0248, | |
| "step": 19180 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 1.10182785987854, | |
| "learning_rate": 4.971830985915493e-06, | |
| "loss": 0.0284, | |
| "step": 19190 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 0.02137897163629532, | |
| "learning_rate": 4.948356807511737e-06, | |
| "loss": 0.0266, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 0.47869399189949036, | |
| "learning_rate": 4.924882629107982e-06, | |
| "loss": 0.0344, | |
| "step": 19210 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 3.633575916290283, | |
| "learning_rate": 4.901408450704226e-06, | |
| "loss": 0.0205, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 3.7721774578094482, | |
| "learning_rate": 4.87793427230047e-06, | |
| "loss": 0.038, | |
| "step": 19230 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 2.5708956718444824, | |
| "learning_rate": 4.854460093896714e-06, | |
| "loss": 0.0364, | |
| "step": 19240 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.7661235332489014, | |
| "learning_rate": 4.830985915492959e-06, | |
| "loss": 0.0297, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.4676876366138458, | |
| "learning_rate": 4.807511737089202e-06, | |
| "loss": 0.026, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 1.8789290189743042, | |
| "learning_rate": 4.784037558685446e-06, | |
| "loss": 0.0343, | |
| "step": 19270 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 4.177642822265625, | |
| "learning_rate": 4.7605633802816905e-06, | |
| "loss": 0.0264, | |
| "step": 19280 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 1.572719931602478, | |
| "learning_rate": 4.7370892018779344e-06, | |
| "loss": 0.0475, | |
| "step": 19290 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 2.2648651599884033, | |
| "learning_rate": 4.713615023474178e-06, | |
| "loss": 0.0262, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 0.05855144187808037, | |
| "learning_rate": 4.690140845070422e-06, | |
| "loss": 0.0078, | |
| "step": 19310 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 0.0676891878247261, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.0285, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 2.633129596710205, | |
| "learning_rate": 4.643192488262911e-06, | |
| "loss": 0.0277, | |
| "step": 19330 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 4.038257598876953, | |
| "learning_rate": 4.619718309859155e-06, | |
| "loss": 0.026, | |
| "step": 19340 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 1.5584205389022827, | |
| "learning_rate": 4.5962441314554e-06, | |
| "loss": 0.0154, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 2.5847558975219727, | |
| "learning_rate": 4.572769953051644e-06, | |
| "loss": 0.0115, | |
| "step": 19360 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 0.19932971894741058, | |
| "learning_rate": 4.549295774647888e-06, | |
| "loss": 0.03, | |
| "step": 19370 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 2.389786720275879, | |
| "learning_rate": 4.525821596244132e-06, | |
| "loss": 0.0281, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 5.445662975311279, | |
| "learning_rate": 4.502347417840376e-06, | |
| "loss": 0.0157, | |
| "step": 19390 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 0.8284715414047241, | |
| "learning_rate": 4.47887323943662e-06, | |
| "loss": 0.0417, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": 0.39633145928382874, | |
| "learning_rate": 4.455399061032864e-06, | |
| "loss": 0.0254, | |
| "step": 19410 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": 3.238429069519043, | |
| "learning_rate": 4.4319248826291084e-06, | |
| "loss": 0.0439, | |
| "step": 19420 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": 0.1122848391532898, | |
| "learning_rate": 4.408450704225352e-06, | |
| "loss": 0.0215, | |
| "step": 19430 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": 2.4247405529022217, | |
| "learning_rate": 4.384976525821596e-06, | |
| "loss": 0.0115, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 2.5062484741210938, | |
| "learning_rate": 4.36150234741784e-06, | |
| "loss": 0.037, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 2.332712411880493, | |
| "learning_rate": 4.338028169014085e-06, | |
| "loss": 0.0327, | |
| "step": 19460 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 2.1124820709228516, | |
| "learning_rate": 4.314553990610329e-06, | |
| "loss": 0.0244, | |
| "step": 19470 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 0.824579656124115, | |
| "learning_rate": 4.291079812206573e-06, | |
| "loss": 0.024, | |
| "step": 19480 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 1.3833364248275757, | |
| "learning_rate": 4.267605633802817e-06, | |
| "loss": 0.0387, | |
| "step": 19490 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 0.021480737254023552, | |
| "learning_rate": 4.244131455399061e-06, | |
| "loss": 0.0085, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_loss": 0.056427136063575745, | |
| "eval_macro/f1": 0.9295635716052294, | |
| "eval_macro/precision": 0.9300451915383702, | |
| "eval_macro/recall": 0.9294580707864561, | |
| "eval_micro/f1": 0.9307310392056155, | |
| "eval_micro/precision": 0.9308904109589041, | |
| "eval_micro/recall": 0.9305717220130092, | |
| "eval_runtime": 29.8549, | |
| "eval_samples/accuracy": 0.923313933584389, | |
| "eval_samples_per_second": 489.2, | |
| "eval_steps_per_second": 15.307, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 0.8990393280982971, | |
| "learning_rate": 4.220657276995305e-06, | |
| "loss": 0.0254, | |
| "step": 19510 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 3.251713514328003, | |
| "learning_rate": 4.197183098591549e-06, | |
| "loss": 0.0367, | |
| "step": 19520 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 1.872117280960083, | |
| "learning_rate": 4.173708920187794e-06, | |
| "loss": 0.0244, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 0.2993544340133667, | |
| "learning_rate": 4.150234741784038e-06, | |
| "loss": 0.0209, | |
| "step": 19540 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 2.650440216064453, | |
| "learning_rate": 4.126760563380282e-06, | |
| "loss": 0.019, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 3.6848528385162354, | |
| "learning_rate": 4.103286384976526e-06, | |
| "loss": 0.0194, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 2.082089424133301, | |
| "learning_rate": 4.07981220657277e-06, | |
| "loss": 0.0293, | |
| "step": 19570 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 2.0846469402313232, | |
| "learning_rate": 4.056338028169014e-06, | |
| "loss": 0.0336, | |
| "step": 19580 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 1.2705551385879517, | |
| "learning_rate": 4.032863849765258e-06, | |
| "loss": 0.0237, | |
| "step": 19590 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 1.0715053081512451, | |
| "learning_rate": 4.009389671361503e-06, | |
| "loss": 0.0205, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 2.3154516220092773, | |
| "learning_rate": 3.985915492957747e-06, | |
| "loss": 0.0349, | |
| "step": 19610 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 2.9410946369171143, | |
| "learning_rate": 3.96244131455399e-06, | |
| "loss": 0.0232, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 0.5876528024673462, | |
| "learning_rate": 3.938967136150235e-06, | |
| "loss": 0.0327, | |
| "step": 19630 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 0.43626344203948975, | |
| "learning_rate": 3.915492957746479e-06, | |
| "loss": 0.0124, | |
| "step": 19640 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 2.820923328399658, | |
| "learning_rate": 3.892018779342723e-06, | |
| "loss": 0.0136, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 3.3606836795806885, | |
| "learning_rate": 3.868544600938967e-06, | |
| "loss": 0.0295, | |
| "step": 19660 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 3.3373594284057617, | |
| "learning_rate": 3.845070422535212e-06, | |
| "loss": 0.0289, | |
| "step": 19670 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 0.04505685716867447, | |
| "learning_rate": 3.821596244131456e-06, | |
| "loss": 0.0103, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 0.7895365357398987, | |
| "learning_rate": 3.7981220657276996e-06, | |
| "loss": 0.0145, | |
| "step": 19690 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 0.8932519555091858, | |
| "learning_rate": 3.774647887323944e-06, | |
| "loss": 0.0316, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 0.3159659504890442, | |
| "learning_rate": 3.751173708920188e-06, | |
| "loss": 0.0216, | |
| "step": 19710 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 2.177913188934326, | |
| "learning_rate": 3.727699530516432e-06, | |
| "loss": 0.0212, | |
| "step": 19720 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 1.621932864189148, | |
| "learning_rate": 3.7042253521126767e-06, | |
| "loss": 0.0316, | |
| "step": 19730 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 0.0901319831609726, | |
| "learning_rate": 3.6807511737089206e-06, | |
| "loss": 0.0254, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 2.3561832904815674, | |
| "learning_rate": 3.6572769953051646e-06, | |
| "loss": 0.043, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 1.95290207862854, | |
| "learning_rate": 3.6338028169014085e-06, | |
| "loss": 0.0276, | |
| "step": 19760 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 2.1155362129211426, | |
| "learning_rate": 3.610328638497653e-06, | |
| "loss": 0.0475, | |
| "step": 19770 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.561687707901001, | |
| "learning_rate": 3.586854460093897e-06, | |
| "loss": 0.0227, | |
| "step": 19780 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 1.620744228363037, | |
| "learning_rate": 3.563380281690141e-06, | |
| "loss": 0.042, | |
| "step": 19790 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 3.074017286300659, | |
| "learning_rate": 3.5399061032863856e-06, | |
| "loss": 0.0436, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 0.6212040781974792, | |
| "learning_rate": 3.5164319248826296e-06, | |
| "loss": 0.0421, | |
| "step": 19810 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 5.314796447753906, | |
| "learning_rate": 3.492957746478873e-06, | |
| "loss": 0.0372, | |
| "step": 19820 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 0.4144018292427063, | |
| "learning_rate": 3.469483568075117e-06, | |
| "loss": 0.025, | |
| "step": 19830 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 3.012010097503662, | |
| "learning_rate": 3.446009389671362e-06, | |
| "loss": 0.0127, | |
| "step": 19840 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 1.6481027603149414, | |
| "learning_rate": 3.422535211267606e-06, | |
| "loss": 0.0379, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 1.0246399641036987, | |
| "learning_rate": 3.39906103286385e-06, | |
| "loss": 0.0243, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 2.079977512359619, | |
| "learning_rate": 3.375586854460094e-06, | |
| "loss": 0.0382, | |
| "step": 19870 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 2.253732204437256, | |
| "learning_rate": 3.352112676056338e-06, | |
| "loss": 0.0308, | |
| "step": 19880 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 2.0721428394317627, | |
| "learning_rate": 3.328638497652582e-06, | |
| "loss": 0.018, | |
| "step": 19890 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 1.23079252243042, | |
| "learning_rate": 3.305164319248826e-06, | |
| "loss": 0.0317, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 1.093954086303711, | |
| "learning_rate": 3.281690140845071e-06, | |
| "loss": 0.0226, | |
| "step": 19910 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 1.5708380937576294, | |
| "learning_rate": 3.258215962441315e-06, | |
| "loss": 0.03, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.7118130922317505, | |
| "learning_rate": 3.234741784037559e-06, | |
| "loss": 0.02, | |
| "step": 19930 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 3.4391887187957764, | |
| "learning_rate": 3.211267605633803e-06, | |
| "loss": 0.0383, | |
| "step": 19940 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.6392399072647095, | |
| "learning_rate": 3.187793427230047e-06, | |
| "loss": 0.0525, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 2.6325254440307617, | |
| "learning_rate": 3.164319248826291e-06, | |
| "loss": 0.0307, | |
| "step": 19960 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 1.9415335655212402, | |
| "learning_rate": 3.140845070422535e-06, | |
| "loss": 0.0145, | |
| "step": 19970 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 0.5345707535743713, | |
| "learning_rate": 3.1173708920187794e-06, | |
| "loss": 0.0371, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 0.3850795030593872, | |
| "learning_rate": 3.093896713615024e-06, | |
| "loss": 0.011, | |
| "step": 19990 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 3.576430082321167, | |
| "learning_rate": 3.0704225352112678e-06, | |
| "loss": 0.032, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "eval_loss": 0.0599445179104805, | |
| "eval_macro/f1": 0.9280226732438912, | |
| "eval_macro/precision": 0.929679536317452, | |
| "eval_macro/recall": 0.9267250235346973, | |
| "eval_micro/f1": 0.9290442059055524, | |
| "eval_micro/precision": 0.9306719802116257, | |
| "eval_micro/recall": 0.9274221157137966, | |
| "eval_runtime": 29.022, | |
| "eval_samples/accuracy": 0.9212598425196851, | |
| "eval_samples_per_second": 503.24, | |
| "eval_steps_per_second": 15.747, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.2423238307237625, | |
| "learning_rate": 3.0469483568075117e-06, | |
| "loss": 0.0142, | |
| "step": 20010 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 1.0201228857040405, | |
| "learning_rate": 3.023474178403756e-06, | |
| "loss": 0.0293, | |
| "step": 20020 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.4529697895050049, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0359, | |
| "step": 20030 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.8453211784362793, | |
| "learning_rate": 2.9765258215962445e-06, | |
| "loss": 0.032, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 0.892306923866272, | |
| "learning_rate": 2.9530516431924884e-06, | |
| "loss": 0.0281, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 1.364424467086792, | |
| "learning_rate": 2.9295774647887324e-06, | |
| "loss": 0.0364, | |
| "step": 20060 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 0.5051470398902893, | |
| "learning_rate": 2.9061032863849768e-06, | |
| "loss": 0.0311, | |
| "step": 20070 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 2.131049156188965, | |
| "learning_rate": 2.8826291079812207e-06, | |
| "loss": 0.0251, | |
| "step": 20080 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.45606279373168945, | |
| "learning_rate": 2.859154929577465e-06, | |
| "loss": 0.0228, | |
| "step": 20090 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 3.494180202484131, | |
| "learning_rate": 2.835680751173709e-06, | |
| "loss": 0.0205, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.07420266419649124, | |
| "learning_rate": 2.812206572769953e-06, | |
| "loss": 0.0189, | |
| "step": 20110 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 1.7509255409240723, | |
| "learning_rate": 2.7887323943661974e-06, | |
| "loss": 0.0237, | |
| "step": 20120 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 3.2737362384796143, | |
| "learning_rate": 2.7652582159624414e-06, | |
| "loss": 0.0323, | |
| "step": 20130 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 1.4535030126571655, | |
| "learning_rate": 2.7417840375586857e-06, | |
| "loss": 0.025, | |
| "step": 20140 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 0.9434374570846558, | |
| "learning_rate": 2.7183098591549297e-06, | |
| "loss": 0.0309, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 0.07359094172716141, | |
| "learning_rate": 2.694835680751174e-06, | |
| "loss": 0.0291, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 0.20729191601276398, | |
| "learning_rate": 2.671361502347418e-06, | |
| "loss": 0.012, | |
| "step": 20170 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 0.7258203625679016, | |
| "learning_rate": 2.647887323943662e-06, | |
| "loss": 0.0233, | |
| "step": 20180 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 0.2592746913433075, | |
| "learning_rate": 2.6244131455399064e-06, | |
| "loss": 0.032, | |
| "step": 20190 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 1.7571505308151245, | |
| "learning_rate": 2.6009389671361503e-06, | |
| "loss": 0.0433, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 0.1218971237540245, | |
| "learning_rate": 2.5774647887323947e-06, | |
| "loss": 0.0263, | |
| "step": 20210 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 1.3359134197235107, | |
| "learning_rate": 2.5539906103286387e-06, | |
| "loss": 0.0245, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 0.33871036767959595, | |
| "learning_rate": 2.5305164319248826e-06, | |
| "loss": 0.0397, | |
| "step": 20230 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 2.2899630069732666, | |
| "learning_rate": 2.5070422535211266e-06, | |
| "loss": 0.0327, | |
| "step": 20240 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 0.3078570067882538, | |
| "learning_rate": 2.483568075117371e-06, | |
| "loss": 0.0254, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.25189924240112305, | |
| "learning_rate": 2.4600938967136154e-06, | |
| "loss": 0.0242, | |
| "step": 20260 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.4151531755924225, | |
| "learning_rate": 2.4366197183098593e-06, | |
| "loss": 0.0303, | |
| "step": 20270 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.4815005958080292, | |
| "learning_rate": 2.4131455399061037e-06, | |
| "loss": 0.0203, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 3.175389289855957, | |
| "learning_rate": 2.3896713615023472e-06, | |
| "loss": 0.0359, | |
| "step": 20290 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.38591518998146057, | |
| "learning_rate": 2.3661971830985916e-06, | |
| "loss": 0.027, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 3.8292973041534424, | |
| "learning_rate": 2.3427230046948356e-06, | |
| "loss": 0.0432, | |
| "step": 20310 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.09365264326334, | |
| "learning_rate": 2.31924882629108e-06, | |
| "loss": 0.0236, | |
| "step": 20320 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.9030979871749878, | |
| "learning_rate": 2.2957746478873243e-06, | |
| "loss": 0.0224, | |
| "step": 20330 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 5.00977087020874, | |
| "learning_rate": 2.2723004694835683e-06, | |
| "loss": 0.0375, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 0.6654548048973083, | |
| "learning_rate": 2.2488262910798123e-06, | |
| "loss": 0.0454, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 0.40298229455947876, | |
| "learning_rate": 2.2253521126760562e-06, | |
| "loss": 0.0323, | |
| "step": 20360 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 0.07516142725944519, | |
| "learning_rate": 2.2018779342723006e-06, | |
| "loss": 0.0265, | |
| "step": 20370 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 7.442012786865234, | |
| "learning_rate": 2.1784037558685446e-06, | |
| "loss": 0.0236, | |
| "step": 20380 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 0.4781692624092102, | |
| "learning_rate": 2.154929577464789e-06, | |
| "loss": 0.0392, | |
| "step": 20390 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 0.02968725562095642, | |
| "learning_rate": 2.131455399061033e-06, | |
| "loss": 0.0345, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 2.6985020637512207, | |
| "learning_rate": 2.107981220657277e-06, | |
| "loss": 0.0171, | |
| "step": 20410 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 0.02516743168234825, | |
| "learning_rate": 2.0845070422535212e-06, | |
| "loss": 0.0232, | |
| "step": 20420 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 2.1955928802490234, | |
| "learning_rate": 2.061032863849765e-06, | |
| "loss": 0.0251, | |
| "step": 20430 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 2.6991193294525146, | |
| "learning_rate": 2.0375586854460096e-06, | |
| "loss": 0.024, | |
| "step": 20440 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 1.7629855871200562, | |
| "learning_rate": 2.014084507042254e-06, | |
| "loss": 0.0395, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.22058716416358948, | |
| "learning_rate": 1.990610328638498e-06, | |
| "loss": 0.0065, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 1.8587628602981567, | |
| "learning_rate": 1.967136150234742e-06, | |
| "loss": 0.0259, | |
| "step": 20470 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 0.107975073158741, | |
| "learning_rate": 1.943661971830986e-06, | |
| "loss": 0.0279, | |
| "step": 20480 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 0.9777230620384216, | |
| "learning_rate": 1.9201877934272302e-06, | |
| "loss": 0.0225, | |
| "step": 20490 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 0.5380791425704956, | |
| "learning_rate": 1.8967136150234742e-06, | |
| "loss": 0.0399, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_loss": 0.05661395192146301, | |
| "eval_macro/f1": 0.9310132935790502, | |
| "eval_macro/precision": 0.931598994557163, | |
| "eval_macro/recall": 0.9305680939976558, | |
| "eval_micro/f1": 0.932027531418005, | |
| "eval_micro/precision": 0.9322509932867517, | |
| "eval_micro/recall": 0.9318041766518316, | |
| "eval_runtime": 29.363, | |
| "eval_samples/accuracy": 0.9252995549469359, | |
| "eval_samples_per_second": 497.394, | |
| "eval_steps_per_second": 15.564, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 2.409182071685791, | |
| "learning_rate": 1.8732394366197183e-06, | |
| "loss": 0.0139, | |
| "step": 20510 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 0.6202594041824341, | |
| "learning_rate": 1.8497652582159627e-06, | |
| "loss": 0.0259, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 1.8184819221496582, | |
| "learning_rate": 1.8262910798122067e-06, | |
| "loss": 0.0322, | |
| "step": 20530 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 0.19951772689819336, | |
| "learning_rate": 1.8028169014084509e-06, | |
| "loss": 0.0269, | |
| "step": 20540 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 1.1214886903762817, | |
| "learning_rate": 1.7793427230046948e-06, | |
| "loss": 0.0308, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 1.819677472114563, | |
| "learning_rate": 1.7558685446009392e-06, | |
| "loss": 0.033, | |
| "step": 20560 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 0.6370648145675659, | |
| "learning_rate": 1.732394366197183e-06, | |
| "loss": 0.0211, | |
| "step": 20570 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 0.18035587668418884, | |
| "learning_rate": 1.7089201877934273e-06, | |
| "loss": 0.0328, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 1.0719821453094482, | |
| "learning_rate": 1.6854460093896715e-06, | |
| "loss": 0.0298, | |
| "step": 20590 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.222098246216774, | |
| "learning_rate": 1.6619718309859155e-06, | |
| "loss": 0.0465, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 4.420154094696045, | |
| "learning_rate": 1.6384976525821598e-06, | |
| "loss": 0.0252, | |
| "step": 20610 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 1.6321355104446411, | |
| "learning_rate": 1.6150234741784038e-06, | |
| "loss": 0.0306, | |
| "step": 20620 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.06074954941868782, | |
| "learning_rate": 1.591549295774648e-06, | |
| "loss": 0.0375, | |
| "step": 20630 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 4.066643238067627, | |
| "learning_rate": 1.568075117370892e-06, | |
| "loss": 0.0352, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 0.1419898420572281, | |
| "learning_rate": 1.5446009389671363e-06, | |
| "loss": 0.0258, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 0.1670309603214264, | |
| "learning_rate": 1.5211267605633803e-06, | |
| "loss": 0.0137, | |
| "step": 20660 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 0.8675075173377991, | |
| "learning_rate": 1.4976525821596244e-06, | |
| "loss": 0.022, | |
| "step": 20670 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 0.5059126615524292, | |
| "learning_rate": 1.4741784037558686e-06, | |
| "loss": 0.045, | |
| "step": 20680 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 1.8234846591949463, | |
| "learning_rate": 1.4507042253521128e-06, | |
| "loss": 0.0285, | |
| "step": 20690 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 0.09939184039831161, | |
| "learning_rate": 1.427230046948357e-06, | |
| "loss": 0.0313, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 1.7367419004440308, | |
| "learning_rate": 1.4061032863849765e-06, | |
| "loss": 0.0307, | |
| "step": 20710 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 0.7291856408119202, | |
| "learning_rate": 1.3826291079812207e-06, | |
| "loss": 0.025, | |
| "step": 20720 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 0.6269216537475586, | |
| "learning_rate": 1.3591549295774648e-06, | |
| "loss": 0.0377, | |
| "step": 20730 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 0.03957618027925491, | |
| "learning_rate": 1.335680751173709e-06, | |
| "loss": 0.0235, | |
| "step": 20740 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 0.7715898752212524, | |
| "learning_rate": 1.3122065727699532e-06, | |
| "loss": 0.0339, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 0.8872296810150146, | |
| "learning_rate": 1.2887323943661974e-06, | |
| "loss": 0.0167, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.6660736799240112, | |
| "learning_rate": 1.2652582159624413e-06, | |
| "loss": 0.0384, | |
| "step": 20770 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 1.6280009746551514, | |
| "learning_rate": 1.2417840375586855e-06, | |
| "loss": 0.0247, | |
| "step": 20780 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.618594229221344, | |
| "learning_rate": 1.2183098591549297e-06, | |
| "loss": 0.0327, | |
| "step": 20790 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 1.0769779682159424, | |
| "learning_rate": 1.1948356807511736e-06, | |
| "loss": 0.024, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 1.8020055294036865, | |
| "learning_rate": 1.1713615023474178e-06, | |
| "loss": 0.0316, | |
| "step": 20810 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 0.07099230587482452, | |
| "learning_rate": 1.1478873239436622e-06, | |
| "loss": 0.0189, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 0.5301911234855652, | |
| "learning_rate": 1.1244131455399061e-06, | |
| "loss": 0.0248, | |
| "step": 20830 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 2.444852113723755, | |
| "learning_rate": 1.1009389671361503e-06, | |
| "loss": 0.0317, | |
| "step": 20840 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 1.6938620805740356, | |
| "learning_rate": 1.0774647887323945e-06, | |
| "loss": 0.0216, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 1.9859988689422607, | |
| "learning_rate": 1.0539906103286384e-06, | |
| "loss": 0.0323, | |
| "step": 20860 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.8528733849525452, | |
| "learning_rate": 1.0305164319248826e-06, | |
| "loss": 0.0214, | |
| "step": 20870 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 8.605490684509277, | |
| "learning_rate": 1.007042253521127e-06, | |
| "loss": 0.017, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.17768090963363647, | |
| "learning_rate": 9.83568075117371e-07, | |
| "loss": 0.0289, | |
| "step": 20890 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 0.3029472529888153, | |
| "learning_rate": 9.600938967136151e-07, | |
| "loss": 0.0165, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 4.979481220245361, | |
| "learning_rate": 9.366197183098592e-07, | |
| "loss": 0.0133, | |
| "step": 20910 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 0.1385805457830429, | |
| "learning_rate": 9.131455399061033e-07, | |
| "loss": 0.0207, | |
| "step": 20920 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 2.0309174060821533, | |
| "learning_rate": 8.896713615023474e-07, | |
| "loss": 0.0274, | |
| "step": 20930 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.3579356372356415, | |
| "learning_rate": 8.661971830985915e-07, | |
| "loss": 0.03, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 1.9798082113265991, | |
| "learning_rate": 8.427230046948357e-07, | |
| "loss": 0.0183, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 3.616589069366455, | |
| "learning_rate": 8.192488262910799e-07, | |
| "loss": 0.029, | |
| "step": 20960 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.07770455628633499, | |
| "learning_rate": 7.95774647887324e-07, | |
| "loss": 0.016, | |
| "step": 20970 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.17096202075481415, | |
| "learning_rate": 7.723004694835682e-07, | |
| "loss": 0.0204, | |
| "step": 20980 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 0.9039891362190247, | |
| "learning_rate": 7.488262910798122e-07, | |
| "loss": 0.0286, | |
| "step": 20990 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 0.06406420469284058, | |
| "learning_rate": 7.253521126760564e-07, | |
| "loss": 0.0261, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "eval_loss": 0.057017967104911804, | |
| "eval_macro/f1": 0.9299257465348796, | |
| "eval_macro/precision": 0.9302157930278783, | |
| "eval_macro/recall": 0.9298030271698045, | |
| "eval_micro/f1": 0.9309872655073258, | |
| "eval_micro/precision": 0.9309235298144725, | |
| "eval_micro/recall": 0.9310510099281069, | |
| "eval_runtime": 28.8502, | |
| "eval_samples/accuracy": 0.9243409791167408, | |
| "eval_samples_per_second": 506.236, | |
| "eval_steps_per_second": 15.84, | |
| "step": 21000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 21300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 2.2102454595032064e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |